diff options
Diffstat (limited to 'thirdparty/BLAKE3/c/blake3_dispatch.c')
| -rw-r--r-- | thirdparty/BLAKE3/c/blake3_dispatch.c | 276 |
1 files changed, 0 insertions, 276 deletions
diff --git a/thirdparty/BLAKE3/c/blake3_dispatch.c b/thirdparty/BLAKE3/c/blake3_dispatch.c deleted file mode 100644 index 6518478e5..000000000 --- a/thirdparty/BLAKE3/c/blake3_dispatch.c +++ /dev/null @@ -1,276 +0,0 @@ -#include <stdbool.h> -#include <stddef.h> -#include <stdint.h> - -#include "blake3_impl.h" - -#if defined(IS_X86) -#if defined(_MSC_VER) -#include <intrin.h> -#elif defined(__GNUC__) -#include <immintrin.h> -#else -#error "Unimplemented!" -#endif -#endif - -#define MAYBE_UNUSED(x) (void)((x)) - -#if defined(IS_X86) -static uint64_t xgetbv() { -#if defined(_MSC_VER) - return _xgetbv(0); -#else - uint32_t eax = 0, edx = 0; - __asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0)); - return ((uint64_t)edx << 32) | eax; -#endif -} - -static void cpuid(uint32_t out[4], uint32_t id) { -#if defined(_MSC_VER) - __cpuid((int *)out, id); -#elif defined(__i386__) || defined(_M_IX86) - __asm__ __volatile__("movl %%ebx, %1\n" - "cpuid\n" - "xchgl %1, %%ebx\n" - : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]) - : "a"(id)); -#else - __asm__ __volatile__("cpuid\n" - : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) - : "a"(id)); -#endif -} - -static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) { -#if defined(_MSC_VER) - __cpuidex((int *)out, id, sid); -#elif defined(__i386__) || defined(_M_IX86) - __asm__ __volatile__("movl %%ebx, %1\n" - "cpuid\n" - "xchgl %1, %%ebx\n" - : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]) - : "a"(id), "c"(sid)); -#else - __asm__ __volatile__("cpuid\n" - : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) - : "a"(id), "c"(sid)); -#endif -} - -#endif - -enum cpu_feature { - SSE2 = 1 << 0, - SSSE3 = 1 << 1, - SSE41 = 1 << 2, - AVX = 1 << 3, - AVX2 = 1 << 4, - AVX512F = 1 << 5, - AVX512VL = 1 << 6, - /* ... */ - UNDEFINED = 1 << 30 -}; - -#if !defined(BLAKE3_TESTING) -static /* Allow the variable to be controlled manually for testing */ -#endif - enum cpu_feature g_cpu_features = UNDEFINED; - -#if !defined(BLAKE3_TESTING) -static -#endif - enum cpu_feature - get_cpu_features() { - - if (g_cpu_features != UNDEFINED) { - return g_cpu_features; - } else { -#if defined(IS_X86) - uint32_t regs[4] = {0}; - uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3]; - (void)edx; - enum cpu_feature features = 0; - cpuid(regs, 0); - const int max_id = *eax; - cpuid(regs, 1); -#if defined(__amd64__) || defined(_M_X64) - features |= SSE2; -#else - if (*edx & (1UL << 26)) - features |= SSE2; -#endif - if (*ecx & (1UL << 0)) - features |= SSSE3; - if (*ecx & (1UL << 19)) - features |= SSE41; - - if (*ecx & (1UL << 27)) { // OSXSAVE - const uint64_t mask = xgetbv(); - if ((mask & 6) == 6) { // SSE and AVX states - if (*ecx & (1UL << 28)) - features |= AVX; - if (max_id >= 7) { - cpuidex(regs, 7, 0); - if (*ebx & (1UL << 5)) - features |= AVX2; - if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm - if (*ebx & (1UL << 31)) - features |= AVX512VL; - if (*ebx & (1UL << 16)) - features |= AVX512F; - } - } - } - } - g_cpu_features = features; - return features; -#else - /* How to detect NEON? */ - return 0; -#endif - } -} - -void blake3_compress_in_place(uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], - uint8_t block_len, uint64_t counter, - uint8_t flags) { -#if defined(IS_X86) - const enum cpu_feature features = get_cpu_features(); - MAYBE_UNUSED(features); -#if !defined(BLAKE3_NO_AVX512) - if (features & AVX512VL) { - blake3_compress_in_place_avx512(cv, block, block_len, counter, flags); - return; - } -#endif -#if !defined(BLAKE3_NO_SSE41) - if (features & SSE41) { - blake3_compress_in_place_sse41(cv, block, block_len, counter, flags); - return; - } -#endif -#if !defined(BLAKE3_NO_SSE2) - if (features & SSE2) { - blake3_compress_in_place_sse2(cv, block, block_len, counter, flags); - return; - } -#endif -#endif - blake3_compress_in_place_portable(cv, block, block_len, counter, flags); -} - -void blake3_compress_xof(const uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], - uint8_t block_len, uint64_t counter, uint8_t flags, - uint8_t out[64]) { -#if defined(IS_X86) - const enum cpu_feature features = get_cpu_features(); - MAYBE_UNUSED(features); -#if !defined(BLAKE3_NO_AVX512) - if (features & AVX512VL) { - blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out); - return; - } -#endif -#if !defined(BLAKE3_NO_SSE41) - if (features & SSE41) { - blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out); - return; - } -#endif -#if !defined(BLAKE3_NO_SSE2) - if (features & SSE2) { - blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out); - return; - } -#endif -#endif - blake3_compress_xof_portable(cv, block, block_len, counter, flags, out); -} - -void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, - size_t blocks, const uint32_t key[8], uint64_t counter, - bool increment_counter, uint8_t flags, - uint8_t flags_start, uint8_t flags_end, uint8_t *out) { -#if defined(IS_X86) - const enum cpu_feature features = get_cpu_features(); - MAYBE_UNUSED(features); -#if !defined(BLAKE3_NO_AVX512) - if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) { - blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter, - increment_counter, flags, flags_start, flags_end, - out); - return; - } -#endif -#if !defined(BLAKE3_NO_AVX2) - if (features & AVX2) { - blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter, - increment_counter, flags, flags_start, flags_end, - out); - return; - } -#endif -#if !defined(BLAKE3_NO_SSE41) - if (features & SSE41) { - blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter, - increment_counter, flags, flags_start, flags_end, - out); - return; - } -#endif -#if !defined(BLAKE3_NO_SSE2) - if (features & SSE2) { - blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter, - increment_counter, flags, flags_start, flags_end, - out); - return; - } -#endif -#endif - -#if defined(BLAKE3_USE_NEON) - blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter, - increment_counter, flags, flags_start, flags_end, out); - return; -#endif - - blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter, - increment_counter, flags, flags_start, flags_end, - out); -} - -// The dynamically detected SIMD degree of the current platform. -size_t blake3_simd_degree(void) { -#if defined(IS_X86) - const enum cpu_feature features = get_cpu_features(); - MAYBE_UNUSED(features); -#if !defined(BLAKE3_NO_AVX512) - if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) { - return 16; - } -#endif -#if !defined(BLAKE3_NO_AVX2) - if (features & AVX2) { - return 8; - } -#endif -#if !defined(BLAKE3_NO_SSE41) - if (features & SSE41) { - return 4; - } -#endif -#if !defined(BLAKE3_NO_SSE2) - if (features & SSE2) { - return 4; - } -#endif -#endif -#if defined(BLAKE3_USE_NEON) - return 4; -#endif - return 1; -} |