diff options
| author | Jack O'Connor <[email protected]> | 2020-01-21 16:03:27 -0500 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2020-01-22 21:32:35 -0500 |
| commit | 087d72e08feeb1513759f92afd92b836fa17c130 (patch) | |
| tree | f06159f1a3981170136193a6713794aa1395b68f /c/blake3_dispatch.c | |
| parent | 92d421dea1a89e2f079f4dbd93b0dab41234b279 (diff) | |
clang-format
Diffstat (limited to 'c/blake3_dispatch.c')
| -rw-r--r-- | c/blake3_dispatch.c | 264 |
1 files changed, 135 insertions, 129 deletions
diff --git a/c/blake3_dispatch.c b/c/blake3_dispatch.c index 6b43970..782139a 100644 --- a/c/blake3_dispatch.c +++ b/c/blake3_dispatch.c @@ -1,11 +1,12 @@ +#include <stdbool.h> #include <stddef.h> #include <stdint.h> -#include <stdbool.h> #include "blake3.h" -#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64) -#define IS_X86 +#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || \ + defined(_M_X64) +#define IS_X86 #endif #if defined(__arm__) @@ -22,7 +23,6 @@ #endif #endif - // Declarations for implementation-specific functions. void blake3_compress_in_place_portable(uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], @@ -40,7 +40,6 @@ void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out); - #if defined(IS_X86) #if !defined(BLAKE3_NO_SSE41) void blake3_compress_in_place_sse41(uint32_t cv[8], @@ -56,7 +55,7 @@ void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs, uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out); -#endif +#endif #if !defined(BLAKE3_NO_AVX2) void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], @@ -70,7 +69,6 @@ void blake3_compress_in_place_avx512(uint32_t cv[8], uint8_t block_len, uint64_t counter, uint8_t flags); - void blake3_compress_xof_avx512(const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, @@ -93,39 +91,44 @@ void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs, #endif #if defined(IS_X86) -static uint64_t xgetbv() -{ +static uint64_t xgetbv() { #if defined(_MSC_VER) - return _xgetbv(0); + return _xgetbv(0); #else - uint32_t eax=0, edx=0; - __asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0)); - return ((uint64_t)edx << 32) | eax; + uint32_t eax = 0, edx = 0; + __asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0)); + return ((uint64_t)edx << 32) | eax; #endif } -static void cpuid(uint32_t out[4], uint32_t id) -{ +static void cpuid(uint32_t out[4], uint32_t id) { #if defined(_MSC_VER) - __cpuid((int*)out, id); + __cpuid((int *)out, id); #else #if defined(__i386__) || defined(_M_IX86) - __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(id)); + __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" + : "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3]) + : "a"(id)); #else - __asm__ __volatile__("cpuid\n" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(id)); + __asm__ __volatile__("cpuid\n" + : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) + : "a"(id)); #endif #endif } -static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) -{ +static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) { #if defined(_MSC_VER) - __cpuidex((int*)out, id, sid); + __cpuidex((int *)out, id, sid); #else #if defined(__i386__) || defined(_M_IX86) - __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(id), "c"(sid)); + __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" + : "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3]) + : "a"(id), "c"(sid)); #else - __asm__ __volatile__("cpuid\n" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(id), "c"(sid)); + __asm__ __volatile__("cpuid\n" + : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) + : "a"(id), "c"(sid)); #endif #endif } @@ -133,152 +136,155 @@ static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) #endif enum cpu_feature { - SSE2 = 1 << 0, - SSSE3 = 1 << 1, - SSE41 = 1 << 2, - AVX = 1 << 3, - AVX2 = 1 << 4, - AVX512F = 1 << 5, - AVX512VL = 1 << 6, - /* ... */ - UNDEFINED = 1 << 30 + SSE2 = 1 << 0, + SSSE3 = 1 << 1, + SSE41 = 1 << 2, + AVX = 1 << 3, + AVX2 = 1 << 4, + AVX512F = 1 << 5, + AVX512VL = 1 << 6, + /* ... */ + UNDEFINED = 1 << 30 }; #if !defined(BLAKE3_TESTING) static /* Allow the variable to be controlled manually for testing */ #endif -enum cpu_feature g_cpu_features = UNDEFINED; + enum cpu_feature g_cpu_features = UNDEFINED; #if !defined(BLAKE3_TESTING) -static +static #endif -enum cpu_feature get_cpu_features() -{ - - if( g_cpu_features != UNDEFINED ) { - return g_cpu_features; - } else { + enum cpu_feature + get_cpu_features() { + + if (g_cpu_features != UNDEFINED) { + return g_cpu_features; + } else { #if defined(IS_X86) - uint32_t regs[4] = {0}; - uint32_t * eax = ®s[0], * ebx = ®s[1], * ecx = ®s[2], * edx = ®s[3]; - (void)edx; - enum cpu_feature features = 0; - cpuid(regs, 0); - const int max_id = *eax; - cpuid(regs, 1); - #if defined(__amd64__) || defined(_M_X64) - features |= SSE2; - #else - if(*edx & (1UL << 26)) - features |= SSE2; - #endif - if(*ecx & (1UL << 0)) - features |= SSSE3; - if(*ecx & (1UL << 19)) - features |= SSE41; + uint32_t regs[4] = {0}; + uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3]; + (void)edx; + enum cpu_feature features = 0; + cpuid(regs, 0); + const int max_id = *eax; + cpuid(regs, 1); +#if defined(__amd64__) || defined(_M_X64) + features |= SSE2; +#else + if (*edx & (1UL << 26)) + features |= SSE2; +#endif + if (*ecx & (1UL << 0)) + features |= SSSE3; + if (*ecx & (1UL << 19)) + features |= SSE41; - if( *ecx & (1UL << 27) ) { // OSXSAVE - const uint64_t mask = xgetbv(); - if( (mask & 6) == 6 ) { // SSE and AVX states - if(*ecx & (1UL << 28)) - features |= AVX; - if(max_id >= 7) { - cpuidex(regs, 7, 0); - if( *ebx & (1UL << 5) ) - features |= AVX2; - if( (mask & 224) == 224 ) { // Opmask, ZMM_Hi256, Hi16_Zmm - if( *ebx & (1UL << 31) ) - features |= AVX512VL; - if(*ebx & (1UL << 16)) - features |= AVX512F; - } - } - } + if (*ecx & (1UL << 27)) { // OSXSAVE + const uint64_t mask = xgetbv(); + if ((mask & 6) == 6) { // SSE and AVX states + if (*ecx & (1UL << 28)) + features |= AVX; + if (max_id >= 7) { + cpuidex(regs, 7, 0); + if (*ebx & (1UL << 5)) + features |= AVX2; + if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm + if (*ebx & (1UL << 31)) + features |= AVX512VL; + if (*ebx & (1UL << 16)) + features |= AVX512F; + } } - g_cpu_features = features; - return features; + } + } + g_cpu_features = features; + return features; #elif defined(IS_ARM) - /* How to detect NEON? */ - return 0; + /* How to detect NEON? */ + return 0; #else - return 0; + return 0; #endif - } + } } -void blake3_compress_in_place(uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], - uint8_t block_len, uint64_t counter, - uint8_t flags) -{ - const enum cpu_feature features = get_cpu_features(); +void blake3_compress_in_place(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags) { + const enum cpu_feature features = get_cpu_features(); #if defined(IS_X86) #if !defined(BLAKE3_NO_AVX512) - if(features & AVX512VL) { - blake3_compress_in_place_avx512(cv, block, block_len, counter, flags); - return; - } + if (features & AVX512VL) { + blake3_compress_in_place_avx512(cv, block, block_len, counter, flags); + return; + } #endif #if !defined(BLAKE3_NO_SSE41) - if(features & SSE41) { - blake3_compress_in_place_sse41(cv, block, block_len, counter, flags); - return; - } + if (features & SSE41) { + blake3_compress_in_place_sse41(cv, block, block_len, counter, flags); + return; + } #endif #endif - blake3_compress_in_place_portable(cv, block, block_len, counter, flags); + blake3_compress_in_place_portable(cv, block, block_len, counter, flags); } void blake3_compress_xof(const uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], - uint8_t block_len, uint64_t counter, - uint8_t flags, uint8_t out[64]) -{ - const enum cpu_feature features = get_cpu_features(); + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, uint8_t flags, + uint8_t out[64]) { + const enum cpu_feature features = get_cpu_features(); #if defined(IS_X86) #if !defined(BLAKE3_NO_AVX512) - if(features & AVX512VL) { - blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out); - return; - } + if (features & AVX512VL) { + blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out); + return; + } #endif #if !defined(BLAKE3_NO_SSE41) - if(features & SSE41) { - blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out); - return; - } + if (features & SSE41) { + blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out); + return; + } #endif #endif - blake3_compress_xof_portable(cv, block, block_len, counter, flags, out); + blake3_compress_xof_portable(cv, block, block_len, counter, flags, out); } void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, - size_t blocks, const uint32_t key[8], - uint64_t counter, bool increment_counter, - uint8_t flags, uint8_t flags_start, - uint8_t flags_end, uint8_t *out) -{ - const enum cpu_feature features = get_cpu_features(); + size_t blocks, const uint32_t key[8], uint64_t counter, + bool increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + const enum cpu_feature features = get_cpu_features(); #if defined(IS_X86) #if !defined(BLAKE3_NO_AVX512) - if(features & AVX512F) { - blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out); - return; - } + if (features & AVX512F) { + blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, + out); + return; + } #endif #if !defined(BLAKE3_NO_AVX2) - if(features & AVX2) { - blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out); - return; - } + if (features & AVX2) { + blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, + out); + return; + } #endif #if !defined(BLAKE3_NO_SSE41) - if(features & SSE41) { - blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out); - return; - } + if (features & SSE41) { + blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, + out); + return; + } #endif #endif - blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out); + blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, + out); } - |
