diff options
| -rw-r--r-- | c/blake3.c | 9 | ||||
| -rw-r--r-- | c/blake3_dispatch.c | 69 | ||||
| -rw-r--r-- | c/blake3_impl.h | 70 | ||||
| -rw-r--r-- | c/main.c | 4 |
4 files changed, 77 insertions, 75 deletions
@@ -259,10 +259,11 @@ INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values, // Why not just have the caller split the input on the first update(), instead // of implementing this special rule? Because we don't want to limit SIMD or // multi-threading parallelism for that update(). -size_t blake3_compress_subtree_wide(const uint8_t *input, size_t input_len, - const uint32_t key[8], - uint64_t chunk_counter, uint8_t flags, - uint8_t *out) { +static size_t blake3_compress_subtree_wide(const uint8_t *input, + size_t input_len, + const uint32_t key[8], + uint64_t chunk_counter, + uint8_t flags, uint8_t *out) { // Note that the single chunk case does *not* bump the SIMD degree up to 2 // when it is 1. If this implementation adds multi-threading in the future, // this gives us the option of multi-threading even the 2-chunk case, which diff --git a/c/blake3_dispatch.c b/c/blake3_dispatch.c index 2fbf43d..add8bef 100644 --- a/c/blake3_dispatch.c +++ b/c/blake3_dispatch.c @@ -14,73 +14,6 @@ #endif #endif -// Declarations for implementation-specific functions. -void blake3_compress_in_place_portable(uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], - uint8_t block_len, uint64_t counter, - uint8_t flags); - -void blake3_compress_xof_portable(const uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], - uint8_t block_len, uint64_t counter, - uint8_t flags, uint8_t out[64]); - -void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs, - size_t blocks, const uint32_t key[8], - uint64_t counter, bool increment_counter, - uint8_t flags, uint8_t flags_start, - uint8_t flags_end, uint8_t *out); - -#if defined(IS_X86) -#if !defined(BLAKE3_NO_SSE41) -void blake3_compress_in_place_sse41(uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], - uint8_t block_len, uint64_t counter, - uint8_t flags); -void blake3_compress_xof_sse41(const uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], - uint8_t block_len, uint64_t counter, - uint8_t flags, uint8_t out[64]); -void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs, - size_t blocks, const uint32_t key[8], - uint64_t counter, bool increment_counter, - uint8_t flags, uint8_t flags_start, - uint8_t flags_end, uint8_t *out); -#endif -#if !defined(BLAKE3_NO_AVX2) -void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs, - size_t blocks, const uint32_t key[8], - uint64_t counter, bool increment_counter, - uint8_t flags, uint8_t flags_start, - uint8_t flags_end, uint8_t *out); -#endif -#if !defined(BLAKE3_NO_AVX512) -void blake3_compress_in_place_avx512(uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], - uint8_t block_len, uint64_t counter, - uint8_t flags); - -void blake3_compress_xof_avx512(const uint32_t cv[8], - const uint8_t block[BLAKE3_BLOCK_LEN], - uint8_t block_len, uint64_t counter, - uint8_t flags, uint8_t out[64]); - -void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs, - size_t blocks, const uint32_t key[8], - uint64_t counter, bool increment_counter, - uint8_t flags, uint8_t flags_start, - uint8_t flags_end, uint8_t *out); -#endif -#endif - -#if defined(BLAKE3_USE_NEON) -void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs, - size_t blocks, const uint32_t key[8], - uint64_t counter, bool increment_counter, - uint8_t flags, uint8_t flags_start, - uint8_t flags_end, uint8_t *out); -#endif - #if defined(IS_X86) static uint64_t xgetbv() { #if defined(_MSC_VER) @@ -286,7 +219,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, } // The dynamically detected SIMD degree of the current platform. -size_t blake3_simd_degree() { +size_t blake3_simd_degree(void) { #if defined(IS_X86) const enum cpu_feature features = get_cpu_features(); #if !defined(BLAKE3_NO_AVX512) diff --git a/c/blake3_impl.h b/c/blake3_impl.h index d8954e4..c384671 100644 --- a/c/blake3_impl.h +++ b/c/blake3_impl.h @@ -161,7 +161,75 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out); -size_t blake3_simd_degree(); +size_t blake3_simd_degree(void); + + +// Declarations for implementation-specific functions. +void blake3_compress_in_place_portable(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags); + +void blake3_compress_xof_portable(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags, uint8_t out[64]); + +void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out); + +#if defined(IS_X86) +#if !defined(BLAKE3_NO_SSE41) +void blake3_compress_in_place_sse41(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags); +void blake3_compress_xof_sse41(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags, uint8_t out[64]); +void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out); +#endif +#if !defined(BLAKE3_NO_AVX2) +void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out); +#endif +#if !defined(BLAKE3_NO_AVX512) +void blake3_compress_in_place_avx512(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags); + +void blake3_compress_xof_avx512(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags, uint8_t out[64]); + +void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out); +#endif +#endif + +#if defined(BLAKE3_USE_NEON) +void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs, + size_t blocks, const uint32_t key[8], + uint64_t counter, bool increment_counter, + uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out); +#endif #endif /* BLAKE3_IMPL_H */ @@ -18,7 +18,7 @@ #define KEYED_HASH_MODE 1 #define DERIVE_KEY_MODE 2 -void hex_char_value(uint8_t c, uint8_t *value, bool *valid) { +static void hex_char_value(uint8_t c, uint8_t *value, bool *valid) { if ('0' <= c && c <= '9') { *value = c - '0'; *valid = true; @@ -30,7 +30,7 @@ void hex_char_value(uint8_t c, uint8_t *value, bool *valid) { } } -int parse_key(char *hex_key, uint8_t out[BLAKE3_KEY_LEN]) { +static int parse_key(char *hex_key, uint8_t out[BLAKE3_KEY_LEN]) { size_t hex_len = strlen(hex_key); if (hex_len != 64) { fprintf(stderr, "Expected a 64-char hexadecimal key, got %zu chars.\n", |
