diff options
| -rw-r--r-- | c/blake3.c | 30 | ||||
| -rw-r--r-- | c/blake3_dispatch.c | 17 | ||||
| -rw-r--r-- | c/blake3_impl.h | 16 | ||||
| -rw-r--r-- | c/blake3_portable.c | 10 |
4 files changed, 59 insertions, 14 deletions
@@ -89,21 +89,23 @@ INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out, uint64_t output_block_counter = seek / 64; size_t offset_within_block = seek % 64; uint8_t wide_buf[64]; - while (out_len > 0) { - blake3_compress_xof(self->input_cv, self->block, self->block_len, - output_block_counter, self->flags | ROOT, wide_buf); - size_t available_bytes = 64 - offset_within_block; - size_t memcpy_len; - if (out_len > available_bytes) { - memcpy_len = available_bytes; - } else { - memcpy_len = out_len; - } - memcpy(out, wide_buf + offset_within_block, memcpy_len); - out += memcpy_len; - out_len -= memcpy_len; + if(offset_within_block) { + blake3_compress_xof(self->input_cv, self->block, self->block_len, output_block_counter, self->flags | ROOT, wide_buf); + const size_t available_bytes = 64 - offset_within_block; + const size_t bytes = out_len > available_bytes ? available_bytes : out_len; + memcpy(out, wide_buf + offset_within_block, bytes); + out += bytes; + out_len -= bytes; output_block_counter += 1; - offset_within_block = 0; + } + if(out_len / 64) + blake3_xof_many(self->input_cv, self->block, self->block_len, output_block_counter, self->flags | ROOT, out, out_len / 64); + output_block_counter += out_len / 64; + out += out_len & -64; + out_len -= out_len & -64; + if(out_len) { + blake3_compress_xof(self->input_cv, self->block, self->block_len, output_block_counter, self->flags | ROOT, wide_buf); + memcpy(out, wide_buf, out_len); } } diff --git a/c/blake3_dispatch.c b/c/blake3_dispatch.c index 6847725..a748968 100644 --- a/c/blake3_dispatch.c +++ b/c/blake3_dispatch.c @@ -175,6 +175,23 @@ void blake3_compress_xof(const uint32_t cv[8], blake3_compress_xof_portable(cv, block, block_len, counter, flags, out); } + +void blake3_xof_many(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, uint8_t flags, + uint8_t out[64], size_t outblocks) { +#if defined(IS_X86) + const enum cpu_feature features = get_cpu_features(); +#if !defined(BLAKE3_NO_AVX512) + if (features & AVX512VL) { + blake3_xof_many_avx512(cv, block, block_len, counter, flags, out, outblocks); + return; + } +#endif +#endif + blake3_xof_many_portable(cv, block, block_len, counter, flags, out, outblocks); +} + void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, diff --git a/c/blake3_impl.h b/c/blake3_impl.h index c384671..c825825 100644 --- a/c/blake3_impl.h +++ b/c/blake3_impl.h @@ -156,6 +156,11 @@ void blake3_compress_xof(const uint32_t cv[8], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64]); +void blake3_xof_many(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, uint8_t flags, + uint8_t out[64], size_t outblocks); + void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, @@ -175,6 +180,11 @@ void blake3_compress_xof_portable(const uint32_t cv[8], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64]); +void blake3_xof_many_portable(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, uint8_t flags, + uint8_t out[64], size_t outblocks); + void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, @@ -215,6 +225,12 @@ void blake3_compress_xof_avx512(const uint32_t cv[8], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64]); + +void blake3_xof_many_avx512(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, uint8_t flags, + uint8_t out[64], size_t outblocks); + void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, diff --git a/c/blake3_portable.c b/c/blake3_portable.c index 9ee2f4a..ffdd935 100644 --- a/c/blake3_portable.c +++ b/c/blake3_portable.c @@ -130,6 +130,16 @@ void blake3_compress_xof_portable(const uint32_t cv[8], store32(&out[15 * 4], state[15] ^ cv[7]); } +void blake3_xof_many_portable(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, uint8_t flags, + uint8_t out[BLAKE3_BLOCK_LEN], size_t outblocks) +{ + for(size_t i = 0; i < outblocks; ++i) { + blake3_compress_xof_portable(cv, block, block_len, counter + i, flags, out + 64*i); + } +} + INLINE void hash_one_portable(const uint8_t *input, size_t blocks, const uint32_t key[8], uint64_t counter, uint8_t flags, uint8_t flags_start, |
