diff options
| author | Jack O'Connor <[email protected]> | 2019-12-10 14:20:09 -0500 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2019-12-11 18:05:26 -0500 |
| commit | 52ea6487f88a0e5cbc2f784f3095539afe6c91e4 (patch) | |
| tree | 181508c1840c2961e530e982c4525029d79e5685 /benches | |
| parent | d68882da0d897c93a271a7c0f6d6b9b13d13aa16 (diff) | |
switch to representing CVs as words for the compression function
The portable implementation was getting slowed down by converting back
and forth between words and bytes.
I made the corresponding change on the C side first
(https://github.com/veorq/BLAKE3-c/commit/12a37be8b50922a358c016ba07f46816a3da4a31),
and as part of this commit I'm re-vendoring the C code. I'm also
exposing a small FFI interface to C so that blake3_neon.c can link
against portable.rs rather than blake3_portable.c, see c_neon.rs.
Diffstat (limited to 'benches')
| -rw-r--r-- | benches/bench.rs | 48 |
1 files changed, 12 insertions, 36 deletions
diff --git a/benches/bench.rs b/benches/bench.rs index ec7658c..da0f7d3 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -5,7 +5,7 @@ extern crate test; use arrayref::array_ref; use arrayvec::ArrayVec; use blake3::platform::MAX_SIMD_DEGREE; -use blake3::{BLOCK_LEN, CHUNK_LEN, KEY_LEN, OUT_LEN}; +use blake3::{BLOCK_LEN, CHUNK_LEN, OUT_LEN}; use rand::prelude::*; use test::Bencher; @@ -48,27 +48,21 @@ impl RandomInput { } } -type CompressFn = unsafe fn( - cv: &[u8; 32], - block: &[u8; BLOCK_LEN], - block_len: u8, - offset: u64, - flags: u8, -) -> [u8; 64]; +type CompressInPlaceFn = + unsafe fn(cv: &mut [u32; 8], block: &[u8; BLOCK_LEN], block_len: u8, offset: u64, flags: u8); -fn bench_single_compression_fn(b: &mut Bencher, f: CompressFn) { - let state: [u8; 32]; +fn bench_single_compression_fn(b: &mut Bencher, f: CompressInPlaceFn) { + let mut state = [1u32; 8]; let mut r = RandomInput::new(b, 64); - state = *array_ref!(r.get(), 0, 32); let input = array_ref!(r.get(), 0, 64); unsafe { - b.iter(|| f(&state, input, 64 as u8, 0, 0)); + b.iter(|| f(&mut state, input, 64 as u8, 0, 0)); } } #[bench] fn bench_single_compression_portable(b: &mut Bencher) { - bench_single_compression_fn(b, blake3::portable::compress); + bench_single_compression_fn(b, blake3::portable::compress_in_place); } #[bench] @@ -77,7 +71,7 @@ fn bench_single_compression_sse41(b: &mut Bencher) { if !blake3::platform::sse41_detected() { return; } - bench_single_compression_fn(b, blake3::sse41::compress); + bench_single_compression_fn(b, blake3::sse41::compress_in_place); } #[bench] @@ -86,12 +80,12 @@ fn bench_single_compression_avx512(b: &mut Bencher) { if !blake3::platform::avx512_detected() { return; } - bench_single_compression_fn(b, blake3::c_avx512::compress); + bench_single_compression_fn(b, blake3::c_avx512::compress_in_place); } type HashManyFn<A> = unsafe fn( inputs: &[&A], - key: &[u8; blake3::KEY_LEN], + key: &[u32; 8], offset: u64, offset_deltas: &[u64; 17], flags: u8, @@ -113,16 +107,7 @@ fn bench_many_chunks_fn(b: &mut Bencher, f: HashManyFn<[u8; CHUNK_LEN]>, degree: .map(|i| array_ref!(i.get(), 0, CHUNK_LEN)) .collect(); let mut out = [0; MAX_SIMD_DEGREE * OUT_LEN]; - f( - &input_arrays[..], - &[0; KEY_LEN], - 0, - &[0; 17], - 0, - 0, - 0, - &mut out, - ); + f(&input_arrays[..], &[0; 8], 0, &[0; 17], 0, 0, 0, &mut out); }); } } @@ -175,16 +160,7 @@ fn bench_many_parents_fn(b: &mut Bencher, f: HashManyFn<[u8; BLOCK_LEN]>, degree .map(|i| array_ref!(i.get(), 0, BLOCK_LEN)) .collect(); let mut out = [0; MAX_SIMD_DEGREE * OUT_LEN]; - f( - &input_arrays[..], - &[0; KEY_LEN], - 0, - &[0; 17], - 0, - 0, - 0, - &mut out, - ); + f(&input_arrays[..], &[0; 8], 0, &[0; 17], 0, 0, 0, &mut out); }); } } |
