aboutsummaryrefslogtreecommitdiff
path: root/benches
diff options
context:
space:
mode:
authorJack O'Connor <[email protected]>2019-12-10 14:20:09 -0500
committerJack O'Connor <[email protected]>2019-12-11 18:05:26 -0500
commit52ea6487f88a0e5cbc2f784f3095539afe6c91e4 (patch)
tree181508c1840c2961e530e982c4525029d79e5685 /benches
parentd68882da0d897c93a271a7c0f6d6b9b13d13aa16 (diff)
switch to representing CVs as words for the compression function
The portable implementation was getting slowed down by converting back and forth between words and bytes. I made the corresponding change on the C side first (https://github.com/veorq/BLAKE3-c/commit/12a37be8b50922a358c016ba07f46816a3da4a31), and as part of this commit I'm re-vendoring the C code. I'm also exposing a small FFI interface to C so that blake3_neon.c can link against portable.rs rather than blake3_portable.c, see c_neon.rs.
Diffstat (limited to 'benches')
-rw-r--r--benches/bench.rs48
1 files changed, 12 insertions, 36 deletions
diff --git a/benches/bench.rs b/benches/bench.rs
index ec7658c..da0f7d3 100644
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -5,7 +5,7 @@ extern crate test;
use arrayref::array_ref;
use arrayvec::ArrayVec;
use blake3::platform::MAX_SIMD_DEGREE;
-use blake3::{BLOCK_LEN, CHUNK_LEN, KEY_LEN, OUT_LEN};
+use blake3::{BLOCK_LEN, CHUNK_LEN, OUT_LEN};
use rand::prelude::*;
use test::Bencher;
@@ -48,27 +48,21 @@ impl RandomInput {
}
}
-type CompressFn = unsafe fn(
- cv: &[u8; 32],
- block: &[u8; BLOCK_LEN],
- block_len: u8,
- offset: u64,
- flags: u8,
-) -> [u8; 64];
+type CompressInPlaceFn =
+ unsafe fn(cv: &mut [u32; 8], block: &[u8; BLOCK_LEN], block_len: u8, offset: u64, flags: u8);
-fn bench_single_compression_fn(b: &mut Bencher, f: CompressFn) {
- let state: [u8; 32];
+fn bench_single_compression_fn(b: &mut Bencher, f: CompressInPlaceFn) {
+ let mut state = [1u32; 8];
let mut r = RandomInput::new(b, 64);
- state = *array_ref!(r.get(), 0, 32);
let input = array_ref!(r.get(), 0, 64);
unsafe {
- b.iter(|| f(&state, input, 64 as u8, 0, 0));
+ b.iter(|| f(&mut state, input, 64 as u8, 0, 0));
}
}
#[bench]
fn bench_single_compression_portable(b: &mut Bencher) {
- bench_single_compression_fn(b, blake3::portable::compress);
+ bench_single_compression_fn(b, blake3::portable::compress_in_place);
}
#[bench]
@@ -77,7 +71,7 @@ fn bench_single_compression_sse41(b: &mut Bencher) {
if !blake3::platform::sse41_detected() {
return;
}
- bench_single_compression_fn(b, blake3::sse41::compress);
+ bench_single_compression_fn(b, blake3::sse41::compress_in_place);
}
#[bench]
@@ -86,12 +80,12 @@ fn bench_single_compression_avx512(b: &mut Bencher) {
if !blake3::platform::avx512_detected() {
return;
}
- bench_single_compression_fn(b, blake3::c_avx512::compress);
+ bench_single_compression_fn(b, blake3::c_avx512::compress_in_place);
}
type HashManyFn<A> = unsafe fn(
inputs: &[&A],
- key: &[u8; blake3::KEY_LEN],
+ key: &[u32; 8],
offset: u64,
offset_deltas: &[u64; 17],
flags: u8,
@@ -113,16 +107,7 @@ fn bench_many_chunks_fn(b: &mut Bencher, f: HashManyFn<[u8; CHUNK_LEN]>, degree:
.map(|i| array_ref!(i.get(), 0, CHUNK_LEN))
.collect();
let mut out = [0; MAX_SIMD_DEGREE * OUT_LEN];
- f(
- &input_arrays[..],
- &[0; KEY_LEN],
- 0,
- &[0; 17],
- 0,
- 0,
- 0,
- &mut out,
- );
+ f(&input_arrays[..], &[0; 8], 0, &[0; 17], 0, 0, 0, &mut out);
});
}
}
@@ -175,16 +160,7 @@ fn bench_many_parents_fn(b: &mut Bencher, f: HashManyFn<[u8; BLOCK_LEN]>, degree
.map(|i| array_ref!(i.get(), 0, BLOCK_LEN))
.collect();
let mut out = [0; MAX_SIMD_DEGREE * OUT_LEN];
- f(
- &input_arrays[..],
- &[0; KEY_LEN],
- 0,
- &[0; 17],
- 0,
- 0,
- 0,
- &mut out,
- );
+ f(&input_arrays[..], &[0; 8], 0, &[0; 17], 0, 0, 0, &mut out);
});
}
}