aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack O'Connor <[email protected]>2022-03-07 14:56:46 -0500
committerJack O'Connor <[email protected]>2022-03-08 22:23:09 -0500
commitd9b803304c3ffe1fb865eb9cbe9140f7c63c3bf9 (patch)
treed6201c5cedd5ee9a9b99d51c048eea6fb52275d8
parente4397683ef2b87ad27e9aaa447f2051940526a8c (diff)
add a benchmark
-rw-r--r--benches/bench.rs9
-rw-r--r--src/kernel.rs3
2 files changed, 11 insertions, 1 deletions
diff --git a/benches/bench.rs b/benches/bench.rs
index 5efb9e6..95b67e1 100644
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -151,6 +151,15 @@ fn bench_many_chunks_neon(b: &mut Bencher) {
}
}
+#[bench]
+fn bench_many_chunks_kernel(b: &mut Bencher) {
+ let mut input = RandomInput::new(b, 16 * CHUNK_LEN);
+ let mut out = [blake3::kernel::Words16([0; 16]); 8];
+ b.iter(|| unsafe {
+ blake3::kernel::chunks16(input.get().try_into().unwrap(), &[0; 8], 0, 0, &mut out);
+ });
+}
+
// TODO: When we get const generics we can unify this with the chunks code.
fn bench_many_parents_fn(b: &mut Bencher, platform: Platform) {
let degree = platform.simd_degree();
diff --git a/src/kernel.rs b/src/kernel.rs
index a9c58ab..4ac76eb 100644
--- a/src/kernel.rs
+++ b/src/kernel.rs
@@ -964,6 +964,7 @@ global_asm!(
// CVs in zmm0-zmm7 are written to the out pointer.
// --------------------------------------------------------------------------------------------
"blake3_avx512_chunks_16:",
+ // TODO: Prefetches
// Broadcast the key into zmm0-zmm7. Use ecx as scratch.
"mov ecx, dword ptr [rsi + 0 * 4]",
"vpbroadcastd zmm0, ecx",
@@ -1038,7 +1039,7 @@ global_asm!(
#[repr(C, align(64))]
#[derive(Copy, Clone, Debug)]
-pub struct Words16([u32; 16]);
+pub struct Words16(pub [u32; 16]);
#[no_mangle]
static BLAKE3_IV0_16: Words16 = Words16([crate::IV[0]; 16]);