diff options
| author | Jack O'Connor <[email protected]> | 2022-03-07 14:56:46 -0500 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2022-03-08 22:23:09 -0500 |
| commit | d9b803304c3ffe1fb865eb9cbe9140f7c63c3bf9 (patch) | |
| tree | d6201c5cedd5ee9a9b99d51c048eea6fb52275d8 | |
| parent | e4397683ef2b87ad27e9aaa447f2051940526a8c (diff) | |
add a benchmark
| -rw-r--r-- | benches/bench.rs | 9 | ||||
| -rw-r--r-- | src/kernel.rs | 3 |
2 files changed, 11 insertions, 1 deletions
diff --git a/benches/bench.rs b/benches/bench.rs index 5efb9e6..95b67e1 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -151,6 +151,15 @@ fn bench_many_chunks_neon(b: &mut Bencher) { } } +#[bench] +fn bench_many_chunks_kernel(b: &mut Bencher) { + let mut input = RandomInput::new(b, 16 * CHUNK_LEN); + let mut out = [blake3::kernel::Words16([0; 16]); 8]; + b.iter(|| unsafe { + blake3::kernel::chunks16(input.get().try_into().unwrap(), &[0; 8], 0, 0, &mut out); + }); +} + // TODO: When we get const generics we can unify this with the chunks code. fn bench_many_parents_fn(b: &mut Bencher, platform: Platform) { let degree = platform.simd_degree(); diff --git a/src/kernel.rs b/src/kernel.rs index a9c58ab..4ac76eb 100644 --- a/src/kernel.rs +++ b/src/kernel.rs @@ -964,6 +964,7 @@ global_asm!( // CVs in zmm0-zmm7 are written to the out pointer. // -------------------------------------------------------------------------------------------- "blake3_avx512_chunks_16:", + // TODO: Prefetches // Broadcast the key into zmm0-zmm7. Use ecx as scratch. "mov ecx, dword ptr [rsi + 0 * 4]", "vpbroadcastd zmm0, ecx", @@ -1038,7 +1039,7 @@ global_asm!( #[repr(C, align(64))] #[derive(Copy, Clone, Debug)] -pub struct Words16([u32; 16]); +pub struct Words16(pub [u32; 16]); #[no_mangle] static BLAKE3_IV0_16: Words16 = Words16([crate::IV[0]; 16]); |
