diff options
| author | Jack O'Connor <[email protected]> | 2023-07-18 22:30:55 -0700 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2023-07-18 22:30:55 -0700 |
| commit | e9643f4eb20efd516b5b4b03443b77631dcff264 (patch) | |
| tree | d0130e9a6a923f24e35e5562bbbb735aadae7756 | |
| parent | e56c6a814f3ea736f57e7b8983d19d310fba6020 (diff) | |
get rid of loops in impl functions
| -rw-r--r-- | rust/guts/src/avx512.rs | 26 | ||||
| -rw-r--r-- | rust/guts/src/lib.rs | 64 |
2 files changed, 52 insertions, 38 deletions
diff --git a/rust/guts/src/avx512.rs b/rust/guts/src/avx512.rs index e61949e..f5797ba 100644 --- a/rust/guts/src/avx512.rs +++ b/rust/guts/src/avx512.rs @@ -122,16 +122,15 @@ unsafe extern "C" fn xof( block: *const BlockBytes, block_len: u32, cv: *const CVBytes, - mut counter: u64, + counter: u64, flags: u32, - mut out: *mut u8, - mut out_len: usize, + out: *mut u8, + out_len: usize, ) { - while out_len >= 16 * BLOCK_LEN { + debug_assert!(out_len <= 16 * BLOCK_LEN); + if out_len == 16 * BLOCK_LEN { blake3_guts_avx512_xof_16_exact(block, block_len, cv, counter, flags, out); - counter += 16; - out = out.add(16 * BLOCK_LEN); - out_len -= 16 * BLOCK_LEN; + return; } crate::xof_using_compress_xof( blake3_guts_avx512_compress_xof, @@ -149,16 +148,15 @@ unsafe extern "C" fn xof_xor( block: *const BlockBytes, block_len: u32, cv: *const CVBytes, - mut counter: u64, + counter: u64, flags: u32, - mut out: *mut u8, - mut out_len: usize, + out: *mut u8, + out_len: usize, ) { - while out_len >= 16 * BLOCK_LEN { + debug_assert!(out_len <= 16 * BLOCK_LEN); + if out_len == 16 * BLOCK_LEN { blake3_guts_avx512_xof_xor_16_exact(block, block_len, cv, counter, flags, out); - counter += 16; - out = out.add(16 * BLOCK_LEN); - out_len -= 16 * BLOCK_LEN; + return; } crate::xof_xor_using_compress_xof( blake3_guts_avx512_compress_xof, diff --git a/rust/guts/src/lib.rs b/rust/guts/src/lib.rs index 30a6c53..89e6126 100644 --- a/rust/guts/src/lib.rs +++ b/rust/guts/src/lib.rs @@ -284,20 +284,27 @@ impl Implementation { block: &BlockBytes, block_len: u32, cv: &CVBytes, - counter: u64, + mut counter: u64, flags: u32, - out: &mut [u8], + mut out: &mut [u8], ) { - unsafe { - self.xof_fn()( - block, - block_len, - cv, - counter, - flags | ROOT, - out.as_mut_ptr(), - out.len(), - ); + let degree = self.degree(); + let simd_len = degree * BLOCK_LEN; + while !out.is_empty() { + let take = cmp::min(simd_len, out.len()); + unsafe { + self.xof_fn()( + block, + block_len, + cv, + counter, + flags | ROOT, + out.as_mut_ptr(), + take, + ); + } + out = &mut out[take..]; + counter += degree as u64; } } @@ -312,20 +319,27 @@ impl Implementation { block: &BlockBytes, block_len: u32, cv: &CVBytes, - counter: u64, + mut counter: u64, flags: u32, - out: &mut [u8], + mut out: &mut [u8], ) { - unsafe { - self.xof_xor_fn()( - block, - block_len, - cv, - counter, - flags | ROOT, - out.as_mut_ptr(), - out.len(), - ); + let degree = self.degree(); + let simd_len = degree * BLOCK_LEN; + while !out.is_empty() { + let take = cmp::min(simd_len, out.len()); + unsafe { + self.xof_xor_fn()( + block, + block_len, + cv, + counter, + flags | ROOT, + out.as_mut_ptr(), + take, + ); + } + out = &mut out[take..]; + counter += degree as u64; } } @@ -608,6 +622,7 @@ unsafe fn xof_using_compress_xof( mut out: *mut u8, mut out_len: usize, ) { + debug_assert!(out_len <= MAX_SIMD_DEGREE * BLOCK_LEN); while out_len > 0 { let mut block_output = [0u8; 64]; compress_xof(block, block_len, cv, counter, flags, &mut block_output); @@ -630,6 +645,7 @@ unsafe fn xof_xor_using_compress_xof( mut out: *mut u8, mut out_len: usize, ) { + debug_assert!(out_len <= MAX_SIMD_DEGREE * BLOCK_LEN); while out_len > 0 { let mut block_output = [0u8; 64]; compress_xof(block, block_len, cv, counter, flags, &mut block_output); |
