aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack O'Connor <[email protected]>2023-07-18 22:30:55 -0700
committerJack O'Connor <[email protected]>2023-07-18 22:30:55 -0700
commite9643f4eb20efd516b5b4b03443b77631dcff264 (patch)
treed0130e9a6a923f24e35e5562bbbb735aadae7756
parente56c6a814f3ea736f57e7b8983d19d310fba6020 (diff)
get rid of loops in impl functions
-rw-r--r--rust/guts/src/avx512.rs26
-rw-r--r--rust/guts/src/lib.rs64
2 files changed, 52 insertions, 38 deletions
diff --git a/rust/guts/src/avx512.rs b/rust/guts/src/avx512.rs
index e61949e..f5797ba 100644
--- a/rust/guts/src/avx512.rs
+++ b/rust/guts/src/avx512.rs
@@ -122,16 +122,15 @@ unsafe extern "C" fn xof(
block: *const BlockBytes,
block_len: u32,
cv: *const CVBytes,
- mut counter: u64,
+ counter: u64,
flags: u32,
- mut out: *mut u8,
- mut out_len: usize,
+ out: *mut u8,
+ out_len: usize,
) {
- while out_len >= 16 * BLOCK_LEN {
+ debug_assert!(out_len <= 16 * BLOCK_LEN);
+ if out_len == 16 * BLOCK_LEN {
blake3_guts_avx512_xof_16_exact(block, block_len, cv, counter, flags, out);
- counter += 16;
- out = out.add(16 * BLOCK_LEN);
- out_len -= 16 * BLOCK_LEN;
+ return;
}
crate::xof_using_compress_xof(
blake3_guts_avx512_compress_xof,
@@ -149,16 +148,15 @@ unsafe extern "C" fn xof_xor(
block: *const BlockBytes,
block_len: u32,
cv: *const CVBytes,
- mut counter: u64,
+ counter: u64,
flags: u32,
- mut out: *mut u8,
- mut out_len: usize,
+ out: *mut u8,
+ out_len: usize,
) {
- while out_len >= 16 * BLOCK_LEN {
+ debug_assert!(out_len <= 16 * BLOCK_LEN);
+ if out_len == 16 * BLOCK_LEN {
blake3_guts_avx512_xof_xor_16_exact(block, block_len, cv, counter, flags, out);
- counter += 16;
- out = out.add(16 * BLOCK_LEN);
- out_len -= 16 * BLOCK_LEN;
+ return;
}
crate::xof_xor_using_compress_xof(
blake3_guts_avx512_compress_xof,
diff --git a/rust/guts/src/lib.rs b/rust/guts/src/lib.rs
index 30a6c53..89e6126 100644
--- a/rust/guts/src/lib.rs
+++ b/rust/guts/src/lib.rs
@@ -284,20 +284,27 @@ impl Implementation {
block: &BlockBytes,
block_len: u32,
cv: &CVBytes,
- counter: u64,
+ mut counter: u64,
flags: u32,
- out: &mut [u8],
+ mut out: &mut [u8],
) {
- unsafe {
- self.xof_fn()(
- block,
- block_len,
- cv,
- counter,
- flags | ROOT,
- out.as_mut_ptr(),
- out.len(),
- );
+ let degree = self.degree();
+ let simd_len = degree * BLOCK_LEN;
+ while !out.is_empty() {
+ let take = cmp::min(simd_len, out.len());
+ unsafe {
+ self.xof_fn()(
+ block,
+ block_len,
+ cv,
+ counter,
+ flags | ROOT,
+ out.as_mut_ptr(),
+ take,
+ );
+ }
+ out = &mut out[take..];
+ counter += degree as u64;
}
}
@@ -312,20 +319,27 @@ impl Implementation {
block: &BlockBytes,
block_len: u32,
cv: &CVBytes,
- counter: u64,
+ mut counter: u64,
flags: u32,
- out: &mut [u8],
+ mut out: &mut [u8],
) {
- unsafe {
- self.xof_xor_fn()(
- block,
- block_len,
- cv,
- counter,
- flags | ROOT,
- out.as_mut_ptr(),
- out.len(),
- );
+ let degree = self.degree();
+ let simd_len = degree * BLOCK_LEN;
+ while !out.is_empty() {
+ let take = cmp::min(simd_len, out.len());
+ unsafe {
+ self.xof_xor_fn()(
+ block,
+ block_len,
+ cv,
+ counter,
+ flags | ROOT,
+ out.as_mut_ptr(),
+ take,
+ );
+ }
+ out = &mut out[take..];
+ counter += degree as u64;
}
}
@@ -608,6 +622,7 @@ unsafe fn xof_using_compress_xof(
mut out: *mut u8,
mut out_len: usize,
) {
+ debug_assert!(out_len <= MAX_SIMD_DEGREE * BLOCK_LEN);
while out_len > 0 {
let mut block_output = [0u8; 64];
compress_xof(block, block_len, cv, counter, flags, &mut block_output);
@@ -630,6 +645,7 @@ unsafe fn xof_xor_using_compress_xof(
mut out: *mut u8,
mut out_len: usize,
) {
+ debug_assert!(out_len <= MAX_SIMD_DEGREE * BLOCK_LEN);
while out_len > 0 {
let mut block_output = [0u8; 64];
compress_xof(block, block_len, cv, counter, flags, &mut block_output);