aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/ci.yml20
-rw-r--r--c/blake3_dispatch.c4
-rw-r--r--c/blake3_impl.h3
-rw-r--r--src/platform.rs13
-rw-r--r--src/portable.rs4
5 files changed, 41 insertions, 3 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d7c5de4..b031602 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -126,6 +126,26 @@ jobs:
- name: cargo test C bindings intrinsics
run: cargo test --features=prefer_intrinsics
working-directory: ./c/blake3_c_rust_bindings
+ - name: cargo test C bindings no AVX-512
+ run: cargo test
+ working-directory: ./c/blake3_c_rust_bindings
+ env:
+ CFLAGS: -DBLAKE3_NO_AVX512
+ - name: cargo test C bindings no AVX2
+ run: cargo test
+ working-directory: ./c/blake3_c_rust_bindings
+ env:
+ CFLAGS: -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2
+ - name: cargo test C bindings no SSE41
+ run: cargo test
+ working-directory: ./c/blake3_c_rust_bindings
+ env:
+ CFLAGS: -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2 -DBLAKE3_NO_SSE41
+ - name: cargo test C bindings no SSE2
+ run: cargo test
+ working-directory: ./c/blake3_c_rust_bindings
+ env:
+ CFLAGS: -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_SSE2
# Reference impl doc test.
- name: reference impl doc test
run: cargo test
diff --git a/c/blake3_dispatch.c b/c/blake3_dispatch.c
index cf5bad7..5c76b14 100644
--- a/c/blake3_dispatch.c
+++ b/c/blake3_dispatch.c
@@ -241,7 +241,9 @@ void blake3_xof_many(const uint32_t cv[8],
}
#endif
#endif
- blake3_xof_many_portable(cv, block, block_len, counter, flags, out, outblocks);
+ for(size_t i = 0; i < outblocks; ++i) {
+ blake3_compress_xof(cv, block, block_len, counter + i, flags, out + 64*i);
+ }
}
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
diff --git a/c/blake3_impl.h b/c/blake3_impl.h
index abd7546..3da773b 100644
--- a/c/blake3_impl.h
+++ b/c/blake3_impl.h
@@ -222,6 +222,9 @@ void blake3_compress_xof_portable(const uint32_t cv[8],
uint8_t block_len, uint64_t counter,
uint8_t flags, uint8_t out[64]);
+// This function is test-only. When blake3_xof_many doesn't have an optimized implementation,
+// it loops over blake3_compress_xof instead of falling back to this, so it still benefits
+// from compress optimizations.
void blake3_xof_many_portable(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN],
uint8_t block_len, uint64_t counter, uint8_t flags,
diff --git a/src/platform.rs b/src/platform.rs
index 590a77c..cd8ef63 100644
--- a/src/platform.rs
+++ b/src/platform.rs
@@ -282,7 +282,7 @@ impl Platform {
cv: &CVWords,
block: &[u8; BLOCK_LEN],
block_len: u8,
- counter: u64,
+ mut counter: u64,
flags: u8,
out: &mut [u8],
) {
@@ -299,7 +299,16 @@ impl Platform {
Platform::AVX512 => unsafe {
crate::avx512::xof_many(cv, block, block_len, counter, flags, out)
},
- _ => crate::portable::xof_many(cv, block, block_len, counter, flags, out),
+ _ => {
+ // For platforms without an optimized xof_many, fall back to a loop over
+ // compress_xof. This is still faster than portable code.
+ for out_block in out.chunks_exact_mut(BLOCK_LEN) {
+ // TODO: Use array_chunks_mut here once that's stable.
+ let out_array: &mut [u8; BLOCK_LEN] = out_block.try_into().unwrap();
+ *out_array = self.compress_xof(cv, block, block_len, counter, flags);
+ counter += 1;
+ }
+ }
}
}
diff --git a/src/portable.rs b/src/portable.rs
index 4181f27..35b5f5d 100644
--- a/src/portable.rs
+++ b/src/portable.rs
@@ -177,6 +177,10 @@ pub fn hash_many<const N: usize>(
}
}
+// This function is test-only. When platform::xof_many() doesn't have an optimized implementation,
+// it loops over platform::compress_xof() instead of falling back to this, so it still benefits
+// from compress optimizations.
+#[cfg(test)]
pub fn xof_many(
cv: &CVWords,
block: &[u8; BLOCK_LEN],