aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJack O'Connor <[email protected]>2022-03-12 01:14:10 -0500
committerJack O'Connor <[email protected]>2022-03-15 14:03:02 -0400
commitee558b2f3218cf77b58afb65a9e493fa99642080 (patch)
tree60a910ca7bba8095536e45afd14ef3683007c14d /src
parent2e5eb837e53722dc121cca50ff4fc27d3413dcea (diff)
generate blake3_{avx512,sse41,sse2}_compress with asm.py
Diffstat (limited to 'src')
-rw-r--r--src/kernel.rs85
1 files changed, 85 insertions, 0 deletions
diff --git a/src/kernel.rs b/src/kernel.rs
index 68c478d..0be9df3 100644
--- a/src/kernel.rs
+++ b/src/kernel.rs
@@ -1,6 +1,91 @@
use crate::CHUNK_LEN;
use std::arch::{asm, global_asm};
+global_asm!(include_str!("../asm/out.S"));
+
+extern "C" {
+ pub fn blake3_sse2_compress(
+ cv: &[u32; 8],
+ block: &[u8; 64],
+ counter: u64,
+ block_len: u32,
+ flags: u32,
+ );
+ pub fn blake3_sse41_compress(
+ cv: &[u32; 8],
+ block: &[u8; 64],
+ counter: u64,
+ block_len: u32,
+ flags: u32,
+ );
+ pub fn blake3_avx512_compress(
+ cv: &[u32; 8],
+ block: &[u8; 64],
+ counter: u64,
+ block_len: u32,
+ flags: u32,
+ );
+}
+
+pub type CompressionFn =
+ unsafe extern "C" fn(cv: &[u32; 8], block: &[u8; 64], counter: u64, block_len: u32, flags: u32);
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ fn test_compression_function(f: CompressionFn) {
+ let mut block = [0; 64];
+ let block_len = 53;
+ crate::test::paint_test_input(&mut block[..block_len]);
+ let counter = u64::MAX - 42;
+ let flags = crate::CHUNK_START | crate::CHUNK_END | crate::ROOT;
+
+ let mut expected = *crate::IV;
+ crate::platform::Platform::Portable.compress_in_place(
+ &mut expected,
+ &block,
+ block_len as u8,
+ counter,
+ flags,
+ );
+
+ let mut found = *crate::IV;
+ unsafe {
+ f(&mut found, &block, counter, block_len as u32, flags as u32);
+ }
+
+ assert_eq!(expected, found);
+ }
+
+ #[test]
+ #[cfg(target_arch = "x86_64")]
+ fn test_sse2_compress() {
+ if !is_x86_feature_detected!("sse2") {
+ return;
+ }
+ test_compression_function(blake3_sse2_compress);
+ }
+
+ #[test]
+ #[cfg(target_arch = "x86_64")]
+ fn test_sse41_compress() {
+ if !is_x86_feature_detected!("sse4.1") {
+ return;
+ }
+ test_compression_function(blake3_sse41_compress);
+ }
+
+ #[test]
+ #[cfg(target_arch = "x86_64")]
+ fn test_avx512_compress() {
+ if !is_x86_feature_detected!("avx512f") || !is_x86_feature_detected!("avx512vl") {
+ return;
+ }
+ test_compression_function(blake3_avx512_compress);
+ }
+}
+
global_asm!(
// --------------------------------------------------------------------------------------------
// blake3_avx512_kernel_16