diff options
| author | Jack O'Connor <[email protected]> | 2022-03-12 01:14:10 -0500 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2022-03-15 14:03:02 -0400 |
| commit | ee558b2f3218cf77b58afb65a9e493fa99642080 (patch) | |
| tree | 60a910ca7bba8095536e45afd14ef3683007c14d /src | |
| parent | 2e5eb837e53722dc121cca50ff4fc27d3413dcea (diff) | |
generate blake3_{avx512,sse41,sse2}_compress with asm.py
Diffstat (limited to 'src')
| -rw-r--r-- | src/kernel.rs | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/src/kernel.rs b/src/kernel.rs index 68c478d..0be9df3 100644 --- a/src/kernel.rs +++ b/src/kernel.rs @@ -1,6 +1,91 @@ use crate::CHUNK_LEN; use std::arch::{asm, global_asm}; +global_asm!(include_str!("../asm/out.S")); + +extern "C" { + pub fn blake3_sse2_compress( + cv: &[u32; 8], + block: &[u8; 64], + counter: u64, + block_len: u32, + flags: u32, + ); + pub fn blake3_sse41_compress( + cv: &[u32; 8], + block: &[u8; 64], + counter: u64, + block_len: u32, + flags: u32, + ); + pub fn blake3_avx512_compress( + cv: &[u32; 8], + block: &[u8; 64], + counter: u64, + block_len: u32, + flags: u32, + ); +} + +pub type CompressionFn = + unsafe extern "C" fn(cv: &[u32; 8], block: &[u8; 64], counter: u64, block_len: u32, flags: u32); + +#[cfg(test)] +mod test { + use super::*; + + fn test_compression_function(f: CompressionFn) { + let mut block = [0; 64]; + let block_len = 53; + crate::test::paint_test_input(&mut block[..block_len]); + let counter = u64::MAX - 42; + let flags = crate::CHUNK_START | crate::CHUNK_END | crate::ROOT; + + let mut expected = *crate::IV; + crate::platform::Platform::Portable.compress_in_place( + &mut expected, + &block, + block_len as u8, + counter, + flags, + ); + + let mut found = *crate::IV; + unsafe { + f(&mut found, &block, counter, block_len as u32, flags as u32); + } + + assert_eq!(expected, found); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_sse2_compress() { + if !is_x86_feature_detected!("sse2") { + return; + } + test_compression_function(blake3_sse2_compress); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_sse41_compress() { + if !is_x86_feature_detected!("sse4.1") { + return; + } + test_compression_function(blake3_sse41_compress); + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_avx512_compress() { + if !is_x86_feature_detected!("avx512f") || !is_x86_feature_detected!("avx512vl") { + return; + } + test_compression_function(blake3_avx512_compress); + } +} + global_asm!( // -------------------------------------------------------------------------------------------- // blake3_avx512_kernel_16 |
