diff options
| author | Jack O'Connor <[email protected]> | 2020-02-11 14:13:30 -0500 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2020-02-12 10:23:17 -0500 |
| commit | efbfa0463c793dc1319db10ca4e3b809937b227d (patch) | |
| tree | b643427eb38da8dc9b6548814e7e34966b604791 /src | |
| parent | b6b3c27824e665a73f77fd147da2052efff0ab8a (diff) | |
integrate assembly implementations into the blake3 crate
Diffstat (limited to 'src')
| -rw-r--r-- | src/c_avx2.rs | 63 | ||||
| -rw-r--r-- | src/c_avx512.rs | 3 | ||||
| -rw-r--r-- | src/c_neon.rs | 2 | ||||
| -rw-r--r-- | src/c_sse41.rs | 114 | ||||
| -rw-r--r-- | src/lib.rs | 38 | ||||
| -rw-r--r-- | src/platform.rs | 89 | ||||
| -rw-r--r-- | src/rust_avx2.rs (renamed from src/avx2.rs) | 0 | ||||
| -rw-r--r-- | src/rust_sse41.rs (renamed from src/sse41.rs) | 0 |
8 files changed, 261 insertions, 48 deletions
diff --git a/src/c_avx2.rs b/src/c_avx2.rs new file mode 100644 index 0000000..d805e86 --- /dev/null +++ b/src/c_avx2.rs @@ -0,0 +1,63 @@ +use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN}; + +// Note that there is no AVX2 implementation of compress_in_place or +// compress_xof. + +// Unsafe because this may only be called on platforms supporting AVX2. +pub unsafe fn hash_many<A: arrayvec::Array<Item = u8>>( + inputs: &[&A], + key: &CVWords, + counter: u64, + increment_counter: IncrementCounter, + flags: u8, + flags_start: u8, + flags_end: u8, + out: &mut [u8], +) { + // The Rust hash_many implementations do bounds checking on the `out` + // array, but the C implementations don't. Even though this is an unsafe + // function, assert the bounds here. + assert!(out.len() >= inputs.len() * OUT_LEN); + ffi::blake3_hash_many_avx2( + inputs.as_ptr() as *const *const u8, + inputs.len(), + A::CAPACITY / BLOCK_LEN, + key.as_ptr(), + counter, + increment_counter.yes(), + flags, + flags_start, + flags_end, + out.as_mut_ptr(), + ) +} + +pub mod ffi { + extern "C" { + pub fn blake3_hash_many_avx2( + inputs: *const *const u8, + num_inputs: usize, + blocks: usize, + key: *const u32, + counter: u64, + increment_counter: bool, + flags: u8, + flags_start: u8, + flags_end: u8, + out: *mut u8, + ); + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_hash_many() { + if !crate::platform::avx2_detected() { + return; + } + crate::test::test_hash_many_fn(hash_many, hash_many); + } +} diff --git a/src/c_avx512.rs b/src/c_avx512.rs index f20de2c..c1b9f64 100644 --- a/src/c_avx512.rs +++ b/src/c_avx512.rs @@ -1,7 +1,5 @@ use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN}; -pub const DEGREE: usize = 16; - // Unsafe because this may only be called on platforms supporting AVX-512. pub unsafe fn compress_in_place( cv: &mut CVWords, @@ -91,7 +89,6 @@ pub mod ffi { flags_end: u8, out: *mut u8, ); - } } diff --git a/src/c_neon.rs b/src/c_neon.rs index 34ef074..77b9654 100644 --- a/src/c_neon.rs +++ b/src/c_neon.rs @@ -1,7 +1,5 @@ use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN}; -pub const DEGREE: usize = 4; - // Unsafe because this may only be called on platforms supporting NEON. pub unsafe fn hash_many<A: arrayvec::Array<Item = u8>>( inputs: &[&A], diff --git a/src/c_sse41.rs b/src/c_sse41.rs new file mode 100644 index 0000000..0b64c90 --- /dev/null +++ b/src/c_sse41.rs @@ -0,0 +1,114 @@ +use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN}; + +// Unsafe because this may only be called on platforms supporting SSE4.1. +pub unsafe fn compress_in_place( + cv: &mut CVWords, + block: &[u8; BLOCK_LEN], + block_len: u8, + counter: u64, + flags: u8, +) { + ffi::blake3_compress_in_place_sse41(cv.as_mut_ptr(), block.as_ptr(), block_len, counter, flags) +} + +// Unsafe because this may only be called on platforms supporting SSE4.1. +pub unsafe fn compress_xof( + cv: &CVWords, + block: &[u8; BLOCK_LEN], + block_len: u8, + counter: u64, + flags: u8, +) -> [u8; 64] { + let mut out = [0u8; 64]; + ffi::blake3_compress_xof_sse41( + cv.as_ptr(), + block.as_ptr(), + block_len, + counter, + flags, + out.as_mut_ptr(), + ); + out +} + +// Unsafe because this may only be called on platforms supporting SSE4.1. +pub unsafe fn hash_many<A: arrayvec::Array<Item = u8>>( + inputs: &[&A], + key: &CVWords, + counter: u64, + increment_counter: IncrementCounter, + flags: u8, + flags_start: u8, + flags_end: u8, + out: &mut [u8], +) { + // The Rust hash_many implementations do bounds checking on the `out` + // array, but the C implementations don't. Even though this is an unsafe + // function, assert the bounds here. + assert!(out.len() >= inputs.len() * OUT_LEN); + ffi::blake3_hash_many_sse41( + inputs.as_ptr() as *const *const u8, + inputs.len(), + A::CAPACITY / BLOCK_LEN, + key.as_ptr(), + counter, + increment_counter.yes(), + flags, + flags_start, + flags_end, + out.as_mut_ptr(), + ) +} + +pub mod ffi { + extern "C" { + pub fn blake3_compress_in_place_sse41( + cv: *mut u32, + block: *const u8, + block_len: u8, + counter: u64, + flags: u8, + ); + pub fn blake3_compress_xof_sse41( + cv: *const u32, + block: *const u8, + block_len: u8, + counter: u64, + flags: u8, + out: *mut u8, + ); + pub fn blake3_hash_many_sse41( + inputs: *const *const u8, + num_inputs: usize, + blocks: usize, + key: *const u32, + counter: u64, + increment_counter: bool, + flags: u8, + flags_start: u8, + flags_end: u8, + out: *mut u8, + ); + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_compress() { + if !crate::platform::sse41_detected() { + return; + } + crate::test::test_compress_fn(compress_in_place, compress_xof); + } + + #[test] + fn test_hash_many() { + if !crate::platform::sse41_detected() { + return; + } + crate::test::test_hash_many_fn(hash_many, hash_many); + } +} @@ -39,24 +39,32 @@ mod test; #[doc(hidden)] pub mod guts; -// These modules are pub for benchmarks only. They are not stable. -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -#[doc(hidden)] -pub mod avx2; -#[cfg(feature = "c_avx512")] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -#[doc(hidden)] -pub mod c_avx512; -#[cfg(feature = "c_neon")] -#[doc(hidden)] -pub mod c_neon; +// The platform module is pub for benchmarks only. It is not stable. #[doc(hidden)] pub mod platform; -#[doc(hidden)] -pub mod portable; + +// Platform-specific implementations of the compression function. +mod portable; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -#[doc(hidden)] -pub mod sse41; +cfg_if::cfg_if! { + if #[cfg(feature = "c")] { + #[path = "c_sse41.rs"] + mod sse41; + #[path = "c_avx2.rs"] + mod avx2; + #[path = "c_avx512.rs"] + mod avx512; + } else { + #[path = "rust_sse41.rs"] + mod sse41; + #[path = "rust_avx2.rs"] + mod avx2; + // Stable Rust does not currently support AVX-512. + } +} +#[cfg(feature = "c_neon")] +#[path = "c_neon.rs"] +mod neon; pub mod traits; diff --git a/src/platform.rs b/src/platform.rs index b453a6e..163cbbb 100644 --- a/src/platform.rs +++ b/src/platform.rs @@ -1,18 +1,10 @@ use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN}; use arrayref::{array_mut_ref, array_ref}; -#[cfg(feature = "c_avx512")] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use crate::c_avx512; -#[cfg(feature = "c_neon")] -use crate::c_neon; -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use crate::{avx2, sse41}; - cfg_if::cfg_if! { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { cfg_if::cfg_if! { - if #[cfg(feature = "c_avx512")] { + if #[cfg(feature = "c")] { pub const MAX_SIMD_DEGREE: usize = 16; } else { pub const MAX_SIMD_DEGREE: usize = 8; @@ -32,7 +24,7 @@ cfg_if::cfg_if! { cfg_if::cfg_if! { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { cfg_if::cfg_if! { - if #[cfg(feature = "c_avx512")] { + if #[cfg(feature = "c")] { pub const MAX_SIMD_DEGREE_OR_2: usize = 16; } else { pub const MAX_SIMD_DEGREE_OR_2: usize = 8; @@ -52,7 +44,7 @@ pub enum Platform { SSE41, #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] AVX2, - #[cfg(feature = "c_avx512")] + #[cfg(feature = "c")] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] AVX512, #[cfg(feature = "c_neon")] @@ -64,7 +56,7 @@ impl Platform { pub fn detect() -> Self { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - #[cfg(feature = "c_avx512")] + #[cfg(feature = "c")] { if avx512_detected() { return Platform::AVX512; @@ -93,7 +85,7 @@ impl Platform { Platform::SSE41 => 4, #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::AVX2 => 8, - #[cfg(feature = "c_avx512")] + #[cfg(feature = "c")] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::AVX512 => 16, #[cfg(feature = "c_neon")] @@ -103,7 +95,7 @@ impl Platform { degree } - pub(crate) fn compress_in_place( + pub fn compress_in_place( &self, cv: &mut CVWords, block: &[u8; BLOCK_LEN], @@ -116,13 +108,13 @@ impl Platform { // Safe because detect() checked for platform support. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::SSE41 | Platform::AVX2 => unsafe { - sse41::compress_in_place(cv, block, block_len, counter, flags) + crate::sse41::compress_in_place(cv, block, block_len, counter, flags) }, // Safe because detect() checked for platform support. - #[cfg(feature = "c_avx512")] + #[cfg(feature = "c")] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::AVX512 => unsafe { - c_avx512::compress_in_place(cv, block, block_len, counter, flags) + crate::avx512::compress_in_place(cv, block, block_len, counter, flags) }, // No NEON compress_in_place() implementation yet. #[cfg(feature = "c_neon")] @@ -130,7 +122,7 @@ impl Platform { } } - pub(crate) fn compress_xof( + pub fn compress_xof( &self, cv: &CVWords, block: &[u8; BLOCK_LEN], @@ -143,13 +135,13 @@ impl Platform { // Safe because detect() checked for platform support. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::SSE41 | Platform::AVX2 => unsafe { - sse41::compress_xof(cv, block, block_len, counter, flags) + crate::sse41::compress_xof(cv, block, block_len, counter, flags) }, // Safe because detect() checked for platform support. - #[cfg(feature = "c_avx512")] + #[cfg(feature = "c")] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::AVX512 => unsafe { - c_avx512::compress_xof(cv, block, block_len, counter, flags) + crate::avx512::compress_xof(cv, block, block_len, counter, flags) }, // No NEON compress_xof() implementation yet. #[cfg(feature = "c_neon")] @@ -167,7 +159,7 @@ impl Platform { // after every block, there's a small but measurable performance loss. // Compressing chunks with a dedicated loop avoids this. - pub(crate) fn hash_many<A: arrayvec::Array<Item = u8>>( + pub fn hash_many<A: arrayvec::Array<Item = u8>>( &self, inputs: &[&A], key: &CVWords, @@ -192,7 +184,7 @@ impl Platform { // Safe because detect() checked for platform support. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::SSE41 => unsafe { - sse41::hash_many( + crate::sse41::hash_many( inputs, key, counter, @@ -206,7 +198,7 @@ impl Platform { // Safe because detect() checked for platform support. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::AVX2 => unsafe { - avx2::hash_many( + crate::avx2::hash_many( inputs, key, counter, @@ -218,10 +210,10 @@ impl Platform { ) }, // Safe because detect() checked for platform support. - #[cfg(feature = "c_avx512")] + #[cfg(feature = "c")] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::AVX512 => unsafe { - c_avx512::hash_many( + crate::avx512::hash_many( inputs, key, counter, @@ -235,7 +227,7 @@ impl Platform { // Assumed to be safe if the "c_neon" feature is on. #[cfg(feature = "c_neon")] Platform::NEON => unsafe { - c_neon::hash_many( + crate::neon::hash_many( inputs, key, counter, @@ -248,11 +240,52 @@ impl Platform { }, } } + + // Explicit platform constructors, for benchmarks. + + pub fn portable() -> Self { + Self::Portable + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn sse41() -> Option<Self> { + if sse41_detected() { + Some(Self::SSE41) + } else { + None + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn avx2() -> Option<Self> { + if avx2_detected() { + Some(Self::AVX2) + } else { + None + } + } + + #[cfg(feature = "c")] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn avx512() -> Option<Self> { + if avx512_detected() { + Some(Self::AVX512) + } else { + None + } + } + + #[cfg(feature = "c_neon")] + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + pub fn neon() -> Option<Self> { + // Assumed to be safe if the "c_neon" feature is on. + Some(Self::NEON) + } } // Note that AVX-512 is divided into multiple featuresets, and we use two of // them, F and VL. -#[cfg(feature = "c_avx512")] +#[cfg(feature = "c")] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[inline(always)] pub fn avx512_detected() -> bool { diff --git a/src/avx2.rs b/src/rust_avx2.rs index 7f36072..7f36072 100644 --- a/src/avx2.rs +++ b/src/rust_avx2.rs diff --git a/src/sse41.rs b/src/rust_sse41.rs index fcf2f98..fcf2f98 100644 --- a/src/sse41.rs +++ b/src/rust_sse41.rs |
