aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJack O'Connor <[email protected]>2020-02-11 14:13:30 -0500
committerJack O'Connor <[email protected]>2020-02-12 10:23:17 -0500
commitefbfa0463c793dc1319db10ca4e3b809937b227d (patch)
treeb643427eb38da8dc9b6548814e7e34966b604791 /src
parentb6b3c27824e665a73f77fd147da2052efff0ab8a (diff)
integrate assembly implementations into the blake3 crate
Diffstat (limited to 'src')
-rw-r--r--src/c_avx2.rs63
-rw-r--r--src/c_avx512.rs3
-rw-r--r--src/c_neon.rs2
-rw-r--r--src/c_sse41.rs114
-rw-r--r--src/lib.rs38
-rw-r--r--src/platform.rs89
-rw-r--r--src/rust_avx2.rs (renamed from src/avx2.rs)0
-rw-r--r--src/rust_sse41.rs (renamed from src/sse41.rs)0
8 files changed, 261 insertions, 48 deletions
diff --git a/src/c_avx2.rs b/src/c_avx2.rs
new file mode 100644
index 0000000..d805e86
--- /dev/null
+++ b/src/c_avx2.rs
@@ -0,0 +1,63 @@
+use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN};
+
+// Note that there is no AVX2 implementation of compress_in_place or
+// compress_xof.
+
+// Unsafe because this may only be called on platforms supporting AVX2.
+pub unsafe fn hash_many<A: arrayvec::Array<Item = u8>>(
+ inputs: &[&A],
+ key: &CVWords,
+ counter: u64,
+ increment_counter: IncrementCounter,
+ flags: u8,
+ flags_start: u8,
+ flags_end: u8,
+ out: &mut [u8],
+) {
+ // The Rust hash_many implementations do bounds checking on the `out`
+ // array, but the C implementations don't. Even though this is an unsafe
+ // function, assert the bounds here.
+ assert!(out.len() >= inputs.len() * OUT_LEN);
+ ffi::blake3_hash_many_avx2(
+ inputs.as_ptr() as *const *const u8,
+ inputs.len(),
+ A::CAPACITY / BLOCK_LEN,
+ key.as_ptr(),
+ counter,
+ increment_counter.yes(),
+ flags,
+ flags_start,
+ flags_end,
+ out.as_mut_ptr(),
+ )
+}
+
+pub mod ffi {
+ extern "C" {
+ pub fn blake3_hash_many_avx2(
+ inputs: *const *const u8,
+ num_inputs: usize,
+ blocks: usize,
+ key: *const u32,
+ counter: u64,
+ increment_counter: bool,
+ flags: u8,
+ flags_start: u8,
+ flags_end: u8,
+ out: *mut u8,
+ );
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_hash_many() {
+ if !crate::platform::avx2_detected() {
+ return;
+ }
+ crate::test::test_hash_many_fn(hash_many, hash_many);
+ }
+}
diff --git a/src/c_avx512.rs b/src/c_avx512.rs
index f20de2c..c1b9f64 100644
--- a/src/c_avx512.rs
+++ b/src/c_avx512.rs
@@ -1,7 +1,5 @@
use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN};
-pub const DEGREE: usize = 16;
-
// Unsafe because this may only be called on platforms supporting AVX-512.
pub unsafe fn compress_in_place(
cv: &mut CVWords,
@@ -91,7 +89,6 @@ pub mod ffi {
flags_end: u8,
out: *mut u8,
);
-
}
}
diff --git a/src/c_neon.rs b/src/c_neon.rs
index 34ef074..77b9654 100644
--- a/src/c_neon.rs
+++ b/src/c_neon.rs
@@ -1,7 +1,5 @@
use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN};
-pub const DEGREE: usize = 4;
-
// Unsafe because this may only be called on platforms supporting NEON.
pub unsafe fn hash_many<A: arrayvec::Array<Item = u8>>(
inputs: &[&A],
diff --git a/src/c_sse41.rs b/src/c_sse41.rs
new file mode 100644
index 0000000..0b64c90
--- /dev/null
+++ b/src/c_sse41.rs
@@ -0,0 +1,114 @@
+use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN};
+
+// Unsafe because this may only be called on platforms supporting SSE4.1.
+pub unsafe fn compress_in_place(
+ cv: &mut CVWords,
+ block: &[u8; BLOCK_LEN],
+ block_len: u8,
+ counter: u64,
+ flags: u8,
+) {
+ ffi::blake3_compress_in_place_sse41(cv.as_mut_ptr(), block.as_ptr(), block_len, counter, flags)
+}
+
+// Unsafe because this may only be called on platforms supporting SSE4.1.
+pub unsafe fn compress_xof(
+ cv: &CVWords,
+ block: &[u8; BLOCK_LEN],
+ block_len: u8,
+ counter: u64,
+ flags: u8,
+) -> [u8; 64] {
+ let mut out = [0u8; 64];
+ ffi::blake3_compress_xof_sse41(
+ cv.as_ptr(),
+ block.as_ptr(),
+ block_len,
+ counter,
+ flags,
+ out.as_mut_ptr(),
+ );
+ out
+}
+
+// Unsafe because this may only be called on platforms supporting SSE4.1.
+pub unsafe fn hash_many<A: arrayvec::Array<Item = u8>>(
+ inputs: &[&A],
+ key: &CVWords,
+ counter: u64,
+ increment_counter: IncrementCounter,
+ flags: u8,
+ flags_start: u8,
+ flags_end: u8,
+ out: &mut [u8],
+) {
+ // The Rust hash_many implementations do bounds checking on the `out`
+ // array, but the C implementations don't. Even though this is an unsafe
+ // function, assert the bounds here.
+ assert!(out.len() >= inputs.len() * OUT_LEN);
+ ffi::blake3_hash_many_sse41(
+ inputs.as_ptr() as *const *const u8,
+ inputs.len(),
+ A::CAPACITY / BLOCK_LEN,
+ key.as_ptr(),
+ counter,
+ increment_counter.yes(),
+ flags,
+ flags_start,
+ flags_end,
+ out.as_mut_ptr(),
+ )
+}
+
+pub mod ffi {
+ extern "C" {
+ pub fn blake3_compress_in_place_sse41(
+ cv: *mut u32,
+ block: *const u8,
+ block_len: u8,
+ counter: u64,
+ flags: u8,
+ );
+ pub fn blake3_compress_xof_sse41(
+ cv: *const u32,
+ block: *const u8,
+ block_len: u8,
+ counter: u64,
+ flags: u8,
+ out: *mut u8,
+ );
+ pub fn blake3_hash_many_sse41(
+ inputs: *const *const u8,
+ num_inputs: usize,
+ blocks: usize,
+ key: *const u32,
+ counter: u64,
+ increment_counter: bool,
+ flags: u8,
+ flags_start: u8,
+ flags_end: u8,
+ out: *mut u8,
+ );
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_compress() {
+ if !crate::platform::sse41_detected() {
+ return;
+ }
+ crate::test::test_compress_fn(compress_in_place, compress_xof);
+ }
+
+ #[test]
+ fn test_hash_many() {
+ if !crate::platform::sse41_detected() {
+ return;
+ }
+ crate::test::test_hash_many_fn(hash_many, hash_many);
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 7fa3510..58d2dbe 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -39,24 +39,32 @@ mod test;
#[doc(hidden)]
pub mod guts;
-// These modules are pub for benchmarks only. They are not stable.
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[doc(hidden)]
-pub mod avx2;
-#[cfg(feature = "c_avx512")]
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[doc(hidden)]
-pub mod c_avx512;
-#[cfg(feature = "c_neon")]
-#[doc(hidden)]
-pub mod c_neon;
+// The platform module is pub for benchmarks only. It is not stable.
#[doc(hidden)]
pub mod platform;
-#[doc(hidden)]
-pub mod portable;
+
+// Platform-specific implementations of the compression function.
+mod portable;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[doc(hidden)]
-pub mod sse41;
+cfg_if::cfg_if! {
+ if #[cfg(feature = "c")] {
+ #[path = "c_sse41.rs"]
+ mod sse41;
+ #[path = "c_avx2.rs"]
+ mod avx2;
+ #[path = "c_avx512.rs"]
+ mod avx512;
+ } else {
+ #[path = "rust_sse41.rs"]
+ mod sse41;
+ #[path = "rust_avx2.rs"]
+ mod avx2;
+ // Stable Rust does not currently support AVX-512.
+ }
+}
+#[cfg(feature = "c_neon")]
+#[path = "c_neon.rs"]
+mod neon;
pub mod traits;
diff --git a/src/platform.rs b/src/platform.rs
index b453a6e..163cbbb 100644
--- a/src/platform.rs
+++ b/src/platform.rs
@@ -1,18 +1,10 @@
use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN};
use arrayref::{array_mut_ref, array_ref};
-#[cfg(feature = "c_avx512")]
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-use crate::c_avx512;
-#[cfg(feature = "c_neon")]
-use crate::c_neon;
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-use crate::{avx2, sse41};
-
cfg_if::cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
cfg_if::cfg_if! {
- if #[cfg(feature = "c_avx512")] {
+ if #[cfg(feature = "c")] {
pub const MAX_SIMD_DEGREE: usize = 16;
} else {
pub const MAX_SIMD_DEGREE: usize = 8;
@@ -32,7 +24,7 @@ cfg_if::cfg_if! {
cfg_if::cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
cfg_if::cfg_if! {
- if #[cfg(feature = "c_avx512")] {
+ if #[cfg(feature = "c")] {
pub const MAX_SIMD_DEGREE_OR_2: usize = 16;
} else {
pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
@@ -52,7 +44,7 @@ pub enum Platform {
SSE41,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
AVX2,
- #[cfg(feature = "c_avx512")]
+ #[cfg(feature = "c")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
AVX512,
#[cfg(feature = "c_neon")]
@@ -64,7 +56,7 @@ impl Platform {
pub fn detect() -> Self {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
- #[cfg(feature = "c_avx512")]
+ #[cfg(feature = "c")]
{
if avx512_detected() {
return Platform::AVX512;
@@ -93,7 +85,7 @@ impl Platform {
Platform::SSE41 => 4,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX2 => 8,
- #[cfg(feature = "c_avx512")]
+ #[cfg(feature = "c")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => 16,
#[cfg(feature = "c_neon")]
@@ -103,7 +95,7 @@ impl Platform {
degree
}
- pub(crate) fn compress_in_place(
+ pub fn compress_in_place(
&self,
cv: &mut CVWords,
block: &[u8; BLOCK_LEN],
@@ -116,13 +108,13 @@ impl Platform {
// Safe because detect() checked for platform support.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::SSE41 | Platform::AVX2 => unsafe {
- sse41::compress_in_place(cv, block, block_len, counter, flags)
+ crate::sse41::compress_in_place(cv, block, block_len, counter, flags)
},
// Safe because detect() checked for platform support.
- #[cfg(feature = "c_avx512")]
+ #[cfg(feature = "c")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => unsafe {
- c_avx512::compress_in_place(cv, block, block_len, counter, flags)
+ crate::avx512::compress_in_place(cv, block, block_len, counter, flags)
},
// No NEON compress_in_place() implementation yet.
#[cfg(feature = "c_neon")]
@@ -130,7 +122,7 @@ impl Platform {
}
}
- pub(crate) fn compress_xof(
+ pub fn compress_xof(
&self,
cv: &CVWords,
block: &[u8; BLOCK_LEN],
@@ -143,13 +135,13 @@ impl Platform {
// Safe because detect() checked for platform support.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::SSE41 | Platform::AVX2 => unsafe {
- sse41::compress_xof(cv, block, block_len, counter, flags)
+ crate::sse41::compress_xof(cv, block, block_len, counter, flags)
},
// Safe because detect() checked for platform support.
- #[cfg(feature = "c_avx512")]
+ #[cfg(feature = "c")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => unsafe {
- c_avx512::compress_xof(cv, block, block_len, counter, flags)
+ crate::avx512::compress_xof(cv, block, block_len, counter, flags)
},
// No NEON compress_xof() implementation yet.
#[cfg(feature = "c_neon")]
@@ -167,7 +159,7 @@ impl Platform {
// after every block, there's a small but measurable performance loss.
// Compressing chunks with a dedicated loop avoids this.
- pub(crate) fn hash_many<A: arrayvec::Array<Item = u8>>(
+ pub fn hash_many<A: arrayvec::Array<Item = u8>>(
&self,
inputs: &[&A],
key: &CVWords,
@@ -192,7 +184,7 @@ impl Platform {
// Safe because detect() checked for platform support.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::SSE41 => unsafe {
- sse41::hash_many(
+ crate::sse41::hash_many(
inputs,
key,
counter,
@@ -206,7 +198,7 @@ impl Platform {
// Safe because detect() checked for platform support.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX2 => unsafe {
- avx2::hash_many(
+ crate::avx2::hash_many(
inputs,
key,
counter,
@@ -218,10 +210,10 @@ impl Platform {
)
},
// Safe because detect() checked for platform support.
- #[cfg(feature = "c_avx512")]
+ #[cfg(feature = "c")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => unsafe {
- c_avx512::hash_many(
+ crate::avx512::hash_many(
inputs,
key,
counter,
@@ -235,7 +227,7 @@ impl Platform {
// Assumed to be safe if the "c_neon" feature is on.
#[cfg(feature = "c_neon")]
Platform::NEON => unsafe {
- c_neon::hash_many(
+ crate::neon::hash_many(
inputs,
key,
counter,
@@ -248,11 +240,52 @@ impl Platform {
},
}
}
+
+ // Explicit platform constructors, for benchmarks.
+
+ pub fn portable() -> Self {
+ Self::Portable
+ }
+
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ pub fn sse41() -> Option<Self> {
+ if sse41_detected() {
+ Some(Self::SSE41)
+ } else {
+ None
+ }
+ }
+
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ pub fn avx2() -> Option<Self> {
+ if avx2_detected() {
+ Some(Self::AVX2)
+ } else {
+ None
+ }
+ }
+
+ #[cfg(feature = "c")]
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ pub fn avx512() -> Option<Self> {
+ if avx512_detected() {
+ Some(Self::AVX512)
+ } else {
+ None
+ }
+ }
+
+ #[cfg(feature = "c_neon")]
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ pub fn neon() -> Option<Self> {
+ // Assumed to be safe if the "c_neon" feature is on.
+ Some(Self::NEON)
+ }
}
// Note that AVX-512 is divided into multiple featuresets, and we use two of
// them, F and VL.
-#[cfg(feature = "c_avx512")]
+#[cfg(feature = "c")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[inline(always)]
pub fn avx512_detected() -> bool {
diff --git a/src/avx2.rs b/src/rust_avx2.rs
index 7f36072..7f36072 100644
--- a/src/avx2.rs
+++ b/src/rust_avx2.rs
diff --git a/src/sse41.rs b/src/rust_sse41.rs
index fcf2f98..fcf2f98 100644
--- a/src/sse41.rs
+++ b/src/rust_sse41.rs