aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJack O'Connor <[email protected]>2020-03-31 12:36:41 -0400
committerJack O'Connor <[email protected]>2020-04-01 19:13:15 -0400
commitb8cdcb1f84cf794c7072bbfaacac71f6a5857e3c (patch)
tree8c79aa18cf9ae1ee4908b88a546b4283646ca890 /src
parenteec458d03ee7828225dda4f08138563d4ff8bb6d (diff)
automatically fall back to the pure Rust build
There are two scenarios where compiling AVX-512 C or assembly code might not work: 1. There might not be a C compiler installed at all. Most commonly this is either in cross-compiling situations, or with the Windows GNU target. 2. The installed C compiler might not support e.g. -mavx512f, because it's too old. In both of these cases, print a relevant warning, and then automatically fall back to using the pure Rust intrinsics build. Note that this only affects x86 targets. Other targets always use pure Rust, unless the "neon" feature is enabled.
Diffstat (limited to 'src')
-rw-r--r--src/lib.rs70
-rw-r--r--src/platform.rs28
2 files changed, 45 insertions, 53 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 0a0d640..e9480e8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -33,17 +33,25 @@
//! the [`join::RayonJoin`] type for use with [`Hasher::update_with_join`]. It
//! is disabled by default, but enabled for [docs.rs].
//!
-//! The `pure` feature disables all FFI to C and assembly implementations,
-//! leaving only the Rust intrinsics implementations for SSE4.1 and AVX2. This
-//! removes the dependency on a C compiler/assembler. Library crates should
-//! generally avoid this feature, so that each binary crate is free make its
-//! own decision about build dependencies.
+//! The `neon` feature enables ARM NEON support. Currently there is no runtime
+//! CPU feature detection for NEON, so you must only enable this feature for
+//! targets that are known to have NEON support. In particular, some ARMv7
+//! targets support NEON, and some don't.
+//!
+//! The `std` feature (enabled by default) is required for implementations of
+//! the [`Write`] and [`Seek`] traits, and also for runtime CPU feature
+//! detection. If this feature is disabled, the only way to use the SIMD
+//! implementations in this crate is to enable the corresponding instruction
+//! sets statically for the entire build, with e.g. `RUSTFLAGS="-C
+//! target-cpu=native"`. The resulting binary will not be portable to machines.
//!
//! [BLAKE3]: https://blake3.io
//! [Rayon]: https://github.com/rayon-rs/rayon
//! [`join::RayonJoin`]: join/enum.RayonJoin.html
//! [`Hasher::update_with_join`]: struct.Hasher.html#method.update_with_join
//! [docs.rs]: https://docs.rs/
+//! [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
+//! [`Seek`]: https://doc.rust-lang.org/std/io/trait.Seek.html
#![cfg_attr(not(feature = "std"), no_std)]
@@ -61,43 +69,27 @@ pub mod guts;
#[doc(hidden)]
pub mod platform;
-// Platform-specific implementations of the compression function.
-mod portable;
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-cfg_if::cfg_if! {
- if #[cfg(feature = "pure")] {
- // When "pure" is enabled, use only Rust intrinsics. Stable Rust
- // doesn't currently support AVX-512.
- #[path = "rust_sse41.rs"]
- mod sse41;
- #[path = "rust_avx2.rs"]
- mod avx2;
- } else if #[cfg(any(target_arch = "x86", feature = "prefer_intrinsics"))] {
- // When "prefer_intrinsics" is enabled, or on 32-bit x86 (which our
- // assembly implementations don't support), use Rust intrinsics for
- // SSE4.1 and AVX2, and use C intrinsics for AVX-512. In this cacse,
- // build.rs will compile and link c/blake3_avx512.c.
- #[path = "rust_sse41.rs"]
- mod sse41;
- #[path = "rust_avx2.rs"]
- mod avx2;
- #[path = "ffi_avx512.rs"]
- mod avx512;
- } else {
- // Otherwise on x86_64, use assembly implementations for everything. In
- // this case, build.rs will compile and link all the assembly files for
- // the target platform (Unix, Windows MSVC, or Windows GNU).
- #[path = "ffi_sse41.rs"]
- mod sse41;
- #[path = "ffi_avx2.rs"]
- mod avx2;
- #[path = "ffi_avx512.rs"]
- mod avx512;
- }
-}
+// Platform-specific implementations of the compression function. These
+// BLAKE3-specific cfg flags are set in build.rs.
+#[cfg(blake3_avx2_rust)]
+#[path = "rust_avx2.rs"]
+mod avx2;
+#[cfg(blake3_avx2_ffi)]
+#[path = "ffi_avx2.rs"]
+mod avx2;
+#[cfg(blake3_avx512_ffi)]
+#[path = "ffi_avx512.rs"]
+mod avx512;
#[cfg(feature = "neon")]
#[path = "ffi_neon.rs"]
mod neon;
+mod portable;
+#[cfg(blake3_sse41_rust)]
+#[path = "rust_sse41.rs"]
+mod sse41;
+#[cfg(blake3_sse41_ffi)]
+#[path = "ffi_sse41.rs"]
+mod sse41;
pub mod traits;
diff --git a/src/platform.rs b/src/platform.rs
index 0cc8d49..d2790a6 100644
--- a/src/platform.rs
+++ b/src/platform.rs
@@ -4,10 +4,10 @@ use arrayref::{array_mut_ref, array_ref};
cfg_if::cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
cfg_if::cfg_if! {
- if #[cfg(feature = "pure")] {
- pub const MAX_SIMD_DEGREE: usize = 8;
- } else {
+ if #[cfg(blake3_avx512_ffi)] {
pub const MAX_SIMD_DEGREE: usize = 16;
+ } else {
+ pub const MAX_SIMD_DEGREE: usize = 8;
}
}
} else if #[cfg(feature = "neon")] {
@@ -24,10 +24,10 @@ cfg_if::cfg_if! {
cfg_if::cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
cfg_if::cfg_if! {
- if #[cfg(feature = "pure")] {
- pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
- } else {
+ if #[cfg(blake3_avx512_ffi)] {
pub const MAX_SIMD_DEGREE_OR_2: usize = 16;
+ } else {
+ pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
}
}
} else if #[cfg(feature = "neon")] {
@@ -44,7 +44,7 @@ pub enum Platform {
SSE41,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
AVX2,
- #[cfg(not(feature = "pure"))]
+ #[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
AVX512,
#[cfg(feature = "neon")]
@@ -56,7 +56,7 @@ impl Platform {
pub fn detect() -> Self {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
- #[cfg(not(feature = "pure"))]
+ #[cfg(blake3_avx512_ffi)]
{
if avx512_detected() {
return Platform::AVX512;
@@ -85,7 +85,7 @@ impl Platform {
Platform::SSE41 => 4,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX2 => 8,
- #[cfg(not(feature = "pure"))]
+ #[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => 16,
#[cfg(feature = "neon")]
@@ -111,7 +111,7 @@ impl Platform {
crate::sse41::compress_in_place(cv, block, block_len, counter, flags)
},
// Safe because detect() checked for platform support.
- #[cfg(not(feature = "pure"))]
+ #[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => unsafe {
crate::avx512::compress_in_place(cv, block, block_len, counter, flags)
@@ -138,7 +138,7 @@ impl Platform {
crate::sse41::compress_xof(cv, block, block_len, counter, flags)
},
// Safe because detect() checked for platform support.
- #[cfg(not(feature = "pure"))]
+ #[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => unsafe {
crate::avx512::compress_xof(cv, block, block_len, counter, flags)
@@ -210,7 +210,7 @@ impl Platform {
)
},
// Safe because detect() checked for platform support.
- #[cfg(not(feature = "pure"))]
+ #[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => unsafe {
crate::avx512::hash_many(
@@ -265,7 +265,7 @@ impl Platform {
}
}
- #[cfg(not(feature = "pure"))]
+ #[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub fn avx512() -> Option<Self> {
if avx512_detected() {
@@ -285,7 +285,7 @@ impl Platform {
// Note that AVX-512 is divided into multiple featuresets, and we use two of
// them, F and VL.
-#[cfg(not(feature = "pure"))]
+#[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[inline(always)]
pub fn avx512_detected() -> bool {