aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack O'Connor <[email protected]>2021-10-14 21:48:02 -0400
committerGitHub <[email protected]>2021-10-14 21:48:02 -0400
commit5957d7d48f0626bfa9cd21339b0db61c72391ebe (patch)
tree409772ca28e0b8e3631bae5a4f914d1c65cf5249
parentdb436a50c2385550e7a0d54b24c7c6b47a6ca037 (diff)
parent2aa7c963be41eaee47f3ba5af1d5a7d7f1a9658d (diff)
Merge pull request #201 from symmetree-labs/master
Improve compile-time target detection for NEON
-rw-r--r--.github/workflows/ci.yml3
-rw-r--r--Cargo.toml1
-rw-r--r--build.rs19
-rw-r--r--c/Makefile.testing6
-rw-r--r--c/README.md17
-rw-r--r--c/blake3_c_rust_bindings/build.rs16
-rw-r--r--c/blake3_dispatch.c4
-rw-r--r--c/blake3_impl.h17
-rw-r--r--src/lib.rs2
-rw-r--r--src/platform.rs18
10 files changed, 79 insertions, 24 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7ce4b59..715fe15 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -149,6 +149,9 @@ jobs:
# Test the NEON implementation on ARM targets.
- run: cross test --target ${{ matrix.arch }} --features=neon
if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-')
+ # NEON is enabled by default on aarch64, disabling it through the no_neon feature.
+ - run: cross test --target ${{ matrix.arch }} --features=no_neon
+ if: startsWith(matrix.arch, 'aarch64-')
# Test vectors. Note that this uses a hacky script due to path dependency limitations.
- run: ./test_vectors/cross_test.sh --target ${{ matrix.arch }}
# C code. Same issue with the hacky script.
diff --git a/Cargo.toml b/Cargo.toml
index a43f000..b2d8d0c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -73,6 +73,7 @@ no_sse2 = []
no_sse41 = []
no_avx2 = []
no_avx512 = []
+no_neon = []
[package.metadata.docs.rs]
# Document Hasher::update_rayon on docs.rs.
diff --git a/build.rs b/build.rs
index 4fd3bae..ac1d6a6 100644
--- a/build.rs
+++ b/build.rs
@@ -17,6 +17,10 @@ fn is_neon() -> bool {
defined("CARGO_FEATURE_NEON")
}
+fn is_no_neon() -> bool {
+ defined("CARGO_FEATURE_NO_NEON")
+}
+
fn is_ci() -> bool {
defined("BLAKE3_CI")
}
@@ -44,6 +48,14 @@ fn is_x86_32() -> bool {
arch == "i386" || arch == "i586" || arch == "i686"
}
+fn is_arm() -> bool {
+ is_armv7() || is_aarch64() || target_components()[0] == "arm"
+}
+
+fn is_aarch64() -> bool {
+ target_components()[0] == "aarch64"
+}
+
fn is_armv7() -> bool {
target_components()[0] == "armv7"
}
@@ -218,6 +230,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
panic!("It doesn't make sense to enable both \"pure\" and \"neon\".");
}
+ if is_no_neon() && is_neon() {
+ panic!("It doesn't make sense to enable both \"no_neon\" and \"neon\".");
+ }
+
if is_x86_64() || is_x86_32() {
let support = c_compiler_support();
if is_x86_32() || should_prefer_intrinsics() || is_pure() || support == NoCompiler {
@@ -237,7 +253,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
}
}
- if is_neon() {
+ if (is_arm() && is_neon()) || (!is_no_neon() && !is_pure() && is_aarch64()) {
+ println!("cargo:rustc-cfg=blake3_neon");
build_neon_c_intrinsics();
}
diff --git a/c/Makefile.testing b/c/Makefile.testing
index 41e6b82..b540528 100644
--- a/c/Makefile.testing
+++ b/c/Makefile.testing
@@ -38,10 +38,14 @@ ASM_TARGETS += blake3_avx512_x86-64_unix.S
endif
ifdef BLAKE3_USE_NEON
-EXTRAFLAGS += -DBLAKE3_USE_NEON
+EXTRAFLAGS += -DBLAKE3_USE_NEON=1
TARGETS += blake3_neon.o
endif
+ifdef BLAKE3_NO_NEON
+EXTRAFLAGS += -DBLAKE3_USE_NEON=0
+endif
+
all: blake3.c blake3_dispatch.c blake3_portable.c main.c $(TARGETS)
$(CC) $(CFLAGS) $(EXTRAFLAGS) $^ -o $(NAME) $(LDFLAGS)
diff --git a/c/README.md b/c/README.md
index 5268818..eacdc54 100644
--- a/c/README.md
+++ b/c/README.md
@@ -250,15 +250,24 @@ gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_A
## ARM NEON
-The NEON implementation is not enabled by default on ARM, since not all
-ARM targets support it. To enable it, set `BLAKE3_USE_NEON=1`. Here's an
-example of building a shared library on ARM Linux with NEON support:
+The NEON implementation is enabled by default on AARCH64, but not on
+other ARM targets, since not all of them support it. To enable it, set
+`BLAKE3_USE_NEON=1`. Here's an example of building a shared library on
+ARM Linux with NEON support:
```bash
-gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON blake3.c blake3_dispatch.c \
+gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=1 blake3.c blake3_dispatch.c \
blake3_portable.c blake3_neon.c
```
+To explicitiy disable using NEON instructions on AARCH64, set
+`BLAKE3_USE_NEON=0`.
+
+```bash
+gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=0 blake3.c blake3_dispatch.c \
+ blake3_portable.c
+```
+
Note that on some targets (ARMv7 in particular), extra flags may be
required to activate NEON support in the compiler. If you see an error
like...
diff --git a/c/blake3_c_rust_bindings/build.rs b/c/blake3_c_rust_bindings/build.rs
index d5dc47a..98f8396 100644
--- a/c/blake3_c_rust_bindings/build.rs
+++ b/c/blake3_c_rust_bindings/build.rs
@@ -22,6 +22,10 @@ fn is_armv7() -> bool {
target_components()[0] == "armv7"
}
+fn is_aarch64() -> bool {
+ target_components()[0] == "aarch64"
+}
+
// Windows targets may be using the MSVC toolchain or the GNU toolchain. The
// right compiler flags to use depend on the toolchain. (And we don't want to
// use flag_if_supported, because we don't want features to be silently
@@ -148,10 +152,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
avx512_build.compile("blake3_avx512");
}
- // We only build NEON code here if 1) it's requested and 2) the root crate
- // is not already building it. The only time this will really happen is if
- // you build this crate by hand with the "neon" feature for some reason.
- if defined("CARGO_FEATURE_NEON") {
+ // We only build NEON code here if
+ // 1) it's requested
+ // and 2) the root crate is not already building it.
+ // The only time this will really happen is if you build this
+ // crate by hand with the "neon" feature for some reason.
+ //
+ // In addition, 3) if the target is aarch64, NEON is on by default.
+ if defined("CARGO_FEATURE_NEON") || is_aarch64() {
let mut neon_build = new_build();
neon_build.file(c_dir_path("blake3_neon.c"));
// ARMv7 platforms that support NEON generally need the following
diff --git a/c/blake3_dispatch.c b/c/blake3_dispatch.c
index 6518478..b498058 100644
--- a/c/blake3_dispatch.c
+++ b/c/blake3_dispatch.c
@@ -232,7 +232,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
#endif
#endif
-#if defined(BLAKE3_USE_NEON)
+#if BLAKE3_USE_NEON == 1
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
return;
@@ -269,7 +269,7 @@ size_t blake3_simd_degree(void) {
}
#endif
#endif
-#if defined(BLAKE3_USE_NEON)
+#if BLAKE3_USE_NEON == 1
return 4;
#endif
return 1;
diff --git a/c/blake3_impl.h b/c/blake3_impl.h
index 86ab6aa..ba2e91c 100644
--- a/c/blake3_impl.h
+++ b/c/blake3_impl.h
@@ -38,6 +38,10 @@ enum blake3_flags {
#define IS_X86_32
#endif
+#if defined(__aarch64__) || defined(_M_ARM64)
+#define IS_AARCH64
+#endif
+
#if defined(IS_X86)
#if defined(_MSC_VER)
#include <intrin.h>
@@ -45,9 +49,18 @@ enum blake3_flags {
#include <immintrin.h>
#endif
+#if !defined(BLAKE3_USE_NEON)
+ // If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
+ #if defined(IS_AARCH64)
+ #define BLAKE3_USE_NEON 1
+ #else
+ #define BLAKE3_USE_NEON 0
+ #endif
+#endif
+
#if defined(IS_X86)
#define MAX_SIMD_DEGREE 16
-#elif defined(BLAKE3_USE_NEON)
+#elif BLAKE3_USE_NEON == 1
#define MAX_SIMD_DEGREE 4
#else
#define MAX_SIMD_DEGREE 1
@@ -257,7 +270,7 @@ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
#endif
#endif
-#if defined(BLAKE3_USE_NEON)
+#if BLAKE3_USE_NEON == 1
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8],
uint64_t counter, bool increment_counter,
diff --git a/src/lib.rs b/src/lib.rs
index 4123a0b..31e5cd6 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -94,7 +94,7 @@ mod avx2;
#[cfg(blake3_avx512_ffi)]
#[path = "ffi_avx512.rs"]
mod avx512;
-#[cfg(feature = "neon")]
+#[cfg(blake3_neon)]
#[path = "ffi_neon.rs"]
mod neon;
mod portable;
diff --git a/src/platform.rs b/src/platform.rs
index 1a732c5..00058b1 100644
--- a/src/platform.rs
+++ b/src/platform.rs
@@ -10,7 +10,7 @@ cfg_if::cfg_if! {
pub const MAX_SIMD_DEGREE: usize = 8;
}
}
- } else if #[cfg(feature = "neon")] {
+ } else if #[cfg(blake3_neon)] {
pub const MAX_SIMD_DEGREE: usize = 4;
} else {
pub const MAX_SIMD_DEGREE: usize = 1;
@@ -30,7 +30,7 @@ cfg_if::cfg_if! {
pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
}
}
- } else if #[cfg(feature = "neon")] {
+ } else if #[cfg(blake3_neon)] {
pub const MAX_SIMD_DEGREE_OR_2: usize = 4;
} else {
pub const MAX_SIMD_DEGREE_OR_2: usize = 2;
@@ -49,7 +49,7 @@ pub enum Platform {
#[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
AVX512,
- #[cfg(feature = "neon")]
+ #[cfg(blake3_neon)]
NEON,
}
@@ -76,7 +76,7 @@ impl Platform {
}
// We don't use dynamic feature detection for NEON. If the "neon"
// feature is on, NEON is assumed to be supported.
- #[cfg(feature = "neon")]
+ #[cfg(blake3_neon)]
{
return Platform::NEON;
}
@@ -95,7 +95,7 @@ impl Platform {
#[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => 16,
- #[cfg(feature = "neon")]
+ #[cfg(blake3_neon)]
Platform::NEON => 4,
};
debug_assert!(degree <= MAX_SIMD_DEGREE);
@@ -129,7 +129,7 @@ impl Platform {
crate::avx512::compress_in_place(cv, block, block_len, counter, flags)
},
// No NEON compress_in_place() implementation yet.
- #[cfg(feature = "neon")]
+ #[cfg(blake3_neon)]
Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags),
}
}
@@ -161,7 +161,7 @@ impl Platform {
crate::avx512::compress_xof(cv, block, block_len, counter, flags)
},
// No NEON compress_xof() implementation yet.
- #[cfg(feature = "neon")]
+ #[cfg(blake3_neon)]
Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags),
}
}
@@ -256,7 +256,7 @@ impl Platform {
)
},
// Assumed to be safe if the "neon" feature is on.
- #[cfg(feature = "neon")]
+ #[cfg(blake3_neon)]
Platform::NEON => unsafe {
crate::neon::hash_many(
inputs,
@@ -315,7 +315,7 @@ impl Platform {
}
}
- #[cfg(feature = "neon")]
+ #[cfg(blake3_neon)]
pub fn neon() -> Option<Self> {
// Assumed to be safe if the "neon" feature is on.
Some(Self::NEON)