diff options
| author | Jack O'Connor <[email protected]> | 2020-01-27 13:46:00 -0500 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2020-01-28 15:59:16 -0500 |
| commit | 37e153cc607ba5b975ce8bd7f2017f01624781bb (patch) | |
| tree | 20b39b41a9bfad9c6d45e2d92912bd9fc9d6703d /c | |
| parent | d7a37fa54d67e67c19027928ae524292318c9021 (diff) | |
add NEON support to blake3_dispatch.c
Currently this requires setting the BLAKE3_USE_NEON preprocessor flag.
In the future we may enable this automatically on AArch32/64 or include
some kind of dynamic feature detection. (Though ARM makes this harder
than x86.)
As part of this, get rid of the IS_ARM flag. It wasn't being set
properly when I tried it on a Raspberry Pi.
Closes #30.
Diffstat (limited to 'c')
| -rw-r--r-- | c/README.md | 23 | ||||
| -rw-r--r-- | c/blake3_dispatch.c | 13 | ||||
| -rw-r--r-- | c/blake3_impl.h | 4 |
3 files changed, 30 insertions, 10 deletions
diff --git a/c/README.md b/c/README.md index 21be216..a0d87af 100644 --- a/c/README.md +++ b/c/README.md @@ -88,9 +88,28 @@ gcc -shared -O3 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX2 -DBLAKE3_NO_AVX512 \ blake3.c blake3_dispatch.c blake3_portable.c -o libblake3.so ``` -### ARM +### ARM NEON -TODO: add NEON support to `blake3_dispatch.c`. +The NEON implementation is not enabled by default on ARM, since not all +ARM targets support it. To enable it, set `BLAKE3_USE_NEON=1`. Here's an +example of building a shared library on ARM Linux with NEON support: + +```bash +gcc -shared -O3 -DBLAKE3_USE_NEON blake3.c blake3_dispatch.c \ + blake3_portable.c blake3_neon.c -o libblake3.so +``` + +Note that on some targets (ARMv7 in particular), extra flags may be +required to activate NEON support in the compiler. If you see an error +like... + +``` +/usr/lib/gcc/armv7l-unknown-linux-gnueabihf/9.2.0/include/arm_neon.h:635:1: error: inlining failed +in call to always_inline ‘vaddq_u32’: target specific option mismatch +``` + +...then you may need to add something like `-mfpu=neon-vfpv4 +-mfloat-abi=hard`. ### Other Platforms diff --git a/c/blake3_dispatch.c b/c/blake3_dispatch.c index 4d033db..7daf43e 100644 --- a/c/blake3_dispatch.c +++ b/c/blake3_dispatch.c @@ -73,7 +73,7 @@ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs, #endif #endif -#if defined(IS_ARM) && defined(BLAKE3_USE_NEON) +#if defined(BLAKE3_USE_NEON) void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, @@ -191,10 +191,8 @@ static } g_cpu_features = features; return features; -#elif defined(IS_ARM) - /* How to detect NEON? */ - return 0; #else + /* How to detect NEON? */ return 0; #endif } @@ -275,6 +273,13 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, } #endif #endif + +#if defined(BLAKE3_USE_NEON) + blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, out); + return; +#endif + blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out); diff --git a/c/blake3_impl.h b/c/blake3_impl.h index 269dd67..d8954e4 100644 --- a/c/blake3_impl.h +++ b/c/blake3_impl.h @@ -38,10 +38,6 @@ enum blake3_flags { #define IS_X86_32 #endif -#if defined(__arm__) -#define IS_ARM -#endif - #if defined(IS_X86) #if defined(_MSC_VER) #include <intrin.h> |
