aboutsummaryrefslogtreecommitdiff
path: root/c
diff options
context:
space:
mode:
authorJack O'Connor <[email protected]>2020-01-27 13:46:00 -0500
committerJack O'Connor <[email protected]>2020-01-28 15:59:16 -0500
commit37e153cc607ba5b975ce8bd7f2017f01624781bb (patch)
tree20b39b41a9bfad9c6d45e2d92912bd9fc9d6703d /c
parentd7a37fa54d67e67c19027928ae524292318c9021 (diff)
add NEON support to blake3_dispatch.c
Currently this requires setting the BLAKE3_USE_NEON preprocessor flag. In the future we may enable this automatically on AArch32/64 or include some kind of dynamic feature detection. (Though ARM makes this harder than x86.) As part of this, get rid of the IS_ARM flag. It wasn't being set properly when I tried it on a Raspberry Pi. Closes #30.
Diffstat (limited to 'c')
-rw-r--r--c/README.md23
-rw-r--r--c/blake3_dispatch.c13
-rw-r--r--c/blake3_impl.h4
3 files changed, 30 insertions, 10 deletions
diff --git a/c/README.md b/c/README.md
index 21be216..a0d87af 100644
--- a/c/README.md
+++ b/c/README.md
@@ -88,9 +88,28 @@ gcc -shared -O3 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX2 -DBLAKE3_NO_AVX512 \
blake3.c blake3_dispatch.c blake3_portable.c -o libblake3.so
```
-### ARM
+### ARM NEON
-TODO: add NEON support to `blake3_dispatch.c`.
+The NEON implementation is not enabled by default on ARM, since not all
+ARM targets support it. To enable it, set `BLAKE3_USE_NEON=1`. Here's an
+example of building a shared library on ARM Linux with NEON support:
+
+```bash
+gcc -shared -O3 -DBLAKE3_USE_NEON blake3.c blake3_dispatch.c \
+ blake3_portable.c blake3_neon.c -o libblake3.so
+```
+
+Note that on some targets (ARMv7 in particular), extra flags may be
+required to activate NEON support in the compiler. If you see an error
+like...
+
+```
+/usr/lib/gcc/armv7l-unknown-linux-gnueabihf/9.2.0/include/arm_neon.h:635:1: error: inlining failed
+in call to always_inline ‘vaddq_u32’: target specific option mismatch
+```
+
+...then you may need to add something like `-mfpu=neon-vfpv4
+-mfloat-abi=hard`.
### Other Platforms
diff --git a/c/blake3_dispatch.c b/c/blake3_dispatch.c
index 4d033db..7daf43e 100644
--- a/c/blake3_dispatch.c
+++ b/c/blake3_dispatch.c
@@ -73,7 +73,7 @@ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
#endif
#endif
-#if defined(IS_ARM) && defined(BLAKE3_USE_NEON)
+#if defined(BLAKE3_USE_NEON)
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8],
uint64_t counter, bool increment_counter,
@@ -191,10 +191,8 @@ static
}
g_cpu_features = features;
return features;
-#elif defined(IS_ARM)
- /* How to detect NEON? */
- return 0;
#else
+ /* How to detect NEON? */
return 0;
#endif
}
@@ -275,6 +273,13 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
}
#endif
#endif
+
+#if defined(BLAKE3_USE_NEON)
+ blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ return;
+#endif
+
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end,
out);
diff --git a/c/blake3_impl.h b/c/blake3_impl.h
index 269dd67..d8954e4 100644
--- a/c/blake3_impl.h
+++ b/c/blake3_impl.h
@@ -38,10 +38,6 @@ enum blake3_flags {
#define IS_X86_32
#endif
-#if defined(__arm__)
-#define IS_ARM
-#endif
-
#if defined(IS_X86)
#if defined(_MSC_VER)
#include <intrin.h>