retain the old NEON rotations in inline comments

author: Jack O'Connor <[email protected]> 2023-07-05 10:28:07 -0700
committer: Jack O'Connor <[email protected]> 2023-07-05 10:29:02 -0700
commit: f7e1a7429ff7727144a67a06ac210d2831f392a2 (patch)
tree: 87fd6a9afa96ed75369f7d8061719d850d975c79
parent: 7038dad280eb5dcd622bf54336207b683388e8cc (diff)
1 files changed, 10 insertions, 0 deletions
diff --git a/c/blake3_neon.c b/c/blake3_neon.c
index 1d4559e..8a818fc 100644
--- a/c/blake3_neon.c
+++ b/c/blake3_neon.c
@@ -36,14 +36,22 @@ INLINE uint32x4_t set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
 }
 
 INLINE uint32x4_t rot16_128(uint32x4_t x) {
+  // The straightfoward implementation would be two shifts and an or, but that's
+  // slower on microarchitectures we've tested. See
+  // https://github.com/BLAKE3-team/BLAKE3/pull/319.
+  // return vorrq_u32(vshrq_n_u32(x, 16), vshlq_n_u32(x, 32 - 16));
   return vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x)));
 }
 
 INLINE uint32x4_t rot12_128(uint32x4_t x) {
+  // See comment in rot16_128.
+  // return vorrq_u32(vshrq_n_u32(x, 12), vshlq_n_u32(x, 32 - 12));
   return vsriq_n_u32(vshlq_n_u32(x, 32-12), x, 12);
 }
 
 INLINE uint32x4_t rot8_128(uint32x4_t x) {
+  // See comment in rot16_128.
+  // return vorrq_u32(vshrq_n_u32(x, 8), vshlq_n_u32(x, 32 - 8));
 #if defined(__clang__)
   return vreinterpretq_u32_u8(__builtin_shufflevector(vreinterpretq_u8_u32(x), vreinterpretq_u8_u32(x), 1,2,3,0,5,6,7,4,9,10,11,8,13,14,15,12));
 #elif __GNUC__ * 10000 + __GNUC_MINOR__ * 100 >=40700
@@ -55,6 +63,8 @@ INLINE uint32x4_t rot8_128(uint32x4_t x) {
 }
 
 INLINE uint32x4_t rot7_128(uint32x4_t x) {
+  // See comment in rot16_128.
+  // return vorrq_u32(vshrq_n_u32(x, 7), vshlq_n_u32(x, 32 - 7));
   return vsriq_n_u32(vshlq_n_u32(x, 32-7), x, 7);
 }
author	Jack O'Connor <[email protected]>	2023-07-05 10:28:07 -0700
committer	Jack O'Connor <[email protected]>	2023-07-05 10:29:02 -0700
commit	f7e1a7429ff7727144a67a06ac210d2831f392a2 (patch)
tree	87fd6a9afa96ed75369f7d8061719d850d975c79
parent	7038dad280eb5dcd622bf54336207b683388e8cc (diff)