diff options
| author | sdlyyxy <[email protected]> | 2023-06-24 15:14:47 +0800 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2023-07-05 13:28:45 -0400 |
| commit | 38a06e78d3f39cff749a8230d94394d62e00251d (patch) | |
| tree | 9ae803524708757a8917707cc4026805a0587209 | |
| parent | e47e5706912dee41416d54b8193260b90f059b50 (diff) | |
Improve NEON rot16/rot8
| -rw-r--r-- | c/blake3_neon.c | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/c/blake3_neon.c b/c/blake3_neon.c index a6f6da9..3e58703 100644 --- a/c/blake3_neon.c +++ b/c/blake3_neon.c @@ -36,7 +36,7 @@ INLINE uint32x4_t set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { } INLINE uint32x4_t rot16_128(uint32x4_t x) { - return vorrq_u32(vshrq_n_u32(x, 16), vshlq_n_u32(x, 32 - 16)); + return vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x))); } INLINE uint32x4_t rot12_128(uint32x4_t x) { @@ -44,7 +44,7 @@ INLINE uint32x4_t rot12_128(uint32x4_t x) { } INLINE uint32x4_t rot8_128(uint32x4_t x) { - return vorrq_u32(vshrq_n_u32(x, 8), vshlq_n_u32(x, 32 - 8)); + return vreinterpretq_u32_u8(__builtin_shufflevector(vreinterpretq_u8_u32(x), vreinterpretq_u8_u32(x), 1,2,3,0,5,6,7,4,9,10,11,8,13,14,15,12)); } INLINE uint32x4_t rot7_128(uint32x4_t x) { |
