diff options
| -rw-r--r-- | src/rust_avx2.rs | 5 | ||||
| -rw-r--r-- | src/rust_sse2.rs | 5 | ||||
| -rw-r--r-- | src/rust_sse41.rs | 5 |
3 files changed, 12 insertions, 3 deletions
diff --git a/src/rust_avx2.rs b/src/rust_avx2.rs index a37a4ca..7fe69e6 100644 --- a/src/rust_avx2.rs +++ b/src/rust_avx2.rs @@ -262,7 +262,10 @@ unsafe fn transpose_msg_vecs(inputs: &[*const u8; DEGREE], block_offset: usize) loadu(inputs[7].add(block_offset + 1 * 4 * DEGREE)), ]; for i in 0..DEGREE { - _mm_prefetch(inputs[i].add(block_offset + 256) as *const i8, _MM_HINT_T0); + _mm_prefetch( + inputs[i].wrapping_add(block_offset + 256) as *const i8, + _MM_HINT_T0, + ); } let squares = mut_array_refs!(&mut vecs, DEGREE, DEGREE); transpose_vecs(squares.0); diff --git a/src/rust_sse2.rs b/src/rust_sse2.rs index bd2be69..2a9eed6 100644 --- a/src/rust_sse2.rs +++ b/src/rust_sse2.rs @@ -522,7 +522,10 @@ unsafe fn transpose_msg_vecs(inputs: &[*const u8; DEGREE], block_offset: usize) loadu(inputs[3].add(block_offset + 3 * 4 * DEGREE)), ]; for i in 0..DEGREE { - _mm_prefetch(inputs[i].add(block_offset + 256) as *const i8, _MM_HINT_T0); + _mm_prefetch( + inputs[i].wrapping_add(block_offset + 256) as *const i8, + _MM_HINT_T0, + ); } let squares = mut_array_refs!(&mut vecs, DEGREE, DEGREE, DEGREE, DEGREE); transpose_vecs(squares.0); diff --git a/src/rust_sse41.rs b/src/rust_sse41.rs index 1ebadc4..baffb7d 100644 --- a/src/rust_sse41.rs +++ b/src/rust_sse41.rs @@ -513,7 +513,10 @@ unsafe fn transpose_msg_vecs(inputs: &[*const u8; DEGREE], block_offset: usize) loadu(inputs[3].add(block_offset + 3 * 4 * DEGREE)), ]; for i in 0..DEGREE { - _mm_prefetch(inputs[i].add(block_offset + 256) as *const i8, _MM_HINT_T0); + _mm_prefetch( + inputs[i].wrapping_add(block_offset + 256) as *const i8, + _MM_HINT_T0, + ); } let squares = mut_array_refs!(&mut vecs, DEGREE, DEGREE, DEGREE, DEGREE); transpose_vecs(squares.0); |
