diff options
| author | Nazar Mokrynskyi <[email protected]> | 2025-08-12 14:05:37 +0300 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2025-08-12 18:33:35 -0700 |
| commit | 89537d968225007ee1490059c894fc89ab9b4863 (patch) | |
| tree | 6ca463bceb4bad49ed41cf2f0173f352f46cc93a | |
| parent | c7f0d216e6fc834b742456b39546c9835baa1277 (diff) | |
Fix prefetch pointer addition that resulted in UB
| -rw-r--r-- | src/rust_avx2.rs | 5 | ||||
| -rw-r--r-- | src/rust_sse2.rs | 5 | ||||
| -rw-r--r-- | src/rust_sse41.rs | 5 |
3 files changed, 12 insertions, 3 deletions
diff --git a/src/rust_avx2.rs b/src/rust_avx2.rs index a37a4ca..7fe69e6 100644 --- a/src/rust_avx2.rs +++ b/src/rust_avx2.rs @@ -262,7 +262,10 @@ unsafe fn transpose_msg_vecs(inputs: &[*const u8; DEGREE], block_offset: usize) loadu(inputs[7].add(block_offset + 1 * 4 * DEGREE)), ]; for i in 0..DEGREE { - _mm_prefetch(inputs[i].add(block_offset + 256) as *const i8, _MM_HINT_T0); + _mm_prefetch( + inputs[i].wrapping_add(block_offset + 256) as *const i8, + _MM_HINT_T0, + ); } let squares = mut_array_refs!(&mut vecs, DEGREE, DEGREE); transpose_vecs(squares.0); diff --git a/src/rust_sse2.rs b/src/rust_sse2.rs index bd2be69..2a9eed6 100644 --- a/src/rust_sse2.rs +++ b/src/rust_sse2.rs @@ -522,7 +522,10 @@ unsafe fn transpose_msg_vecs(inputs: &[*const u8; DEGREE], block_offset: usize) loadu(inputs[3].add(block_offset + 3 * 4 * DEGREE)), ]; for i in 0..DEGREE { - _mm_prefetch(inputs[i].add(block_offset + 256) as *const i8, _MM_HINT_T0); + _mm_prefetch( + inputs[i].wrapping_add(block_offset + 256) as *const i8, + _MM_HINT_T0, + ); } let squares = mut_array_refs!(&mut vecs, DEGREE, DEGREE, DEGREE, DEGREE); transpose_vecs(squares.0); diff --git a/src/rust_sse41.rs b/src/rust_sse41.rs index 1ebadc4..baffb7d 100644 --- a/src/rust_sse41.rs +++ b/src/rust_sse41.rs @@ -513,7 +513,10 @@ unsafe fn transpose_msg_vecs(inputs: &[*const u8; DEGREE], block_offset: usize) loadu(inputs[3].add(block_offset + 3 * 4 * DEGREE)), ]; for i in 0..DEGREE { - _mm_prefetch(inputs[i].add(block_offset + 256) as *const i8, _MM_HINT_T0); + _mm_prefetch( + inputs[i].wrapping_add(block_offset + 256) as *const i8, + _MM_HINT_T0, + ); } let squares = mut_array_refs!(&mut vecs, DEGREE, DEGREE, DEGREE, DEGREE); transpose_vecs(squares.0); |
