From 89537d968225007ee1490059c894fc89ab9b4863 Mon Sep 17 00:00:00 2001 From: Nazar Mokrynskyi Date: Tue, 12 Aug 2025 14:05:37 +0300 Subject: Fix prefetch pointer addition that resulted in UB --- src/rust_avx2.rs | 5 ++++- src/rust_sse2.rs | 5 ++++- src/rust_sse41.rs | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/rust_avx2.rs b/src/rust_avx2.rs index a37a4ca..7fe69e6 100644 --- a/src/rust_avx2.rs +++ b/src/rust_avx2.rs @@ -262,7 +262,10 @@ unsafe fn transpose_msg_vecs(inputs: &[*const u8; DEGREE], block_offset: usize) loadu(inputs[7].add(block_offset + 1 * 4 * DEGREE)), ]; for i in 0..DEGREE { - _mm_prefetch(inputs[i].add(block_offset + 256) as *const i8, _MM_HINT_T0); + _mm_prefetch( + inputs[i].wrapping_add(block_offset + 256) as *const i8, + _MM_HINT_T0, + ); } let squares = mut_array_refs!(&mut vecs, DEGREE, DEGREE); transpose_vecs(squares.0); diff --git a/src/rust_sse2.rs b/src/rust_sse2.rs index bd2be69..2a9eed6 100644 --- a/src/rust_sse2.rs +++ b/src/rust_sse2.rs @@ -522,7 +522,10 @@ unsafe fn transpose_msg_vecs(inputs: &[*const u8; DEGREE], block_offset: usize) loadu(inputs[3].add(block_offset + 3 * 4 * DEGREE)), ]; for i in 0..DEGREE { - _mm_prefetch(inputs[i].add(block_offset + 256) as *const i8, _MM_HINT_T0); + _mm_prefetch( + inputs[i].wrapping_add(block_offset + 256) as *const i8, + _MM_HINT_T0, + ); } let squares = mut_array_refs!(&mut vecs, DEGREE, DEGREE, DEGREE, DEGREE); transpose_vecs(squares.0); diff --git a/src/rust_sse41.rs b/src/rust_sse41.rs index 1ebadc4..baffb7d 100644 --- a/src/rust_sse41.rs +++ b/src/rust_sse41.rs @@ -513,7 +513,10 @@ unsafe fn transpose_msg_vecs(inputs: &[*const u8; DEGREE], block_offset: usize) loadu(inputs[3].add(block_offset + 3 * 4 * DEGREE)), ]; for i in 0..DEGREE { - _mm_prefetch(inputs[i].add(block_offset + 256) as *const i8, _MM_HINT_T0); + _mm_prefetch( + inputs[i].wrapping_add(block_offset + 256) as *const i8, + _MM_HINT_T0, + ); } let squares = mut_array_refs!(&mut vecs, DEGREE, DEGREE, DEGREE, DEGREE); transpose_vecs(squares.0); -- cgit v1.2.3