aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNazar Mokrynskyi <[email protected]>2025-08-12 14:05:37 +0300
committerJack O'Connor <[email protected]>2025-08-12 18:33:35 -0700
commit89537d968225007ee1490059c894fc89ab9b4863 (patch)
tree6ca463bceb4bad49ed41cf2f0173f352f46cc93a /src
parentc7f0d216e6fc834b742456b39546c9835baa1277 (diff)
Fix prefetch pointer addition that resulted in UB
Diffstat (limited to 'src')
-rw-r--r--src/rust_avx2.rs5
-rw-r--r--src/rust_sse2.rs5
-rw-r--r--src/rust_sse41.rs5
3 files changed, 12 insertions, 3 deletions
diff --git a/src/rust_avx2.rs b/src/rust_avx2.rs
index a37a4ca..7fe69e6 100644
--- a/src/rust_avx2.rs
+++ b/src/rust_avx2.rs
@@ -262,7 +262,10 @@ unsafe fn transpose_msg_vecs(inputs: &[*const u8; DEGREE], block_offset: usize)
loadu(inputs[7].add(block_offset + 1 * 4 * DEGREE)),
];
for i in 0..DEGREE {
- _mm_prefetch(inputs[i].add(block_offset + 256) as *const i8, _MM_HINT_T0);
+ _mm_prefetch(
+ inputs[i].wrapping_add(block_offset + 256) as *const i8,
+ _MM_HINT_T0,
+ );
}
let squares = mut_array_refs!(&mut vecs, DEGREE, DEGREE);
transpose_vecs(squares.0);
diff --git a/src/rust_sse2.rs b/src/rust_sse2.rs
index bd2be69..2a9eed6 100644
--- a/src/rust_sse2.rs
+++ b/src/rust_sse2.rs
@@ -522,7 +522,10 @@ unsafe fn transpose_msg_vecs(inputs: &[*const u8; DEGREE], block_offset: usize)
loadu(inputs[3].add(block_offset + 3 * 4 * DEGREE)),
];
for i in 0..DEGREE {
- _mm_prefetch(inputs[i].add(block_offset + 256) as *const i8, _MM_HINT_T0);
+ _mm_prefetch(
+ inputs[i].wrapping_add(block_offset + 256) as *const i8,
+ _MM_HINT_T0,
+ );
}
let squares = mut_array_refs!(&mut vecs, DEGREE, DEGREE, DEGREE, DEGREE);
transpose_vecs(squares.0);
diff --git a/src/rust_sse41.rs b/src/rust_sse41.rs
index 1ebadc4..baffb7d 100644
--- a/src/rust_sse41.rs
+++ b/src/rust_sse41.rs
@@ -513,7 +513,10 @@ unsafe fn transpose_msg_vecs(inputs: &[*const u8; DEGREE], block_offset: usize)
loadu(inputs[3].add(block_offset + 3 * 4 * DEGREE)),
];
for i in 0..DEGREE {
- _mm_prefetch(inputs[i].add(block_offset + 256) as *const i8, _MM_HINT_T0);
+ _mm_prefetch(
+ inputs[i].wrapping_add(block_offset + 256) as *const i8,
+ _MM_HINT_T0,
+ );
}
let squares = mut_array_refs!(&mut vecs, DEGREE, DEGREE, DEGREE, DEGREE);
transpose_vecs(squares.0);