aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack O'Connor <[email protected]>2021-11-05 00:17:05 -0400
committerJack O'Connor <[email protected]>2021-11-05 12:25:44 -0400
commit371b5483c95be1e0250c5209d68a8536406152de (patch)
tree00bc4905f45079f9ba02cedb84e1d230eccf8ca3
parent04571021fb5490d0f0008c5b5a968f221de159a0 (diff)
fix incorrect output / undefined behavior in Windows SSE2 assembly
The SSE2 patch introduced xmm10 as a temporary register for one of the rotations, but xmm6-xmm15 are callee-save registers on Windows, and SSE4.1 was only saving the registers it used. The minimal fix is to use one of the saved registers instead of xmm10. See https://github.com/BLAKE3-team/BLAKE3/issues/206.
-rw-r--r--c/blake3_sse2_x86-64_windows_gnu.S12
-rw-r--r--c/blake3_sse2_x86-64_windows_msvc.asm12
-rw-r--r--src/test.rs30
3 files changed, 42 insertions, 12 deletions
diff --git a/c/blake3_sse2_x86-64_windows_gnu.S b/c/blake3_sse2_x86-64_windows_gnu.S
index 424b4f8..8852ba5 100644
--- a/c/blake3_sse2_x86-64_windows_gnu.S
+++ b/c/blake3_sse2_x86-64_windows_gnu.S
@@ -2137,10 +2137,10 @@ _blake3_compress_in_place_sse2:
por xmm9, xmm8
movdqa xmm8, xmm7
punpcklqdq xmm8, xmm5
- movdqa xmm10, xmm6
+ movdqa xmm14, xmm6
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
- pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
- por xmm8, xmm10
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm8, xmm14
pshufd xmm8, xmm8, 0x78
punpckhdq xmm5, xmm7
punpckldq xmm6, xmm5
@@ -2268,10 +2268,10 @@ blake3_compress_xof_sse2:
por xmm9, xmm8
movdqa xmm8, xmm7
punpcklqdq xmm8, xmm5
- movdqa xmm10, xmm6
+ movdqa xmm14, xmm6
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
- pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
- por xmm8, xmm10
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm8, xmm14
pshufd xmm8, xmm8, 0x78
punpckhdq xmm5, xmm7
punpckldq xmm6, xmm5
diff --git a/c/blake3_sse2_x86-64_windows_msvc.asm b/c/blake3_sse2_x86-64_windows_msvc.asm
index ff9bb4d..507502f 100644
--- a/c/blake3_sse2_x86-64_windows_msvc.asm
+++ b/c/blake3_sse2_x86-64_windows_msvc.asm
@@ -2139,10 +2139,10 @@ _blake3_compress_in_place_sse2 PROC
por xmm9, xmm8
movdqa xmm8, xmm7
punpcklqdq xmm8, xmm5
- movdqa xmm10, xmm6
+ movdqa xmm14, xmm6
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
- pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK]
- por xmm8, xmm10
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
+ por xmm8, xmm14
pshufd xmm8, xmm8, 78H
punpckhdq xmm5, xmm7
punpckldq xmm6, xmm5
@@ -2271,10 +2271,10 @@ _blake3_compress_xof_sse2 PROC
por xmm9, xmm8
movdqa xmm8, xmm7
punpcklqdq xmm8, xmm5
- movdqa xmm10, xmm6
+ movdqa xmm14, xmm6
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
- pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK]
- por xmm8, xmm10
+ pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
+ por xmm8, xmm14
pshufd xmm8, xmm8, 78H
punpckhdq xmm5, xmm7
punpckldq xmm6, xmm5
diff --git a/src/test.rs b/src/test.rs
index cbe0188..c2bea95 100644
--- a/src/test.rs
+++ b/src/test.rs
@@ -564,3 +564,33 @@ fn test_hex_encoding_decoding() {
#[cfg(feature = "std")]
assert_eq!(_result.to_string(), "invalid hex character: 0x80");
}
+
+// This test is a mimized failure case for the Windows SSE2 bug described in
+// https://github.com/BLAKE3-team/BLAKE3/issues/206.
+//
+// Before that issue was fixed, this test would fail on Windows in the following configuration:
+//
+// cargo test --features=no_avx512,no_avx2,no_sse41 --release
+//
+// Bugs like this one (stomping on a caller's register) are very sensitive to the details of
+// surrounding code, so it's not especially likely that this test will catch another bug (or even
+// the same bug) in the future. Still, there's no harm in keeping it.
+#[test]
+fn test_issue_206_windows_sse2() {
+ // This stupid loop has to be here to trigger the bug. I don't know why.
+ for _ in &[0] {
+ // The length 65 (two blocks) is significant. It doesn't repro with 64 (one block). It also
+ // doesn't repro with an all-zero input.
+ let input = &[0xff; 65];
+ let expected_hash = [
+ 183, 235, 50, 217, 156, 24, 190, 219, 2, 216, 176, 255, 224, 53, 28, 95, 57, 148, 179,
+ 245, 162, 90, 37, 121, 0, 142, 219, 62, 234, 204, 225, 161,
+ ];
+
+ // This throwaway call has to be here to trigger the bug.
+ crate::Hasher::new().update(input);
+
+ // This assert fails when the bug is triggered.
+ assert_eq!(crate::Hasher::new().update(input).finalize(), expected_hash);
+ }
+}