aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Neves <[email protected]>2021-02-06 20:02:53 +0000
committerGitHub <[email protected]>2021-02-06 20:02:53 +0000
commit953654e25ece61a084c269169391eceec5235615 (patch)
tree2c3c15e32aecb91aabb25fa747333dde79f3a29c
parentaea29ace2d4aa4943345155b333b264f625459c0 (diff)
More movd/movq discrepancies. Fixes #149. (#150)
This should be irrelevant, but some toolchains will not accept movd with 64-bit arguments.
-rw-r--r--c/blake3_sse2_x86-64_unix.S4
-rw-r--r--c/blake3_sse2_x86-64_windows_gnu.S4
-rw-r--r--c/blake3_sse2_x86-64_windows_msvc.asm12
-rw-r--r--c/blake3_sse41_x86-64_windows_msvc.asm8
4 files changed, 14 insertions, 14 deletions
diff --git a/c/blake3_sse2_x86-64_unix.S b/c/blake3_sse2_x86-64_unix.S
index d144046..99f033f 100644
--- a/c/blake3_sse2_x86-64_unix.S
+++ b/c/blake3_sse2_x86-64_unix.S
@@ -1704,7 +1704,7 @@ blake3_hash_many_sse2:
pshufd xmm15, xmm11, 0x93
shl rax, 0x20
or rax, 0x40
- movd xmm3, rax
+ movq xmm3, rax
movdqa xmmword ptr [rsp+0x20], xmm3
movaps xmm3, xmmword ptr [rsp]
movaps xmm11, xmmword ptr [rsp+0x10]
@@ -1917,7 +1917,7 @@ blake3_hash_many_sse2:
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
shl rax, 32
or rax, 64
- movd xmm12, rax
+ movq xmm12, rax
movdqa xmm3, xmm13
punpcklqdq xmm3, xmm12
movups xmm4, xmmword ptr [r8+rdx-0x40]
diff --git a/c/blake3_sse2_x86-64_windows_gnu.S b/c/blake3_sse2_x86-64_windows_gnu.S
index 494c0c6..424b4f8 100644
--- a/c/blake3_sse2_x86-64_windows_gnu.S
+++ b/c/blake3_sse2_x86-64_windows_gnu.S
@@ -1715,7 +1715,7 @@ blake3_hash_many_sse2:
pshufd xmm15, xmm11, 0x93
shl rax, 0x20
or rax, 0x40
- movd xmm3, rax
+ movq xmm3, rax
movdqa xmmword ptr [rsp+0x20], xmm3
movaps xmm3, xmmword ptr [rsp]
movaps xmm11, xmmword ptr [rsp+0x10]
@@ -1928,7 +1928,7 @@ blake3_hash_many_sse2:
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
shl rax, 32
or rax, 64
- movd xmm12, rax
+ movq xmm12, rax
movdqa xmm3, xmm13
punpcklqdq xmm3, xmm12
movups xmm4, xmmword ptr [r8+rdx-0x40]
diff --git a/c/blake3_sse2_x86-64_windows_msvc.asm b/c/blake3_sse2_x86-64_windows_msvc.asm
index ff9bb4d..551dd5e 100644
--- a/c/blake3_sse2_x86-64_windows_msvc.asm
+++ b/c/blake3_sse2_x86-64_windows_msvc.asm
@@ -1716,7 +1716,7 @@ innerloop2:
pshufd xmm15, xmm11, 93H
shl rax, 20H
or rax, 40H
- movd xmm3, rax
+ movq xmm3, rax
movdqa xmmword ptr [rsp+20H], xmm3
movaps xmm3, xmmword ptr [rsp]
movaps xmm11, xmmword ptr [rsp+10H]
@@ -1929,7 +1929,7 @@ innerloop1:
movaps xmm2, xmmword ptr [BLAKE3_IV]
shl rax, 32
or rax, 64
- movd xmm12, rax
+ movq xmm12, rax
movdqa xmm3, xmm13
punpcklqdq xmm3, xmm12
movups xmm4, xmmword ptr [r8+rdx-40H]
@@ -2054,8 +2054,8 @@ _blake3_compress_in_place_sse2 PROC
movzx r8d, r8b
shl rax, 32
add r8, rax
- movd xmm3, r9
- movd xmm4, r8
+ movq xmm3, r9
+ movq xmm4, r8
punpcklqdq xmm3, xmm4
movups xmm4, xmmword ptr [rdx]
movups xmm5, xmmword ptr [rdx+10H]
@@ -2186,8 +2186,8 @@ _blake3_compress_xof_sse2 PROC
mov r10, qword ptr [rsp+0A8H]
shl rax, 32
add r8, rax
- movd xmm3, r9
- movd xmm4, r8
+ movq xmm3, r9
+ movq xmm4, r8
punpcklqdq xmm3, xmm4
movups xmm4, xmmword ptr [rdx]
movups xmm5, xmmword ptr [rdx+10H]
diff --git a/c/blake3_sse41_x86-64_windows_msvc.asm b/c/blake3_sse41_x86-64_windows_msvc.asm
index 8966c7b..87001e4 100644
--- a/c/blake3_sse41_x86-64_windows_msvc.asm
+++ b/c/blake3_sse41_x86-64_windows_msvc.asm
@@ -1817,8 +1817,8 @@ _blake3_compress_in_place_sse41 PROC
movzx r8d, r8b
shl rax, 32
add r8, rax
- movd xmm3, r9
- movd xmm4, r8
+ movq xmm3, r9
+ movq xmm4, r8
punpcklqdq xmm3, xmm4
movups xmm4, xmmword ptr [rdx]
movups xmm5, xmmword ptr [rdx+10H]
@@ -1938,8 +1938,8 @@ _blake3_compress_xof_sse41 PROC
mov r10, qword ptr [rsp+0A8H]
shl rax, 32
add r8, rax
- movd xmm3, r9
- movd xmm4, r8
+ movq xmm3, r9
+ movq xmm4, r8
punpcklqdq xmm3, xmm4
movups xmm4, xmmword ptr [rdx]
movups xmm5, xmmword ptr [rdx+10H]