aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Neves <[email protected]>2020-08-31 18:45:43 +0100
committerSamuel Neves <[email protected]>2020-08-31 19:11:58 +0100
commitbf705f2d5471900a4e032e0c12eb457ee05ede9e (patch)
tree586e97f9e17b919343cd513dbbb79c50908fe830
parent3340e32c7f4d89f7544b34b2b2924c5dbe5ee258 (diff)
remove avoidable spill
-rw-r--r--c/blake3_sse2_x86-64_unix.S10
-rw-r--r--c/blake3_sse2_x86-64_windows_gnu.S10
-rw-r--r--c/blake3_sse2_x86-64_windows_msvc.asm10
3 files changed, 12 insertions, 18 deletions
diff --git a/c/blake3_sse2_x86-64_unix.S b/c/blake3_sse2_x86-64_unix.S
index 245c519..d144046 100644
--- a/c/blake3_sse2_x86-64_unix.S
+++ b/c/blake3_sse2_x86-64_unix.S
@@ -1836,19 +1836,17 @@ blake3_hash_many_sse2:
pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip]
pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip]
por xmm13, xmm12
+ movdqa xmmword ptr [rsp+0x20], xmm13
movdqa xmm12, xmm7
punpcklqdq xmm12, xmm5
- movdqa xmmword ptr [rsp+0x20], xmm2
- movdqa xmm2, xmm6
+ movdqa xmm13, xmm6
pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip]
- pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip]
- por xmm12, xmm2
- movdqa xmm2, xmmword ptr [rsp+0x20]
+ pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm12, xmm13
pshufd xmm12, xmm12, 0x78
punpckhdq xmm5, xmm7
punpckldq xmm6, xmm5
pshufd xmm7, xmm6, 0x1E
- movdqa xmmword ptr [rsp+0x20], xmm13
movdqa xmmword ptr [rsp+0x40], xmm12
movdqa xmm5, xmmword ptr [rsp+0x30]
movdqa xmm13, xmmword ptr [rsp+0x50]
diff --git a/c/blake3_sse2_x86-64_windows_gnu.S b/c/blake3_sse2_x86-64_windows_gnu.S
index 0800f4a..494c0c6 100644
--- a/c/blake3_sse2_x86-64_windows_gnu.S
+++ b/c/blake3_sse2_x86-64_windows_gnu.S
@@ -1847,19 +1847,17 @@ blake3_hash_many_sse2:
pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip]
pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip]
por xmm13, xmm12
+ movdqa xmmword ptr [rsp+0x20], xmm13
movdqa xmm12, xmm7
punpcklqdq xmm12, xmm5
- movdqa xmmword ptr [rsp+0x20], xmm2
- movdqa xmm2, xmm6
+ movdqa xmm13, xmm6
pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip]
- pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip]
- por xmm12, xmm2
- movdqa xmm2, xmmword ptr [rsp+0x20]
+ pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip]
+ por xmm12, xmm13
pshufd xmm12, xmm12, 0x78
punpckhdq xmm5, xmm7
punpckldq xmm6, xmm5
pshufd xmm7, xmm6, 0x1E
- movdqa xmmword ptr [rsp+0x20], xmm13
movdqa xmmword ptr [rsp+0x40], xmm12
movdqa xmm5, xmmword ptr [rsp+0x30]
movdqa xmm13, xmmword ptr [rsp+0x50]
diff --git a/c/blake3_sse2_x86-64_windows_msvc.asm b/c/blake3_sse2_x86-64_windows_msvc.asm
index 0a2d9cb..72deb7b 100644
--- a/c/blake3_sse2_x86-64_windows_msvc.asm
+++ b/c/blake3_sse2_x86-64_windows_msvc.asm
@@ -1848,19 +1848,17 @@ roundloop2:
pand xmm13, xmmword ptr [PBLENDW_0x33_MASK]
pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK]
por xmm13, xmm12
+ movdqa xmmword ptr [rsp+20H], xmm13
movdqa xmm12, xmm7
punpcklqdq xmm12, xmm5
- movdqa xmmword ptr [rsp+20H], xmm2
- movdqa xmm2, xmm6
+ movdqa xmm13, xmm6
pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK]
- pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK]
- por xmm12, xmm2
- movdqa xmm2, xmmword ptr [rsp+20H]
+ pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK]
+ por xmm12, xmm13
pshufd xmm12, xmm12, 78H
punpckhdq xmm5, xmm7
punpckldq xmm6, xmm5
pshufd xmm7, xmm6, 1EH
- movdqa xmmword ptr [rsp+20H], xmm13
movdqa xmmword ptr [rsp+40H], xmm12
movdqa xmm5, xmmword ptr [rsp+30H]
movdqa xmm13, xmmword ptr [rsp+50H]