diff options
| author | Samuel Neves <[email protected]> | 2020-08-31 18:45:43 +0100 |
|---|---|---|
| committer | Samuel Neves <[email protected]> | 2020-08-31 19:11:58 +0100 |
| commit | bf705f2d5471900a4e032e0c12eb457ee05ede9e (patch) | |
| tree | 586e97f9e17b919343cd513dbbb79c50908fe830 | |
| parent | 3340e32c7f4d89f7544b34b2b2924c5dbe5ee258 (diff) | |
remove avoidable spill
| -rw-r--r-- | c/blake3_sse2_x86-64_unix.S | 10 | ||||
| -rw-r--r-- | c/blake3_sse2_x86-64_windows_gnu.S | 10 | ||||
| -rw-r--r-- | c/blake3_sse2_x86-64_windows_msvc.asm | 10 |
3 files changed, 12 insertions, 18 deletions
diff --git a/c/blake3_sse2_x86-64_unix.S b/c/blake3_sse2_x86-64_unix.S index 245c519..d144046 100644 --- a/c/blake3_sse2_x86-64_unix.S +++ b/c/blake3_sse2_x86-64_unix.S @@ -1836,19 +1836,17 @@ blake3_hash_many_sse2: pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip] pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip] por xmm13, xmm12 + movdqa xmmword ptr [rsp+0x20], xmm13 movdqa xmm12, xmm7 punpcklqdq xmm12, xmm5 - movdqa xmmword ptr [rsp+0x20], xmm2 - movdqa xmm2, xmm6 + movdqa xmm13, xmm6 pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip] - pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip] - por xmm12, xmm2 - movdqa xmm2, xmmword ptr [rsp+0x20] + pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm12, xmm13 pshufd xmm12, xmm12, 0x78 punpckhdq xmm5, xmm7 punpckldq xmm6, xmm5 pshufd xmm7, xmm6, 0x1E - movdqa xmmword ptr [rsp+0x20], xmm13 movdqa xmmword ptr [rsp+0x40], xmm12 movdqa xmm5, xmmword ptr [rsp+0x30] movdqa xmm13, xmmword ptr [rsp+0x50] diff --git a/c/blake3_sse2_x86-64_windows_gnu.S b/c/blake3_sse2_x86-64_windows_gnu.S index 0800f4a..494c0c6 100644 --- a/c/blake3_sse2_x86-64_windows_gnu.S +++ b/c/blake3_sse2_x86-64_windows_gnu.S @@ -1847,19 +1847,17 @@ blake3_hash_many_sse2: pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip] pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip] por xmm13, xmm12 + movdqa xmmword ptr [rsp+0x20], xmm13 movdqa xmm12, xmm7 punpcklqdq xmm12, xmm5 - movdqa xmmword ptr [rsp+0x20], xmm2 - movdqa xmm2, xmm6 + movdqa xmm13, xmm6 pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip] - pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip] - por xmm12, xmm2 - movdqa xmm2, xmmword ptr [rsp+0x20] + pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm12, xmm13 pshufd xmm12, xmm12, 0x78 punpckhdq xmm5, xmm7 punpckldq xmm6, xmm5 pshufd xmm7, xmm6, 0x1E - movdqa xmmword ptr [rsp+0x20], xmm13 movdqa xmmword ptr [rsp+0x40], xmm12 movdqa xmm5, xmmword ptr [rsp+0x30] movdqa xmm13, xmmword ptr [rsp+0x50] diff --git a/c/blake3_sse2_x86-64_windows_msvc.asm b/c/blake3_sse2_x86-64_windows_msvc.asm index 0a2d9cb..72deb7b 100644 --- a/c/blake3_sse2_x86-64_windows_msvc.asm +++ b/c/blake3_sse2_x86-64_windows_msvc.asm @@ -1848,19 +1848,17 @@ roundloop2: pand xmm13, xmmword ptr [PBLENDW_0x33_MASK] pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK] por xmm13, xmm12 + movdqa xmmword ptr [rsp+20H], xmm13 movdqa xmm12, xmm7 punpcklqdq xmm12, xmm5 - movdqa xmmword ptr [rsp+20H], xmm2 - movdqa xmm2, xmm6 + movdqa xmm13, xmm6 pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK] - pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK] - por xmm12, xmm2 - movdqa xmm2, xmmword ptr [rsp+20H] + pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK] + por xmm12, xmm13 pshufd xmm12, xmm12, 78H punpckhdq xmm5, xmm7 punpckldq xmm6, xmm5 pshufd xmm7, xmm6, 1EH - movdqa xmmword ptr [rsp+20H], xmm13 movdqa xmmword ptr [rsp+40H], xmm12 movdqa xmm5, xmmword ptr [rsp+30H] movdqa xmm13, xmmword ptr [rsp+50H] |
