diff options
| author | Jack O'Connor <[email protected]> | 2022-03-08 14:46:13 -0500 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2022-03-08 22:23:09 -0500 |
| commit | ec669de03e8719b25a29b9b616057072c019e9da (patch) | |
| tree | ae405ad8ebc06750305466774839d97f2b491d99 | |
| parent | 9fdea0db7cee98343c920c8f28d8e88dc6a3a500 (diff) | |
use a memory argument for vpbroadcastd
| -rw-r--r-- | src/kernel.rs | 28 |
1 files changed, 10 insertions, 18 deletions
diff --git a/src/kernel.rs b/src/kernel.rs index 115ec76..8e03e48 100644 --- a/src/kernel.rs +++ b/src/kernel.rs @@ -982,24 +982,16 @@ global_asm!( // -------------------------------------------------------------------------------------------- "blake3_avx512_chunks_16:", // TODO: Prefetches - // Broadcast the key into zmm0-zmm7. Use ecx as scratch. - "mov ecx, dword ptr [rsi + 0 * 4]", - "vpbroadcastd zmm0, ecx", - "mov ecx, dword ptr [rsi + 1 * 4]", - "vpbroadcastd zmm1, ecx", - "mov ecx, dword ptr [rsi + 2 * 4]", - "vpbroadcastd zmm2, ecx", - "mov ecx, dword ptr [rsi + 3 * 4]", - "vpbroadcastd zmm3, ecx", - "mov ecx, dword ptr [rsi + 4 * 4]", - "vpbroadcastd zmm4, ecx", - "mov ecx, dword ptr [rsi + 5 * 4]", - "vpbroadcastd zmm5, ecx", - "mov ecx, dword ptr [rsi + 6 * 4]", - "vpbroadcastd zmm6, ecx", - "mov ecx, dword ptr [rsi + 7 * 4]", - "vpbroadcastd zmm7, ecx", - // ecx is the block length arg to blake3_avx512_blocks_16. It is always 64. + // Broadcast the key into zmm0-zmm7. + "vpbroadcastd zmm0, dword ptr [rsi + 0 * 4]", + "vpbroadcastd zmm1, dword ptr [rsi + 1 * 4]", + "vpbroadcastd zmm2, dword ptr [rsi + 2 * 4]", + "vpbroadcastd zmm3, dword ptr [rsi + 3 * 4]", + "vpbroadcastd zmm4, dword ptr [rsi + 4 * 4]", + "vpbroadcastd zmm5, dword ptr [rsi + 5 * 4]", + "vpbroadcastd zmm6, dword ptr [rsi + 6 * 4]", + "vpbroadcastd zmm7, dword ptr [rsi + 7 * 4]", + // ecx is the block length parameter for blake3_avx512_blocks_16. It is always 64. "mov ecx, 64", // Set the CHUNK_START flag. "or r8d, 1", |
