aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack O'Connor <[email protected]>2022-03-08 14:46:13 -0500
committerJack O'Connor <[email protected]>2022-03-08 22:23:09 -0500
commitec669de03e8719b25a29b9b616057072c019e9da (patch)
treeae405ad8ebc06750305466774839d97f2b491d99
parent9fdea0db7cee98343c920c8f28d8e88dc6a3a500 (diff)
use a memory argument for vpbroadcastd
-rw-r--r--src/kernel.rs28
1 files changed, 10 insertions, 18 deletions
diff --git a/src/kernel.rs b/src/kernel.rs
index 115ec76..8e03e48 100644
--- a/src/kernel.rs
+++ b/src/kernel.rs
@@ -982,24 +982,16 @@ global_asm!(
// --------------------------------------------------------------------------------------------
"blake3_avx512_chunks_16:",
// TODO: Prefetches
- // Broadcast the key into zmm0-zmm7. Use ecx as scratch.
- "mov ecx, dword ptr [rsi + 0 * 4]",
- "vpbroadcastd zmm0, ecx",
- "mov ecx, dword ptr [rsi + 1 * 4]",
- "vpbroadcastd zmm1, ecx",
- "mov ecx, dword ptr [rsi + 2 * 4]",
- "vpbroadcastd zmm2, ecx",
- "mov ecx, dword ptr [rsi + 3 * 4]",
- "vpbroadcastd zmm3, ecx",
- "mov ecx, dword ptr [rsi + 4 * 4]",
- "vpbroadcastd zmm4, ecx",
- "mov ecx, dword ptr [rsi + 5 * 4]",
- "vpbroadcastd zmm5, ecx",
- "mov ecx, dword ptr [rsi + 6 * 4]",
- "vpbroadcastd zmm6, ecx",
- "mov ecx, dword ptr [rsi + 7 * 4]",
- "vpbroadcastd zmm7, ecx",
- // ecx is the block length arg to blake3_avx512_blocks_16. It is always 64.
+ // Broadcast the key into zmm0-zmm7.
+ "vpbroadcastd zmm0, dword ptr [rsi + 0 * 4]",
+ "vpbroadcastd zmm1, dword ptr [rsi + 1 * 4]",
+ "vpbroadcastd zmm2, dword ptr [rsi + 2 * 4]",
+ "vpbroadcastd zmm3, dword ptr [rsi + 3 * 4]",
+ "vpbroadcastd zmm4, dword ptr [rsi + 4 * 4]",
+ "vpbroadcastd zmm5, dword ptr [rsi + 5 * 4]",
+ "vpbroadcastd zmm6, dword ptr [rsi + 6 * 4]",
+ "vpbroadcastd zmm7, dword ptr [rsi + 7 * 4]",
+ // ecx is the block length parameter for blake3_avx512_blocks_16. It is always 64.
"mov ecx, 64",
// Set the CHUNK_START flag.
"or r8d, 1",