diff options
| author | Joel Rosdahl <[email protected]> | 2023-05-23 21:47:15 +0200 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2023-05-23 14:39:27 -0700 |
| commit | 2dd4e57f68d85f3983b1880b66250fc7bdf0b7c8 (patch) | |
| tree | 3bfbedaa7e893cca918aec1c5eb2f5994c3420f7 /c | |
| parent | 71a2646180c787e22f8681c5fec7655a0ad51e99 (diff) | |
Fix typos
Diffstat (limited to 'c')
| -rw-r--r-- | c/README.md | 2 | ||||
| -rw-r--r-- | c/blake3_avx2.c | 2 | ||||
| -rw-r--r-- | c/blake3_avx512.c | 6 | ||||
| -rw-r--r-- | c/blake3_c_rust_bindings/build.rs | 4 | ||||
| -rw-r--r-- | c/blake3_c_rust_bindings/src/test.rs | 2 | ||||
| -rw-r--r-- | c/blake3_sse2.c | 2 | ||||
| -rw-r--r-- | c/blake3_sse41.c | 2 | ||||
| -rwxr-xr-x | c/test.py | 2 |
8 files changed, 11 insertions, 11 deletions
diff --git a/c/README.md b/c/README.md index 259af2a..965d8c7 100644 --- a/c/README.md +++ b/c/README.md @@ -256,7 +256,7 @@ gcc -shared -O3 -o libblake3.so blake3.c blake3_dispatch.c blake3_portable.c \ Note above that building `blake3_avx512.c` requires both `-mavx512f` and `-mavx512vl` under GCC and Clang. Under MSVC, the single `/arch:AVX512` flag is sufficient. The MSVC equivalent of `-mavx2` is `/arch:AVX2`. -MSVC enables SSE2 and SSE4.1 by defaut, and it doesn't have a +MSVC enables SSE2 and SSE4.1 by default, and it doesn't have a corresponding flag. If you want to omit SIMD code entirely, you need to explicitly disable diff --git a/c/blake3_avx2.c b/c/blake3_avx2.c index e76aa1a..381e7c4 100644 --- a/c/blake3_avx2.c +++ b/c/blake3_avx2.c @@ -167,7 +167,7 @@ INLINE void transpose_vecs(__m256i vecs[DEGREE]) { __m256i gh_0145 = _mm256_unpacklo_epi32(vecs[6], vecs[7]); __m256i gh_2367 = _mm256_unpackhi_epi32(vecs[6], vecs[7]); - // Interleave 64-bit lates. The low unpack is lanes 00/22 and the high is + // Interleave 64-bit lanes. The low unpack is lanes 00/22 and the high is // 11/33. __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145); __m256i abcd_15 = _mm256_unpackhi_epi64(ab_0145, cd_0145); diff --git a/c/blake3_avx512.c b/c/blake3_avx512.c index 334d82d..d6b1ae9 100644 --- a/c/blake3_avx512.c +++ b/c/blake3_avx512.c @@ -429,7 +429,7 @@ INLINE void round_fn4(__m128i v[16], __m128i m[16], size_t r) { } INLINE void transpose_vecs_128(__m128i vecs[4]) { - // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is + // Interleave 32-bit lanes. The low unpack is lanes 00/11 and the high is // 22/33. Note that this doesn't split the vector into two lanes, as the // AVX2 counterparts do. __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]); @@ -684,7 +684,7 @@ INLINE void transpose_vecs_256(__m256i vecs[8]) { __m256i gh_0145 = _mm256_unpacklo_epi32(vecs[6], vecs[7]); __m256i gh_2367 = _mm256_unpackhi_epi32(vecs[6], vecs[7]); - // Interleave 64-bit lates. The low unpack is lanes 00/22 and the high is + // Interleave 64-bit lanes. The low unpack is lanes 00/22 and the high is // 11/33. __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145); __m256i abcd_15 = _mm256_unpackhi_epi64(ab_0145, cd_0145); @@ -959,7 +959,7 @@ INLINE void transpose_vecs_512(__m512i vecs[16]) { __m512i op_0 = _mm512_unpacklo_epi32(vecs[14], vecs[15]); __m512i op_2 = _mm512_unpackhi_epi32(vecs[14], vecs[15]); - // Interleave 64-bit lates. The _0 unpack is lanes + // Interleave 64-bit lanes. The _0 unpack is lanes // 0/0/0/0/4/4/4/4/8/8/8/8/12/12/12/12, the _1 unpack is lanes // 1/1/1/1/5/5/5/5/9/9/9/9/13/13/13/13, the _2 unpack is lanes // 2/2/2/2/6/6/6/6/10/10/10/10/14/14/14/14, and the _3 unpack is lanes diff --git a/c/blake3_c_rust_bindings/build.rs b/c/blake3_c_rust_bindings/build.rs index 98f8396..624dbb9 100644 --- a/c/blake3_c_rust_bindings/build.rs +++ b/c/blake3_c_rust_bindings/build.rs @@ -110,7 +110,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> { if is_windows_msvc() { // /arch:SSE2 is the default on x86 and undefined on x86_64: // https://docs.microsoft.com/en-us/cpp/build/reference/arch-x86 - // It also includes SSE4.1 intrisincs: + // It also includes SSE4.1 intrinsics: // https://stackoverflow.com/a/32183222/823869 } else { sse2_build.flag("-msse2"); @@ -122,7 +122,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> { if is_windows_msvc() { // /arch:SSE2 is the default on x86 and undefined on x86_64: // https://docs.microsoft.com/en-us/cpp/build/reference/arch-x86 - // It also includes SSE4.1 intrisincs: + // It also includes SSE4.1 intrinsics: // https://stackoverflow.com/a/32183222/823869 } else { sse41_build.flag("-msse4.1"); diff --git a/c/blake3_c_rust_bindings/src/test.rs b/c/blake3_c_rust_bindings/src/test.rs index 28ec34b..1fc077c 100644 --- a/c/blake3_c_rust_bindings/src/test.rs +++ b/c/blake3_c_rust_bindings/src/test.rs @@ -63,7 +63,7 @@ pub const TEST_KEY_WORDS: [u32; 8] = [ ]; // Paint the input with a repeating byte pattern. We use a cycle length of 251, -// because that's the largets prime number less than 256. This makes it +// because that's the largest prime number less than 256. This makes it // unlikely to swapping any two adjacent input blocks or chunks will give the // same answer. fn paint_test_input(buf: &mut [u8]) { diff --git a/c/blake3_sse2.c b/c/blake3_sse2.c index f4449ac..691e1c6 100644 --- a/c/blake3_sse2.c +++ b/c/blake3_sse2.c @@ -396,7 +396,7 @@ INLINE void round_fn(__m128i v[16], __m128i m[16], size_t r) { } INLINE void transpose_vecs(__m128i vecs[DEGREE]) { - // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is + // Interleave 32-bit lanes. The low unpack is lanes 00/11 and the high is // 22/33. Note that this doesn't split the vector into two lanes, as the // AVX2 counterparts do. __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]); diff --git a/c/blake3_sse41.c b/c/blake3_sse41.c index 87a8dae..4653a85 100644 --- a/c/blake3_sse41.c +++ b/c/blake3_sse41.c @@ -390,7 +390,7 @@ INLINE void round_fn(__m128i v[16], __m128i m[16], size_t r) { } INLINE void transpose_vecs(__m128i vecs[DEGREE]) { - // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is + // Interleave 32-bit lanes. The low unpack is lanes 00/11 and the high is // 22/33. Note that this doesn't split the vector into two lanes, as the // AVX2 counterparts do. __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]); @@ -19,7 +19,7 @@ def run_blake3(args, input): # Fill the input with a repeating byte pattern. We use a cycle length of 251, -# because that's the largets prime number less than 256. This makes it unlikely +# because that's the largest prime number less than 256. This makes it unlikely # to swapping any two adjacent input blocks or chunks will give the same # answer. def make_test_input(length): |
