diff options
| author | Jack O'Connor <[email protected]> | 2021-02-28 21:11:21 -0500 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2021-03-14 00:26:18 -0500 |
| commit | b228f46e0308d9c48d19ee077a2f73a402fa26c3 (patch) | |
| tree | c54ab2fbad167197e3a84eff1dd9ea2ec942d77c | |
| parent | ea72822620ba77e4f597bf6d6bd4bd8c3b4cc9dc (diff) | |
add *_rayon methods
| -rw-r--r-- | Cargo.toml | 7 | ||||
| -rw-r--r-- | b3sum/src/main.rs | 2 | ||||
| -rw-r--r-- | benches/bench.rs | 6 | ||||
| -rw-r--r-- | src/join.rs | 11 | ||||
| -rw-r--r-- | src/lib.rs | 107 | ||||
| -rw-r--r-- | src/test.rs | 59 |
6 files changed, 137 insertions, 55 deletions
@@ -25,10 +25,9 @@ neon = [] # entire build, with e.g. RUSTFLAGS="-C target-cpu=native". std = ["digest/std"] -# The "rayon" feature (defined below as an optional dependency) enables the -# join::RayonJoin type, which can be used with Hasher::update_with_join to -# perform multi-threaded hashing. However, even if this feature is enabled, all -# other APIs remain single-threaded. +# The "rayon" feature (defined below as an optional dependency) enables API +# functions like `hash_rayon` and `update_rayon`. However, even if this feature +# is enabled, all other APIs remain single-threaded. # ---------- Features below this line are for internal testing only. ---------- diff --git a/b3sum/src/main.rs b/b3sum/src/main.rs index b01e5de..3810bfe 100644 --- a/b3sum/src/main.rs +++ b/b3sum/src/main.rs @@ -219,7 +219,7 @@ impl Input { // multiple threads. This doesn't work on stdin, or on some files, // and it can also be disabled with --no-mmap. Self::Mmap(cursor) => { - hasher.update_with_join::<blake3::join::RayonJoin>(cursor.get_ref()); + hasher.update_rayon(cursor.get_ref()); } // The slower paths, for stdin or files we didn't/couldn't mmap. // This is currently all single-threaded. Doing multi-threaded diff --git a/benches/bench.rs b/benches/bench.rs index ba5a404..832f0f8 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -421,11 +421,7 @@ fn bench_reference_1024_kib(b: &mut Bencher) { #[cfg(feature = "rayon")] fn bench_rayon(b: &mut Bencher, len: usize) { let mut input = RandomInput::new(b, len); - b.iter(|| { - blake3::Hasher::new() - .update_with_join::<blake3::join::RayonJoin>(input.get()) - .finalize() - }); + b.iter(|| blake3::hash_rayon(input.get())); } #[bench] diff --git a/src/join.rs b/src/join.rs index 2435bc6..227216a 100644 --- a/src/join.rs +++ b/src/join.rs @@ -1,11 +1,12 @@ -//! The multi-threading abstractions used by [`Hasher::update_with_join`]. +//! The multi-threading abstractions used by `Hasher::update_with_join`. //! //! Different implementations of the `Join` trait determine whether -//! [`Hasher::update_with_join`] performs multi-threading on sufficiently large +//! `Hasher::update_with_join` performs multi-threading on sufficiently large //! inputs. The `SerialJoin` implementation is single-threaded, and the -//! `RayonJoin` implementation (gated by the `rayon` feature) is -//! multi-threaded. Interfaces other than [`Hasher::update_with_join`], like -//! [`hash`] and [`Hasher::update`], always use `SerialJoin` internally. +//! `RayonJoin` implementation (gated by the `rayon` feature) is multi-threaded. +//! Interfaces other than `Hasher::update_with_join`, like [`hash`](crate::hash) +//! and [`Hasher::update`](crate::Hasher::update), always use `SerialJoin` +//! internally. //! //! The `Join` trait is an almost exact copy of the [`rayon::join`] API, and //! `RayonJoin` is the only non-trivial implementation. Previously this trait @@ -32,9 +32,9 @@ //! //! # Cargo Features //! -//! The `rayon` feature provides [Rayon]-based multi-threading, in particular -//! the [`join::RayonJoin`] type for use with [`Hasher::update_with_join`]. It -//! is disabled by default, but enabled for [docs.rs]. +//! The `rayon` feature provides [Rayon]-based multi-threading, via functions +//! with the `_rayon` suffix. It is disabled by default, but enabled for +//! [docs.rs]. //! //! The `neon` feature enables ARM NEON support. Currently there is no runtime //! CPU feature detection for NEON, so you must only enable this feature for @@ -107,7 +107,6 @@ use arrayref::{array_mut_ref, array_ref}; use arrayvec::{ArrayString, ArrayVec}; use core::cmp; use core::fmt; -use join::{Join, SerialJoin}; use platform::{Platform, MAX_SIMD_DEGREE, MAX_SIMD_DEGREE_OR_2}; /// The number of bytes in a [`Hash`](struct.Hash.html), 32. @@ -659,7 +658,7 @@ fn compress_parents_parallel( // Why not just have the caller split the input on the first update(), instead // of implementing this special rule? Because we don't want to limit SIMD or // multi-threading parallelism for that update(). -fn compress_subtree_wide<J: Join>( +fn compress_subtree_wide<J: join::Join>( input: &[u8], key: &CVWords, chunk_counter: u64, @@ -733,7 +732,7 @@ fn compress_subtree_wide<J: Join>( // // As with compress_subtree_wide(), this function is not used on inputs of 1 // chunk or less. That's a different codepath. -fn compress_subtree_to_parent_node<J: Join>( +fn compress_subtree_to_parent_node<J: join::Join>( input: &[u8], key: &CVWords, chunk_counter: u64, @@ -761,7 +760,7 @@ fn compress_subtree_to_parent_node<J: Join>( // Hash a complete input all at once. Unlike compress_subtree_wide() and // compress_subtree_to_parent_node(), this function handles the 1 chunk case. // Note that this we use SerialJoin here, so this is always single-threaded. -fn hash_all_at_once(input: &[u8], key: &CVWords, flags: u8) -> Output { +fn hash_all_at_once<J: join::Join>(input: &[u8], key: &CVWords, flags: u8) -> Output { let platform = Platform::detect(); // If the whole subtree is one chunk, hash it directly with a ChunkState. @@ -775,7 +774,7 @@ fn hash_all_at_once(input: &[u8], key: &CVWords, flags: u8) -> Output { // compress_subtree_to_parent_node(). Output { input_chaining_value: *key, - block: compress_subtree_to_parent_node::<SerialJoin>(input, key, 0, flags, platform), + block: compress_subtree_to_parent_node::<J>(input, key, 0, flags, platform), block_len: BLOCK_LEN as u8, counter: 0, flags: flags | PARENT, @@ -792,9 +791,22 @@ fn hash_all_at_once(input: &[u8], key: &CVWords, flags: u8) -> Output { /// [`OutputReader`]. /// /// This function is always single-threaded. For multi-threading support, see -/// [`Hasher::update_with_join`]. +/// the [`hash_rayon`](hash_rayon) function (enabled by the `rayon` Cargo +/// feature). pub fn hash(input: &[u8]) -> Hash { - hash_all_at_once(input, IV, 0).root_hash() + hash_all_at_once::<join::SerialJoin>(input, IV, 0).root_hash() +} + +/// Like [`hash`], but using Rayon-based multithreading used as a performance +/// optimization. +/// +/// To get any performance benefit from multi-threading, the input needs to be +/// very large. As a rule of thumb on x86_64, there is no benefit to +/// multi-threading inputs less than 128 KiB. Other platforms have different +/// thresholds, and in general you need to benchmark your specific use case. +#[cfg(feature = "rayon")] +pub fn hash_rayon(input: &[u8]) -> Hash { + hash_all_at_once::<join::RayonJoin>(input, IV, 0).root_hash() } /// The keyed hash function. @@ -809,10 +821,24 @@ pub fn hash(input: &[u8]) -> Hash { /// [`Hasher::finalize_xof`], and [`OutputReader`]. /// /// This function is always single-threaded. For multi-threading support, see -/// [`Hasher::update_with_join`]. +/// the [`keyed_hash_rayon`](keyed_hash_rayon) function (enabled by the `rayon` +/// Cargo feature). pub fn keyed_hash(key: &[u8; KEY_LEN], input: &[u8]) -> Hash { let key_words = platform::words_from_le_bytes_32(key); - hash_all_at_once(input, &key_words, KEYED_HASH).root_hash() + hash_all_at_once::<join::SerialJoin>(input, &key_words, KEYED_HASH).root_hash() +} + +/// Like [`keyed_hash`], but using Rayon-based multithreading as a performance +/// optimization. +/// +/// To get any performance benefit from multi-threading, the input needs to be +/// very large. As a rule of thumb on x86_64, there is no benefit to +/// multi-threading inputs less than 128 KiB. Other platforms have different +/// thresholds, and in general you need to benchmark your specific use case. +#[cfg(feature = "rayon")] +pub fn keyed_hash_rayon(key: &[u8; KEY_LEN], input: &[u8]) -> Hash { + let key_words = platform::words_from_le_bytes_32(key); + hash_all_at_once::<join::RayonJoin>(input, &key_words, KEYED_HASH).root_hash() } /// The key derivation function. @@ -845,13 +871,36 @@ pub fn keyed_hash(key: &[u8; KEY_LEN], input: &[u8]) -> Hash { /// [`Hasher::finalize_xof`], and [`OutputReader`]. /// /// This function is always single-threaded. For multi-threading support, see -/// [`Hasher::update_with_join`]. +/// the [`derive_key_rayon`](derive_key_rayon) function (enabled by the `rayon` +/// Cargo feature). /// /// [Argon2]: https://en.wikipedia.org/wiki/Argon2 pub fn derive_key(context: &str, key_material: &[u8]) -> [u8; OUT_LEN] { - let context_key = hash_all_at_once(context.as_bytes(), IV, DERIVE_KEY_CONTEXT).root_hash(); + let context_key = + hash_all_at_once::<join::SerialJoin>(context.as_bytes(), IV, DERIVE_KEY_CONTEXT) + .root_hash(); let context_key_words = platform::words_from_le_bytes_32(context_key.as_bytes()); - hash_all_at_once(key_material, &context_key_words, DERIVE_KEY_MATERIAL) + hash_all_at_once::<join::SerialJoin>(key_material, &context_key_words, DERIVE_KEY_MATERIAL) + .root_hash() + .0 +} + +/// Like [`derive_key`], but using Rayon-based multithreading as a performance +/// optimization. +/// +/// To get any performance benefit from multi-threading, the input needs to be +/// very large. As a rule of thumb on x86_64, there is no benefit to +/// multi-threading inputs less than 128 KiB. Other platforms have different +/// thresholds, and in general you need to benchmark your specific use case. +#[cfg(feature = "rayon")] +pub fn derive_key_rayon(context: &str, key_material: &[u8]) -> [u8; 32] { + // There is no conceivable reason anyone should use a context string long + // enough for multithreading to make a difference. + let context_key = + hash_all_at_once::<join::SerialJoin>(context.as_bytes(), IV, DERIVE_KEY_CONTEXT) + .root_hash(); + let context_key_words = platform::words_from_le_bytes_32(context_key.as_bytes()); + hash_all_at_once::<join::RayonJoin>(key_material, &context_key_words, DERIVE_KEY_MATERIAL) .root_hash() .0 } @@ -882,10 +931,10 @@ fn parent_node_output( /// used traits from the [`digest`](https://crates.io/crates/digest) and /// [`crypto_mac`](https://crates.io/crates/crypto-mac) crates. /// -/// **Performance note:** The [`update`] and [`update_with_join`] methods -/// perform poorly when the caller's input buffer is small. See their method -/// docs below. A 16 KiB buffer is large enough to leverage all currently -/// supported SIMD instruction sets. +/// **Performance note:** The [`update`] and (if the `rayon` Cargo feature is +/// enabled) [`update_rayon`] methods perform poorly when the caller's input +/// buffer is small. See their method docs below. A 16 KiB buffer is large +/// enough to leverage all currently supported SIMD instruction sets. /// /// # Examples /// @@ -952,7 +1001,9 @@ impl Hasher { /// /// [`derive_key`]: fn.derive_key.html pub fn new_derive_key(context: &str) -> Self { - let context_key = hash_all_at_once(context.as_bytes(), IV, DERIVE_KEY_CONTEXT).root_hash(); + let context_key = + hash_all_at_once::<join::SerialJoin>(context.as_bytes(), IV, DERIVE_KEY_CONTEXT) + .root_hash(); let context_key_words = platform::words_from_le_bytes_32(context_key.as_bytes()); Self::new_internal(&context_key_words, DERIVE_KEY_MATERIAL) } @@ -1054,12 +1105,11 @@ impl Hasher { /// /// [`std::io::copy`]: https://doc.rust-lang.org/std/io/fn.copy.html pub fn update(&mut self, input: &[u8]) -> &mut Self { - self.update_with_join::<SerialJoin>(input) + self.update_with_join::<join::SerialJoin>(input) } - /// Add input bytes to the hash state, as with `update`, but potentially - /// using multi-threading. See the example below, and the - /// [`join`](join/index.html) module for a more detailed explanation. + /// Like [`update`](Hasher::update), but using Rayon-based multithreading as + /// a performance optimization. /// /// To get any performance benefit from multi-threading, the input buffer /// size needs to be very large. As a rule of thumb on x86_64, there is no @@ -1087,11 +1137,16 @@ impl Hasher { /// # fn some_large_input() -> &'static [u8] { b"foo" } /// let input: &[u8] = some_large_input(); /// let mut hasher = blake3::Hasher::new(); - /// hasher.update_with_join::<blake3::join::RayonJoin>(input); + /// hasher.update_rayon(input); /// let hash = hasher.finalize(); /// # } /// ``` - pub fn update_with_join<J: Join>(&mut self, mut input: &[u8]) -> &mut Self { + #[cfg(feature = "rayon")] + pub fn update_rayon(&mut self, input: &[u8]) -> &mut Self { + self.update_with_join::<join::RayonJoin>(input) + } + + fn update_with_join<J: join::Join>(&mut self, mut input: &[u8]) -> &mut Self { // If we have some partial chunk bytes in the internal chunk_state, we // need to finish that chunk first. if self.chunk_state.len() > 0 { diff --git a/src/test.rs b/src/test.rs index b99892c..1ebf27f 100644 --- a/src/test.rs +++ b/src/test.rs @@ -283,12 +283,26 @@ fn test_compare_reference_impl() { // all at once let test_out = crate::hash(input); - assert_eq!(test_out, expected_out[..32]); + assert_eq!(test_out, *array_ref!(expected_out, 0, 32)); + // all at once (rayon) + #[cfg(feature = "rayon")] + { + let test_out = crate::hash_rayon(input); + assert_eq!(test_out, *array_ref!(expected_out, 0, 32)); + } // incremental let mut hasher = crate::Hasher::new(); hasher.update(input); assert_eq!(hasher.finalize(), *array_ref!(expected_out, 0, 32)); assert_eq!(hasher.finalize(), test_out); + // incremental (rayon) + #[cfg(feature = "rayon")] + { + let mut hasher = crate::Hasher::new(); + hasher.update_rayon(input); + assert_eq!(hasher.finalize(), *array_ref!(expected_out, 0, 32)); + assert_eq!(hasher.finalize(), test_out); + } // xof let mut extended = [0; OUT]; hasher.finalize_xof().fill(&mut extended); @@ -304,12 +318,26 @@ fn test_compare_reference_impl() { // all at once let test_out = crate::keyed_hash(&TEST_KEY, input); - assert_eq!(test_out, expected_out[..32]); + assert_eq!(test_out, *array_ref!(expected_out, 0, 32)); + // all at once (rayon) + #[cfg(feature = "rayon")] + { + let test_out = crate::keyed_hash_rayon(&TEST_KEY, input); + assert_eq!(test_out, *array_ref!(expected_out, 0, 32)); + } // incremental let mut hasher = crate::Hasher::new_keyed(&TEST_KEY); hasher.update(input); assert_eq!(hasher.finalize(), *array_ref!(expected_out, 0, 32)); assert_eq!(hasher.finalize(), test_out); + // incremental (rayon) + #[cfg(feature = "rayon")] + { + let mut hasher = crate::Hasher::new_keyed(&TEST_KEY); + hasher.update_rayon(input); + assert_eq!(hasher.finalize(), *array_ref!(expected_out, 0, 32)); + assert_eq!(hasher.finalize(), test_out); + } // xof let mut extended = [0; OUT]; hasher.finalize_xof().fill(&mut extended); @@ -326,12 +354,26 @@ fn test_compare_reference_impl() { // all at once let test_out = crate::derive_key(context, input); - assert_eq!(test_out[..], expected_out[..32]); + assert_eq!(test_out, expected_out[..32]); + // all at once (rayon) + #[cfg(feature = "rayon")] + { + let test_out = crate::derive_key_rayon(context, input); + assert_eq!(test_out, expected_out[..32]); + } // incremental let mut hasher = crate::Hasher::new_derive_key(context); hasher.update(input); assert_eq!(hasher.finalize(), *array_ref!(expected_out, 0, 32)); assert_eq!(hasher.finalize(), *array_ref!(test_out, 0, 32)); + // incremental (rayon) + #[cfg(feature = "rayon")] + { + let mut hasher = crate::Hasher::new_derive_key(context); + hasher.update_rayon(input); + assert_eq!(hasher.finalize(), *array_ref!(expected_out, 0, 32)); + assert_eq!(hasher.finalize(), *array_ref!(test_out, 0, 32)); + } // xof let mut extended = [0; OUT]; hasher.finalize_xof().fill(&mut extended); @@ -504,17 +546,6 @@ fn test_reset() { } #[test] -#[cfg(feature = "rayon")] -fn test_update_with_rayon_join() { - let mut input = [0; TEST_CASES_MAX]; - paint_test_input(&mut input); - let rayon_hash = crate::Hasher::new() - .update_with_join::<crate::join::RayonJoin>(&input) - .finalize(); - assert_eq!(crate::hash(&input), rayon_hash); -} - -#[test] fn test_hex_encoding_decoding() { let digest_str = "04e0bb39f30b1a3feb89f536c93be15055482df748674b00d26e5a75777702e9"; let mut hasher = crate::Hasher::new(); |
