diff options
| author | Jack O'Connor <[email protected]> | 2021-03-21 16:21:20 -0400 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2021-03-21 21:14:13 -0400 |
| commit | 05292a018b25e47c2f094aa335a3bef5f4c0ac1a (patch) | |
| tree | 5ee1103ef6b3dad45792f436f3407f1a21a19444 | |
| parent | 07b746b1b4792f8885fe8749b22dd6242467b674 (diff) | |
get rid of the standalone "*_rayon" functions
These clutter the toplevel API, and their prominence might lead callers
to prefer them as a first resort, which probably isn't a good idea.
Restricting multithreading to `Hasher::update_rayon` feels better,
similar to what we've done with `Hasher::finalize_xof`. (But I think
`update_rayon` is still an improvement over the trait-based interface
that it replaced.)
| -rw-r--r-- | .github/workflows/ci.yml | 2 | ||||
| -rw-r--r-- | Cargo.toml | 8 | ||||
| -rw-r--r-- | benches/bench.rs | 2 | ||||
| -rw-r--r-- | src/lib.rs | 92 | ||||
| -rw-r--r-- | src/test.rs | 18 |
5 files changed, 31 insertions, 91 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2c410f0..7ce4b59 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -89,7 +89,7 @@ jobs: - run: cargo test --features=no_avx512,no_avx2,no_sse41,no_sse2,pure --release # Test benchmarks. RUSTC_BOOTSTRAP=1 lets this run on non-nightly toolchains. - - run: cargo test --benches + - run: cargo test --benches --features=rayon env: RUSTC_BOOTSTRAP: 1 # Test vectors. @@ -25,9 +25,9 @@ neon = [] # entire build, with e.g. RUSTFLAGS="-C target-cpu=native". std = ["digest/std"] -# The "rayon" feature (defined below as an optional dependency) enables API -# functions like `hash_rayon` and `update_rayon`. However, even if this feature -# is enabled, all other APIs remain single-threaded. +# The "rayon" feature (defined below as an optional dependency) enables the +# `Hasher::update_rayon` method, for multithreaded hashing. However, even if +# this feature is enabled, all other APIs remain single-threaded. # This crate implements traits from the RustCrypto project, exposed here as the # "traits-preview" feature. However, these traits aren't stable, and they're @@ -75,7 +75,7 @@ no_avx2 = [] no_avx512 = [] [package.metadata.docs.rs] -# Document blake3::join::RayonJoin on docs.rs. +# Document Hasher::update_rayon on docs.rs. features = ["rayon"] [dependencies] diff --git a/benches/bench.rs b/benches/bench.rs index 832f0f8..90dbc42 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -421,7 +421,7 @@ fn bench_reference_1024_kib(b: &mut Bencher) { #[cfg(feature = "rayon")] fn bench_rayon(b: &mut Bencher, len: usize) { let mut input = RandomInput::new(b, len); - b.iter(|| blake3::hash_rayon(input.get())); + b.iter(|| blake3::Hasher::new().update_rayon(input.get()).finalize()); } #[bench] @@ -40,9 +40,8 @@ //! resulting binary will not be portable to other machines. //! //! The `rayon` feature (disabled by default, but enabled for [docs.rs]) adds -//! new functions that use [Rayon]-based multithreading internally: -//! [`Hasher::update_rayon`], [`hash_rayon`], [`keyed_hash_rayon`], and -//! [`derive_key_rayon`]. +//! the [`Hasher::update_rayon`] method, for multithreaded hashing. However, +//! even if this feature is enabled, all other APIs remain single-threaded. //! //! The `neon` feature enables ARM NEON support. Currently there is no runtime //! CPU feature detection for NEON, so you must only enable this feature for @@ -59,9 +58,6 @@ //! RustCrypto [`signature`] crate.) //! //! [`Hasher::update_rayon`]: struct.Hasher.html#method.update_rayon -//! [`hash_rayon`]: fn.hash_rayon.html -//! [`keyed_hash_rayon`]: fn.keyed_hash_rayon.html -//! [`derive_key_rayon`]: fn.derive_key_rayon.html //! [BLAKE3]: https://blake3.io //! [Rayon]: https://github.com/rayon-rs/rayon //! [docs.rs]: https://docs.rs/ @@ -711,8 +707,8 @@ fn compress_subtree_wide<J: join::Join>( }; let (left_out, right_out) = cv_array.split_at_mut(degree * OUT_LEN); - // Recurse! For *_rayon functions, this is where we take advantage of RayonJoin and use - // multiple threads. + // Recurse! For update_rayon(), this is where we take advantage of RayonJoin and use multiple + // threads. let (left_n, right_n) = J::join( || compress_subtree_wide::<J>(left, key, chunk_counter, flags, platform, left_out), || compress_subtree_wide::<J>(right, key, right_chunk_counter, flags, platform, right_out), @@ -808,22 +804,11 @@ fn hash_all_at_once<J: join::Join>(input: &[u8], key: &CVWords, flags: u8) -> Ou /// [`OutputReader`]. /// /// This function is always single-threaded. For multithreading support, see -/// the [`hash_rayon`](fn.hash_rayon.html) function (enabled by the `rayon` -/// Cargo feature). +/// [`Hasher::update_rayon`](struct.Hasher.html#method.update_rayon). pub fn hash(input: &[u8]) -> Hash { hash_all_at_once::<join::SerialJoin>(input, IV, 0).root_hash() } -/// Identical to [`hash`], but using Rayon-based multithreading internally. -/// -/// Multithreading isn't always helpful for performance, and it's a good idea to -/// benchmark your specific use case. See performance notes on -/// [`Hasher::update_rayon`]. -#[cfg(feature = "rayon")] -pub fn hash_rayon(input: &[u8]) -> Hash { - hash_all_at_once::<join::RayonJoin>(input, IV, 0).root_hash() -} - /// The keyed hash function. /// /// This is suitable for use as a message authentication code, for example to @@ -836,25 +821,13 @@ pub fn hash_rayon(input: &[u8]) -> Hash { /// [`Hasher::finalize_xof`], and [`OutputReader`]. /// /// This function is always single-threaded. For multithreading support, see -/// the [`keyed_hash_rayon`](fn.keyed_hash_rayon.html) function (enabled by the -/// `rayon` Cargo feature). +/// [`Hasher::new_keyed`] and +/// [`Hasher::update_rayon`](struct.Hasher.html#method.update_rayon). pub fn keyed_hash(key: &[u8; KEY_LEN], input: &[u8]) -> Hash { let key_words = platform::words_from_le_bytes_32(key); hash_all_at_once::<join::SerialJoin>(input, &key_words, KEYED_HASH).root_hash() } -/// Identical to [`keyed_hash`], but using Rayon-based multithreading -/// internally. -/// -/// Multithreading isn't always helpful for performance, and it's a good idea to -/// benchmark your specific use case. See performance notes on -/// [`Hasher::update_rayon`]. -#[cfg(feature = "rayon")] -pub fn keyed_hash_rayon(key: &[u8; KEY_LEN], input: &[u8]) -> Hash { - let key_words = platform::words_from_le_bytes_32(key); - hash_all_at_once::<join::RayonJoin>(input, &key_words, KEYED_HASH).root_hash() -} - /// The key derivation function. /// /// Given cryptographic key material of any length and a context string of any @@ -885,8 +858,8 @@ pub fn keyed_hash_rayon(key: &[u8; KEY_LEN], input: &[u8]) -> Hash { /// [`Hasher::finalize_xof`], and [`OutputReader`]. /// /// This function is always single-threaded. For multithreading support, see -/// the [`derive_key_rayon`](fn.derive_key_rayon.html) function (enabled by the -/// `rayon` Cargo feature). +/// [`Hasher::new_derive_key`] and +/// [`Hasher::update_rayon`](struct.Hasher.html#method.update_rayon). /// /// [Argon2]: https://en.wikipedia.org/wiki/Argon2 pub fn derive_key(context: &str, key_material: &[u8]) -> [u8; OUT_LEN] { @@ -899,25 +872,6 @@ pub fn derive_key(context: &str, key_material: &[u8]) -> [u8; OUT_LEN] { .0 } -/// Identical to [`derive_key`], but using Rayon-based multithreading -/// internally. -/// -/// Multithreading isn't always helpful for performance, and it's a good idea to -/// benchmark your specific use case. See performance notes on -/// [`Hasher::update_rayon`]. -#[cfg(feature = "rayon")] -pub fn derive_key_rayon(context: &str, key_material: &[u8]) -> [u8; 32] { - // There is no conceivable reason anyone should use a context string long - // enough for multithreading to make a difference. - let context_key = - hash_all_at_once::<join::SerialJoin>(context.as_bytes(), IV, DERIVE_KEY_CONTEXT) - .root_hash(); - let context_key_words = platform::words_from_le_bytes_32(context_key.as_bytes()); - hash_all_at_once::<join::RayonJoin>(key_material, &context_key_words, DERIVE_KEY_MATERIAL) - .root_hash() - .0 -} - fn parent_node_output( left_child: &CVBytes, right_child: &CVBytes, @@ -949,10 +903,14 @@ fn parent_node_output( /// guarantees for this feature, and callers who use it should expect breaking /// changes between patch versions. /// -/// **Performance note:** The [`update`] method can't take full advantage of -/// SIMD optimizations if its input buffer is too small or oddly sized. Using a -/// 16 KiB buffer, or any multiple of that, enables all currently supported SIMD -/// instruction sets. +/// When the `rayon` Cargo feature is enabled, the +/// [`update_rayon`](#method.update_rayon) method is available for multithreaded +/// hashing. +/// +/// **Performance note:** The [`update`](#method.update) method can't take full +/// advantage of SIMD optimizations if its input buffer is too small or oddly +/// sized. Using a 16 KiB buffer, or any multiple of that, enables all currently +/// supported SIMD instruction sets. /// /// # Examples /// @@ -975,9 +933,6 @@ fn parent_node_output( /// # Ok(()) /// # } /// ``` -/// -/// [`update`]: #method.update -/// [`update_rayon`]: #method.update_rayon #[derive(Clone)] pub struct Hasher { key: CVWords, @@ -1127,19 +1082,22 @@ impl Hasher { /// Identical to [`update`](Hasher::update), but using Rayon-based /// multithreading internally. /// + /// This method is gated by the `rayon` Cargo feature, which is disabled by + /// default but enabled on [docs.rs](https://docs.rs). + /// /// To get any performance benefit from multithreading, the input buffer /// needs to be large. As a rule of thumb on x86_64, `update_rayon` is /// _slower_ than `update` for inputs under 128 KiB. That threshold varies /// quite a lot across different processors, and it's important to benchmark /// your specific use case. /// - /// Memory mapping an entire input file is a good way to take advantage of + /// Memory mapping an entire input file is a simple way to take advantage of /// multithreading without needing to carefully tune your buffer size or /// offload IO. However, on spinning disks where random access is expensive, - /// this can lead to disk thrashing and terrible IO performance. OS page - /// caching can mask this problem, in which case it might only affect files - /// larger than available RAM. Again, benchmarking your specific use case is - /// important. + /// that approach can lead to disk thrashing and terrible IO performance. + /// Note that OS page caching can mask this problem, in which case it might + /// only appear for files larger than available RAM. Again, benchmarking + /// your specific use case is important. #[cfg(feature = "rayon")] pub fn update_rayon(&mut self, input: &[u8]) -> &mut Self { self.update_with_join::<join::RayonJoin>(input) diff --git a/src/test.rs b/src/test.rs index 1ebf27f..67e2b8b 100644 --- a/src/test.rs +++ b/src/test.rs @@ -284,12 +284,6 @@ fn test_compare_reference_impl() { // all at once let test_out = crate::hash(input); assert_eq!(test_out, *array_ref!(expected_out, 0, 32)); - // all at once (rayon) - #[cfg(feature = "rayon")] - { - let test_out = crate::hash_rayon(input); - assert_eq!(test_out, *array_ref!(expected_out, 0, 32)); - } // incremental let mut hasher = crate::Hasher::new(); hasher.update(input); @@ -319,12 +313,6 @@ fn test_compare_reference_impl() { // all at once let test_out = crate::keyed_hash(&TEST_KEY, input); assert_eq!(test_out, *array_ref!(expected_out, 0, 32)); - // all at once (rayon) - #[cfg(feature = "rayon")] - { - let test_out = crate::keyed_hash_rayon(&TEST_KEY, input); - assert_eq!(test_out, *array_ref!(expected_out, 0, 32)); - } // incremental let mut hasher = crate::Hasher::new_keyed(&TEST_KEY); hasher.update(input); @@ -355,12 +343,6 @@ fn test_compare_reference_impl() { // all at once let test_out = crate::derive_key(context, input); assert_eq!(test_out, expected_out[..32]); - // all at once (rayon) - #[cfg(feature = "rayon")] - { - let test_out = crate::derive_key_rayon(context, input); - assert_eq!(test_out, expected_out[..32]); - } // incremental let mut hasher = crate::Hasher::new_derive_key(context); hasher.update(input); |
