get rid of the standalone "*_rayon" functions

These clutter the toplevel API, and their prominence might lead callers to prefer them as a first resort, which probably isn't a good idea. Restricting multithreading to `Hasher::update_rayon` feels better, similar to what we've done with `Hasher::finalize_xof`. (But I think `update_rayon` is still an improvement over the trait-based interface that it replaced.)
author: Jack O'Connor <[email protected]> 2021-03-21 16:21:20 -0400
committer: Jack O'Connor <[email protected]> 2021-03-21 21:14:13 -0400
commit: 05292a018b25e47c2f094aa335a3bef5f4c0ac1a (patch)
tree: 5ee1103ef6b3dad45792f436f3407f1a21a19444
parent: 07b746b1b4792f8885fe8749b22dd6242467b674 (diff)
5 files changed, 31 insertions, 91 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2c410f0..7ce4b59 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -89,7 +89,7 @@ jobs:
     - run: cargo test --features=no_avx512,no_avx2,no_sse41,no_sse2,pure --release
 
     # Test benchmarks. RUSTC_BOOTSTRAP=1 lets this run on non-nightly toolchains.
-    - run: cargo test --benches
+    - run: cargo test --benches --features=rayon
       env:
         RUSTC_BOOTSTRAP: 1
     # Test vectors.
diff --git a/Cargo.toml b/Cargo.toml
index 2cd37cd..316cd09 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -25,9 +25,9 @@ neon = []
 # entire build, with e.g. RUSTFLAGS="-C target-cpu=native".
 std = ["digest/std"]
 
-# The "rayon" feature (defined below as an optional dependency) enables API
-# functions like `hash_rayon` and `update_rayon`. However, even if this feature
-# is enabled, all other APIs remain single-threaded.
+# The "rayon" feature (defined below as an optional dependency) enables the
+# `Hasher::update_rayon` method, for multithreaded hashing. However, even if
+# this feature is enabled, all other APIs remain single-threaded.
 
 # This crate implements traits from the RustCrypto project, exposed here as the
 # "traits-preview" feature. However, these traits aren't stable, and they're
@@ -75,7 +75,7 @@ no_avx2 = []
 no_avx512 = []
 
 [package.metadata.docs.rs]
-# Document blake3::join::RayonJoin on docs.rs.
+# Document Hasher::update_rayon on docs.rs.
 features = ["rayon"]
 
 [dependencies]
diff --git a/benches/bench.rs b/benches/bench.rs
index 832f0f8..90dbc42 100644
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -421,7 +421,7 @@ fn bench_reference_1024_kib(b: &mut Bencher) {
 #[cfg(feature = "rayon")]
 fn bench_rayon(b: &mut Bencher, len: usize) {
     let mut input = RandomInput::new(b, len);
-    b.iter(|| blake3::hash_rayon(input.get()));
+    b.iter(|| blake3::Hasher::new().update_rayon(input.get()).finalize());
 }
 
 #[bench]
diff --git a/src/lib.rs b/src/lib.rs
index 34a883f..7376636 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -40,9 +40,8 @@
 //! resulting binary will not be portable to other machines.
 //!
 //! The `rayon` feature (disabled by default, but enabled for [docs.rs]) adds
-//! new functions that use [Rayon]-based multithreading internally:
-//! [`Hasher::update_rayon`], [`hash_rayon`], [`keyed_hash_rayon`], and
-//! [`derive_key_rayon`].
+//! the [`Hasher::update_rayon`] method, for multithreaded hashing. However,
+//! even if this feature is enabled, all other APIs remain single-threaded.
 //!
 //! The `neon` feature enables ARM NEON support. Currently there is no runtime
 //! CPU feature detection for NEON, so you must only enable this feature for
@@ -59,9 +58,6 @@
 //! RustCrypto [`signature`] crate.)
 //!
 //! [`Hasher::update_rayon`]: struct.Hasher.html#method.update_rayon
-//! [`hash_rayon`]: fn.hash_rayon.html
-//! [`keyed_hash_rayon`]: fn.keyed_hash_rayon.html
-//! [`derive_key_rayon`]: fn.derive_key_rayon.html
 //! [BLAKE3]: https://blake3.io
 //! [Rayon]: https://github.com/rayon-rs/rayon
 //! [docs.rs]: https://docs.rs/
@@ -711,8 +707,8 @@ fn compress_subtree_wide<J: join::Join>(
     };
     let (left_out, right_out) = cv_array.split_at_mut(degree * OUT_LEN);
 
-    // Recurse! For *_rayon functions, this is where we take advantage of RayonJoin and use
-    // multiple threads.
+    // Recurse! For update_rayon(), this is where we take advantage of RayonJoin and use multiple
+    // threads.
     let (left_n, right_n) = J::join(
         || compress_subtree_wide::<J>(left, key, chunk_counter, flags, platform, left_out),
         || compress_subtree_wide::<J>(right, key, right_chunk_counter, flags, platform, right_out),
@@ -808,22 +804,11 @@ fn hash_all_at_once<J: join::Join>(input: &[u8], key: &CVWords, flags: u8) -> Ou
 /// [`OutputReader`].
 ///
 /// This function is always single-threaded. For multithreading support, see
-/// the [`hash_rayon`](fn.hash_rayon.html) function (enabled by the `rayon`
-/// Cargo feature).
+/// [`Hasher::update_rayon`](struct.Hasher.html#method.update_rayon).
 pub fn hash(input: &[u8]) -> Hash {
     hash_all_at_once::<join::SerialJoin>(input, IV, 0).root_hash()
 }
 
-/// Identical to [`hash`], but using Rayon-based multithreading internally.
-///
-/// Multithreading isn't always helpful for performance, and it's a good idea to
-/// benchmark your specific use case. See performance notes on
-/// [`Hasher::update_rayon`].
-#[cfg(feature = "rayon")]
-pub fn hash_rayon(input: &[u8]) -> Hash {
-    hash_all_at_once::<join::RayonJoin>(input, IV, 0).root_hash()
-}
-
 /// The keyed hash function.
 ///
 /// This is suitable for use as a message authentication code, for example to
@@ -836,25 +821,13 @@ pub fn hash_rayon(input: &[u8]) -> Hash {
 /// [`Hasher::finalize_xof`], and [`OutputReader`].
 ///
 /// This function is always single-threaded. For multithreading support, see
-/// the [`keyed_hash_rayon`](fn.keyed_hash_rayon.html) function (enabled by the
-/// `rayon` Cargo feature).
+/// [`Hasher::new_keyed`] and
+/// [`Hasher::update_rayon`](struct.Hasher.html#method.update_rayon).
 pub fn keyed_hash(key: &[u8; KEY_LEN], input: &[u8]) -> Hash {
     let key_words = platform::words_from_le_bytes_32(key);
     hash_all_at_once::<join::SerialJoin>(input, &key_words, KEYED_HASH).root_hash()
 }
 
-/// Identical to [`keyed_hash`], but using Rayon-based multithreading
-/// internally.
-///
-/// Multithreading isn't always helpful for performance, and it's a good idea to
-/// benchmark your specific use case. See performance notes on
-/// [`Hasher::update_rayon`].
-#[cfg(feature = "rayon")]
-pub fn keyed_hash_rayon(key: &[u8; KEY_LEN], input: &[u8]) -> Hash {
-    let key_words = platform::words_from_le_bytes_32(key);
-    hash_all_at_once::<join::RayonJoin>(input, &key_words, KEYED_HASH).root_hash()
-}
-
 /// The key derivation function.
 ///
 /// Given cryptographic key material of any length and a context string of any
@@ -885,8 +858,8 @@ pub fn keyed_hash_rayon(key: &[u8; KEY_LEN], input: &[u8]) -> Hash {
 /// [`Hasher::finalize_xof`], and [`OutputReader`].
 ///
 /// This function is always single-threaded. For multithreading support, see
-/// the [`derive_key_rayon`](fn.derive_key_rayon.html) function (enabled by the
-/// `rayon` Cargo feature).
+/// [`Hasher::new_derive_key`] and
+/// [`Hasher::update_rayon`](struct.Hasher.html#method.update_rayon).
 ///
 /// [Argon2]: https://en.wikipedia.org/wiki/Argon2
 pub fn derive_key(context: &str, key_material: &[u8]) -> [u8; OUT_LEN] {
@@ -899,25 +872,6 @@ pub fn derive_key(context: &str, key_material: &[u8]) -> [u8; OUT_LEN] {
         .0
 }
 
-/// Identical to [`derive_key`], but using Rayon-based multithreading
-/// internally.
-///
-/// Multithreading isn't always helpful for performance, and it's a good idea to
-/// benchmark your specific use case. See performance notes on
-/// [`Hasher::update_rayon`].
-#[cfg(feature = "rayon")]
-pub fn derive_key_rayon(context: &str, key_material: &[u8]) -> [u8; 32] {
-    // There is no conceivable reason anyone should use a context string long
-    // enough for multithreading to make a difference.
-    let context_key =
-        hash_all_at_once::<join::SerialJoin>(context.as_bytes(), IV, DERIVE_KEY_CONTEXT)
-            .root_hash();
-    let context_key_words = platform::words_from_le_bytes_32(context_key.as_bytes());
-    hash_all_at_once::<join::RayonJoin>(key_material, &context_key_words, DERIVE_KEY_MATERIAL)
-        .root_hash()
-        .0
-}
-
 fn parent_node_output(
     left_child: &CVBytes,
     right_child: &CVBytes,
@@ -949,10 +903,14 @@ fn parent_node_output(
 /// guarantees for this feature, and callers who use it should expect breaking
 /// changes between patch versions.
 ///
-/// **Performance note:** The [`update`] method can't take full advantage of
-/// SIMD optimizations if its input buffer is too small or oddly sized. Using a
-/// 16 KiB buffer, or any multiple of that, enables all currently supported SIMD
-/// instruction sets.
+/// When the `rayon` Cargo feature is enabled, the
+/// [`update_rayon`](#method.update_rayon) method is available for multithreaded
+/// hashing.
+///
+/// **Performance note:** The [`update`](#method.update) method can't take full
+/// advantage of SIMD optimizations if its input buffer is too small or oddly
+/// sized. Using a 16 KiB buffer, or any multiple of that, enables all currently
+/// supported SIMD instruction sets.
 ///
 /// # Examples
 ///
@@ -975,9 +933,6 @@ fn parent_node_output(
 /// # Ok(())
 /// # }
 /// ```
-///
-/// [`update`]: #method.update
-/// [`update_rayon`]: #method.update_rayon
 #[derive(Clone)]
 pub struct Hasher {
     key: CVWords,
@@ -1127,19 +1082,22 @@ impl Hasher {
     /// Identical to [`update`](Hasher::update), but using Rayon-based
     /// multithreading internally.
     ///
+    /// This method is gated by the `rayon` Cargo feature, which is disabled by
+    /// default but enabled on [docs.rs](https://docs.rs).
+    ///
     /// To get any performance benefit from multithreading, the input buffer
     /// needs to be large. As a rule of thumb on x86_64, `update_rayon` is
     /// _slower_ than `update` for inputs under 128 KiB. That threshold varies
     /// quite a lot across different processors, and it's important to benchmark
     /// your specific use case.
     ///
-    /// Memory mapping an entire input file is a good way to take advantage of
+    /// Memory mapping an entire input file is a simple way to take advantage of
     /// multithreading without needing to carefully tune your buffer size or
     /// offload IO. However, on spinning disks where random access is expensive,
-    /// this can lead to disk thrashing and terrible IO performance. OS page
-    /// caching can mask this problem, in which case it might only affect files
-    /// larger than available RAM. Again, benchmarking your specific use case is
-    /// important.
+    /// that approach can lead to disk thrashing and terrible IO performance.
+    /// Note that OS page caching can mask this problem, in which case it might
+    /// only appear for files larger than available RAM. Again, benchmarking
+    /// your specific use case is important.
     #[cfg(feature = "rayon")]
     pub fn update_rayon(&mut self, input: &[u8]) -> &mut Self {
         self.update_with_join::<join::RayonJoin>(input)
diff --git a/src/test.rs b/src/test.rs
index 1ebf27f..67e2b8b 100644
--- a/src/test.rs
+++ b/src/test.rs
@@ -284,12 +284,6 @@ fn test_compare_reference_impl() {
             // all at once
             let test_out = crate::hash(input);
             assert_eq!(test_out, *array_ref!(expected_out, 0, 32));
-            // all at once (rayon)
-            #[cfg(feature = "rayon")]
-            {
-                let test_out = crate::hash_rayon(input);
-                assert_eq!(test_out, *array_ref!(expected_out, 0, 32));
-            }
             // incremental
             let mut hasher = crate::Hasher::new();
             hasher.update(input);
@@ -319,12 +313,6 @@ fn test_compare_reference_impl() {
             // all at once
             let test_out = crate::keyed_hash(&TEST_KEY, input);
             assert_eq!(test_out, *array_ref!(expected_out, 0, 32));
-            // all at once (rayon)
-            #[cfg(feature = "rayon")]
-            {
-                let test_out = crate::keyed_hash_rayon(&TEST_KEY, input);
-                assert_eq!(test_out, *array_ref!(expected_out, 0, 32));
-            }
             // incremental
             let mut hasher = crate::Hasher::new_keyed(&TEST_KEY);
             hasher.update(input);
@@ -355,12 +343,6 @@ fn test_compare_reference_impl() {
             // all at once
             let test_out = crate::derive_key(context, input);
             assert_eq!(test_out, expected_out[..32]);
-            // all at once (rayon)
-            #[cfg(feature = "rayon")]
-            {
-                let test_out = crate::derive_key_rayon(context, input);
-                assert_eq!(test_out, expected_out[..32]);
-            }
             // incremental
             let mut hasher = crate::Hasher::new_derive_key(context);
             hasher.update(input);
author	Jack O'Connor <[email protected]>	2021-03-21 16:21:20 -0400
committer	Jack O'Connor <[email protected]>	2021-03-21 21:14:13 -0400
commit	05292a018b25e47c2f094aa335a3bef5f4c0ac1a (patch)
tree	5ee1103ef6b3dad45792f436f3407f1a21a19444
parent	07b746b1b4792f8885fe8749b22dd6242467b674 (diff)