aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack O'Connor <[email protected]>2021-03-21 16:21:20 -0400
committerJack O'Connor <[email protected]>2021-03-21 21:14:13 -0400
commit05292a018b25e47c2f094aa335a3bef5f4c0ac1a (patch)
tree5ee1103ef6b3dad45792f436f3407f1a21a19444
parent07b746b1b4792f8885fe8749b22dd6242467b674 (diff)
get rid of the standalone "*_rayon" functions
These clutter the toplevel API, and their prominence might lead callers to prefer them as a first resort, which probably isn't a good idea. Restricting multithreading to `Hasher::update_rayon` feels better, similar to what we've done with `Hasher::finalize_xof`. (But I think `update_rayon` is still an improvement over the trait-based interface that it replaced.)
-rw-r--r--.github/workflows/ci.yml2
-rw-r--r--Cargo.toml8
-rw-r--r--benches/bench.rs2
-rw-r--r--src/lib.rs92
-rw-r--r--src/test.rs18
5 files changed, 31 insertions, 91 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2c410f0..7ce4b59 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -89,7 +89,7 @@ jobs:
- run: cargo test --features=no_avx512,no_avx2,no_sse41,no_sse2,pure --release
# Test benchmarks. RUSTC_BOOTSTRAP=1 lets this run on non-nightly toolchains.
- - run: cargo test --benches
+ - run: cargo test --benches --features=rayon
env:
RUSTC_BOOTSTRAP: 1
# Test vectors.
diff --git a/Cargo.toml b/Cargo.toml
index 2cd37cd..316cd09 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -25,9 +25,9 @@ neon = []
# entire build, with e.g. RUSTFLAGS="-C target-cpu=native".
std = ["digest/std"]
-# The "rayon" feature (defined below as an optional dependency) enables API
-# functions like `hash_rayon` and `update_rayon`. However, even if this feature
-# is enabled, all other APIs remain single-threaded.
+# The "rayon" feature (defined below as an optional dependency) enables the
+# `Hasher::update_rayon` method, for multithreaded hashing. However, even if
+# this feature is enabled, all other APIs remain single-threaded.
# This crate implements traits from the RustCrypto project, exposed here as the
# "traits-preview" feature. However, these traits aren't stable, and they're
@@ -75,7 +75,7 @@ no_avx2 = []
no_avx512 = []
[package.metadata.docs.rs]
-# Document blake3::join::RayonJoin on docs.rs.
+# Document Hasher::update_rayon on docs.rs.
features = ["rayon"]
[dependencies]
diff --git a/benches/bench.rs b/benches/bench.rs
index 832f0f8..90dbc42 100644
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -421,7 +421,7 @@ fn bench_reference_1024_kib(b: &mut Bencher) {
#[cfg(feature = "rayon")]
fn bench_rayon(b: &mut Bencher, len: usize) {
let mut input = RandomInput::new(b, len);
- b.iter(|| blake3::hash_rayon(input.get()));
+ b.iter(|| blake3::Hasher::new().update_rayon(input.get()).finalize());
}
#[bench]
diff --git a/src/lib.rs b/src/lib.rs
index 34a883f..7376636 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -40,9 +40,8 @@
//! resulting binary will not be portable to other machines.
//!
//! The `rayon` feature (disabled by default, but enabled for [docs.rs]) adds
-//! new functions that use [Rayon]-based multithreading internally:
-//! [`Hasher::update_rayon`], [`hash_rayon`], [`keyed_hash_rayon`], and
-//! [`derive_key_rayon`].
+//! the [`Hasher::update_rayon`] method, for multithreaded hashing. However,
+//! even if this feature is enabled, all other APIs remain single-threaded.
//!
//! The `neon` feature enables ARM NEON support. Currently there is no runtime
//! CPU feature detection for NEON, so you must only enable this feature for
@@ -59,9 +58,6 @@
//! RustCrypto [`signature`] crate.)
//!
//! [`Hasher::update_rayon`]: struct.Hasher.html#method.update_rayon
-//! [`hash_rayon`]: fn.hash_rayon.html
-//! [`keyed_hash_rayon`]: fn.keyed_hash_rayon.html
-//! [`derive_key_rayon`]: fn.derive_key_rayon.html
//! [BLAKE3]: https://blake3.io
//! [Rayon]: https://github.com/rayon-rs/rayon
//! [docs.rs]: https://docs.rs/
@@ -711,8 +707,8 @@ fn compress_subtree_wide<J: join::Join>(
};
let (left_out, right_out) = cv_array.split_at_mut(degree * OUT_LEN);
- // Recurse! For *_rayon functions, this is where we take advantage of RayonJoin and use
- // multiple threads.
+ // Recurse! For update_rayon(), this is where we take advantage of RayonJoin and use multiple
+ // threads.
let (left_n, right_n) = J::join(
|| compress_subtree_wide::<J>(left, key, chunk_counter, flags, platform, left_out),
|| compress_subtree_wide::<J>(right, key, right_chunk_counter, flags, platform, right_out),
@@ -808,22 +804,11 @@ fn hash_all_at_once<J: join::Join>(input: &[u8], key: &CVWords, flags: u8) -> Ou
/// [`OutputReader`].
///
/// This function is always single-threaded. For multithreading support, see
-/// the [`hash_rayon`](fn.hash_rayon.html) function (enabled by the `rayon`
-/// Cargo feature).
+/// [`Hasher::update_rayon`](struct.Hasher.html#method.update_rayon).
pub fn hash(input: &[u8]) -> Hash {
hash_all_at_once::<join::SerialJoin>(input, IV, 0).root_hash()
}
-/// Identical to [`hash`], but using Rayon-based multithreading internally.
-///
-/// Multithreading isn't always helpful for performance, and it's a good idea to
-/// benchmark your specific use case. See performance notes on
-/// [`Hasher::update_rayon`].
-#[cfg(feature = "rayon")]
-pub fn hash_rayon(input: &[u8]) -> Hash {
- hash_all_at_once::<join::RayonJoin>(input, IV, 0).root_hash()
-}
-
/// The keyed hash function.
///
/// This is suitable for use as a message authentication code, for example to
@@ -836,25 +821,13 @@ pub fn hash_rayon(input: &[u8]) -> Hash {
/// [`Hasher::finalize_xof`], and [`OutputReader`].
///
/// This function is always single-threaded. For multithreading support, see
-/// the [`keyed_hash_rayon`](fn.keyed_hash_rayon.html) function (enabled by the
-/// `rayon` Cargo feature).
+/// [`Hasher::new_keyed`] and
+/// [`Hasher::update_rayon`](struct.Hasher.html#method.update_rayon).
pub fn keyed_hash(key: &[u8; KEY_LEN], input: &[u8]) -> Hash {
let key_words = platform::words_from_le_bytes_32(key);
hash_all_at_once::<join::SerialJoin>(input, &key_words, KEYED_HASH).root_hash()
}
-/// Identical to [`keyed_hash`], but using Rayon-based multithreading
-/// internally.
-///
-/// Multithreading isn't always helpful for performance, and it's a good idea to
-/// benchmark your specific use case. See performance notes on
-/// [`Hasher::update_rayon`].
-#[cfg(feature = "rayon")]
-pub fn keyed_hash_rayon(key: &[u8; KEY_LEN], input: &[u8]) -> Hash {
- let key_words = platform::words_from_le_bytes_32(key);
- hash_all_at_once::<join::RayonJoin>(input, &key_words, KEYED_HASH).root_hash()
-}
-
/// The key derivation function.
///
/// Given cryptographic key material of any length and a context string of any
@@ -885,8 +858,8 @@ pub fn keyed_hash_rayon(key: &[u8; KEY_LEN], input: &[u8]) -> Hash {
/// [`Hasher::finalize_xof`], and [`OutputReader`].
///
/// This function is always single-threaded. For multithreading support, see
-/// the [`derive_key_rayon`](fn.derive_key_rayon.html) function (enabled by the
-/// `rayon` Cargo feature).
+/// [`Hasher::new_derive_key`] and
+/// [`Hasher::update_rayon`](struct.Hasher.html#method.update_rayon).
///
/// [Argon2]: https://en.wikipedia.org/wiki/Argon2
pub fn derive_key(context: &str, key_material: &[u8]) -> [u8; OUT_LEN] {
@@ -899,25 +872,6 @@ pub fn derive_key(context: &str, key_material: &[u8]) -> [u8; OUT_LEN] {
.0
}
-/// Identical to [`derive_key`], but using Rayon-based multithreading
-/// internally.
-///
-/// Multithreading isn't always helpful for performance, and it's a good idea to
-/// benchmark your specific use case. See performance notes on
-/// [`Hasher::update_rayon`].
-#[cfg(feature = "rayon")]
-pub fn derive_key_rayon(context: &str, key_material: &[u8]) -> [u8; 32] {
- // There is no conceivable reason anyone should use a context string long
- // enough for multithreading to make a difference.
- let context_key =
- hash_all_at_once::<join::SerialJoin>(context.as_bytes(), IV, DERIVE_KEY_CONTEXT)
- .root_hash();
- let context_key_words = platform::words_from_le_bytes_32(context_key.as_bytes());
- hash_all_at_once::<join::RayonJoin>(key_material, &context_key_words, DERIVE_KEY_MATERIAL)
- .root_hash()
- .0
-}
-
fn parent_node_output(
left_child: &CVBytes,
right_child: &CVBytes,
@@ -949,10 +903,14 @@ fn parent_node_output(
/// guarantees for this feature, and callers who use it should expect breaking
/// changes between patch versions.
///
-/// **Performance note:** The [`update`] method can't take full advantage of
-/// SIMD optimizations if its input buffer is too small or oddly sized. Using a
-/// 16 KiB buffer, or any multiple of that, enables all currently supported SIMD
-/// instruction sets.
+/// When the `rayon` Cargo feature is enabled, the
+/// [`update_rayon`](#method.update_rayon) method is available for multithreaded
+/// hashing.
+///
+/// **Performance note:** The [`update`](#method.update) method can't take full
+/// advantage of SIMD optimizations if its input buffer is too small or oddly
+/// sized. Using a 16 KiB buffer, or any multiple of that, enables all currently
+/// supported SIMD instruction sets.
///
/// # Examples
///
@@ -975,9 +933,6 @@ fn parent_node_output(
/// # Ok(())
/// # }
/// ```
-///
-/// [`update`]: #method.update
-/// [`update_rayon`]: #method.update_rayon
#[derive(Clone)]
pub struct Hasher {
key: CVWords,
@@ -1127,19 +1082,22 @@ impl Hasher {
/// Identical to [`update`](Hasher::update), but using Rayon-based
/// multithreading internally.
///
+ /// This method is gated by the `rayon` Cargo feature, which is disabled by
+ /// default but enabled on [docs.rs](https://docs.rs).
+ ///
/// To get any performance benefit from multithreading, the input buffer
/// needs to be large. As a rule of thumb on x86_64, `update_rayon` is
/// _slower_ than `update` for inputs under 128 KiB. That threshold varies
/// quite a lot across different processors, and it's important to benchmark
/// your specific use case.
///
- /// Memory mapping an entire input file is a good way to take advantage of
+ /// Memory mapping an entire input file is a simple way to take advantage of
/// multithreading without needing to carefully tune your buffer size or
/// offload IO. However, on spinning disks where random access is expensive,
- /// this can lead to disk thrashing and terrible IO performance. OS page
- /// caching can mask this problem, in which case it might only affect files
- /// larger than available RAM. Again, benchmarking your specific use case is
- /// important.
+ /// that approach can lead to disk thrashing and terrible IO performance.
+ /// Note that OS page caching can mask this problem, in which case it might
+ /// only appear for files larger than available RAM. Again, benchmarking
+ /// your specific use case is important.
#[cfg(feature = "rayon")]
pub fn update_rayon(&mut self, input: &[u8]) -> &mut Self {
self.update_with_join::<join::RayonJoin>(input)
diff --git a/src/test.rs b/src/test.rs
index 1ebf27f..67e2b8b 100644
--- a/src/test.rs
+++ b/src/test.rs
@@ -284,12 +284,6 @@ fn test_compare_reference_impl() {
// all at once
let test_out = crate::hash(input);
assert_eq!(test_out, *array_ref!(expected_out, 0, 32));
- // all at once (rayon)
- #[cfg(feature = "rayon")]
- {
- let test_out = crate::hash_rayon(input);
- assert_eq!(test_out, *array_ref!(expected_out, 0, 32));
- }
// incremental
let mut hasher = crate::Hasher::new();
hasher.update(input);
@@ -319,12 +313,6 @@ fn test_compare_reference_impl() {
// all at once
let test_out = crate::keyed_hash(&TEST_KEY, input);
assert_eq!(test_out, *array_ref!(expected_out, 0, 32));
- // all at once (rayon)
- #[cfg(feature = "rayon")]
- {
- let test_out = crate::keyed_hash_rayon(&TEST_KEY, input);
- assert_eq!(test_out, *array_ref!(expected_out, 0, 32));
- }
// incremental
let mut hasher = crate::Hasher::new_keyed(&TEST_KEY);
hasher.update(input);
@@ -355,12 +343,6 @@ fn test_compare_reference_impl() {
// all at once
let test_out = crate::derive_key(context, input);
assert_eq!(test_out, expected_out[..32]);
- // all at once (rayon)
- #[cfg(feature = "rayon")]
- {
- let test_out = crate::derive_key_rayon(context, input);
- assert_eq!(test_out, expected_out[..32]);
- }
// incremental
let mut hasher = crate::Hasher::new_derive_key(context);
hasher.update(input);