From e1c2ea27fdd717fd924d7b286a125408d7e817f7 Mon Sep 17 00:00:00 2001 From: Jack O'Connor Date: Sun, 30 Mar 2025 17:25:54 -0700 Subject: add the `hazmat` module and deprecate the undocumented `guts` module https://github.com/BLAKE3-team/BLAKE3/pull/458 --- b3sum/src/main.rs | 2 +- benches/bench.rs | 2 +- c/blake3.c | 8 +- src/guts.rs | 47 +--- src/hazmat.rs | 704 ++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 116 +++++--- src/test.rs | 63 +++-- test_vectors/src/lib.rs | 2 +- 8 files changed, 831 insertions(+), 113 deletions(-) create mode 100644 src/hazmat.rs diff --git a/b3sum/src/main.rs b/b3sum/src/main.rs index 4479d4d..69a10c8 100644 --- a/b3sum/src/main.rs +++ b/b3sum/src/main.rs @@ -196,7 +196,7 @@ fn write_hex_output(mut output: blake3::OutputReader, args: &Args) -> anyhow::Re // TODO: This computes each output block twice when the --seek argument isn't a multiple of 64. // We'll refactor all of this soon anyway, once SIMD optimizations are available for the XOF. let mut len = args.len(); - let mut block = [0; blake3::guts::BLOCK_LEN]; + let mut block = [0; blake3::BLOCK_LEN]; while len > 0 { output.fill(&mut block); let hex_str = hex::encode(&block[..]); diff --git a/benches/bench.rs b/benches/bench.rs index defdb12..e89f04c 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -4,9 +4,9 @@ extern crate test; use arrayref::array_ref; use arrayvec::ArrayVec; -use blake3::guts::{BLOCK_LEN, CHUNK_LEN}; use blake3::platform::{Platform, MAX_SIMD_DEGREE}; use blake3::OUT_LEN; +use blake3::{BLOCK_LEN, CHUNK_LEN}; use rand::prelude::*; use test::Bencher; diff --git a/c/blake3.c b/c/blake3.c index f21973e..74fc2e7 100644 --- a/c/blake3.c +++ b/c/blake3.c @@ -158,10 +158,10 @@ INLINE output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN], // Given some input larger than one chunk, return the number of bytes that // should go in the left subtree. This is the largest power-of-2 number of // chunks that leaves at least 1 byte for the right subtree. -INLINE size_t left_len(size_t content_len) { - // Subtract 1 to reserve at least one byte for the right side. content_len +INLINE size_t left_subtree_len(size_t input_len) { + // Subtract 1 to reserve at least one byte for the right side. input_len // should always be greater than BLAKE3_CHUNK_LEN. - size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN; + size_t full_chunks = (input_len - 1) / BLAKE3_CHUNK_LEN; return round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN; } @@ -282,7 +282,7 @@ size_t blake3_compress_subtree_wide(const uint8_t *input, size_t input_len, // the input into left and right subtrees. (Note that this is only optimal // as long as the SIMD degree is a power of 2. If we ever get a SIMD degree // of 3 or something, we'll need a more complicated strategy.) - size_t left_input_len = left_len(input_len); + size_t left_input_len = left_subtree_len(input_len); size_t right_input_len = input_len - left_input_len; const uint8_t *right_input = &input[left_input_len]; uint64_t right_chunk_counter = diff --git a/src/guts.rs b/src/guts.rs index ecde326..6bbf5a5 100644 --- a/src/guts.rs +++ b/src/guts.rs @@ -1,13 +1,6 @@ -//! This undocumented and unstable module is for use cases like the `bao` crate, -//! which need to traverse the BLAKE3 Merkle tree and work with chunk and parent -//! chaining values directly. There might be breaking changes to this module -//! between patch versions. -//! -//! We could stabilize something like this module in the future. If you have a -//! use case for it, please let us know by filing a GitHub issue. +//! Deprecated in favor of [`hazmat`](crate::hazmat) -pub const BLOCK_LEN: usize = 64; -pub const CHUNK_LEN: usize = 1024; +pub use crate::{BLOCK_LEN, CHUNK_LEN}; #[derive(Clone, Debug)] pub struct ChunkState(crate::ChunkState); @@ -26,7 +19,7 @@ impl ChunkState { #[inline] pub fn len(&self) -> usize { - self.0.len() + self.0.count() } #[inline] @@ -65,37 +58,3 @@ pub fn parent_cv( output.chaining_value().into() } } - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_chunk() { - assert_eq!( - crate::hash(b"foo"), - ChunkState::new(0).update(b"foo").finalize(true) - ); - } - - #[test] - fn test_parents() { - let mut hasher = crate::Hasher::new(); - let mut buf = [0; crate::CHUNK_LEN]; - - buf[0] = 'a' as u8; - hasher.update(&buf); - let chunk0_cv = ChunkState::new(0).update(&buf).finalize(false); - - buf[0] = 'b' as u8; - hasher.update(&buf); - let chunk1_cv = ChunkState::new(1).update(&buf).finalize(false); - - hasher.update(b"c"); - let chunk2_cv = ChunkState::new(2).update(b"c").finalize(false); - - let parent = parent_cv(&chunk0_cv, &chunk1_cv, false); - let root = parent_cv(&parent, &chunk2_cv, true); - assert_eq!(hasher.finalize(), root); - } -} diff --git a/src/hazmat.rs b/src/hazmat.rs new file mode 100644 index 0000000..2fd2449 --- /dev/null +++ b/src/hazmat.rs @@ -0,0 +1,704 @@ +//! Low-level tree manipulations and other sharp tools +//! +//! The target audience for this module is projects like [Bao](https://github.com/oconnor663/bao), +//! which work directly with the interior hashes ("chaining values") of BLAKE3 chunks and subtrees. +//! For example, you could use these functions to implement a BitTorrent-like protocol using the +//! BLAKE3 tree structure, or to hash an input that's distributed across different machines. These +//! use cases are advanced, and most applications don't need this module. Also: +//! +//!
+//! +//! **Warning:** This module is *hazardous material*. If you've heard folks say *don't roll your +//! own crypto,* this is the sort of thing they're talking about. These functions have complicated +//! requirements, and any mistakes will give you garbage output and/or break the security +//! properties that BLAKE3 is supposed to have. Read section 2.1 of [the BLAKE3 +//! paper](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf) to understand the +//! tree structure you need to maintain. Test your code against [`blake3::hash`](../fn.hash.html) +//! and make sure you can get the same outputs for [lots of different +//! inputs](https://github.com/BLAKE3-team/BLAKE3/blob/master/test_vectors/test_vectors.json). +//! +//!
+//! +//! On the other hand: +//! +//!
+//! +//! **Encouragement:** Playing with these functions is a great way to learn how BLAKE3 works on the +//! inside. Have fun! +//! +//!
+//! +//! The main entrypoint for this module is the [`HasherExt`] trait, particularly the +//! [`set_input_offset`](HasherExt::set_input_offset) and +//! [`finalize_non_root`](HasherExt::finalize_non_root) methods. These let you compute the chaining +//! values of individual chunks or subtrees. You then combine these chaining values into larger +//! subtrees using [`merge_subtrees_non_root`] and finally (once at the very top) +//! [`merge_subtrees_root`] or [`merge_subtrees_root_xof`]. +//! +//! # Examples +//! +//! Here's an example of computing all the interior hashes in a 3-chunk tree: +//! +//! ```text +//! root +//! / \ +//! parent \ +//! / \ \ +//! chunk0 chunk1 chunk2 +//! ``` +//! +//! ``` +//! # fn main() { +//! use blake3::{Hasher, CHUNK_LEN}; +//! use blake3::hazmat::{merge_subtrees_non_root, merge_subtrees_root, Mode}; +//! use blake3::hazmat::HasherExt; // an extension trait for Hasher +//! +//! let chunk0 = [b'a'; CHUNK_LEN]; +//! let chunk1 = [b'b'; CHUNK_LEN]; +//! let chunk2 = [b'c'; 42]; // The final chunk can be short. +//! +//! // Compute the non-root hashes ("chaining values") of all three chunks. Chunks or subtrees +//! // that don't begin at the start of the input use `set_input_offset` to say where they begin. +//! let chunk0_cv = Hasher::new() +//! // .set_input_offset(0) is the default. +//! .update(&chunk0) +//! .finalize_non_root(); +//! let chunk1_cv = Hasher::new() +//! .set_input_offset(CHUNK_LEN as u64) +//! .update(&chunk1) +//! .finalize_non_root(); +//! let chunk2_cv = Hasher::new() +//! .set_input_offset(2 * CHUNK_LEN as u64) +//! .update(&chunk2) +//! .finalize_non_root(); +//! +//! // Join the first two chunks with a non-root parent node and compute its chaining value. +//! let parent_cv = merge_subtrees_non_root(&chunk0_cv, &chunk1_cv, Mode::Hash); +//! +//! // Join that parent node and the third chunk with a root parent node and compute the hash. +//! let root_hash = merge_subtrees_root(&parent_cv, &chunk2_cv, Mode::Hash); +//! +//! // Double check that we got the right answer. +//! let mut combined_input = Vec::new(); +//! combined_input.extend_from_slice(&chunk0); +//! combined_input.extend_from_slice(&chunk1); +//! combined_input.extend_from_slice(&chunk2); +//! assert_eq!(root_hash, blake3::hash(&combined_input)); +//! # } +//! ``` +//! +//! Hashing many chunks together is important for performance, because it allows the implementation +//! to use SIMD parallelism internally. ([AVX-512](https://en.wikipedia.org/wiki/AVX-512) for +//! example needs 16 chunks to really get going.) We can reproduce `parent_cv` by hashing `chunk0` +//! and `chunk1` at the same time: +//! +//! ``` +//! # fn main() { +//! # use blake3::{Hasher, CHUNK_LEN}; +//! # use blake3::hazmat::{Mode, HasherExt, merge_subtrees_non_root, merge_subtrees_root}; +//! # let chunk0 = [b'a'; CHUNK_LEN]; +//! # let chunk1 = [b'b'; CHUNK_LEN]; +//! # let chunk0_cv = Hasher::new().update(&chunk0).finalize_non_root(); +//! # let chunk1_cv = Hasher::new().set_input_offset(CHUNK_LEN as u64).update(&chunk1).finalize_non_root(); +//! # let parent_cv = merge_subtrees_non_root(&chunk0_cv, &chunk1_cv, Mode::Hash); +//! # let mut combined_input = Vec::new(); +//! # combined_input.extend_from_slice(&chunk0); +//! # combined_input.extend_from_slice(&chunk1); +//! let left_subtree_cv = Hasher::new() +//! // .set_input_offset(0) is the default. +//! .update(&combined_input[..2 * CHUNK_LEN]) +//! .finalize_non_root(); +//! assert_eq!(left_subtree_cv, parent_cv); +//! +//! // Using multiple updates gives the same answer, though it's not as efficient. +//! let mut subtree_hasher = Hasher::new(); +//! // Again, .set_input_offset(0) is the default. +//! subtree_hasher.update(&chunk0); +//! subtree_hasher.update(&chunk1); +//! assert_eq!(left_subtree_cv, subtree_hasher.finalize_non_root()); +//! # } +//! ``` +//! +//! However, hashing multiple chunks together **must** respect the overall tree structure. Hashing +//! `chunk0` and `chunk1` together is valid, but hashing `chunk1` and `chunk2` together is +//! incorrect and gives a garbage result that will never match a standard BLAKE3 hash. The +//! implementation includes a few best-effort asserts to catch some of these mistakes, but these +//! checks aren't guaranteed. For example, this second call to `update` currently panics: +//! +//! ```should_panic +//! # fn main() { +//! # use blake3::{Hasher, CHUNK_LEN}; +//! # use blake3::hazmat::HasherExt; +//! # let chunk0 = [b'a'; CHUNK_LEN]; +//! # let chunk1 = [b'b'; CHUNK_LEN]; +//! # let chunk2 = [b'c'; 42]; +//! let oops = Hasher::new() +//! .set_input_offset(CHUNK_LEN as u64) +//! .update(&chunk1) +//! // PANIC: "the subtree starting at 1024 contains at most 1024 bytes" +//! .update(&chunk2) +//! .finalize_non_root(); +//! # } +//! ``` +//! +//! For more on valid tree structures, see the docs for and [`left_subtree_len`] and +//! [`max_subtree_len`], and see section 2.1 of [the BLAKE3 +//! paper](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf). Note that the +//! merging functions ([`merge_subtrees_root`] and friends) don't know the shape of the left and +//! right subtrees you're giving them, and they can't help you catch mistakes. The best way to +//! catch mistakes with these is to compare your root output to the [`blake3::hash`](crate::hash) +//! of the same input. + +use crate::platform::Platform; +use crate::{CVWords, Hasher, CHUNK_LEN, IV, KEY_LEN, OUT_LEN}; + +/// Extension methods for [`Hasher`]. This is the main entrypoint to the `hazmat` module. +pub trait HasherExt { + /// Similar to [`Hasher::new_derive_key`] but using a pre-hashed [`ContextKey`] from + /// [`hash_derive_key_context`]. + /// + /// The [`hash_derive_key_context`] function is _only_ valid source of the [`ContextKey`] + /// + /// # Example + /// + /// ``` + /// use blake3::Hasher; + /// use blake3::hazmat::HasherExt; + /// + /// let context_key = blake3::hazmat::hash_derive_key_context("foo"); + /// let mut hasher = Hasher::new_from_context_key(&context_key); + /// hasher.update(b"bar"); + /// let derived_key = *hasher.finalize().as_bytes(); + /// + /// assert_eq!(derived_key, blake3::derive_key("foo", b"bar")); + /// ``` + fn new_from_context_key(context_key: &ContextKey) -> Self; + + /// Configure the `Hasher` to process a chunk or subtree starting at `offset` bytes into the + /// whole input. + /// + /// You must call this function before processing any input with [`update`](Hasher::update) or + /// similar. This step isn't required for the first chunk, or for a subtree that includes the + /// first chunk (i.e. when the `offset` is zero), but it's required for all other chunks and + /// subtrees. + /// + /// The starting input offset of a subtree implies a maximum possible length for that subtree. + /// See [`max_subtree_len`] and section 2.1 of [the BLAKE3 + /// paper](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf). Note that only + /// subtrees along the right edge of the whole tree can have a length less than their maximum + /// possible length. + /// + /// See the [module level examples](index.html#examples). + /// + /// # Panics + /// + /// This function panics if the `Hasher` has already accepted any input with + /// [`update`](Hasher::update) or similar. + /// + /// This should always be paired with [`finalize_non_root`](HasherExt::finalize_non_root). It's + /// never correct to use a non-zero input offset with [`finalize`](Hasher::finalize) or + /// [`finalize_xof`](Hasher::finalize_xof). The `offset` must also be a multiple of + /// `CHUNK_LEN`. Violating either of these rules will currently fail an assertion and panic, + /// but this is not guaranteed. + fn set_input_offset(&mut self, offset: u64) -> &mut Self; + + /// Finalize the non-root hash ("chaining value") of the current chunk or subtree. + /// + /// Afterwards you can merge subtree chaining values into parent nodes using + /// [`merge_subtrees_non_root`] and ultimately into the root node with either + /// [`merge_subtrees_root`] (similar to [`Hasher::finalize`]) or [`merge_subtrees_root_xof`] + /// (similar to [`Hasher::finalize_xof`]). + /// + /// See the [module level examples](index.html#examples), particularly the discussion of valid + /// tree structures. + fn finalize_non_root(&self) -> ChainingValue; +} + +impl HasherExt for Hasher { + fn new_from_context_key(context_key: &[u8; KEY_LEN]) -> Hasher { + let context_key_words = crate::platform::words_from_le_bytes_32(context_key); + Hasher::new_internal(&context_key_words, crate::DERIVE_KEY_MATERIAL) + } + + fn set_input_offset(&mut self, offset: u64) -> &mut Hasher { + assert_eq!(self.count(), 0, "hasher has already accepted input"); + assert_eq!( + offset % CHUNK_LEN as u64, + 0, + "offset ({offset}) must be a chunk boundary (divisible by {CHUNK_LEN})", + ); + let counter = offset / CHUNK_LEN as u64; + self.chunk_state.chunk_counter = counter; + self.initial_chunk_counter = counter; + self + } + + fn finalize_non_root(&self) -> ChainingValue { + assert_ne!(self.count(), 0, "empty subtrees are never valid"); + self.final_output().chaining_value() + } +} + +/// The maximum length of a subtree in bytes, given its starting offset in bytes +/// +/// If you try to hash more than this many bytes as one subtree, you'll end up merging parent nodes +/// that shouldn't be merged, and your output will be garbage. [`Hasher::update`] will currently +/// panic in this case, but this is not guaranteed. +/// +/// For input offset zero (the default), there is no maximum length, and this function returns +/// `None`. For all other offsets it returns `Some`. Note that valid offsets must be a multiple of +/// [`CHUNK_LEN`] (1024); it's not possible to start hashing a chunk in the middle. +/// +/// In the example tree below, chunks are numbered by their _0-based index_. The subtree that +/// _starts_ with chunk 3, i.e. `input_offset = 3 * CHUNK_LEN`, includes only that one chunk, so +/// its max length is `Some(CHUNK_LEN)`. The subtree that starts with chunk 6 includes chunk 7 but +/// not chunk 8, so its max length is `Some(2 * CHUNK_LEN)`. The subtree that starts with chunk 12 +/// includes chunks 13, 14, and 15, but if the tree were bigger it would not include chunk 16, so +/// its max length is `Some(4 * CHUNK_LEN)`. One way to think about the rule here is that, if you +/// go beyond the max subtree length from a given starting offset, you start dealing with subtrees +/// that include chunks _to the left_ of where you started. +/// +/// ```text +/// root +/// / \ +/// . . +/// / \ / \ +/// . . . . +/// / \ / \ / \ / \ +/// . . . . . . . . +/// / \ / \ / \ / \ / \ / \ / \ / \ +/// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +/// ``` +/// +/// The general rule turns out to be that for a subtree starting at a 0-based chunk index N greater +/// than zero, the maximum number of chunks in that subtree is the largest power-of-two that +/// divides N, which is given by `1 << N.trailing_zeros()`. +/// +/// This function can be useful for writing tests or debug assertions, but it's actually rare to +/// use this for real control flow. Callers who split their input recursively using +/// [`left_subtree_len`] will automatically satisfy the `max_subtree_len` bound and don't +/// necessarily need to check. It's also common to choose some fixed power-of-two subtree size, say +/// 64 chunks, and divide your input up into slices of that fixed length (with the final slice +/// possibly short). This approach also automatically satisfies the `max_subtree_len` bound and +/// doesn't need to check. Proving that this is true can be an interesting exercise. Note that +/// chunks 0, 4, 8, and 12 all begin subtrees of at least 4 chunks in the example tree above. +/// +/// # Panics +/// +/// This function currently panics if `input_offset` is not a multiple of `CHUNK_LEN`. This is not +/// guaranteed. +#[inline(always)] +pub fn max_subtree_len(input_offset: u64) -> Option { + if input_offset == 0 { + return None; + } + assert_eq!(input_offset % CHUNK_LEN as u64, 0); + let counter = input_offset / CHUNK_LEN as u64; + let max_chunks = 1 << counter.trailing_zeros(); + Some(max_chunks * CHUNK_LEN as u64) +} + +#[test] +fn test_max_subtree_len() { + assert_eq!(max_subtree_len(0), None); + // (chunk index, max chunks) + let cases = [ + (1, 1), + (2, 2), + (3, 1), + (4, 4), + (5, 1), + (6, 2), + (7, 1), + (8, 8), + ]; + for (chunk_index, max_chunks) in cases { + let input_offset = chunk_index * CHUNK_LEN as u64; + assert_eq!( + max_subtree_len(input_offset), + Some(max_chunks * CHUNK_LEN as u64), + ); + } +} + +/// Given the length in bytes of either a complete input or a subtree input, return the number of +/// bytes that belong to its left child subtree. The rest belong to its right child subtree. +/// +/// Concretely, this function returns the largest power-of-two number of bytes that's strictly less +/// than `input_len`. This leads to a tree where all left subtrees are "complete" and at least as +/// large as their sibling right subtrees, as specified in section 2.1 of [the BLAKE3 +/// paper](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf). For example, if an +/// input is exactly two chunks, its left and right subtrees both get one chunk. But if an input is +/// two chunks plus one more byte, then its left subtree gets two chunks, and its right subtree +/// only gets one byte. +/// +/// This function isn't meaningful for one chunk of input, because chunks don't have children. It +/// currently panics in debug mode if `input_len <= CHUNK_LEN`. +/// +/// # Example +/// +/// Hash a input of random length as two subtrees: +/// +/// ``` +/// # #[cfg(feature = "std")] { +/// use blake3::hazmat::{left_subtree_len, merge_subtrees_root, HasherExt, Mode}; +/// use blake3::{Hasher, CHUNK_LEN}; +/// +/// // Generate a random-length input. Note that to be split into two subtrees, the input length +/// // must be greater than CHUNK_LEN. +/// let input_len = rand::random_range(CHUNK_LEN + 1..1_000_000); +/// let mut input = vec![0; input_len]; +/// rand::fill(&mut input[..]); +/// +/// // Compute the left and right subtree hashes and then the root hash. left_subtree_len() tells +/// // us exactly where to split the input. Any other split would either panic (if we're lucky) or +/// // lead to an incorrect root hash. +/// let left_len = left_subtree_len(input_len as u64) as usize; +/// let left_subtree_cv = Hasher::new() +/// .update(&input[..left_len]) +/// .finalize_non_root(); +/// let right_subtree_cv = Hasher::new() +/// .set_input_offset(left_len as u64) +/// .update(&input[left_len..]) +/// .finalize_non_root(); +/// let root_hash = merge_subtrees_root(&left_subtree_cv, &right_subtree_cv, Mode::Hash); +/// +/// // Double check the answer. +/// assert_eq!(root_hash, blake3::hash(&input)); +/// # } +/// ``` +#[inline(always)] +pub fn left_subtree_len(input_len: u64) -> u64 { + debug_assert!(input_len > CHUNK_LEN as u64); + // Note that .next_power_of_two() is greater than *or equal*. + ((input_len + 1) / 2).next_power_of_two() +} + +#[test] +fn test_left_subtree_len() { + assert_eq!(left_subtree_len(1025), 1024); + for boundary_case in [2, 4, 8, 16, 32, 64] { + let input_len = boundary_case * CHUNK_LEN as u64; + assert_eq!(left_subtree_len(input_len - 1), input_len / 2); + assert_eq!(left_subtree_len(input_len), input_len / 2); + assert_eq!(left_subtree_len(input_len + 1), input_len); + } +} + +/// The `mode` argument to [`merge_subtrees_root`] and friends +/// +/// See the [module level examples](index.html#examples). +#[derive(Copy, Clone, Debug)] +pub enum Mode<'a> { + /// Corresponding to [`hash`](crate::hash) + Hash, + + /// Corresponding to [`keyed_hash`](crate::hash) + KeyedHash(&'a [u8; KEY_LEN]), + + /// Corresponding to [`derive_key`](crate::hash) + /// + /// The [`ContextKey`] comes from [`hash_derive_key_context`]. + DeriveKeyMaterial(&'a ContextKey), +} + +impl<'a> Mode<'a> { + fn key_words(&self) -> CVWords { + match self { + Mode::Hash => *IV, + Mode::KeyedHash(key) => crate::platform::words_from_le_bytes_32(key), + Mode::DeriveKeyMaterial(cx_key) => crate::platform::words_from_le_bytes_32(cx_key), + } + } + + fn flags_byte(&self) -> u8 { + match self { + Mode::Hash => 0, + Mode::KeyedHash(_) => crate::KEYED_HASH, + Mode::DeriveKeyMaterial(_) => crate::DERIVE_KEY_MATERIAL, + } + } +} + +/// "Chaining value" is the academic term for a non-root or non-final hash. +/// +/// Besides just sounding fancy, it turns out there are [security +/// reasons](https://jacko.io/tree_hashing.html) to be careful about the difference between +/// (root/final) hashes and (non-root/non-final) chaining values. +pub type ChainingValue = [u8; OUT_LEN]; + +fn merge_subtrees_inner( + left_child: &ChainingValue, + right_child: &ChainingValue, + mode: Mode, +) -> crate::Output { + crate::parent_node_output( + &left_child, + &right_child, + &mode.key_words(), + mode.flags_byte(), + Platform::detect(), + ) +} + +/// Compute a non-root parent node chaining value from two child chaining values. +/// +/// See the [module level examples](index.html#examples), particularly the discussion of valid tree +/// structures. The left and right child chaining values can come from either +/// [`Hasher::finalize_non_root`](HasherExt::finalize_non_root) or other calls to +/// `merge_subtrees_non_root`. "Chaining value" is the academic term for a non-root or non-final +/// hash. +pub fn merge_subtrees_non_root( + left_child: &ChainingValue, + right_child: &ChainingValue, + mode: Mode, +) -> ChainingValue { + merge_subtrees_inner(left_child, right_child, mode).chaining_value() +} + +/// Compute a root hash from two child chaining values. +/// +/// See the [module level examples](index.html#examples), particularly the discussion of valid tree +/// structures. The left and right child chaining values can come from either +/// [`Hasher::finalize_non_root`](HasherExt::finalize_non_root) or [`merge_subtrees_non_root`]. +/// "Chaining value" is the academic term for a non-root or non-final hash. +/// +/// Note that inputs of [`CHUNK_LEN`] or less don't produce any parent nodes and can't be hashed +/// using this function. In that case you must get the root hash from [`Hasher::finalize`] (or just +/// [`blake3::hash`](crate::hash)). +pub fn merge_subtrees_root( + left_child: &ChainingValue, + right_child: &ChainingValue, + mode: Mode, +) -> crate::Hash { + merge_subtrees_inner(left_child, right_child, mode).root_hash() +} + +/// Build a root [`OutputReader`](crate::OutputReader) from two child chaining values. +/// +/// See also the [module level examples](index.html#examples), particularly the discussion of valid +/// tree structures. The left and right child chaining values can come from either +/// [`Hasher::finalize_non_root`](HasherExt::finalize_non_root) or [`merge_subtrees_non_root`]. +/// "Chaining value" is the academic term for a non-root or non-final hash. +/// +/// Note that inputs of [`CHUNK_LEN`] or less don't produce any parent nodes and can't be hashed +/// using this function. In that case you must get the `OutputReader` from +/// [`Hasher::finalize_xof`]. +/// +/// # Example +/// +/// ``` +/// use blake3::hazmat::{merge_subtrees_root_xof, HasherExt, Mode}; +/// use blake3::{Hasher, CHUNK_LEN}; +/// +/// // Hash a 2-chunk subtree in steps. Note that only +/// // the final chunk can be shorter than CHUNK_LEN. +/// let chunk0 = &[42; CHUNK_LEN]; +/// let chunk1 = b"hello world"; +/// let chunk0_cv = Hasher::new() +/// .update(chunk0) +/// .finalize_non_root(); +/// let chunk1_cv = Hasher::new() +/// .set_input_offset(CHUNK_LEN as u64) +/// .update(chunk1) +/// .finalize_non_root(); +/// +/// // Obtain a blake3::OutputReader at the root and extract 1000 bytes. +/// let mut output_reader = merge_subtrees_root_xof(&chunk0_cv, &chunk1_cv, Mode::Hash); +/// let mut output_bytes = [0; 1_000]; +/// output_reader.fill(&mut output_bytes); +/// +/// // Double check the answer. +/// let mut hasher = Hasher::new(); +/// hasher.update(chunk0); +/// hasher.update(chunk1); +/// let mut expected = [0; 1_000]; +/// hasher.finalize_xof().fill(&mut expected); +/// assert_eq!(output_bytes, expected); +/// ``` +pub fn merge_subtrees_root_xof( + left_child: &ChainingValue, + right_child: &ChainingValue, + mode: Mode, +) -> crate::OutputReader { + crate::OutputReader::new(merge_subtrees_inner(left_child, right_child, mode)) +} + +/// An alias to distinguish [`hash_derive_key_context`] outputs from other keys. +pub type ContextKey = [u8; KEY_LEN]; + +/// Hash a [`derive_key`](crate::derive_key) context string and return a [`ContextKey`]. +/// +/// The _only_ valid uses for the returned [`ContextKey`] are [`Hasher::new_from_context_key`] and +/// [`Mode::DeriveKeyMaterial`] (together with the merge subtree functions). +/// +/// # Example +/// +/// ``` +/// use blake3::Hasher; +/// use blake3::hazmat::HasherExt; +/// +/// let context_key = blake3::hazmat::hash_derive_key_context("foo"); +/// let mut hasher = Hasher::new_from_context_key(&context_key); +/// hasher.update(b"bar"); +/// let derived_key = *hasher.finalize().as_bytes(); +/// +/// assert_eq!(derived_key, blake3::derive_key("foo", b"bar")); +/// ``` +pub fn hash_derive_key_context(context: &str) -> ContextKey { + crate::hash_all_at_once::( + context.as_bytes(), + IV, + crate::DERIVE_KEY_CONTEXT, + ) + .root_hash() + .0 +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + #[should_panic] + fn test_empty_subtree_should_panic() { + Hasher::new().finalize_non_root(); + } + + #[test] + #[should_panic] + fn test_unaligned_offset_should_panic() { + Hasher::new().set_input_offset(1); + } + + #[test] + #[should_panic] + fn test_hasher_already_accepted_input_should_panic() { + Hasher::new().update(b"x").set_input_offset(0); + } + + #[test] + #[should_panic] + fn test_too_much_input_should_panic() { + Hasher::new() + .set_input_offset(CHUNK_LEN as u64) + .update(&[0; CHUNK_LEN + 1]); + } + + #[test] + #[should_panic] + fn test_set_input_offset_cant_finalize() { + Hasher::new().set_input_offset(CHUNK_LEN as u64).finalize(); + } + + #[test] + #[should_panic] + fn test_set_input_offset_cant_finalize_xof() { + Hasher::new() + .set_input_offset(CHUNK_LEN as u64) + .finalize_xof(); + } + + #[test] + fn test_grouped_hash() { + const MAX_CHUNKS: usize = (crate::test::TEST_CASES_MAX + 1) / CHUNK_LEN; + let mut input_buf = [0; crate::test::TEST_CASES_MAX]; + crate::test::paint_test_input(&mut input_buf); + for subtree_chunks in [1, 2, 4, 8, 16, 32] { + #[cfg(feature = "std")] + dbg!(subtree_chunks); + let subtree_len = subtree_chunks * CHUNK_LEN; + for &case in crate::test::TEST_CASES { + if case <= subtree_len { + continue; + } + #[cfg(feature = "std")] + dbg!(case); + let input = &input_buf[..case]; + let expected_hash = crate::hash(input); + + // Collect all the group chaining values. + let mut chaining_values = arrayvec::ArrayVec::::new(); + let mut subtree_offset = 0; + while subtree_offset < input.len() { + let take = core::cmp::min(subtree_len, input.len() - subtree_offset); + let subtree_input = &input[subtree_offset..][..take]; + let subtree_cv = Hasher::new() + .set_input_offset(subtree_offset as u64) + .update(subtree_input) + .finalize_non_root(); + chaining_values.push(subtree_cv); + subtree_offset += take; + } + + // Compress all the chaining_values together, layer by layer. + assert!(chaining_values.len() >= 2); + while chaining_values.len() > 2 { + let n = chaining_values.len(); + // Merge each side-by-side pair in place, overwriting the front half of the + // array with the merged results. This moves us "up one level" in the tree. + for i in 0..(n / 2) { + chaining_values[i] = merge_subtrees_non_root( + &chaining_values[2 * i], + &chaining_values[2 * i + 1], + Mode::Hash, + ); + } + // If there's an odd CV out, it moves up. + if n % 2 == 1 { + chaining_values[n / 2] = chaining_values[n - 1]; + } + chaining_values.truncate(n / 2 + n % 2); + } + assert_eq!(chaining_values.len(), 2); + let root_hash = + merge_subtrees_root(&chaining_values[0], &chaining_values[1], Mode::Hash); + assert_eq!(expected_hash, root_hash); + } + } + } + + #[test] + fn test_keyed_hash_xof() { + let group0 = &[42; 4096]; + let group1 = &[43; 4095]; + let mut input = [0; 8191]; + input[..4096].copy_from_slice(group0); + input[4096..].copy_from_slice(group1); + let key = &[44; 32]; + + let mut expected_output = [0; 100]; + Hasher::new_keyed(&key) + .update(&input) + .finalize_xof() + .fill(&mut expected_output); + + let mut hazmat_output = [0; 100]; + let left = Hasher::new_keyed(key).update(group0).finalize_non_root(); + let right = Hasher::new_keyed(key) + .set_input_offset(group0.len() as u64) + .update(group1) + .finalize_non_root(); + merge_subtrees_root_xof(&left, &right, Mode::KeyedHash(&key)).fill(&mut hazmat_output); + assert_eq!(expected_output, hazmat_output); + } + + #[test] + fn test_derive_key() { + let context = "foo"; + let mut input = [0; 1025]; + crate::test::paint_test_input(&mut input); + let expected = crate::derive_key(context, &input); + + let cx_key = hash_derive_key_context(context); + let left = Hasher::new_from_context_key(&cx_key) + .update(&input[..1024]) + .finalize_non_root(); + let right = Hasher::new_from_context_key(&cx_key) + .set_input_offset(1024) + .update(&input[1024..]) + .finalize_non_root(); + let derived_key = merge_subtrees_root(&left, &right, Mode::DeriveKeyMaterial(&cx_key)).0; + assert_eq!(expected, derived_key); + } +} diff --git a/src/lib.rs b/src/lib.rs index 92bdbdb..027d4ba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -92,13 +92,12 @@ #[cfg(test)] mod test; -// The guts module is for incremental use cases like the `bao` crate that need -// to explicitly compute chunk and parent chaining values. It is semi-stable -// and likely to keep working, but largely undocumented and not intended for -// widespread use. #[doc(hidden)] +#[deprecated(since = "1.8.0", note = "use the hazmat module instead")] pub mod guts; +pub mod hazmat; + /// Undocumented and unstable, for benchmarks only. #[doc(hidden)] pub mod platform; @@ -155,8 +154,20 @@ pub const OUT_LEN: usize = 32; /// The number of bytes in a key, 32. pub const KEY_LEN: usize = 32; +/// The number of bytes in a block, 64. +/// +/// You don't usually need to think about this number. One case where it matters is calling +/// [`OutputReader::fill`] in a loop, where using a `buf` argument that's a multiple of `BLOCK_LEN` +/// avoids repeating work. +pub const BLOCK_LEN: usize = 64; + +/// The number of bytes in a chunk, 1024. +/// +/// You don't usually need to think about this number, but it often comes up in benchmarks, because +/// the maximum degree of parallelism used by the implementation equals the number of chunks. +pub const CHUNK_LEN: usize = 1024; + const MAX_DEPTH: usize = 54; // 2^54 * CHUNK_LEN = 2^64 -use guts::{BLOCK_LEN, CHUNK_LEN}; // While iterating the compression function within a chunk, the CV is // represented as words, to avoid doing two extra endianness conversions for @@ -227,7 +238,7 @@ fn counter_high(counter: u64) -> u32 { /// [`Display`]: https://doc.rust-lang.org/std/fmt/trait.Display.html /// [`FromStr`]: https://doc.rust-lang.org/std/str/trait.FromStr.html #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] -#[derive(Clone, Copy, Hash)] +#[derive(Clone, Copy, Hash, Eq)] pub struct Hash([u8; OUT_LEN]); impl Hash { @@ -354,8 +365,6 @@ impl PartialEq<[u8]> for Hash { } } -impl Eq for Hash {} - impl fmt::Display for Hash { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { // Formatting field as `&str` to reduce code size since the `Debug` @@ -504,7 +513,7 @@ impl ChunkState { } } - fn len(&self) -> usize { + fn count(&self) -> usize { BLOCK_LEN * self.blocks_compressed as usize + self.buf_len as usize } @@ -561,7 +570,7 @@ impl ChunkState { self.fill_buf(&mut input); debug_assert!(input.is_empty()); - debug_assert!(self.len() <= CHUNK_LEN); + debug_assert!(self.count() <= CHUNK_LEN); self } @@ -582,7 +591,7 @@ impl ChunkState { impl fmt::Debug for ChunkState { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("ChunkState") - .field("len", &self.len()) + .field("count", &self.count()) .field("chunk_counter", &self.chunk_counter) .field("flags", &self.flags) .field("platform", &self.platform) @@ -646,22 +655,12 @@ impl IncrementCounter { } } -// The largest power of two less than or equal to `n`, used for left_len() -// immediately below, and also directly in Hasher::update(). +// The largest power of two less than or equal to `n`, used in Hasher::update(). This is similar to +// left_subtree_len(n), but note that left_subtree_len(n) is strictly less than `n`. fn largest_power_of_two_leq(n: usize) -> usize { ((n / 2) + 1).next_power_of_two() } -// Given some input larger than one chunk, return the number of bytes that -// should go in the left subtree. This is the largest power-of-2 number of -// chunks that leaves at least 1 byte for the right subtree. -fn left_len(content_len: usize) -> usize { - debug_assert!(content_len > CHUNK_LEN); - // Subtract 1 to reserve at least one byte for the right side. - let full_chunks = (content_len - 1) / CHUNK_LEN; - largest_power_of_two_leq(full_chunks) * CHUNK_LEN -} - // Use SIMD parallelism to hash up to MAX_SIMD_DEGREE chunks at the same time // on a single thread. Write out the chunk chaining values and return the // number of chunks hashed. These chunks are never the root and never empty; @@ -790,7 +789,7 @@ fn compress_subtree_wide( // as long as the SIMD degree is a power of 2. If we ever get a SIMD degree // of 3 or something, we'll need a more complicated strategy.) debug_assert_eq!(platform.simd_degree().count_ones(), 1, "power of 2"); - let (left, right) = input.split_at(left_len(input.len())); + let (left, right) = input.split_at(hazmat::left_subtree_len(input.len() as u64) as usize); let right_chunk_counter = chunk_counter + (left.len() / CHUNK_LEN) as u64; // Make space for the child outputs. Here we use MAX_SIMD_DEGREE_OR_2 to @@ -996,10 +995,8 @@ pub fn keyed_hash(key: &[u8; KEY_LEN], input: &[u8]) -> Hash { /// /// [Argon2]: https://en.wikipedia.org/wiki/Argon2 pub fn derive_key(context: &str, key_material: &[u8]) -> [u8; OUT_LEN] { - let context_key = - hash_all_at_once::(context.as_bytes(), IV, DERIVE_KEY_CONTEXT) - .root_hash(); - let context_key_words = platform::words_from_le_bytes_32(context_key.as_bytes()); + let context_key = hazmat::hash_derive_key_context(context); + let context_key_words = platform::words_from_le_bytes_32(&context_key); hash_all_at_once::(key_material, &context_key_words, DERIVE_KEY_MATERIAL) .root_hash() .0 @@ -1063,6 +1060,7 @@ fn parent_node_output( pub struct Hasher { key: CVWords, chunk_state: ChunkState, + initial_chunk_counter: u64, // The stack size is MAX_DEPTH + 1 because we do lazy merging. For example, // with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk // requires a 4th entry, rather than merging everything down to 1, because @@ -1076,6 +1074,7 @@ impl Hasher { Self { key: *key, chunk_state: ChunkState::new(key, 0, flags, Platform::detect()), + initial_chunk_counter: 0, cv_stack: ArrayVec::new(), } } @@ -1100,10 +1099,8 @@ impl Hasher { /// /// [`derive_key`]: fn.derive_key.html pub fn new_derive_key(context: &str) -> Self { - let context_key = - hash_all_at_once::(context.as_bytes(), IV, DERIVE_KEY_CONTEXT) - .root_hash(); - let context_key_words = platform::words_from_le_bytes_32(context_key.as_bytes()); + let context_key = hazmat::hash_derive_key_context(context); + let context_key_words = platform::words_from_le_bytes_32(&context_key); Self::new_internal(&context_key_words, DERIVE_KEY_MATERIAL) } @@ -1133,8 +1130,12 @@ impl Hasher { // the CV on top of the stack. The principle is the same: each CV that // should remain in the stack is represented by a 1-bit in the total number // of chunks (or bytes) so far. - fn merge_cv_stack(&mut self, total_len: u64) { - let post_merge_stack_len = total_len.count_ones() as usize; + fn merge_cv_stack(&mut self, chunk_counter: u64) { + // Account for non-zero cases of Hasher::set_input_offset, where there are no prior + // subtrees in the stack. Note that initial_chunk_counter is always 0 for callers who don't + // use the hazmat module. + let post_merge_stack_len = + (chunk_counter - self.initial_chunk_counter).count_ones() as usize; while self.cv_stack.len() > post_merge_stack_len { let right_child = self.cv_stack.pop().unwrap(); let left_child = self.cv_stack.pop().unwrap(); @@ -1199,10 +1200,21 @@ impl Hasher { } fn update_with_join(&mut self, mut input: &[u8]) -> &mut Self { + let input_offset = self.initial_chunk_counter * CHUNK_LEN as u64; + if let Some(max) = hazmat::max_subtree_len(input_offset) { + let remaining = max - self.count(); + assert!( + input.len() as u64 <= remaining, + "the subtree starting at {} contains at most {} bytes (found {})", + CHUNK_LEN as u64 * self.initial_chunk_counter, + max, + input.len(), + ); + } // If we have some partial chunk bytes in the internal chunk_state, we // need to finish that chunk first. - if self.chunk_state.len() > 0 { - let want = CHUNK_LEN - self.chunk_state.len(); + if self.chunk_state.count() > 0 { + let want = CHUNK_LEN - self.chunk_state.count(); let take = cmp::min(want, input.len()); self.chunk_state.update(&input[..take]); input = &input[take..]; @@ -1210,7 +1222,7 @@ impl Hasher { // We've filled the current chunk, and there's more input // coming, so we know it's not the root and we can finalize it. // Then we'll proceed to hashing whole chunks below. - debug_assert_eq!(self.chunk_state.len(), CHUNK_LEN); + debug_assert_eq!(self.chunk_state.count(), CHUNK_LEN); let chunk_cv = self.chunk_state.output().chaining_value(); self.push_cv(&chunk_cv, self.chunk_state.chunk_counter); self.chunk_state = ChunkState::new( @@ -1238,7 +1250,7 @@ impl Hasher { // Because we might need to break up the input to form powers of 2, or // to evenly divide what we already have, this part runs in a loop. while input.len() > CHUNK_LEN { - debug_assert_eq!(self.chunk_state.len(), 0, "no partial chunk data"); + debug_assert_eq!(self.chunk_state.count(), 0, "no partial chunk data"); debug_assert_eq!(CHUNK_LEN.count_ones(), 1, "power of 2 chunk len"); let mut subtree_len = largest_power_of_two_leq(input.len()); let count_so_far = self.chunk_state.chunk_counter * CHUNK_LEN as u64; @@ -1323,7 +1335,7 @@ impl Hasher { // also. Convert it directly into an Output. Otherwise, we need to // merge subtrees below. if self.cv_stack.is_empty() { - debug_assert_eq!(self.chunk_state.chunk_counter, 0); + debug_assert_eq!(self.chunk_state.chunk_counter, self.initial_chunk_counter); return self.chunk_state.output(); } @@ -1341,11 +1353,11 @@ impl Hasher { // the empty chunk is taken care of above. let mut output: Output; let mut num_cvs_remaining = self.cv_stack.len(); - if self.chunk_state.len() > 0 { + if self.chunk_state.count() > 0 { debug_assert_eq!( self.cv_stack.len(), - self.chunk_state.chunk_counter.count_ones() as usize, - "cv stack does not need a merge" + (self.chunk_state.chunk_counter - self.initial_chunk_counter).count_ones() as usize, + "cv stack does not need a merge", ); output = self.chunk_state.output(); } else { @@ -1378,6 +1390,10 @@ impl Hasher { /// This method is idempotent. Calling it twice will give the same result. /// You can also add more input and finalize again. pub fn finalize(&self) -> Hash { + assert_eq!( + self.initial_chunk_counter, 0, + "set_input_offset must be used with finalize_non_root", + ); self.final_output().root_hash() } @@ -1389,12 +1405,22 @@ impl Hasher { /// /// [`OutputReader`]: struct.OutputReader.html pub fn finalize_xof(&self) -> OutputReader { + assert_eq!( + self.initial_chunk_counter, 0, + "set_input_offset must be used with finalize_non_root", + ); OutputReader::new(self.final_output()) } /// Return the total number of bytes hashed so far. + /// + /// [`hazmat::HasherExt::set_input_offset`] does not affect this value. This only counts bytes + /// passed to [`update`](Hasher::update). pub fn count(&self) -> u64 { - self.chunk_state.chunk_counter * CHUNK_LEN as u64 + self.chunk_state.len() as u64 + // Account for non-zero cases of Hasher::set_input_offset. Note that initial_chunk_counter + // is always 0 for callers who don't use the hazmat module. + (self.chunk_state.chunk_counter - self.initial_chunk_counter) * CHUNK_LEN as u64 + + self.chunk_state.count() as u64 } /// As [`update`](Hasher::update), but reading from a @@ -1613,11 +1639,13 @@ impl Zeroize for Hasher { let Self { key, chunk_state, + initial_chunk_counter, cv_stack, } = self; key.zeroize(); chunk_state.zeroize(); + initial_chunk_counter.zeroize(); cv_stack.zeroize(); } } @@ -1684,7 +1712,7 @@ impl OutputReader { /// calling `fill` repeatedly with a short-length or odd-length slice will /// end up performing the same compression multiple times. If you're /// reading output in a loop, prefer a slice length that's a multiple of - /// 64. + /// [`BLOCK_LEN`] (64 bytes). /// /// The maximum output size of BLAKE3 is 264-1 bytes. If you try /// to extract more than that, for example by seeking near the end and diff --git a/src/test.rs b/src/test.rs index 506d7aa..2d21856 100644 --- a/src/test.rs +++ b/src/test.rs @@ -38,8 +38,12 @@ pub const TEST_CASES: &[usize] = &[ 7 * CHUNK_LEN + 1, 8 * CHUNK_LEN, 8 * CHUNK_LEN + 1, - 16 * CHUNK_LEN, // AVX512's bandwidth - 31 * CHUNK_LEN, // 16 + 8 + 4 + 2 + 1 + 16 * CHUNK_LEN - 1, + 16 * CHUNK_LEN, // AVX512's bandwidth + 16 * CHUNK_LEN + 1, + 31 * CHUNK_LEN - 1, + 31 * CHUNK_LEN, // 16 + 8 + 4 + 2 + 1 + 31 * CHUNK_LEN + 1, 100 * CHUNK_LEN, // subtrees larger than MAX_SIMD_DEGREE chunks ]; @@ -327,22 +331,6 @@ fn test_largest_power_of_two_leq() { } } -#[test] -fn test_left_len() { - let input_output = &[ - (CHUNK_LEN + 1, CHUNK_LEN), - (2 * CHUNK_LEN - 1, CHUNK_LEN), - (2 * CHUNK_LEN, CHUNK_LEN), - (2 * CHUNK_LEN + 1, 2 * CHUNK_LEN), - (4 * CHUNK_LEN - 1, 2 * CHUNK_LEN), - (4 * CHUNK_LEN, 2 * CHUNK_LEN), - (4 * CHUNK_LEN + 1, 4 * CHUNK_LEN), - ]; - for &(input, output) in input_output { - assert_eq!(crate::left_len(input), output); - } -} - #[test] fn test_compare_reference_impl() { const OUT: usize = 303; // more than 64, not a multiple of 4 @@ -801,6 +789,7 @@ fn test_zeroize() { flags: 42, platform: crate::Platform::Portable, }, + initial_chunk_counter: 42, key: [42; 8], cv_stack: [[42; 32]; { crate::MAX_DEPTH + 1 }].into(), }; @@ -815,6 +804,7 @@ fn test_zeroize() { hasher.chunk_state.platform, crate::Platform::Portable )); + assert_eq!(hasher.initial_chunk_counter, 0); assert_eq!(hasher.key, [0; 8]); assert_eq!(&*hasher.cv_stack, &[[0u8; 32]; 0]); @@ -1020,3 +1010,40 @@ fn test_miri_smoketest() { reader.set_position(999999); reader.fill(&mut [0]); } + +// I had to move these tests out of the deprecated guts module, because leaving them there causes +// an un-silenceable warning: https://github.com/rust-lang/rust/issues/47238 +#[cfg(test)] +#[allow(deprecated)] +mod guts_tests { + use crate::guts::*; + + #[test] + fn test_chunk() { + assert_eq!( + crate::hash(b"foo"), + ChunkState::new(0).update(b"foo").finalize(true) + ); + } + + #[test] + fn test_parents() { + let mut hasher = crate::Hasher::new(); + let mut buf = [0; crate::CHUNK_LEN]; + + buf[0] = 'a' as u8; + hasher.update(&buf); + let chunk0_cv = ChunkState::new(0).update(&buf).finalize(false); + + buf[0] = 'b' as u8; + hasher.update(&buf); + let chunk1_cv = ChunkState::new(1).update(&buf).finalize(false); + + hasher.update(b"c"); + let chunk2_cv = ChunkState::new(2).update(b"c").finalize(false); + + let parent = parent_cv(&chunk0_cv, &chunk1_cv, false); + let root = parent_cv(&parent, &chunk2_cv, true); + assert_eq!(hasher.finalize(), root); + } +} diff --git a/test_vectors/src/lib.rs b/test_vectors/src/lib.rs index 543a31d..8df28cd 100644 --- a/test_vectors/src/lib.rs +++ b/test_vectors/src/lib.rs @@ -1,4 +1,4 @@ -use blake3::guts::{BLOCK_LEN, CHUNK_LEN}; +use blake3::{BLOCK_LEN, CHUNK_LEN}; use serde::{Deserialize, Serialize}; // Reading files at runtime requires special configuration under WASM/WASI, so including this at -- cgit v1.2.3