3 files changed, 114 insertions, 0 deletions
diff --git a/test_vectors/Cargo.toml b/test_vectors/Cargo.toml
new file mode 100644
index 0000000..aa4c0e7
--- /dev/null
+++ b/test_vectors/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "test_vectors"
+version = "0.0.0"
+edition = "2018"
+
+[dependencies]
+blake3 = { path = "../" }
+hex = "0.4.0"
+reference_impl = { path = "../reference_impl" }
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
diff --git a/test_vectors/src/bin/generate.rs b/test_vectors/src/bin/generate.rs
new file mode 100644
index 0000000..290d335
--- /dev/null
+++ b/test_vectors/src/bin/generate.rs
@@ -0,0 +1,58 @@
+use serde::Serialize;
+
+// A non-multiple of 4 is important, since one possible bug is to fail to emit
+// partial words.
+const OUTPUT_LEN: usize = 2 * blake3::BLOCK_LEN + 3;
+
+#[derive(Serialize)]
+struct Cases {
+    _comment: &'static str,
+    key: &'static str,
+    cases: Vec<Case>,
+}
+
+#[derive(Serialize)]
+struct Case {
+    input_len: usize,
+    hash: String,
+    keyed_hash: String,
+    derive_key: String,
+}
+
+fn main() {
+    let mut cases = Vec::new();
+    for &input_len in test_vectors::TEST_CASES {
+        let mut input = vec![0; input_len];
+        test_vectors::paint_test_input(&mut input);
+
+        let mut hash_out = [0; OUTPUT_LEN];
+        blake3::Hasher::new()
+            .update(&input)
+            .finalize_xof(&mut hash_out);
+
+        let mut keyed_hash_out = [0; OUTPUT_LEN];
+        blake3::Hasher::new_keyed(test_vectors::TEST_KEY)
+            .update(&input)
+            .finalize_xof(&mut keyed_hash_out);
+
+        let mut derive_key_out = [0; OUTPUT_LEN];
+        blake3::Hasher::new_derive_key(test_vectors::TEST_KEY)
+            .update(&input)
+            .finalize_xof(&mut derive_key_out);
+
+        cases.push(Case {
+            input_len,
+            hash: hex::encode(&hash_out[..]),
+            keyed_hash: hex::encode(&keyed_hash_out[..]),
+            derive_key: hex::encode(&derive_key_out[..]),
+        });
+    }
+
+    let output = serde_json::to_string_pretty(&Cases {
+        _comment: "Each test is an input length and three outputs, one for each of the hash, keyed_hash, and derive_key modes. The input in each case is filled with a 251-byte-long repeating pattern: 0, 1, 2, ..., 249, 250, 0, 1, ... The key used with keyed_hash and derive_key is the 32-byte ASCII string given below. Outputs are encoded as hexadecimal. Each case is an extended output, and implementations should also check that the first 32 bytes match their default-length output.",
+        key: std::str::from_utf8(test_vectors::TEST_KEY).unwrap(),
+        cases,
+    }).unwrap();
+
+    println!("{}", &output);
+}
diff --git a/test_vectors/src/lib.rs b/test_vectors/src/lib.rs
new file mode 100644
index 0000000..e38d9f2
--- /dev/null
+++ b/test_vectors/src/lib.rs
@@ -0,0 +1,45 @@
+use blake3::CHUNK_LEN;
+
+pub const TEST_CASES: &[usize] = &[
+    0,
+    1,
+    CHUNK_LEN - 1,
+    CHUNK_LEN,
+    CHUNK_LEN + 1,
+    2 * CHUNK_LEN,
+    2 * CHUNK_LEN + 1,
+    3 * CHUNK_LEN,
+    3 * CHUNK_LEN + 1,
+    4 * CHUNK_LEN,
+    4 * CHUNK_LEN + 1,
+    5 * CHUNK_LEN,
+    5 * CHUNK_LEN + 1,
+    6 * CHUNK_LEN,
+    6 * CHUNK_LEN + 1,
+    7 * CHUNK_LEN,
+    7 * CHUNK_LEN + 1,
+    8 * CHUNK_LEN,
+    8 * CHUNK_LEN + 1,
+    16 * CHUNK_LEN, // AVX512's bandwidth
+    31 * CHUNK_LEN, // 16 + 8 + 4 + 2 + 1
+];
+
+pub const TEST_KEY: &[u8; blake3::KEY_LEN] = b"whats the Elvish word for friend";
+
+// Paint the input with a repeating byte pattern. We use a cycle length of 251,
+// because that's the largets prime number less than 256. This makes it
+// unlikely to swapping any two adjacent input blocks or chunks will give the
+// same answer.
+pub fn paint_test_input(buf: &mut [u8]) {
+    for (i, b) in buf.iter_mut().enumerate() {
+        *b = (i % 251) as u8;
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn it_works() {
+        assert_eq!(2 + 2, 4);
+    }
+}