aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack O'Connor <[email protected]>2020-05-03 21:13:58 -0400
committerJack O'Connor <[email protected]>2020-05-05 00:59:31 -0400
commitdc2a79d2669c92795c7214761a0568d629a100e4 (patch)
treea727910e2e1028f9fdc79c09d2a5d8279fabb7ff
parent4ad79ae457e826e6a74e91aaa0d3f4ec1562dc54 (diff)
add newline and backslash escaping to b3sum output
As proposed in https://github.com/BLAKE3-team/BLAKE3/issues/33#issuecomment-623153164 This brings b3sum behavior close to md5sum. All occurrences of backslash are replaced with "\\", and all occurrences of (Unix) newline are replaced with "\n". In addition, any line containing these escapes has a single "\" prepended to the front. Filepaths were already being converted to UTF-8 with to_string_lossy(), but this commit adds an extra warning when that conversion is in fact lossy (because the path is not valid Unicode). This new warning is printed to stdout, with the goal of deliberately breaking --check (which is not yet implemented) in this case.
-rw-r--r--.github/workflows/ci.yml1
-rw-r--r--b3sum/src/main.rs58
-rw-r--r--b3sum/tests/test.rs173
3 files changed, 225 insertions, 7 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0eb486a..499b7f5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,6 +10,7 @@ on:
env:
BLAKE3_CI: "1"
RUSTFLAGS: "-D warnings"
+ RUST_BACKTRACE: "1"
jobs:
cargo_tests:
diff --git a/b3sum/src/main.rs b/b3sum/src/main.rs
index c7ac523..a87a486 100644
--- a/b3sum/src/main.rs
+++ b/b3sum/src/main.rs
@@ -1,7 +1,9 @@
use anyhow::{bail, Context, Result};
use clap::{App, Arg};
+use std::borrow::Cow;
use std::cmp;
use std::convert::TryInto;
+use std::ffi::OsStr;
use std::fs::File;
use std::io;
use std::io::prelude::*;
@@ -219,6 +221,38 @@ fn read_key_from_stdin() -> Result<[u8; blake3::KEY_LEN]> {
}
}
+struct FilepathString {
+ filepath_string: String,
+ was_lossy: bool,
+ has_escapes: bool,
+}
+
+// returns (string, did_escape)
+fn filepath_to_string(filepath_osstr: &OsStr) -> FilepathString {
+ let unicode_cow = filepath_osstr.to_string_lossy();
+ let was_lossy = matches!(unicode_cow, Cow::Owned(_));
+ let mut filepath_string = unicode_cow.to_string();
+ // If we're on Windows, normalize backslashes to forward slashes. This
+ // avoids a lot of ugly escaping in the common case, and it makes
+ // checkfiles created on Windows more likely to be portable to Unix. It
+ // also allows us to set a blanket "no backslashes allowed in checkfiles on
+ // Windows" rule, rather than allowing a Unix backslash to potentially get
+ // interpreted as a directory separator on Windows.
+ if cfg!(windows) {
+ filepath_string = filepath_string.replace('\\', "/");
+ }
+ let mut has_escapes = false;
+ if filepath_string.contains('\\') || filepath_string.contains('\n') {
+ filepath_string = filepath_string.replace('\\', "\\\\").replace('\n', "\\n");
+ has_escapes = true;
+ }
+ FilepathString {
+ filepath_string,
+ was_lossy,
+ has_escapes,
+ }
+}
+
fn main() -> Result<()> {
let args = clap_parse_argv();
let len = if let Some(length) = args.value_of(LENGTH_ARG) {
@@ -251,16 +285,30 @@ fn main() -> Result<()> {
if raw_output && files.len() > 1 {
bail!("b3sum: Only one filename can be provided when using --raw");
}
- for filepath in files {
- let filepath_str = filepath.to_string_lossy();
- match hash_file(&base_hasher, filepath, mmap_disabled) {
+ for filepath_osstr in files {
+ let FilepathString {
+ filepath_string,
+ was_lossy,
+ has_escapes,
+ } = filepath_to_string(filepath_osstr);
+ if was_lossy && !raw_output {
+ // The conversion was lossy. Print a warning. In addition
+ // to being a warning, this prevents the output from being
+ // successfully parsed by --check. Thus it goes to stdout
+ // rather than stderr.
+ println!("b3sum: warning: filepath contains invalid Unicode");
+ }
+ match hash_file(&base_hasher, filepath_osstr, mmap_disabled) {
Ok(output) => {
if raw_output {
write_raw_output(output, len)?;
} else {
+ if has_escapes {
+ print!("\\");
+ }
write_hex_output(output, len)?;
if print_names {
- println!(" {}", filepath_str);
+ println!(" {}", filepath_string);
} else {
println!();
}
@@ -268,7 +316,7 @@ fn main() -> Result<()> {
}
Err(e) => {
did_error = true;
- eprintln!("b3sum: {}: {}", filepath_str, e);
+ eprintln!("b3sum: {}: {}", filepath_string, e);
}
}
}
diff --git a/b3sum/tests/test.rs b/b3sum/tests/test.rs
index bb2f4e0..b2c8428 100644
--- a/b3sum/tests/test.rs
+++ b/b3sum/tests/test.rs
@@ -1,4 +1,5 @@
use duct::cmd;
+use std::ffi::OsString;
use std::fs;
use std::io::prelude::*;
use std::path::PathBuf;
@@ -40,9 +41,10 @@ fn test_hash_many() {
let expected = format!(
"{} {}\n{} {}",
foo_hash.to_hex(),
- file1.to_string_lossy(),
+ // account for slash normalization on Windows
+ file1.to_string_lossy().replace("\\", "/"),
bar_hash.to_hex(),
- file2.to_string_lossy(),
+ file2.to_string_lossy().replace("\\", "/"),
);
assert_eq!(expected, output);
@@ -134,3 +136,170 @@ fn test_raw_with_multi_files_is_an_error() {
.run();
assert!(result.is_err());
}
+
+#[test]
+#[cfg(unix)]
+fn test_newline_and_backslash_escaping_on_unix() {
+ let empty_hash = blake3::hash(b"").to_hex();
+ let dir = tempfile::tempdir().unwrap();
+ fs::create_dir(dir.path().join("subdir")).unwrap();
+ let names = [
+ "abcdef",
+ "abc\ndef",
+ "abc\\def",
+ "abc\rdef",
+ "abc\r\ndef",
+ "subdir/foo",
+ ];
+ let mut paths = Vec::new();
+ for name in &names {
+ let path = dir.path().join(name);
+ println!("creating file at {:?}", path);
+ fs::write(&path, b"").unwrap();
+ paths.push(path);
+ }
+ let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap();
+ let expected = format!(
+ "\
+{0} abcdef\n\
+\\{0} abc\\ndef\n\
+\\{0} abc\\\\def\n\
+{0} abc\rdef\n\
+\\{0} abc\r\\ndef\n\
+{0} subdir/foo",
+ empty_hash,
+ );
+ println!("output");
+ println!("======");
+ println!("{}", output);
+ println!();
+ println!("expected");
+ println!("========");
+ println!("{}", expected);
+ println!();
+ assert_eq!(expected, output);
+}
+
+#[test]
+#[cfg(windows)]
+fn test_slash_normalization_on_windows() {
+ let empty_hash = blake3::hash(b"").to_hex();
+ let dir = tempfile::tempdir().unwrap();
+ fs::create_dir(dir.path().join("subdir")).unwrap();
+ // Note that filenames can't contain newlines or backslashes on Windows, so
+ // we don't test escaping here. We only test forward slash and backslash as
+ // directory separators.
+ let names = ["abcdef", "subdir/foo", "subdir\\bar"];
+ let mut paths = Vec::new();
+ for name in &names {
+ let path = dir.path().join(name);
+ println!("creating file at {:?}", path);
+ fs::write(&path, b"").unwrap();
+ paths.push(path);
+ }
+ let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap();
+ let expected = format!(
+ "\
+{0} abcdef\n\
+{0} subdir/foo\n\
+{0} subdir/bar",
+ empty_hash,
+ );
+ println!("output");
+ println!("======");
+ println!("{}", output);
+ println!();
+ println!("expected");
+ println!("========");
+ println!("{}", expected);
+ println!();
+ assert_eq!(expected, output);
+}
+
+#[test]
+#[cfg(unix)]
+fn test_invalid_unicode_on_unix() {
+ use std::os::unix::ffi::OsStringExt;
+
+ let empty_hash = blake3::hash(b"").to_hex();
+ let dir = tempfile::tempdir().unwrap();
+ let names = ["abcdef".into(), OsString::from_vec(b"abc\xffdef".to_vec())];
+ let mut paths = Vec::new();
+ for name in &names {
+ let path = dir.path().join(name);
+ println!("creating file at {:?}", path);
+ // Note: Some operating systems, macOS in particular, simply don't
+ // allow invalid Unicode in filenames. On those systems, this write
+ // will fail. That's fine, we'll just short-circuit this test in that
+ // case. But assert that at least Linux allows this.
+ let write_result = fs::write(&path, b"");
+ if cfg!(target_os = "linux") {
+ write_result.expect("Linux should allow invalid Unicode");
+ } else if write_result.is_err() {
+ return;
+ }
+ paths.push(path);
+ }
+ let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap();
+ let expected = format!(
+ "\
+{0} abcdef\n\
+b3sum: warning: filepath contains invalid Unicode
+{0} abc�def",
+ empty_hash,
+ );
+ println!("output");
+ println!("======");
+ println!("{}", output);
+ println!();
+ println!("expected");
+ println!("========");
+ println!("{}", expected);
+ println!();
+ assert_eq!(expected, output);
+}
+
+#[test]
+#[cfg(windows)]
+fn test_invalid_unicode_on_windows() {
+ use std::os::windows::ffi::OsStringExt;
+
+ let empty_hash = blake3::hash(b"").to_hex();
+ let dir = tempfile::tempdir().unwrap();
+ let surrogate_char = 0xDC00;
+ let bad_unicode_wchars = [
+ 'a' as u16,
+ 'b' as u16,
+ 'c' as u16,
+ surrogate_char,
+ 'd' as u16,
+ 'e' as u16,
+ 'f' as u16,
+ ];
+ let bad_osstring = OsString::from_wide(&bad_unicode_wchars);
+ let names = ["abcdef".into(), bad_osstring];
+ let mut paths = Vec::new();
+ for name in &names {
+ let path = dir.path().join(name);
+ println!("creating file at {:?}", path);
+ fs::write(&path, b"").unwrap();
+ paths.push(path);
+ }
+ let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap();
+ let expected = format!(
+ "\
+{0} abcdef\n\
+b3sum: warning: filepath contains invalid Unicode
+{0} abc�def",
+ empty_hash,
+ );
+ println!("output");
+ println!("======");
+ println!("{}", output);
+ println!();
+ println!("expected");
+ println!("========");
+ println!("{}", expected);
+ println!();
+ assert_eq!(expected, output);
+}