diff options
| author | Jack O'Connor <[email protected]> | 2020-05-03 21:13:58 -0400 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2020-05-05 00:59:31 -0400 |
| commit | dc2a79d2669c92795c7214761a0568d629a100e4 (patch) | |
| tree | a727910e2e1028f9fdc79c09d2a5d8279fabb7ff | |
| parent | 4ad79ae457e826e6a74e91aaa0d3f4ec1562dc54 (diff) | |
add newline and backslash escaping to b3sum output
As proposed in
https://github.com/BLAKE3-team/BLAKE3/issues/33#issuecomment-623153164
This brings b3sum behavior close to md5sum. All occurrences of backslash
are replaced with "\\", and all occurrences of (Unix) newline are
replaced with "\n". In addition, any line containing these escapes has a
single "\" prepended to the front.
Filepaths were already being converted to UTF-8 with to_string_lossy(),
but this commit adds an extra warning when that conversion is in fact
lossy (because the path is not valid Unicode). This new warning is
printed to stdout, with the goal of deliberately breaking --check (which
is not yet implemented) in this case.
| -rw-r--r-- | .github/workflows/ci.yml | 1 | ||||
| -rw-r--r-- | b3sum/src/main.rs | 58 | ||||
| -rw-r--r-- | b3sum/tests/test.rs | 173 |
3 files changed, 225 insertions, 7 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0eb486a..499b7f5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,7 @@ on: env: BLAKE3_CI: "1" RUSTFLAGS: "-D warnings" + RUST_BACKTRACE: "1" jobs: cargo_tests: diff --git a/b3sum/src/main.rs b/b3sum/src/main.rs index c7ac523..a87a486 100644 --- a/b3sum/src/main.rs +++ b/b3sum/src/main.rs @@ -1,7 +1,9 @@ use anyhow::{bail, Context, Result}; use clap::{App, Arg}; +use std::borrow::Cow; use std::cmp; use std::convert::TryInto; +use std::ffi::OsStr; use std::fs::File; use std::io; use std::io::prelude::*; @@ -219,6 +221,38 @@ fn read_key_from_stdin() -> Result<[u8; blake3::KEY_LEN]> { } } +struct FilepathString { + filepath_string: String, + was_lossy: bool, + has_escapes: bool, +} + +// returns (string, did_escape) +fn filepath_to_string(filepath_osstr: &OsStr) -> FilepathString { + let unicode_cow = filepath_osstr.to_string_lossy(); + let was_lossy = matches!(unicode_cow, Cow::Owned(_)); + let mut filepath_string = unicode_cow.to_string(); + // If we're on Windows, normalize backslashes to forward slashes. This + // avoids a lot of ugly escaping in the common case, and it makes + // checkfiles created on Windows more likely to be portable to Unix. It + // also allows us to set a blanket "no backslashes allowed in checkfiles on + // Windows" rule, rather than allowing a Unix backslash to potentially get + // interpreted as a directory separator on Windows. + if cfg!(windows) { + filepath_string = filepath_string.replace('\\', "/"); + } + let mut has_escapes = false; + if filepath_string.contains('\\') || filepath_string.contains('\n') { + filepath_string = filepath_string.replace('\\', "\\\\").replace('\n', "\\n"); + has_escapes = true; + } + FilepathString { + filepath_string, + was_lossy, + has_escapes, + } +} + fn main() -> Result<()> { let args = clap_parse_argv(); let len = if let Some(length) = args.value_of(LENGTH_ARG) { @@ -251,16 +285,30 @@ fn main() -> Result<()> { if raw_output && files.len() > 1 { bail!("b3sum: Only one filename can be provided when using --raw"); } - for filepath in files { - let filepath_str = filepath.to_string_lossy(); - match hash_file(&base_hasher, filepath, mmap_disabled) { + for filepath_osstr in files { + let FilepathString { + filepath_string, + was_lossy, + has_escapes, + } = filepath_to_string(filepath_osstr); + if was_lossy && !raw_output { + // The conversion was lossy. Print a warning. In addition + // to being a warning, this prevents the output from being + // successfully parsed by --check. Thus it goes to stdout + // rather than stderr. + println!("b3sum: warning: filepath contains invalid Unicode"); + } + match hash_file(&base_hasher, filepath_osstr, mmap_disabled) { Ok(output) => { if raw_output { write_raw_output(output, len)?; } else { + if has_escapes { + print!("\\"); + } write_hex_output(output, len)?; if print_names { - println!(" {}", filepath_str); + println!(" {}", filepath_string); } else { println!(); } @@ -268,7 +316,7 @@ fn main() -> Result<()> { } Err(e) => { did_error = true; - eprintln!("b3sum: {}: {}", filepath_str, e); + eprintln!("b3sum: {}: {}", filepath_string, e); } } } diff --git a/b3sum/tests/test.rs b/b3sum/tests/test.rs index bb2f4e0..b2c8428 100644 --- a/b3sum/tests/test.rs +++ b/b3sum/tests/test.rs @@ -1,4 +1,5 @@ use duct::cmd; +use std::ffi::OsString; use std::fs; use std::io::prelude::*; use std::path::PathBuf; @@ -40,9 +41,10 @@ fn test_hash_many() { let expected = format!( "{} {}\n{} {}", foo_hash.to_hex(), - file1.to_string_lossy(), + // account for slash normalization on Windows + file1.to_string_lossy().replace("\\", "/"), bar_hash.to_hex(), - file2.to_string_lossy(), + file2.to_string_lossy().replace("\\", "/"), ); assert_eq!(expected, output); @@ -134,3 +136,170 @@ fn test_raw_with_multi_files_is_an_error() { .run(); assert!(result.is_err()); } + +#[test] +#[cfg(unix)] +fn test_newline_and_backslash_escaping_on_unix() { + let empty_hash = blake3::hash(b"").to_hex(); + let dir = tempfile::tempdir().unwrap(); + fs::create_dir(dir.path().join("subdir")).unwrap(); + let names = [ + "abcdef", + "abc\ndef", + "abc\\def", + "abc\rdef", + "abc\r\ndef", + "subdir/foo", + ]; + let mut paths = Vec::new(); + for name in &names { + let path = dir.path().join(name); + println!("creating file at {:?}", path); + fs::write(&path, b"").unwrap(); + paths.push(path); + } + let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap(); + let expected = format!( + "\ +{0} abcdef\n\ +\\{0} abc\\ndef\n\ +\\{0} abc\\\\def\n\ +{0} abc\rdef\n\ +\\{0} abc\r\\ndef\n\ +{0} subdir/foo", + empty_hash, + ); + println!("output"); + println!("======"); + println!("{}", output); + println!(); + println!("expected"); + println!("========"); + println!("{}", expected); + println!(); + assert_eq!(expected, output); +} + +#[test] +#[cfg(windows)] +fn test_slash_normalization_on_windows() { + let empty_hash = blake3::hash(b"").to_hex(); + let dir = tempfile::tempdir().unwrap(); + fs::create_dir(dir.path().join("subdir")).unwrap(); + // Note that filenames can't contain newlines or backslashes on Windows, so + // we don't test escaping here. We only test forward slash and backslash as + // directory separators. + let names = ["abcdef", "subdir/foo", "subdir\\bar"]; + let mut paths = Vec::new(); + for name in &names { + let path = dir.path().join(name); + println!("creating file at {:?}", path); + fs::write(&path, b"").unwrap(); + paths.push(path); + } + let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap(); + let expected = format!( + "\ +{0} abcdef\n\ +{0} subdir/foo\n\ +{0} subdir/bar", + empty_hash, + ); + println!("output"); + println!("======"); + println!("{}", output); + println!(); + println!("expected"); + println!("========"); + println!("{}", expected); + println!(); + assert_eq!(expected, output); +} + +#[test] +#[cfg(unix)] +fn test_invalid_unicode_on_unix() { + use std::os::unix::ffi::OsStringExt; + + let empty_hash = blake3::hash(b"").to_hex(); + let dir = tempfile::tempdir().unwrap(); + let names = ["abcdef".into(), OsString::from_vec(b"abc\xffdef".to_vec())]; + let mut paths = Vec::new(); + for name in &names { + let path = dir.path().join(name); + println!("creating file at {:?}", path); + // Note: Some operating systems, macOS in particular, simply don't + // allow invalid Unicode in filenames. On those systems, this write + // will fail. That's fine, we'll just short-circuit this test in that + // case. But assert that at least Linux allows this. + let write_result = fs::write(&path, b""); + if cfg!(target_os = "linux") { + write_result.expect("Linux should allow invalid Unicode"); + } else if write_result.is_err() { + return; + } + paths.push(path); + } + let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap(); + let expected = format!( + "\ +{0} abcdef\n\ +b3sum: warning: filepath contains invalid Unicode +{0} abc�def", + empty_hash, + ); + println!("output"); + println!("======"); + println!("{}", output); + println!(); + println!("expected"); + println!("========"); + println!("{}", expected); + println!(); + assert_eq!(expected, output); +} + +#[test] +#[cfg(windows)] +fn test_invalid_unicode_on_windows() { + use std::os::windows::ffi::OsStringExt; + + let empty_hash = blake3::hash(b"").to_hex(); + let dir = tempfile::tempdir().unwrap(); + let surrogate_char = 0xDC00; + let bad_unicode_wchars = [ + 'a' as u16, + 'b' as u16, + 'c' as u16, + surrogate_char, + 'd' as u16, + 'e' as u16, + 'f' as u16, + ]; + let bad_osstring = OsString::from_wide(&bad_unicode_wchars); + let names = ["abcdef".into(), bad_osstring]; + let mut paths = Vec::new(); + for name in &names { + let path = dir.path().join(name); + println!("creating file at {:?}", path); + fs::write(&path, b"").unwrap(); + paths.push(path); + } + let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap(); + let expected = format!( + "\ +{0} abcdef\n\ +b3sum: warning: filepath contains invalid Unicode +{0} abc�def", + empty_hash, + ); + println!("output"); + println!("======"); + println!("{}", output); + println!(); + println!("expected"); + println!("========"); + println!("{}", expected); + println!(); + assert_eq!(expected, output); +} |
