diff options
| author | Jack O'Connor <[email protected]> | 2023-09-10 15:18:11 -0700 |
|---|---|---|
| committer | Jack O'Connor <[email protected]> | 2023-09-16 17:04:27 -0700 |
| commit | cb32f0bd1450991d86a8ceb3716fd591382cc507 (patch) | |
| tree | f0112db7d1a4452facd92111eb7134a20270d60e /b3sum/src | |
| parent | e0bb91564125407102af81e219399025aa2c24b9 (diff) | |
replace the new file module with inherent methods on Hasher
New methods:
- update_reader
- update_mmap
- update_mmap_rayon
These are more discoverable, more convenient, and safer.
There are two problems I want to avoid by taking a `Path` instead of a
`File`. First, exposing `Mmap` objects to the caller is fundamentally
unsafe, and making `maybe_mmap_file` private avoids that issue. Second,
taking a `File` raises questions about whether memory mapped reads
should behave like regular file reads. (Should they respect the current
seek position? Should they update the seek position?) Taking a `Path`
from the caller and opening the `File` internally avoids these
questions.
Diffstat (limited to 'b3sum/src')
| -rw-r--r-- | b3sum/src/main.rs | 110 |
1 files changed, 33 insertions, 77 deletions
diff --git a/b3sum/src/main.rs b/b3sum/src/main.rs index 165c579..228737f 100644 --- a/b3sum/src/main.rs +++ b/b3sum/src/main.rs @@ -163,73 +163,22 @@ impl Args { } } -enum Input { - Mmap(io::Cursor<memmap2::Mmap>), - File(File), - Stdin, -} - -impl Input { - // Open an input file, using mmap if appropriate. "-" means stdin. Note - // that this convention applies both to command line arguments, and to - // filepaths that appear in a checkfile. - fn open(path: &Path, args: &Args) -> Result<Self> { - if path == Path::new("-") { - if args.keyed() { - bail!("Cannot open `-` in keyed mode"); - } - return Ok(Self::Stdin); - } - let file = File::open(path)?; - if !args.no_mmap() { - if let Some(mmap) = blake3::file::maybe_memmap_file(&file)? { - return Ok(Self::Mmap(io::Cursor::new(mmap))); - } - } - Ok(Self::File(file)) - } - - fn hash(&mut self, args: &Args) -> Result<blake3::OutputReader> { - let mut hasher = args.base_hasher.clone(); - match self { - // The fast path: If we mmapped the file successfully, hash using - // multiple threads. This doesn't work on stdin, or on some files, - // and it can also be disabled with --no-mmap. - Self::Mmap(cursor) => { - hasher.update_rayon(cursor.get_ref()); - } - // The slower paths, for stdin or files we didn't/couldn't mmap. - // This is currently all single-threaded. Doing multi-threaded - // hashing without memory mapping is tricky, since all your worker - // threads have to stop every time you refill the buffer, and that - // ends up being a lot of overhead. To solve that, we need a more - // complicated double-buffering strategy where a background thread - // fills one buffer while the worker threads are hashing the other - // one. We might implement that in the future, but since this is - // the slow path anyway, it's not high priority. - Self::File(file) => { - blake3::copy_wide(file, &mut hasher)?; - } - Self::Stdin => { - let stdin = io::stdin(); - let lock = stdin.lock(); - blake3::copy_wide(lock, &mut hasher)?; - } - } - let mut output_reader = hasher.finalize_xof(); - output_reader.set_position(args.seek()); - Ok(output_reader) - } -} - -impl Read for Input { - fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { - match self { - Self::Mmap(cursor) => cursor.read(buf), - Self::File(file) => file.read(buf), - Self::Stdin => io::stdin().read(buf), +fn hash_path(args: &Args, path: &Path) -> Result<blake3::OutputReader> { + let mut hasher = args.base_hasher.clone(); + if path == Path::new("-") { + if args.keyed() { + bail!("Cannot open `-` in keyed mode"); } + hasher.update_reader(io::stdin().lock())?; + } else if args.no_mmap() { + hasher.update_reader(File::open(path)?)?; + } else { + // The fast path: Try to mmap the file and hash it with multiple threads. + hasher.update_mmap_rayon(path)?; } + let mut output_reader = hasher.finalize_xof(); + output_reader.set_position(args.seek()); + Ok(output_reader) } fn write_hex_output(mut output: blake3::OutputReader, args: &Args) -> Result<()> { @@ -425,8 +374,7 @@ fn parse_check_line(mut line: &str) -> Result<ParsedCheckLine> { } fn hash_one_input(path: &Path, args: &Args) -> Result<()> { - let mut input = Input::open(path, args)?; - let output = input.hash(args)?; + let output = hash_path(args, path)?; if args.raw() { write_raw_output(output, args)?; return Ok(()); @@ -470,15 +418,13 @@ fn check_one_line(line: &str, args: &Args) -> bool { } else { file_string }; - let hash_result: Result<blake3::Hash> = Input::open(&file_path, args) - .and_then(|mut input| input.hash(args)) - .map(|mut hash_output| { + let found_hash: blake3::Hash; + match hash_path(args, &file_path) { + Ok(mut output) => { let mut found_hash_bytes = [0; blake3::OUT_LEN]; - hash_output.fill(&mut found_hash_bytes); - found_hash_bytes.into() - }); - let found_hash: blake3::Hash = match hash_result { - Ok(hash) => hash, + output.fill(&mut found_hash_bytes); + found_hash = found_hash_bytes.into(); + } Err(e) => { println!("{}: FAILED ({})", file_string, e); return false; @@ -497,8 +443,18 @@ fn check_one_line(line: &str, args: &Args) -> bool { } fn check_one_checkfile(path: &Path, args: &Args, files_failed: &mut u64) -> Result<()> { - let checkfile_input = Input::open(path, args)?; - let mut bufreader = io::BufReader::new(checkfile_input); + let mut file; + let stdin; + let mut stdin_lock; + let mut bufreader: io::BufReader<&mut dyn Read>; + if path == Path::new("-") { + stdin = io::stdin(); + stdin_lock = stdin.lock(); + bufreader = io::BufReader::new(&mut stdin_lock); + } else { + file = File::open(path)?; + bufreader = io::BufReader::new(&mut file); + } let mut line = String::new(); loop { line.clear(); |
