aboutsummaryrefslogtreecommitdiff
path: root/b3sum/src
diff options
context:
space:
mode:
authorJack O'Connor <[email protected]>2023-09-10 15:18:11 -0700
committerJack O'Connor <[email protected]>2023-09-16 17:04:27 -0700
commitcb32f0bd1450991d86a8ceb3716fd591382cc507 (patch)
treef0112db7d1a4452facd92111eb7134a20270d60e /b3sum/src
parente0bb91564125407102af81e219399025aa2c24b9 (diff)
replace the new file module with inherent methods on Hasher
New methods: - update_reader - update_mmap - update_mmap_rayon These are more discoverable, more convenient, and safer. There are two problems I want to avoid by taking a `Path` instead of a `File`. First, exposing `Mmap` objects to the caller is fundamentally unsafe, and making `maybe_mmap_file` private avoids that issue. Second, taking a `File` raises questions about whether memory mapped reads should behave like regular file reads. (Should they respect the current seek position? Should they update the seek position?) Taking a `Path` from the caller and opening the `File` internally avoids these questions.
Diffstat (limited to 'b3sum/src')
-rw-r--r--b3sum/src/main.rs110
1 files changed, 33 insertions, 77 deletions
diff --git a/b3sum/src/main.rs b/b3sum/src/main.rs
index 165c579..228737f 100644
--- a/b3sum/src/main.rs
+++ b/b3sum/src/main.rs
@@ -163,73 +163,22 @@ impl Args {
}
}
-enum Input {
- Mmap(io::Cursor<memmap2::Mmap>),
- File(File),
- Stdin,
-}
-
-impl Input {
- // Open an input file, using mmap if appropriate. "-" means stdin. Note
- // that this convention applies both to command line arguments, and to
- // filepaths that appear in a checkfile.
- fn open(path: &Path, args: &Args) -> Result<Self> {
- if path == Path::new("-") {
- if args.keyed() {
- bail!("Cannot open `-` in keyed mode");
- }
- return Ok(Self::Stdin);
- }
- let file = File::open(path)?;
- if !args.no_mmap() {
- if let Some(mmap) = blake3::file::maybe_memmap_file(&file)? {
- return Ok(Self::Mmap(io::Cursor::new(mmap)));
- }
- }
- Ok(Self::File(file))
- }
-
- fn hash(&mut self, args: &Args) -> Result<blake3::OutputReader> {
- let mut hasher = args.base_hasher.clone();
- match self {
- // The fast path: If we mmapped the file successfully, hash using
- // multiple threads. This doesn't work on stdin, or on some files,
- // and it can also be disabled with --no-mmap.
- Self::Mmap(cursor) => {
- hasher.update_rayon(cursor.get_ref());
- }
- // The slower paths, for stdin or files we didn't/couldn't mmap.
- // This is currently all single-threaded. Doing multi-threaded
- // hashing without memory mapping is tricky, since all your worker
- // threads have to stop every time you refill the buffer, and that
- // ends up being a lot of overhead. To solve that, we need a more
- // complicated double-buffering strategy where a background thread
- // fills one buffer while the worker threads are hashing the other
- // one. We might implement that in the future, but since this is
- // the slow path anyway, it's not high priority.
- Self::File(file) => {
- blake3::copy_wide(file, &mut hasher)?;
- }
- Self::Stdin => {
- let stdin = io::stdin();
- let lock = stdin.lock();
- blake3::copy_wide(lock, &mut hasher)?;
- }
- }
- let mut output_reader = hasher.finalize_xof();
- output_reader.set_position(args.seek());
- Ok(output_reader)
- }
-}
-
-impl Read for Input {
- fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
- match self {
- Self::Mmap(cursor) => cursor.read(buf),
- Self::File(file) => file.read(buf),
- Self::Stdin => io::stdin().read(buf),
+fn hash_path(args: &Args, path: &Path) -> Result<blake3::OutputReader> {
+ let mut hasher = args.base_hasher.clone();
+ if path == Path::new("-") {
+ if args.keyed() {
+ bail!("Cannot open `-` in keyed mode");
}
+ hasher.update_reader(io::stdin().lock())?;
+ } else if args.no_mmap() {
+ hasher.update_reader(File::open(path)?)?;
+ } else {
+ // The fast path: Try to mmap the file and hash it with multiple threads.
+ hasher.update_mmap_rayon(path)?;
}
+ let mut output_reader = hasher.finalize_xof();
+ output_reader.set_position(args.seek());
+ Ok(output_reader)
}
fn write_hex_output(mut output: blake3::OutputReader, args: &Args) -> Result<()> {
@@ -425,8 +374,7 @@ fn parse_check_line(mut line: &str) -> Result<ParsedCheckLine> {
}
fn hash_one_input(path: &Path, args: &Args) -> Result<()> {
- let mut input = Input::open(path, args)?;
- let output = input.hash(args)?;
+ let output = hash_path(args, path)?;
if args.raw() {
write_raw_output(output, args)?;
return Ok(());
@@ -470,15 +418,13 @@ fn check_one_line(line: &str, args: &Args) -> bool {
} else {
file_string
};
- let hash_result: Result<blake3::Hash> = Input::open(&file_path, args)
- .and_then(|mut input| input.hash(args))
- .map(|mut hash_output| {
+ let found_hash: blake3::Hash;
+ match hash_path(args, &file_path) {
+ Ok(mut output) => {
let mut found_hash_bytes = [0; blake3::OUT_LEN];
- hash_output.fill(&mut found_hash_bytes);
- found_hash_bytes.into()
- });
- let found_hash: blake3::Hash = match hash_result {
- Ok(hash) => hash,
+ output.fill(&mut found_hash_bytes);
+ found_hash = found_hash_bytes.into();
+ }
Err(e) => {
println!("{}: FAILED ({})", file_string, e);
return false;
@@ -497,8 +443,18 @@ fn check_one_line(line: &str, args: &Args) -> bool {
}
fn check_one_checkfile(path: &Path, args: &Args, files_failed: &mut u64) -> Result<()> {
- let checkfile_input = Input::open(path, args)?;
- let mut bufreader = io::BufReader::new(checkfile_input);
+ let mut file;
+ let stdin;
+ let mut stdin_lock;
+ let mut bufreader: io::BufReader<&mut dyn Read>;
+ if path == Path::new("-") {
+ stdin = io::stdin();
+ stdin_lock = stdin.lock();
+ bufreader = io::BufReader::new(&mut stdin_lock);
+ } else {
+ file = File::open(path)?;
+ bufreader = io::BufReader::new(&mut file);
+ }
let mut line = String::new();
loop {
line.clear();