Renamed 3rdparty -> thirdparty for legal compliance

author: Stefan Boberg <[email protected]> 2021-11-04 14:18:54 +0100
committer: Stefan Boberg <[email protected]> 2021-11-04 14:19:05 +0100
commit: 472569d4a5f2daaef7e234d93b417ccb650be624 (patch)
tree: 085c33f178855ad5ffe48b01bf99d41516ffa011 /thirdparty/BLAKE3/b3sum
parent: Merge branch 'main' of https://github.com/EpicGames/zen (diff)
download: zen-472569d4a5f2daaef7e234d93b417ccb650be624.tar.xz
zen-472569d4a5f2daaef7e234d93b417ccb650be624.zip
6 files changed, 1649 insertions, 0 deletions
diff --git a/thirdparty/BLAKE3/b3sum/Cargo.toml b/thirdparty/BLAKE3/b3sum/Cargo.toml
new file mode 100644
index 000000000..4678bee2d
--- /dev/null
+++ b/thirdparty/BLAKE3/b3sum/Cargo.toml
@@ -0,0 +1,27 @@
+[package]
+name = "b3sum"
+version = "0.3.7"
+authors = ["Jack O'Connor <[email protected]>"]
+description = "a command line implementation of the BLAKE3 hash function"
+repository = "https://github.com/BLAKE3-team/BLAKE3"
+license = "CC0-1.0 OR Apache-2.0"
+readme = "README.md"
+edition = "2018"
+
+[features]
+neon = ["blake3/neon"]
+prefer_intrinsics = ["blake3/prefer_intrinsics"]
+pure = ["blake3/pure"]
+
+[dependencies]
+anyhow = "1.0.25"
+blake3 = { version = "0.3", path = "..", features = ["rayon"] }
+clap = "2.33.1"
+hex = "0.4.0"
+memmap = "0.7.0"
+rayon = "1.2.1"
+wild = "2.0.3"
+
+[dev-dependencies]
+duct = "0.13.3"
+tempfile = "3.1.0"
diff --git a/thirdparty/BLAKE3/b3sum/README.md b/thirdparty/BLAKE3/b3sum/README.md
new file mode 100644
index 000000000..e97830b7c
--- /dev/null
+++ b/thirdparty/BLAKE3/b3sum/README.md
@@ -0,0 +1,86 @@
+# b3sum
+
+A command line utility for calculating
+[BLAKE3](https://github.com/BLAKE3-team/BLAKE3) hashes, similar to
+Coreutils tools like `b2sum` or `md5sum`.
+
+```
+b3sum 0.3.6
+
+USAGE:
+    b3sum [FLAGS] [OPTIONS] [FILE]...
+
+FLAGS:
+    -c, --check       Reads BLAKE3 sums from the [file]s and checks them
+    -h, --help        Prints help information
+        --keyed       Uses the keyed mode. The secret key is read from standard
+                      input, and it must be exactly 32 raw bytes.
+        --no-mmap     Disables memory mapping. Currently this also disables
+                      multithreading.
+        --no-names    Omits filenames in the output
+        --quiet       Skips printing OK for each successfully verified file.
+                      Must be used with --check.
+        --raw         Writes raw output bytes to stdout, rather than hex.
+                      --no-names is implied. In this case, only a single
+                      input is allowed.
+    -V, --version     Prints version information
+
+OPTIONS:
+        --derive-key <CONTEXT>    Uses the key derivation mode, with the given
+                                  context string. Cannot be used with --keyed.
+    -l, --length <LEN>            The number of output bytes, prior to hex
+                                  encoding (default 32)
+        --num-threads <NUM>       The maximum number of threads to use. By
+                                  default, this is the number of logical cores.
+                                  If this flag is omitted, or if its value is 0,
+                                  RAYON_NUM_THREADS is also respected.
+
+ARGS:
+    <FILE>...    Files to hash, or checkfiles to check. When no file is given,
+                 or when - is given, read standard input.
+```
+
+See also [this document about how the `--check` flag
+works](https://github.com/BLAKE3-team/BLAKE3/blob/master/b3sum/what_does_check_do.md).
+
+# Example
+
+Hash the file `foo.txt`:
+
+```bash
+b3sum foo.txt
+```
+
+Time hashing a gigabyte of data, to see how fast it is:
+
+```bash
+# Create a 1 GB file.
+head -c 1000000000 /dev/zero > /tmp/bigfile
+# Hash it with SHA-256.
+time openssl sha256 /tmp/bigfile
+# Hash it with BLAKE3.
+time b3sum /tmp/bigfile
+```
+
+
+# Installation
+
+Prebuilt binaries are available for Linux, Windows, and macOS (requiring
+the [unidentified developer
+workaround](https://support.apple.com/guide/mac-help/open-a-mac-app-from-an-unidentified-developer-mh40616/mac))
+on the [releases page](https://github.com/BLAKE3-team/BLAKE3/releases).
+If you've [installed Rust and
+Cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html),
+you can also build `b3sum` yourself with:
+
+```
+cargo install b3sum
+```
+
+On Linux for example, Cargo will put the compiled binary in
+`~/.cargo/bin`. You might want to add that directory to your `$PATH`, or
+`rustup` might have done it for you when you installed Cargo.
+
+If you want to install directly from this directory, you can run `cargo
+install --path .`. Or you can just build with `cargo build --release`,
+which puts the binary at `./target/release/b3sum`.
diff --git a/thirdparty/BLAKE3/b3sum/src/main.rs b/thirdparty/BLAKE3/b3sum/src/main.rs
new file mode 100644
index 000000000..b01e5de58
--- /dev/null
+++ b/thirdparty/BLAKE3/b3sum/src/main.rs
@@ -0,0 +1,621 @@
+use anyhow::{bail, ensure, Context, Result};
+use clap::{App, Arg};
+use std::cmp;
+use std::convert::TryInto;
+use std::fs::File;
+use std::io;
+use std::io::prelude::*;
+use std::path::{Path, PathBuf};
+
+#[cfg(test)]
+mod unit_tests;
+
+const NAME: &str = "b3sum";
+
+const FILE_ARG: &str = "FILE";
+const DERIVE_KEY_ARG: &str = "derive-key";
+const KEYED_ARG: &str = "keyed";
+const LENGTH_ARG: &str = "length";
+const NO_MMAP_ARG: &str = "no-mmap";
+const NO_NAMES_ARG: &str = "no-names";
+const NUM_THREADS_ARG: &str = "num-threads";
+const RAW_ARG: &str = "raw";
+const CHECK_ARG: &str = "check";
+const QUIET_ARG: &str = "quiet";
+
+struct Args {
+    inner: clap::ArgMatches<'static>,
+    file_args: Vec<PathBuf>,
+    base_hasher: blake3::Hasher,
+}
+
+impl Args {
+    fn parse() -> Result<Self> {
+        let inner = App::new(NAME)
+            .version(env!("CARGO_PKG_VERSION"))
+            .arg(Arg::with_name(FILE_ARG).multiple(true).help(
+                "Files to hash, or checkfiles to check. When no file is given,\n\
+                 or when - is given, read standard input.",
+            ))
+            .arg(
+                Arg::with_name(LENGTH_ARG)
+                    .long(LENGTH_ARG)
+                    .short("l")
+                    .takes_value(true)
+                    .value_name("LEN")
+                    .help(
+                        "The number of output bytes, prior to hex\n\
+                         encoding (default 32)",
+                    ),
+            )
+            .arg(
+                Arg::with_name(NUM_THREADS_ARG)
+                    .long(NUM_THREADS_ARG)
+                    .takes_value(true)
+                    .value_name("NUM")
+                    .help(
+                        "The maximum number of threads to use. By\n\
+                         default, this is the number of logical cores.\n\
+                         If this flag is omitted, or if its value is 0,\n\
+                         RAYON_NUM_THREADS is also respected.",
+                    ),
+            )
+            .arg(
+                Arg::with_name(KEYED_ARG)
+                    .long(KEYED_ARG)
+                    .requires(FILE_ARG)
+                    .help(
+                        "Uses the keyed mode. The secret key is read from standard\n\
+                         input, and it must be exactly 32 raw bytes.",
+                    ),
+            )
+            .arg(
+                Arg::with_name(DERIVE_KEY_ARG)
+                    .long(DERIVE_KEY_ARG)
+                    .conflicts_with(KEYED_ARG)
+                    .takes_value(true)
+                    .value_name("CONTEXT")
+                    .help(
+                        "Uses the key derivation mode, with the given\n\
+                         context string. Cannot be used with --keyed.",
+                    ),
+            )
+            .arg(Arg::with_name(NO_MMAP_ARG).long(NO_MMAP_ARG).help(
+                "Disables memory mapping. Currently this also disables\n\
+                 multithreading.",
+            ))
+            .arg(
+                Arg::with_name(NO_NAMES_ARG)
+                    .long(NO_NAMES_ARG)
+                    .help("Omits filenames in the output"),
+            )
+            .arg(Arg::with_name(RAW_ARG).long(RAW_ARG).help(
+                "Writes raw output bytes to stdout, rather than hex.\n\
+                 --no-names is implied. In this case, only a single\n\
+                 input is allowed.",
+            ))
+            .arg(
+                Arg::with_name(CHECK_ARG)
+                    .long(CHECK_ARG)
+                    .short("c")
+                    .conflicts_with(DERIVE_KEY_ARG)
+                    .conflicts_with(KEYED_ARG)
+                    .conflicts_with(LENGTH_ARG)
+                    .conflicts_with(RAW_ARG)
+                    .conflicts_with(NO_NAMES_ARG)
+                    .help("Reads BLAKE3 sums from the [file]s and checks them"),
+            )
+            .arg(
+                Arg::with_name(QUIET_ARG)
+                    .long(QUIET_ARG)
+                    .requires(CHECK_ARG)
+                    .help(
+                        "Skips printing OK for each successfully verified file.\n\
+                         Must be used with --check.",
+                    ),
+            )
+            // wild::args_os() is equivalent to std::env::args_os() on Unix,
+            // but on Windows it adds support for globbing.
+            .get_matches_from(wild::args_os());
+        let file_args = if let Some(iter) = inner.values_of_os(FILE_ARG) {
+            iter.map(|s| s.into()).collect()
+        } else {
+            vec!["-".into()]
+        };
+        if inner.is_present(RAW_ARG) && file_args.len() > 1 {
+            bail!("Only one filename can be provided when using --raw");
+        }
+        let base_hasher = if inner.is_present(KEYED_ARG) {
+            // In keyed mode, since stdin is used for the key, we can't handle
+            // `-` arguments. Input::open handles that case below.
+            blake3::Hasher::new_keyed(&read_key_from_stdin()?)
+        } else if let Some(context) = inner.value_of(DERIVE_KEY_ARG) {
+            blake3::Hasher::new_derive_key(context)
+        } else {
+            blake3::Hasher::new()
+        };
+        Ok(Self {
+            inner,
+            file_args,
+            base_hasher,
+        })
+    }
+
+    fn num_threads(&self) -> Result<Option<usize>> {
+        if let Some(num_threads_str) = self.inner.value_of(NUM_THREADS_ARG) {
+            Ok(Some(
+                num_threads_str
+                    .parse()
+                    .context("Failed to parse num threads.")?,
+            ))
+        } else {
+            Ok(None)
+        }
+    }
+
+    fn check(&self) -> bool {
+        self.inner.is_present(CHECK_ARG)
+    }
+
+    fn raw(&self) -> bool {
+        self.inner.is_present(RAW_ARG)
+    }
+
+    fn no_mmap(&self) -> bool {
+        self.inner.is_present(NO_MMAP_ARG)
+    }
+
+    fn no_names(&self) -> bool {
+        self.inner.is_present(NO_NAMES_ARG)
+    }
+
+    fn len(&self) -> Result<u64> {
+        if let Some(length) = self.inner.value_of(LENGTH_ARG) {
+            length.parse::<u64>().context("Failed to parse length.")
+        } else {
+            Ok(blake3::OUT_LEN as u64)
+        }
+    }
+
+    fn keyed(&self) -> bool {
+        self.inner.is_present(KEYED_ARG)
+    }
+
+    fn quiet(&self) -> bool {
+        self.inner.is_present(QUIET_ARG)
+    }
+}
+
+enum Input {
+    Mmap(io::Cursor<memmap::Mmap>),
+    File(File),
+    Stdin,
+}
+
+impl Input {
+    // Open an input file, using mmap if appropriate. "-" means stdin. Note
+    // that this convention applies both to command line arguments, and to
+    // filepaths that appear in a checkfile.
+    fn open(path: &Path, args: &Args) -> Result<Self> {
+        if path == Path::new("-") {
+            if args.keyed() {
+                bail!("Cannot open `-` in keyed mode");
+            }
+            return Ok(Self::Stdin);
+        }
+        let file = File::open(path)?;
+        if !args.no_mmap() {
+            if let Some(mmap) = maybe_memmap_file(&file)? {
+                return Ok(Self::Mmap(io::Cursor::new(mmap)));
+            }
+        }
+        Ok(Self::File(file))
+    }
+
+    fn hash(&mut self, args: &Args) -> Result<blake3::OutputReader> {
+        let mut hasher = args.base_hasher.clone();
+        match self {
+            // The fast path: If we mmapped the file successfully, hash using
+            // multiple threads. This doesn't work on stdin, or on some files,
+            // and it can also be disabled with --no-mmap.
+            Self::Mmap(cursor) => {
+                hasher.update_with_join::<blake3::join::RayonJoin>(cursor.get_ref());
+            }
+            // The slower paths, for stdin or files we didn't/couldn't mmap.
+            // This is currently all single-threaded. Doing multi-threaded
+            // hashing without memory mapping is tricky, since all your worker
+            // threads have to stop every time you refill the buffer, and that
+            // ends up being a lot of overhead. To solve that, we need a more
+            // complicated double-buffering strategy where a background thread
+            // fills one buffer while the worker threads are hashing the other
+            // one. We might implement that in the future, but since this is
+            // the slow path anyway, it's not high priority.
+            Self::File(file) => {
+                copy_wide(file, &mut hasher)?;
+            }
+            Self::Stdin => {
+                let stdin = io::stdin();
+                let lock = stdin.lock();
+                copy_wide(lock, &mut hasher)?;
+            }
+        }
+        Ok(hasher.finalize_xof())
+    }
+}
+
+impl Read for Input {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        match self {
+            Self::Mmap(cursor) => cursor.read(buf),
+            Self::File(file) => file.read(buf),
+            Self::Stdin => io::stdin().read(buf),
+        }
+    }
+}
+
+// A 16 KiB buffer is enough to take advantage of all the SIMD instruction sets
+// that we support, but `std::io::copy` currently uses 8 KiB. Most platforms
+// can support at least 64 KiB, and there's some performance benefit to using
+// bigger reads, so that's what we use here.
+fn copy_wide(mut reader: impl Read, hasher: &mut blake3::Hasher) -> io::Result<u64> {
+    let mut buffer = [0; 65536];
+    let mut total = 0;
+    loop {
+        match reader.read(&mut buffer) {
+            Ok(0) => return Ok(total),
+            Ok(n) => {
+                hasher.update(&buffer[..n]);
+                total += n as u64;
+            }
+            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
+            Err(e) => return Err(e),
+        }
+    }
+}
+
+// Mmap a file, if it looks like a good idea. Return None in cases where we
+// know mmap will fail, or if the file is short enough that mmapping isn't
+// worth it. However, if we do try to mmap and it fails, return the error.
+fn maybe_memmap_file(file: &File) -> Result<Option<memmap::Mmap>> {
+    let metadata = file.metadata()?;
+    let file_size = metadata.len();
+    Ok(if !metadata.is_file() {
+        // Not a real file.
+        None
+    } else if file_size > isize::max_value() as u64 {
+        // Too long to safely map.
+        // https://github.com/danburkert/memmap-rs/issues/69
+        None
+    } else if file_size == 0 {
+        // Mapping an empty file currently fails.
+        // https://github.com/danburkert/memmap-rs/issues/72
+        None
+    } else if file_size < 16 * 1024 {
+        // Mapping small files is not worth it.
+        None
+    } else {
+        // Explicitly set the length of the memory map, so that filesystem
+        // changes can't race to violate the invariants we just checked.
+        let map = unsafe {
+            memmap::MmapOptions::new()
+                .len(file_size as usize)
+                .map(&file)?
+        };
+        Some(map)
+    })
+}
+
+fn write_hex_output(mut output: blake3::OutputReader, args: &Args) -> Result<()> {
+    // Encoding multiples of the block size is most efficient.
+    let mut len = args.len()?;
+    let mut block = [0; blake3::BLOCK_LEN];
+    while len > 0 {
+        output.fill(&mut block);
+        let hex_str = hex::encode(&block[..]);
+        let take_bytes = cmp::min(len, block.len() as u64);
+        print!("{}", &hex_str[..2 * take_bytes as usize]);
+        len -= take_bytes;
+    }
+    Ok(())
+}
+
+fn write_raw_output(output: blake3::OutputReader, args: &Args) -> Result<()> {
+    let mut output = output.take(args.len()?);
+    let stdout = std::io::stdout();
+    let mut handler = stdout.lock();
+    std::io::copy(&mut output, &mut handler)?;
+
+    Ok(())
+}
+
+fn read_key_from_stdin() -> Result<[u8; blake3::KEY_LEN]> {
+    let mut bytes = Vec::with_capacity(blake3::KEY_LEN + 1);
+    let n = std::io::stdin()
+        .lock()
+        .take(blake3::KEY_LEN as u64 + 1)
+        .read_to_end(&mut bytes)?;
+    if n < 32 {
+        bail!(
+            "expected {} key bytes from stdin, found {}",
+            blake3::KEY_LEN,
+            n,
+        )
+    } else if n > 32 {
+        bail!("read more than {} key bytes from stdin", blake3::KEY_LEN)
+    } else {
+        Ok(bytes[..blake3::KEY_LEN].try_into().unwrap())
+    }
+}
+
+struct FilepathString {
+    filepath_string: String,
+    is_escaped: bool,
+}
+
+// returns (string, did_escape)
+fn filepath_to_string(filepath: &Path) -> FilepathString {
+    let unicode_cow = filepath.to_string_lossy();
+    let mut filepath_string = unicode_cow.to_string();
+    // If we're on Windows, normalize backslashes to forward slashes. This
+    // avoids a lot of ugly escaping in the common case, and it makes
+    // checkfiles created on Windows more likely to be portable to Unix. It
+    // also allows us to set a blanket "no backslashes allowed in checkfiles on
+    // Windows" rule, rather than allowing a Unix backslash to potentially get
+    // interpreted as a directory separator on Windows.
+    if cfg!(windows) {
+        filepath_string = filepath_string.replace('\\', "/");
+    }
+    let mut is_escaped = false;
+    if filepath_string.contains('\\') || filepath_string.contains('\n') {
+        filepath_string = filepath_string.replace('\\', "\\\\").replace('\n', "\\n");
+        is_escaped = true;
+    }
+    FilepathString {
+        filepath_string,
+        is_escaped,
+    }
+}
+
+fn hex_half_byte(c: char) -> Result<u8> {
+    // The hex characters in the hash must be lowercase for now, though we
+    // could support uppercase too if we wanted to.
+    if '0' <= c && c <= '9' {
+        return Ok(c as u8 - '0' as u8);
+    }
+    if 'a' <= c && c <= 'f' {
+        return Ok(c as u8 - 'a' as u8 + 10);
+    }
+    bail!("Invalid hex");
+}
+
+// The `check` command is a security tool. That means it's much better for a
+// check to fail more often than it should (a false negative), than for a check
+// to ever succeed when it shouldn't (a false positive). By forbidding certain
+// characters in checked filepaths, we avoid a class of false positives where
+// two different filepaths can get confused with each other.
+fn check_for_invalid_characters(utf8_path: &str) -> Result<()> {
+    // Null characters in paths should never happen, but they can result in a
+    // path getting silently truncated on Unix.
+    if utf8_path.contains('\0') {
+        bail!("Null character in path");
+    }
+    // Because we convert invalid UTF-8 sequences in paths to the Unicode
+    // replacement character, multiple different invalid paths can map to the
+    // same UTF-8 string.
+    if utf8_path.contains('�') {
+        bail!("Unicode replacement character in path");
+    }
+    // We normalize all Windows backslashes to forward slashes in our output,
+    // so the only natural way to get a backslash in a checkfile on Windows is
+    // to construct it on Unix and copy it over. (Or of course you could just
+    // doctor it by hand.) To avoid confusing this with a directory separator,
+    // we forbid backslashes entirely on Windows. Note that this check comes
+    // after unescaping has been done.
+    if cfg!(windows) && utf8_path.contains('\\') {
+        bail!("Backslash in path");
+    }
+    Ok(())
+}
+
+fn unescape(mut path: &str) -> Result<String> {
+    let mut unescaped = String::with_capacity(2 * path.len());
+    while let Some(i) = path.find('\\') {
+        ensure!(i < path.len() - 1, "Invalid backslash escape");
+        unescaped.push_str(&path[..i]);
+        match path[i + 1..].chars().next().unwrap() {
+            // Anything other than a recognized escape sequence is an error.
+            'n' => unescaped.push_str("\n"),
+            '\\' => unescaped.push_str("\\"),
+            _ => bail!("Invalid backslash escape"),
+        }
+        path = &path[i + 2..];
+    }
+    unescaped.push_str(path);
+    Ok(unescaped)
+}
+
+#[derive(Debug)]
+struct ParsedCheckLine {
+    file_string: String,
+    is_escaped: bool,
+    file_path: PathBuf,
+    expected_hash: blake3::Hash,
+}
+
+fn parse_check_line(mut line: &str) -> Result<ParsedCheckLine> {
+    // Trim off the trailing newline, if any.
+    line = line.trim_end_matches('\n');
+    // If there's a backslash at the front of the line, that means we need to
+    // unescape the path below. This matches the behavior of e.g. md5sum.
+    let first = if let Some(c) = line.chars().next() {
+        c
+    } else {
+        bail!("Empty line");
+    };
+    let mut is_escaped = false;
+    if first == '\\' {
+        is_escaped = true;
+        line = &line[1..];
+    }
+    // The front of the line must be a hash of the usual length, followed by
+    // two spaces. The hex characters in the hash must be lowercase for now,
+    // though we could support uppercase too if we wanted to.
+    let hash_hex_len = 2 * blake3::OUT_LEN;
+    let num_spaces = 2;
+    let prefix_len = hash_hex_len + num_spaces;
+    ensure!(line.len() > prefix_len, "Short line");
+    ensure!(
+        line.chars().take(prefix_len).all(|c| c.is_ascii()),
+        "Non-ASCII prefix"
+    );
+    ensure!(&line[hash_hex_len..][..2] == "  ", "Invalid space");
+    // Decode the hash hex.
+    let mut hash_bytes = [0; blake3::OUT_LEN];
+    let mut hex_chars = line[..hash_hex_len].chars();
+    for byte in &mut hash_bytes {
+        let high_char = hex_chars.next().unwrap();
+        let low_char = hex_chars.next().unwrap();
+        *byte = 16 * hex_half_byte(high_char)? + hex_half_byte(low_char)?;
+    }
+    let expected_hash: blake3::Hash = hash_bytes.into();
+    let file_string = line[prefix_len..].to_string();
+    let file_path_string = if is_escaped {
+        // If we detected a backslash at the start of the line earlier, now we
+        // need to unescape backslashes and newlines.
+        unescape(&file_string)?
+    } else {
+        file_string.clone().into()
+    };
+    check_for_invalid_characters(&file_path_string)?;
+    Ok(ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path: file_path_string.into(),
+        expected_hash,
+    })
+}
+
+fn hash_one_input(path: &Path, args: &Args) -> Result<()> {
+    let mut input = Input::open(path, args)?;
+    let output = input.hash(args)?;
+    if args.raw() {
+        write_raw_output(output, args)?;
+        return Ok(());
+    }
+    if args.no_names() {
+        write_hex_output(output, args)?;
+        println!();
+        return Ok(());
+    }
+    let FilepathString {
+        filepath_string,
+        is_escaped,
+    } = filepath_to_string(path);
+    if is_escaped {
+        print!("\\");
+    }
+    write_hex_output(output, args)?;
+    println!("  {}", filepath_string);
+    Ok(())
+}
+
+// Returns true for success. Having a boolean return value here, instead of
+// passing down the some_file_failed reference, makes it less likely that we
+// might forget to set it in some error condition.
+fn check_one_line(line: &str, args: &Args) -> bool {
+    let parse_result = parse_check_line(&line);
+    let ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path,
+        expected_hash,
+    } = match parse_result {
+        Ok(parsed) => parsed,
+        Err(e) => {
+            eprintln!("{}: {}", NAME, e);
+            return false;
+        }
+    };
+    let file_string = if is_escaped {
+        "\\".to_string() + &file_string
+    } else {
+        file_string
+    };
+    let hash_result: Result<blake3::Hash> = Input::open(&file_path, args)
+        .and_then(|mut input| input.hash(args))
+        .map(|mut hash_output| {
+            let mut found_hash_bytes = [0; blake3::OUT_LEN];
+            hash_output.fill(&mut found_hash_bytes);
+            found_hash_bytes.into()
+        });
+    let found_hash: blake3::Hash = match hash_result {
+        Ok(hash) => hash,
+        Err(e) => {
+            println!("{}: FAILED ({})", file_string, e);
+            return false;
+        }
+    };
+    // This is a constant-time comparison.
+    if expected_hash == found_hash {
+        if !args.quiet() {
+            println!("{}: OK", file_string);
+        }
+        true
+    } else {
+        println!("{}: FAILED", file_string);
+        false
+    }
+}
+
+fn check_one_checkfile(path: &Path, args: &Args, some_file_failed: &mut bool) -> Result<()> {
+    let checkfile_input = Input::open(path, args)?;
+    let mut bufreader = io::BufReader::new(checkfile_input);
+    let mut line = String::new();
+    loop {
+        line.clear();
+        let n = bufreader.read_line(&mut line)?;
+        if n == 0 {
+            return Ok(());
+        }
+        // check_one_line() prints errors and turns them into a success=false
+        // return, so it doesn't return a Result.
+        let success = check_one_line(&line, args);
+        if !success {
+            *some_file_failed = true;
+        }
+    }
+}
+
+fn main() -> Result<()> {
+    let args = Args::parse()?;
+    let mut thread_pool_builder = rayon::ThreadPoolBuilder::new();
+    if let Some(num_threads) = args.num_threads()? {
+        thread_pool_builder = thread_pool_builder.num_threads(num_threads);
+    }
+    let thread_pool = thread_pool_builder.build()?;
+    thread_pool.install(|| {
+        let mut some_file_failed = false;
+        // Note that file_args automatically includes `-` if nothing is given.
+        for path in &args.file_args {
+            if args.check() {
+                // A hash mismatch or a failure to read a hashed file will be
+                // printed in the checkfile loop, and will not propagate here.
+                // This is similar to the explicit error handling we do in the
+                // hashing case immediately below. In these cases,
+                // some_file_failed will be set to false.
+                check_one_checkfile(path, &args, &mut some_file_failed)?;
+            } else {
+                // Errors encountered in hashing are tolerated and printed to
+                // stderr. This allows e.g. `b3sum *` to print errors for
+                // non-files and keep going. However, if we encounter any
+                // errors we'll still return non-zero at the end.
+                let result = hash_one_input(path, &args);
+                if let Err(e) = result {
+                    some_file_failed = true;
+                    eprintln!("{}: {}: {}", NAME, path.to_string_lossy(), e);
+                }
+            }
+        }
+        std::process::exit(if some_file_failed { 1 } else { 0 });
+    })
+}
diff --git a/thirdparty/BLAKE3/b3sum/src/unit_tests.rs b/thirdparty/BLAKE3/b3sum/src/unit_tests.rs
new file mode 100644
index 000000000..1fa1a17dc
--- /dev/null
+++ b/thirdparty/BLAKE3/b3sum/src/unit_tests.rs
@@ -0,0 +1,189 @@
+use std::path::Path;
+
+#[test]
+fn test_parse_check_line() {
+    // =========================
+    // ===== Success Cases =====
+    // =========================
+
+    // the basic case
+    let crate::ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path,
+        expected_hash,
+    } = crate::parse_check_line(
+        "0909090909090909090909090909090909090909090909090909090909090909  foo",
+    )
+    .unwrap();
+    assert_eq!(expected_hash, blake3::Hash::from([0x09; 32]));
+    assert!(!is_escaped);
+    assert_eq!(file_string, "foo");
+    assert_eq!(file_path, Path::new("foo"));
+
+    // regular whitespace
+    let crate::ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path,
+        expected_hash,
+    } = crate::parse_check_line(
+        "fafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafa  fo \to\n\n\n",
+    )
+    .unwrap();
+    assert_eq!(expected_hash, blake3::Hash::from([0xfa; 32]));
+    assert!(!is_escaped);
+    assert_eq!(file_string, "fo \to");
+    assert_eq!(file_path, Path::new("fo \to"));
+
+    // path is one space
+    let crate::ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path,
+        expected_hash,
+    } = crate::parse_check_line(
+        "4242424242424242424242424242424242424242424242424242424242424242   ",
+    )
+    .unwrap();
+    assert_eq!(expected_hash, blake3::Hash::from([0x42; 32]));
+    assert!(!is_escaped);
+    assert_eq!(file_string, " ");
+    assert_eq!(file_path, Path::new(" "));
+
+    // *Unescaped* backslashes. Note that this line does *not* start with a
+    // backslash, so something like "\" + "n" is interpreted as *two*
+    // characters. We forbid all backslashes on Windows, so this test is
+    // Unix-only.
+    if cfg!(not(windows)) {
+        let crate::ParsedCheckLine {
+            file_string,
+            is_escaped,
+            file_path,
+            expected_hash,
+        } = crate::parse_check_line(
+            "4343434343434343434343434343434343434343434343434343434343434343  fo\\a\\no",
+        )
+        .unwrap();
+        assert_eq!(expected_hash, blake3::Hash::from([0x43; 32]));
+        assert!(!is_escaped);
+        assert_eq!(file_string, "fo\\a\\no");
+        assert_eq!(file_path, Path::new("fo\\a\\no"));
+    }
+
+    // escaped newline
+    let crate::ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path,
+        expected_hash,
+    } = crate::parse_check_line(
+        "\\4444444444444444444444444444444444444444444444444444444444444444  fo\\n\\no",
+    )
+    .unwrap();
+    assert_eq!(expected_hash, blake3::Hash::from([0x44; 32]));
+    assert!(is_escaped);
+    assert_eq!(file_string, "fo\\n\\no");
+    assert_eq!(file_path, Path::new("fo\n\no"));
+
+    // Escaped newline and backslash. Again because backslash is not allowed on
+    // Windows, this test is Unix-only.
+    if cfg!(not(windows)) {
+        let crate::ParsedCheckLine {
+            file_string,
+            is_escaped,
+            file_path,
+            expected_hash,
+        } = crate::parse_check_line(
+            "\\4545454545454545454545454545454545454545454545454545454545454545  fo\\n\\\\o",
+        )
+        .unwrap();
+        assert_eq!(expected_hash, blake3::Hash::from([0x45; 32]));
+        assert!(is_escaped);
+        assert_eq!(file_string, "fo\\n\\\\o");
+        assert_eq!(file_path, Path::new("fo\n\\o"));
+    }
+
+    // non-ASCII path
+    let crate::ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path,
+        expected_hash,
+    } = crate::parse_check_line(
+        "4646464646464646464646464646464646464646464646464646464646464646  否认",
+    )
+    .unwrap();
+    assert_eq!(expected_hash, blake3::Hash::from([0x46; 32]));
+    assert!(!is_escaped);
+    assert_eq!(file_string, "否认");
+    assert_eq!(file_path, Path::new("否认"));
+
+    // =========================
+    // ===== Failure Cases =====
+    // =========================
+
+    // too short
+    crate::parse_check_line("").unwrap_err();
+    crate::parse_check_line("0").unwrap_err();
+    crate::parse_check_line("00").unwrap_err();
+    crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000")
+        .unwrap_err();
+    crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000  ")
+        .unwrap_err();
+
+    // not enough spaces
+    crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000 foo")
+        .unwrap_err();
+
+    // capital letter hex
+    crate::parse_check_line(
+        "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA  foo",
+    )
+    .unwrap_err();
+
+    // non-hex hex
+    crate::parse_check_line(
+        "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx  foo",
+    )
+    .unwrap_err();
+
+    // non-ASCII hex
+    crate::parse_check_line("你好, 我叫杰克. 认识你很高兴. 要不要吃个香蕉?  foo").unwrap_err();
+
+    // invalid escape sequence
+    crate::parse_check_line(
+        "\\0000000000000000000000000000000000000000000000000000000000000000  fo\\o",
+    )
+    .unwrap_err();
+
+    // truncated escape sequence
+    crate::parse_check_line(
+        "\\0000000000000000000000000000000000000000000000000000000000000000  foo\\",
+    )
+    .unwrap_err();
+
+    // null char
+    crate::parse_check_line(
+        "0000000000000000000000000000000000000000000000000000000000000000  fo\0o",
+    )
+    .unwrap_err();
+
+    // Unicode replacement char
+    crate::parse_check_line(
+        "0000000000000000000000000000000000000000000000000000000000000000  fo�o",
+    )
+    .unwrap_err();
+
+    // On Windows only, backslashes are not allowed, escaped or otherwise.
+    if cfg!(windows) {
+        crate::parse_check_line(
+            "0000000000000000000000000000000000000000000000000000000000000000  fo\\o",
+        )
+        .unwrap_err();
+        crate::parse_check_line(
+            "\\0000000000000000000000000000000000000000000000000000000000000000  fo\\\\o",
+        )
+        .unwrap_err();
+    }
+}
diff --git a/thirdparty/BLAKE3/b3sum/tests/cli_tests.rs b/thirdparty/BLAKE3/b3sum/tests/cli_tests.rs
new file mode 100644
index 000000000..51fbbba98
--- /dev/null
+++ b/thirdparty/BLAKE3/b3sum/tests/cli_tests.rs
@@ -0,0 +1,552 @@
+use duct::cmd;
+use std::ffi::OsString;
+use std::fs;
+use std::io::prelude::*;
+use std::path::PathBuf;
+
+pub fn b3sum_exe() -> PathBuf {
+    env!("CARGO_BIN_EXE_b3sum").into()
+}
+
+#[test]
+fn test_hash_one() {
+    let expected = format!("{}  -", blake3::hash(b"foo").to_hex());
+    let output = cmd!(b3sum_exe()).stdin_bytes("foo").read().unwrap();
+    assert_eq!(&*expected, output);
+}
+
+#[test]
+fn test_hash_one_raw() {
+    let expected = blake3::hash(b"foo").as_bytes().to_owned();
+    let output = cmd!(b3sum_exe(), "--raw")
+        .stdin_bytes("foo")
+        .stdout_capture()
+        .run()
+        .unwrap()
+        .stdout;
+    assert_eq!(expected, output.as_slice());
+}
+
+#[test]
+fn test_hash_many() {
+    let dir = tempfile::tempdir().unwrap();
+    let file1 = dir.path().join("file1");
+    fs::write(&file1, b"foo").unwrap();
+    let file2 = dir.path().join("file2");
+    fs::write(&file2, b"bar").unwrap();
+
+    let output = cmd!(b3sum_exe(), &file1, &file2).read().unwrap();
+    let foo_hash = blake3::hash(b"foo");
+    let bar_hash = blake3::hash(b"bar");
+    let expected = format!(
+        "{}  {}\n{}  {}",
+        foo_hash.to_hex(),
+        // account for slash normalization on Windows
+        file1.to_string_lossy().replace("\\", "/"),
+        bar_hash.to_hex(),
+        file2.to_string_lossy().replace("\\", "/"),
+    );
+    assert_eq!(expected, output);
+
+    let output_no_names = cmd!(b3sum_exe(), "--no-names", &file1, &file2)
+        .read()
+        .unwrap();
+    let expected_no_names = format!("{}\n{}", foo_hash.to_hex(), bar_hash.to_hex(),);
+    assert_eq!(expected_no_names, output_no_names);
+}
+
+#[test]
+fn test_missing_files() {
+    let dir = tempfile::tempdir().unwrap();
+    let file1 = dir.path().join("file1");
+    fs::write(&file1, b"foo").unwrap();
+    let file2 = dir.path().join("file2");
+    fs::write(&file2, b"bar").unwrap();
+
+    let output = cmd!(b3sum_exe(), "file1", "missing_file", "file2")
+        .dir(dir.path())
+        .stdout_capture()
+        .stderr_capture()
+        .unchecked()
+        .run()
+        .unwrap();
+    assert!(!output.status.success());
+
+    let foo_hash = blake3::hash(b"foo");
+    let bar_hash = blake3::hash(b"bar");
+    let expected_stdout = format!(
+        "{}  file1\n{}  file2\n",
+        foo_hash.to_hex(),
+        bar_hash.to_hex(),
+    );
+    assert_eq!(expected_stdout.as_bytes(), &output.stdout[..]);
+
+    let bing_error = fs::File::open(dir.path().join("missing_file")).unwrap_err();
+    let expected_stderr = format!("b3sum: missing_file: {}\n", bing_error.to_string());
+    assert_eq!(expected_stderr.as_bytes(), &output.stderr[..]);
+}
+
+#[test]
+fn test_hash_length() {
+    let mut buf = [0; 100];
+    blake3::Hasher::new()
+        .update(b"foo")
+        .finalize_xof()
+        .fill(&mut buf);
+    let expected = format!("{}  -", hex::encode(&buf[..]));
+    let output = cmd!(b3sum_exe(), "--length=100")
+        .stdin_bytes("foo")
+        .read()
+        .unwrap();
+    assert_eq!(&*expected, &*output);
+}
+
+#[test]
+fn test_keyed() {
+    let key = [42; blake3::KEY_LEN];
+    let f = tempfile::NamedTempFile::new().unwrap();
+    f.as_file().write_all(b"foo").unwrap();
+    f.as_file().flush().unwrap();
+    let expected = blake3::keyed_hash(&key, b"foo").to_hex();
+    let output = cmd!(b3sum_exe(), "--keyed", "--no-names", f.path())
+        .stdin_bytes(&key[..])
+        .read()
+        .unwrap();
+    assert_eq!(&*expected, &*output);
+}
+
+#[test]
+fn test_derive_key() {
+    let context = "BLAKE3 2019-12-28 10:28:41 example context";
+    let f = tempfile::NamedTempFile::new().unwrap();
+    f.as_file().write_all(b"key material").unwrap();
+    f.as_file().flush().unwrap();
+    let mut derive_key_out = [0; blake3::OUT_LEN];
+    blake3::derive_key(context, b"key material", &mut derive_key_out);
+    let expected = hex::encode(&derive_key_out);
+    let output = cmd!(b3sum_exe(), "--derive-key", context, "--no-names", f.path())
+        .read()
+        .unwrap();
+    assert_eq!(&*expected, &*output);
+}
+
+#[test]
+fn test_no_mmap() {
+    let f = tempfile::NamedTempFile::new().unwrap();
+    f.as_file().write_all(b"foo").unwrap();
+    f.as_file().flush().unwrap();
+
+    let expected = blake3::hash(b"foo").to_hex();
+    let output = cmd!(b3sum_exe(), "--no-mmap", "--no-names", f.path())
+        .read()
+        .unwrap();
+    assert_eq!(&*expected, &*output);
+}
+
+#[test]
+fn test_length_without_value_is_an_error() {
+    let result = cmd!(b3sum_exe(), "--length")
+        .stdin_bytes("foo")
+        .stderr_capture()
+        .run();
+    assert!(result.is_err());
+}
+
+#[test]
+fn test_raw_with_multi_files_is_an_error() {
+    let f1 = tempfile::NamedTempFile::new().unwrap();
+    let f2 = tempfile::NamedTempFile::new().unwrap();
+
+    // Make sure it doesn't error with just one file
+    let result = cmd!(b3sum_exe(), "--raw", f1.path()).stdout_capture().run();
+    assert!(result.is_ok());
+
+    // Make sure it errors when both file are passed
+    let result = cmd!(b3sum_exe(), "--raw", f1.path(), f2.path())
+        .stderr_capture()
+        .run();
+    assert!(result.is_err());
+}
+
+#[test]
+#[cfg(unix)]
+fn test_newline_and_backslash_escaping_on_unix() {
+    let empty_hash = blake3::hash(b"").to_hex();
+    let dir = tempfile::tempdir().unwrap();
+    fs::create_dir(dir.path().join("subdir")).unwrap();
+    let names = [
+        "abcdef",
+        "abc\ndef",
+        "abc\\def",
+        "abc\rdef",
+        "abc\r\ndef",
+        "subdir/foo",
+    ];
+    let mut paths = Vec::new();
+    for name in &names {
+        let path = dir.path().join(name);
+        println!("creating file at {:?}", path);
+        fs::write(&path, b"").unwrap();
+        paths.push(path);
+    }
+    let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap();
+    let expected = format!(
+        "\
+{0}  abcdef
+\\{0}  abc\\ndef
+\\{0}  abc\\\\def
+{0}  abc\rdef
+\\{0}  abc\r\\ndef
+{0}  subdir/foo",
+        empty_hash,
+    );
+    println!("output");
+    println!("======");
+    println!("{}", output);
+    println!();
+    println!("expected");
+    println!("========");
+    println!("{}", expected);
+    println!();
+    assert_eq!(expected, output);
+}
+
+#[test]
+#[cfg(windows)]
+fn test_slash_normalization_on_windows() {
+    let empty_hash = blake3::hash(b"").to_hex();
+    let dir = tempfile::tempdir().unwrap();
+    fs::create_dir(dir.path().join("subdir")).unwrap();
+    // Note that filenames can't contain newlines or backslashes on Windows, so
+    // we don't test escaping here. We only test forward slash and backslash as
+    // directory separators.
+    let names = ["abcdef", "subdir/foo", "subdir\\bar"];
+    let mut paths = Vec::new();
+    for name in &names {
+        let path = dir.path().join(name);
+        println!("creating file at {:?}", path);
+        fs::write(&path, b"").unwrap();
+        paths.push(path);
+    }
+    let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap();
+    let expected = format!(
+        "\
+{0}  abcdef
+{0}  subdir/foo
+{0}  subdir/bar",
+        empty_hash,
+    );
+    println!("output");
+    println!("======");
+    println!("{}", output);
+    println!();
+    println!("expected");
+    println!("========");
+    println!("{}", expected);
+    println!();
+    assert_eq!(expected, output);
+}
+
+#[test]
+#[cfg(unix)]
+fn test_invalid_unicode_on_unix() {
+    use std::os::unix::ffi::OsStringExt;
+
+    let empty_hash = blake3::hash(b"").to_hex();
+    let dir = tempfile::tempdir().unwrap();
+    let names = ["abcdef".into(), OsString::from_vec(b"abc\xffdef".to_vec())];
+    let mut paths = Vec::new();
+    for name in &names {
+        let path = dir.path().join(name);
+        println!("creating file at {:?}", path);
+        // Note: Some operating systems, macOS in particular, simply don't
+        // allow invalid Unicode in filenames. On those systems, this write
+        // will fail. That's fine, we'll just short-circuit this test in that
+        // case. But assert that at least Linux allows this.
+        let write_result = fs::write(&path, b"");
+        if cfg!(target_os = "linux") {
+            write_result.expect("Linux should allow invalid Unicode");
+        } else if write_result.is_err() {
+            return;
+        }
+        paths.push(path);
+    }
+    let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap();
+    let expected = format!(
+        "\
+{0}  abcdef
+{0}  abc�def",
+        empty_hash,
+    );
+    println!("output");
+    println!("======");
+    println!("{}", output);
+    println!();
+    println!("expected");
+    println!("========");
+    println!("{}", expected);
+    println!();
+    assert_eq!(expected, output);
+}
+
+#[test]
+#[cfg(windows)]
+fn test_invalid_unicode_on_windows() {
+    use std::os::windows::ffi::OsStringExt;
+
+    let empty_hash = blake3::hash(b"").to_hex();
+    let dir = tempfile::tempdir().unwrap();
+    let surrogate_char = 0xDC00;
+    let bad_unicode_wchars = [
+        'a' as u16,
+        'b' as u16,
+        'c' as u16,
+        surrogate_char,
+        'd' as u16,
+        'e' as u16,
+        'f' as u16,
+    ];
+    let bad_osstring = OsString::from_wide(&bad_unicode_wchars);
+    let names = ["abcdef".into(), bad_osstring];
+    let mut paths = Vec::new();
+    for name in &names {
+        let path = dir.path().join(name);
+        println!("creating file at {:?}", path);
+        fs::write(&path, b"").unwrap();
+        paths.push(path);
+    }
+    let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap();
+    let expected = format!(
+        "\
+{0}  abcdef
+{0}  abc�def",
+        empty_hash,
+    );
+    println!("output");
+    println!("======");
+    println!("{}", output);
+    println!();
+    println!("expected");
+    println!("========");
+    println!("{}", expected);
+    println!();
+    assert_eq!(expected, output);
+}
+
+#[test]
+fn test_check() {
+    // Make a directory full of files, and make sure the b3sum output in that
+    // directory is what we expect.
+    let a_hash = blake3::hash(b"a").to_hex();
+    let b_hash = blake3::hash(b"b").to_hex();
+    let cd_hash = blake3::hash(b"cd").to_hex();
+    let dir = tempfile::tempdir().unwrap();
+    fs::write(dir.path().join("a"), b"a").unwrap();
+    fs::write(dir.path().join("b"), b"b").unwrap();
+    fs::create_dir(dir.path().join("c")).unwrap();
+    fs::write(dir.path().join("c/d"), b"cd").unwrap();
+    let output = cmd!(b3sum_exe(), "a", "b", "c/d")
+        .dir(dir.path())
+        .stdout_capture()
+        .stderr_capture()
+        .run()
+        .unwrap();
+    let stdout = std::str::from_utf8(&output.stdout).unwrap();
+    let stderr = std::str::from_utf8(&output.stderr).unwrap();
+    let expected_checkfile = format!(
+        "{}  a\n\
+         {}  b\n\
+         {}  c/d\n",
+        a_hash, b_hash, cd_hash,
+    );
+    assert_eq!(expected_checkfile, stdout);
+    assert_eq!("", stderr);
+
+    // Now use the output we just validated as a checkfile, passed to stdin.
+    let output = cmd!(b3sum_exe(), "--check")
+        .stdin_bytes(expected_checkfile.as_bytes())
+        .dir(dir.path())
+        .stdout_capture()
+        .stderr_capture()
+        .run()
+        .unwrap();
+    let stdout = std::str::from_utf8(&output.stdout).unwrap();
+    let stderr = std::str::from_utf8(&output.stderr).unwrap();
+    let expected_check_output = "\
+         a: OK\n\
+         b: OK\n\
+         c/d: OK\n";
+    assert_eq!(expected_check_output, stdout);
+    assert_eq!("", stderr);
+
+    // Now pass the same checkfile twice on the command line just for fun.
+    let checkfile_path = dir.path().join("checkfile");
+    fs::write(&checkfile_path, &expected_checkfile).unwrap();
+    let output = cmd!(b3sum_exe(), "--check", &checkfile_path, &checkfile_path)
+        .dir(dir.path())
+        .stdout_capture()
+        .stderr_capture()
+        .run()
+        .unwrap();
+    let stdout = std::str::from_utf8(&output.stdout).unwrap();
+    let stderr = std::str::from_utf8(&output.stderr).unwrap();
+    let mut double_check_output = String::new();
+    double_check_output.push_str(&expected_check_output);
+    double_check_output.push_str(&expected_check_output);
+    assert_eq!(double_check_output, stdout);
+    assert_eq!("", stderr);
+
+    // Corrupt one of the files and check again.
+    fs::write(dir.path().join("b"), b"CORRUPTION").unwrap();
+    let output = cmd!(b3sum_exe(), "--check", &checkfile_path)
+        .dir(dir.path())
+        .stdout_capture()
+        .stderr_capture()
+        .unchecked()
+        .run()
+        .unwrap();
+    let stdout = std::str::from_utf8(&output.stdout).unwrap();
+    let stderr = std::str::from_utf8(&output.stderr).unwrap();
+    let expected_check_failure = "\
+        a: OK\n\
+        b: FAILED\n\
+        c/d: OK\n";
+    assert!(!output.status.success());
+    assert_eq!(expected_check_failure, stdout);
+    assert_eq!("", stderr);
+
+    // Delete one of the files and check again.
+    fs::remove_file(dir.path().join("b")).unwrap();
+    let open_file_error = fs::File::open(dir.path().join("b")).unwrap_err();
+    let output = cmd!(b3sum_exe(), "--check", &checkfile_path)
+        .dir(dir.path())
+        .stdout_capture()
+        .stderr_capture()
+        .unchecked()
+        .run()
+        .unwrap();
+    let stdout = std::str::from_utf8(&output.stdout).unwrap();
+    let stderr = std::str::from_utf8(&output.stderr).unwrap();
+    let expected_check_failure = format!(
+        "a: OK\n\
+         b: FAILED ({})\n\
+         c/d: OK\n",
+        open_file_error,
+    );
+    assert!(!output.status.success());
+    assert_eq!(expected_check_failure, stdout);
+    assert_eq!("", stderr);
+
+    // Confirm that --quiet suppresses the OKs but not the FAILEDs.
+    let output = cmd!(b3sum_exe(), "--check", "--quiet", &checkfile_path)
+        .dir(dir.path())
+        .stdout_capture()
+        .stderr_capture()
+        .unchecked()
+        .run()
+        .unwrap();
+    let stdout = std::str::from_utf8(&output.stdout).unwrap();
+    let stderr = std::str::from_utf8(&output.stderr).unwrap();
+    let expected_check_failure = format!("b: FAILED ({})\n", open_file_error);
+    assert!(!output.status.success());
+    assert_eq!(expected_check_failure, stdout);
+    assert_eq!("", stderr);
+}
+
+#[test]
+fn test_check_invalid_characters() {
+    // Check that a null character in the path fails.
+    let output = cmd!(b3sum_exe(), "--check")
+        .stdin_bytes("0000000000000000000000000000000000000000000000000000000000000000  \0")
+        .stdout_capture()
+        .stderr_capture()
+        .unchecked()
+        .run()
+        .unwrap();
+    let stdout = std::str::from_utf8(&output.stdout).unwrap();
+    let stderr = std::str::from_utf8(&output.stderr).unwrap();
+    assert!(!output.status.success());
+    assert_eq!("", stdout);
+    assert_eq!("b3sum: Null character in path\n", stderr);
+
+    // Check that a Unicode replacement character in the path fails.
+    let output = cmd!(b3sum_exe(), "--check")
+        .stdin_bytes("0000000000000000000000000000000000000000000000000000000000000000  �")
+        .stdout_capture()
+        .stderr_capture()
+        .unchecked()
+        .run()
+        .unwrap();
+    let stdout = std::str::from_utf8(&output.stdout).unwrap();
+    let stderr = std::str::from_utf8(&output.stderr).unwrap();
+    assert!(!output.status.success());
+    assert_eq!("", stdout);
+    assert_eq!("b3sum: Unicode replacement character in path\n", stderr);
+
+    // Check that an invalid escape sequence in the path fails.
+    let output = cmd!(b3sum_exe(), "--check")
+        .stdin_bytes("\\0000000000000000000000000000000000000000000000000000000000000000  \\a")
+        .stdout_capture()
+        .stderr_capture()
+        .unchecked()
+        .run()
+        .unwrap();
+    let stdout = std::str::from_utf8(&output.stdout).unwrap();
+    let stderr = std::str::from_utf8(&output.stderr).unwrap();
+    assert!(!output.status.success());
+    assert_eq!("", stdout);
+    assert_eq!("b3sum: Invalid backslash escape\n", stderr);
+
+    // Windows also forbids literal backslashes. Check for that if and only if
+    // we're on Windows.
+    if cfg!(windows) {
+        let output = cmd!(b3sum_exe(), "--check")
+            .stdin_bytes("0000000000000000000000000000000000000000000000000000000000000000  \\")
+            .stdout_capture()
+            .stderr_capture()
+            .unchecked()
+            .run()
+            .unwrap();
+        let stdout = std::str::from_utf8(&output.stdout).unwrap();
+        let stderr = std::str::from_utf8(&output.stderr).unwrap();
+        assert!(!output.status.success());
+        assert_eq!("", stdout);
+        assert_eq!("b3sum: Backslash in path\n", stderr);
+    }
+}
+
+#[test]
+fn test_globbing() {
+    // On Unix, globbing is provided by the shell. On Windows, globbing is
+    // provided by us, using the `wild` crate.
+    let dir = tempfile::tempdir().unwrap();
+    let file1 = dir.path().join("file1");
+    fs::write(&file1, b"foo").unwrap();
+    let file2 = dir.path().join("file2");
+    fs::write(&file2, b"bar").unwrap();
+
+    let foo_hash = blake3::hash(b"foo");
+    let bar_hash = blake3::hash(b"bar");
+    // NOTE: This assumes that the glob will be expanded in alphabetical order,
+    //       to "file1 file2" rather than "file2 file1". So far, this seems to
+    //       be true (guaranteed?) of Unix shell behavior, and true in practice
+    //       with the `wild` crate on Windows. It's possible that this could
+    //       start failing in the future, though, or on some unknown platform.
+    //       If that ever happens, we'll need to relax this test somehow,
+    //       probably by just testing for both possible outputs. I'm not
+    //       handling that case in advance, though, because I'd prefer to hear
+    //       about it if it comes up.
+    let expected = format!("{}  file1\n{}  file2", foo_hash.to_hex(), bar_hash.to_hex());
+
+    let star_command = format!("{} *", b3sum_exe().to_str().unwrap());
+    let (exe, c_flag) = if cfg!(windows) {
+        ("cmd.exe", "/C")
+    } else {
+        ("/bin/sh", "-c")
+    };
+    let output = cmd!(exe, c_flag, star_command)
+        .dir(dir.path())
+        .read()
+        .unwrap();
+    assert_eq!(expected, output);
+}
diff --git a/thirdparty/BLAKE3/b3sum/what_does_check_do.md b/thirdparty/BLAKE3/b3sum/what_does_check_do.md
new file mode 100644
index 000000000..3a44a0010
--- /dev/null
+++ b/thirdparty/BLAKE3/b3sum/what_does_check_do.md
@@ -0,0 +1,174 @@
+# How does `b3sum --check` behave exactly?<br>or: Are filepaths...text?
+
+Most of the time, `b3sum --check` is a drop-in replacement for `md5sum --check`
+and other Coreutils hashing tools. It consumes a checkfile (the output of a
+regular `b3sum` command), re-hashes all the files listed there, and returns
+success if all of those hashes are still correct. What makes this more
+complicated than it might seem, is that representing filepaths as text means we
+need to consider many possible edge cases of unrepresentable filepaths. This
+document describes all of these edge cases in detail.
+
+## The simple case
+
+Here's the result of running `b3sum a b c/d` in a directory that contains
+those three files:
+
+```bash
+$ echo hi > a
+$ echo lo > b
+$ mkdir c
+$ echo stuff > c/d
+$ b3sum a b c/d
+0b8b60248fad7ac6dfac221b7e01a8b91c772421a15b387dd1fb2d6a94aee438  a
+6ae4a57bbba24f79c461d30bcb4db973b9427d9207877e34d2d74528daa84115  b
+2d477356c962e54784f1c5dc5297718d92087006f6ee96b08aeaf7f3cd252377  c/d
+```
+
+If we pipe that output into `b3sum --check`, it will exit with status zero
+(success) and print:
+
+```bash
+$ b3sum a b c/d | b3sum --check
+a: OK
+b: OK
+c/d: OK
+```
+
+If we delete `b` and change the contents of `c/d`, and then use the same
+checkfile as above, `b3sum --check` will exit with a non-zero status (failure)
+and print:
+
+```bash
+$ b3sum a b c/d > checkfile
+$ rm b
+$ echo more stuff >> c/d
+$ b3sum --check checkfile
+a: OK
+b: FAILED (No such file or directory (os error 2))
+c/d: FAILED
+```
+
+In these typical cases, `b3sum` and `md5sum` have identical output for success
+and very similar output for failure.
+
+## Escaping newlines and backslashes
+
+Since the checkfile format (the regular output format of `b3sum`) is
+newline-separated text, we need to worry about what happens when a filepath
+contains a newline, or worse. Suppose we create a file named `x[newline]x`
+(3 characters). One way to create such a file is with a Python one-liner like
+this:
+
+```python
+>>> open("x\nx", "w")
+```
+
+Here's what happens when we hash that file with `b3sum`:
+
+```bash
+$ b3sum x*
+\af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262  x\nx
+```
+
+Notice two things. First, `b3sum` puts a single `\` character at the front of
+the line. This indicates that the filepath contains escape sequences that
+`b3sum --check` will need to unescape. Then, `b3sum` replaces the newline
+character in the filepath with the two-character escape sequence `\n`.
+Similarly, if the filepath contained a backslash, `b3sum` would escape it as
+`\\` in the output. So far, all of this behavior is still identical to
+`md5sum`.
+
+## Invalid Unicode
+
+This is where `b3sum` and `md5um` diverge. Apart from the newline and backslash
+escapes described above, `md5sum` copies all other filepath bytes verbatim to
+its output. That means its output encoding is "ASCII plus whatever bytes we got
+from the command line". This creates two problems:
+
+1. Printing something that isn't UTF-8 is kind of gross.
+2. Windows support.
+
+What's the deal with Windows? To start with, there's a fundamental difference
+in how Unix and Windows represent filepaths. Unix filepaths are "usually UTF-8"
+and Windows filepaths are "usually UTF-16". That means that a file named `abc`
+is typically represented as the bytes `[97, 98, 99]` on Unix and as the bytes
+`[97, 0, 98, 0, 99, 0]` on Windows. The `md5sum` approach won't work if we plan
+on creating a checkfile on Unix and checking it on Windows, or vice versa.
+
+A more portable approach is to convert platform-specific bytes into some
+consistent Unicode encoding. (In practice this is going to be UTF-8, but in
+theory it could be anything.) Then when `--check` needs to open a file, we
+convert the Unicode representation back into platform-specific bytes. This
+makes important common cases like `abc`, and in fact even `abc[newline]def`,
+work as expected. Great!
+
+But...what did we mean above when we said *usually* UTF-8 and *usually* UTF-16?
+It turns out that not every possible sequence of bytes is valid UTF-8, and not
+every possible sequence of 16-bit wide chars is valid UTF-16. For example, the
+byte 0xFF (255) can never appear in any UTF-8 string. If we ask Python to
+decode it, it yells at us:
+
+```python
+>>> b"\xFF".decode("UTF-8")
+UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte
+```
+
+However, tragically, we *can* create a file with that byte in its name (on
+Linux at least, though not usually on macOS):
+
+```python
+>>> open(b"y\xFFy", "w")
+```
+
+So some filepaths aren't representable in Unicode at all. Our plan to "convert
+platform-specific bytes into some consistent Unicode encoding" isn't going to
+work for everything. What does `b3sum` do with the file above?
+
+```bash
+$ b3sum y*
+af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262  y�y
+```
+
+That � in there is a "Unicode replacement character". When we run into
+filepaths that we can't represent in Unicode, we replace the unrepresentable
+parts with these characters. On the checking side, to avoid any possible
+confusion between two different invalid filepaths, we automatically fail if we
+see a replacement character. Together with a few more details covered in the
+next section, this gives us an important set of properties:
+
+1. Any file can be hashed locally.
+2. Any file with a valid Unicode name not containing the � character can be
+   checked.
+3. Checking ambiguous or unrepresentable filepaths always fails.
+4. Checkfiles are always valid UTF-8.
+5. Checkfiles are portable between Unix and Windows.
+
+## Formal Rules
+
+1. When hashing, filepaths are represented in a platform-specific encoding,
+   which can accommodate any filepath on the current platform. In Rust, this is
+   `OsStr`/`OsString`.
+2. In output, filepaths are first converted to UTF-8. Any non-Unicode segments
+   are replaced with Unicode replacement characters (U+FFFD). In Rust, this is
+   `OsStr::to_string_lossy`.
+3. Then, if a filepath contains any backslashes (U+005C) or newlines (U+000A),
+   these characters are escaped as `\\` and `\n` respectively.
+4. Finally, any output line containing an escape sequence is prefixed with a
+   single backslash.
+5. When checking, each line is parsed as UTF-8, separated by a newline
+   (U+000A). Invalid UTF-8 is an error.
+6. Then, if a line begins with a backslash, the filepath component is
+   unescaped. Any escape sequence other than `\\` or `\n` is an error. If a
+   line does not begin with a backslash, unescaping is not performed, and any
+   backslashes in the filepath component are interpreted literally. (`b3sum`
+   output never contains unescaped backslashes, but they can occur in
+   checkfiles assembled by hand.)
+7. Finally, if a filepath contains a Unicode replacement character (U+FFFD) or
+   a null character (U+0000), it is an error.
+
+   **Additionally, on Windows only:**
+
+8. In output, all backslashes (U+005C) are replaced with forward slashes
+   (U+002F).
+9. When checking, after unescaping, if a filepath contains a backslash, it is
+   an error.
author	Stefan Boberg <[email protected]>	2021-11-04 14:18:54 +0100
committer	Stefan Boberg <[email protected]>	2021-11-04 14:19:05 +0100
commit	472569d4a5f2daaef7e234d93b417ccb650be624 (patch)
tree	085c33f178855ad5ffe48b01bf99d41516ffa011 /thirdparty/BLAKE3/b3sum
parent	Merge branch 'main' of https://github.com/EpicGames/zen (diff)
download	zen-472569d4a5f2daaef7e234d93b417ccb650be624.tar.xz zen-472569d4a5f2daaef7e234d93b417ccb650be624.zip