switch to xmake for package management (#611)

This change removes our dependency on vcpkg for package management, in favour of bringing some code in-tree in the `thirdparty` folder as well as using the xmake build-in package management feature. For the latter, all the package definitions are maintained in the zen repo itself, in the `repo` folder. It should now also be easier to build the project as it will no longer depend on having the right version of vcpkg installed, which has been a common problem for new people coming in to the codebase. Now you should only need xmake to build. * Bumps xmake requirement on github runners to 2.9.9 to resolve an issue where xmake on Windows invokes cmake with `v144` toolchain which does not exist * BLAKE3 is now in-tree at `thirdparty/blake3` * cpr is now in-tree at `thirdparty/cpr` * cxxopts is now in-tree at `thirdparty/cxxopts` * fmt is now in-tree at `thirdparty/fmt` * robin-map is now in-tree at `thirdparty/robin-map` * ryml is now in-tree at `thirdparty/ryml` * sol2 is now in-tree at `thirdparty/sol2` * spdlog is now in-tree at `thirdparty/spdlog` * utfcpp is now in-tree at `thirdparty/utfcpp` * xmake package repo definitions is in `repo` * implemented support for sanitizers. ASAN is supported on windows, TSAN, UBSAN, MSAN etc are supported on Linux/MacOS though I have not yet tested it extensively on MacOS * the zencore encryption implementation also now supports using mbedTLS which is used on MacOS, though for now we still use openssl on Linux * crashpad * bumps libcurl to 8.11.0 (from 8.8.0) which should address a rare build upload bug
author: Stefan Boberg <[email protected]> 2025-11-07 14:49:13 +0100
committer: GitHub Enterprise <[email protected]> 2025-11-07 14:49:13 +0100
commit: 24e43a913f29ac3b314354e8ce5175f135bcc64f (patch)
tree: ca442937ceeb63461012b33a4576e9835099f106 /thirdparty/blake3/b3sum/src
parent: get oplog attachments (#622) (diff)
download: zen-24e43a913f29ac3b314354e8ce5175f135bcc64f.tar.xz
zen-24e43a913f29ac3b314354e8ce5175f135bcc64f.zip
2 files changed, 799 insertions, 0 deletions
diff --git a/thirdparty/blake3/b3sum/src/main.rs b/thirdparty/blake3/b3sum/src/main.rs
new file mode 100644
index 000000000..69a10c837
--- /dev/null
+++ b/thirdparty/blake3/b3sum/src/main.rs
@@ -0,0 +1,564 @@
+use anyhow::{bail, ensure};
+use clap::Parser;
+use std::cmp;
+use std::fs::File;
+use std::io;
+use std::io::prelude::*;
+use std::path::{Path, PathBuf};
+
+#[cfg(test)]
+mod unit_tests;
+
+const NAME: &str = "b3sum";
+
+const DERIVE_KEY_ARG: &str = "derive_key";
+const KEYED_ARG: &str = "keyed";
+const LENGTH_ARG: &str = "length";
+const NO_NAMES_ARG: &str = "no_names";
+const RAW_ARG: &str = "raw";
+const TAG_ARG: &str = "tag";
+const CHECK_ARG: &str = "check";
+
+#[derive(Parser)]
+#[command(version, max_term_width(100))]
+struct Inner {
+    /// Files to hash, or checkfiles to check
+    ///
+    /// When no file is given, or when - is given, read standard input.
+    file: Vec<PathBuf>,
+
+    /// Use the keyed mode, reading the 32-byte key from stdin
+    #[arg(long, requires("file"))]
+    keyed: bool,
+
+    /// Use the key derivation mode, with the given context string
+    ///
+    /// Cannot be used with --keyed.
+    #[arg(long, value_name("CONTEXT"), conflicts_with(KEYED_ARG))]
+    derive_key: Option<String>,
+
+    /// The number of output bytes, before hex encoding
+    #[arg(
+        short,
+        long,
+        default_value_t = blake3::OUT_LEN as u64,
+        value_name("LEN")
+    )]
+    length: u64,
+
+    /// The starting output byte offset, before hex encoding
+    #[arg(long, default_value_t = 0, value_name("SEEK"))]
+    seek: u64,
+
+    /// The maximum number of threads to use
+    ///
+    /// By default, this is the number of logical cores. If this flag is
+    /// omitted, or if its value is 0, RAYON_NUM_THREADS is also respected.
+    #[arg(long, value_name("NUM"))]
+    num_threads: Option<usize>,
+
+    /// Disable memory mapping
+    ///
+    /// Currently this also disables multithreading.
+    #[arg(long)]
+    no_mmap: bool,
+
+    /// Omit filenames in the output
+    #[arg(long)]
+    no_names: bool,
+
+    /// Write raw output bytes to stdout, rather than hex
+    ///
+    /// --no-names is implied. In this case, only a single input is allowed.
+    #[arg(long)]
+    raw: bool,
+
+    /// Output BSD-style checksums: BLAKE3 ([FILE]) = [HASH]
+    #[arg(long)]
+    tag: bool,
+
+    /// Read BLAKE3 sums from the [FILE]s and check them
+    #[arg(
+        short,
+        long,
+        conflicts_with(DERIVE_KEY_ARG),
+        conflicts_with(KEYED_ARG),
+        conflicts_with(LENGTH_ARG),
+        conflicts_with(RAW_ARG),
+        conflicts_with(TAG_ARG),
+        conflicts_with(NO_NAMES_ARG)
+    )]
+    check: bool,
+
+    /// Skip printing OK for each checked file
+    ///
+    /// Must be used with --check.
+    #[arg(long, requires(CHECK_ARG))]
+    quiet: bool,
+}
+
+struct Args {
+    inner: Inner,
+    file_args: Vec<PathBuf>,
+    base_hasher: blake3::Hasher,
+}
+
+impl Args {
+    fn parse() -> anyhow::Result<Self> {
+        // wild::args_os() is equivalent to std::env::args_os() on Unix,
+        // but on Windows it adds support for globbing.
+        let inner = Inner::parse_from(wild::args_os());
+        let file_args = if !inner.file.is_empty() {
+            inner.file.clone()
+        } else {
+            vec!["-".into()]
+        };
+        if inner.raw && file_args.len() > 1 {
+            bail!("Only one filename can be provided when using --raw");
+        }
+        let base_hasher = if inner.keyed {
+            // In keyed mode, since stdin is used for the key, we can't handle
+            // `-` arguments. Input::open handles that case below.
+            blake3::Hasher::new_keyed(&read_key_from_stdin()?)
+        } else if let Some(ref context) = inner.derive_key {
+            blake3::Hasher::new_derive_key(context)
+        } else {
+            blake3::Hasher::new()
+        };
+        Ok(Self {
+            inner,
+            file_args,
+            base_hasher,
+        })
+    }
+
+    fn num_threads(&self) -> Option<usize> {
+        self.inner.num_threads
+    }
+
+    fn check(&self) -> bool {
+        self.inner.check
+    }
+
+    fn raw(&self) -> bool {
+        self.inner.raw
+    }
+
+    fn tag(&self) -> bool {
+        self.inner.tag
+    }
+
+    fn no_mmap(&self) -> bool {
+        self.inner.no_mmap
+    }
+
+    fn no_names(&self) -> bool {
+        self.inner.no_names
+    }
+
+    fn len(&self) -> u64 {
+        self.inner.length
+    }
+
+    fn seek(&self) -> u64 {
+        self.inner.seek
+    }
+
+    fn keyed(&self) -> bool {
+        self.inner.keyed
+    }
+
+    fn quiet(&self) -> bool {
+        self.inner.quiet
+    }
+}
+
+fn hash_path(args: &Args, path: &Path) -> anyhow::Result<blake3::OutputReader> {
+    let mut hasher = args.base_hasher.clone();
+    if path == Path::new("-") {
+        if args.keyed() {
+            bail!("Cannot open `-` in keyed mode");
+        }
+        hasher.update_reader(io::stdin().lock())?;
+    } else if args.no_mmap() {
+        hasher.update_reader(File::open(path)?)?;
+    } else {
+        // The fast path: Try to mmap the file and hash it with multiple threads.
+        hasher.update_mmap_rayon(path)?;
+    }
+    let mut output_reader = hasher.finalize_xof();
+    output_reader.set_position(args.seek());
+    Ok(output_reader)
+}
+
+fn write_hex_output(mut output: blake3::OutputReader, args: &Args) -> anyhow::Result<()> {
+    // Encoding multiples of the 64 bytes is most efficient.
+    // TODO: This computes each output block twice when the --seek argument isn't a multiple of 64.
+    // We'll refactor all of this soon anyway, once SIMD optimizations are available for the XOF.
+    let mut len = args.len();
+    let mut block = [0; blake3::BLOCK_LEN];
+    while len > 0 {
+        output.fill(&mut block);
+        let hex_str = hex::encode(&block[..]);
+        let take_bytes = cmp::min(len, block.len() as u64);
+        print!("{}", &hex_str[..2 * take_bytes as usize]);
+        len -= take_bytes;
+    }
+    Ok(())
+}
+
+fn write_raw_output(output: blake3::OutputReader, args: &Args) -> anyhow::Result<()> {
+    let mut output = output.take(args.len());
+    let stdout = std::io::stdout();
+    let mut handler = stdout.lock();
+    std::io::copy(&mut output, &mut handler)?;
+
+    Ok(())
+}
+
+fn read_key_from_stdin() -> anyhow::Result<[u8; blake3::KEY_LEN]> {
+    let mut bytes = Vec::with_capacity(blake3::KEY_LEN + 1);
+    let n = std::io::stdin()
+        .lock()
+        .take(blake3::KEY_LEN as u64 + 1)
+        .read_to_end(&mut bytes)?;
+    if n < blake3::KEY_LEN {
+        bail!(
+            "expected {} key bytes from stdin, found {}",
+            blake3::KEY_LEN,
+            n,
+        )
+    } else if n > blake3::KEY_LEN {
+        bail!("read more than {} key bytes from stdin", blake3::KEY_LEN)
+    } else {
+        Ok(bytes[..blake3::KEY_LEN].try_into().unwrap())
+    }
+}
+
+struct FilepathString {
+    filepath_string: String,
+    is_escaped: bool,
+}
+
+// returns (string, did_escape)
+fn filepath_to_string(filepath: &Path) -> FilepathString {
+    let unicode_cow = filepath.to_string_lossy();
+    let mut filepath_string = unicode_cow.to_string();
+    // If we're on Windows, normalize backslashes to forward slashes. This
+    // avoids a lot of ugly escaping in the common case, and it makes
+    // checkfiles created on Windows more likely to be portable to Unix. It
+    // also allows us to set a blanket "no backslashes allowed in checkfiles on
+    // Windows" rule, rather than allowing a Unix backslash to potentially get
+    // interpreted as a directory separator on Windows.
+    if cfg!(windows) {
+        filepath_string = filepath_string.replace('\\', "/");
+    }
+    let mut is_escaped = false;
+    if filepath_string.contains(['\\', '\n', '\r']) {
+        filepath_string = filepath_string
+            .replace('\\', "\\\\")
+            .replace('\n', "\\n")
+            .replace('\r', "\\r");
+        is_escaped = true;
+    }
+    FilepathString {
+        filepath_string,
+        is_escaped,
+    }
+}
+
+fn hex_half_byte(c: char) -> anyhow::Result<u8> {
+    // The hex characters in the hash must be lowercase for now, though we
+    // could support uppercase too if we wanted to.
+    if '0' <= c && c <= '9' {
+        return Ok(c as u8 - '0' as u8);
+    }
+    if 'a' <= c && c <= 'f' {
+        return Ok(c as u8 - 'a' as u8 + 10);
+    }
+    bail!("Invalid hex");
+}
+
+// The `check` command is a security tool. That means it's much better for a
+// check to fail more often than it should (a false negative), than for a check
+// to ever succeed when it shouldn't (a false positive). By forbidding certain
+// characters in checked filepaths, we avoid a class of false positives where
+// two different filepaths can get confused with each other.
+fn check_for_invalid_characters(utf8_path: &str) -> anyhow::Result<()> {
+    // Null characters in paths should never happen, but they can result in a
+    // path getting silently truncated on Unix.
+    if utf8_path.contains('\0') {
+        bail!("Null character in path");
+    }
+    // Because we convert invalid UTF-8 sequences in paths to the Unicode
+    // replacement character, multiple different invalid paths can map to the
+    // same UTF-8 string.
+    if utf8_path.contains('�') {
+        bail!("Unicode replacement character in path");
+    }
+    // We normalize all Windows backslashes to forward slashes in our output,
+    // so the only natural way to get a backslash in a checkfile on Windows is
+    // to construct it on Unix and copy it over. (Or of course you could just
+    // doctor it by hand.) To avoid confusing this with a directory separator,
+    // we forbid backslashes entirely on Windows. Note that this check comes
+    // after unescaping has been done.
+    if cfg!(windows) && utf8_path.contains('\\') {
+        bail!("Backslash in path");
+    }
+    Ok(())
+}
+
+fn unescape(mut path: &str) -> anyhow::Result<String> {
+    let mut unescaped = String::with_capacity(2 * path.len());
+    while let Some(i) = path.find('\\') {
+        ensure!(i < path.len() - 1, "Invalid backslash escape");
+        unescaped.push_str(&path[..i]);
+        match path[i + 1..].chars().next().unwrap() {
+            // Anything other than a recognized escape sequence is an error.
+            'n' => unescaped.push_str("\n"),
+            'r' => unescaped.push_str("\r"),
+            '\\' => unescaped.push_str("\\"),
+            _ => bail!("Invalid backslash escape"),
+        }
+        path = &path[i + 2..];
+    }
+    unescaped.push_str(path);
+    Ok(unescaped)
+}
+
+#[derive(Debug)]
+struct ParsedCheckLine {
+    file_string: String,
+    is_escaped: bool,
+    file_path: PathBuf,
+    expected_hash: blake3::Hash,
+}
+
+fn split_untagged_check_line(line_after_slash: &str) -> Option<(&str, &str)> {
+    // Of the form "<hash>  <file>". The file might contain "  ", so we need to split from the
+    // left.
+    line_after_slash.split_once("  ")
+}
+
+fn split_tagged_check_line(line_after_slash: &str) -> Option<(&str, &str)> {
+    // Of the form "BLAKE3 (<file>) = <hash>". The file might contain ") = ", so we need to split
+    // from the *right*.
+    let prefix = "BLAKE3 (";
+    if !line_after_slash.starts_with(prefix) {
+        return None;
+    }
+    line_after_slash[prefix.len()..].rsplit_once(") = ")
+}
+
+fn parse_check_line(mut line: &str) -> anyhow::Result<ParsedCheckLine> {
+    // Trim off the trailing newlines, if any.
+    line = line.trim_end_matches(['\r', '\n']);
+    // If there's a backslash at the front of the line, that means we need to
+    // unescape the path below. This matches the behavior of e.g. md5sum.
+    let Some(first) = line.chars().next() else {
+        bail!("Empty line");
+    };
+    let line_after_slash;
+    let is_escaped;
+    if first == '\\' {
+        is_escaped = true;
+        line_after_slash = &line[1..];
+    } else {
+        is_escaped = false;
+        line_after_slash = line;
+    }
+
+    // Split the line. It might be "<hash>  <file>" or "BLAKE3 (<file>) = <hash>". The latter comes
+    // from the --tag flag.
+    let hash_hex;
+    let file_str;
+    if let Some((left, right)) = split_untagged_check_line(line_after_slash) {
+        hash_hex = left;
+        file_str = right;
+    } else if let Some((left, right)) = split_tagged_check_line(line_after_slash) {
+        file_str = left;
+        hash_hex = right;
+    } else {
+        bail!("Invalid check line format");
+    }
+
+    // Decode the hex hash.
+    ensure!(hash_hex.len() == 2 * blake3::OUT_LEN, "Invalid hash length");
+    let mut hex_chars = hash_hex.chars();
+    let mut hash_bytes = [0; blake3::OUT_LEN];
+    for byte in &mut hash_bytes {
+        let high_char = hex_chars.next().unwrap();
+        let low_char = hex_chars.next().unwrap();
+        *byte = 16 * hex_half_byte(high_char)? + hex_half_byte(low_char)?;
+    }
+    let expected_hash: blake3::Hash = hash_bytes.into();
+
+    // Unescape and validate the filepath.
+    let file_path_string = if is_escaped {
+        unescape(file_str)?
+    } else {
+        file_str.to_string()
+    };
+    ensure!(!file_path_string.is_empty(), "empty file path");
+    check_for_invalid_characters(&file_path_string)?;
+
+    Ok(ParsedCheckLine {
+        file_string: file_str.to_string(),
+        is_escaped,
+        file_path: file_path_string.into(),
+        expected_hash,
+    })
+}
+
+fn hash_one_input(path: &Path, args: &Args) -> anyhow::Result<()> {
+    let output = hash_path(args, path)?;
+    if args.raw() {
+        write_raw_output(output, args)?;
+        return Ok(());
+    }
+    if args.no_names() {
+        write_hex_output(output, args)?;
+        println!();
+        return Ok(());
+    }
+    let FilepathString {
+        filepath_string,
+        is_escaped,
+    } = filepath_to_string(path);
+    if is_escaped {
+        print!("\\");
+    }
+    if args.tag() {
+        print!("BLAKE3 ({}) = ", filepath_string);
+        write_hex_output(output, args)?;
+        println!();
+        return Ok(());
+    }
+    write_hex_output(output, args)?;
+    println!("  {}", filepath_string);
+    Ok(())
+}
+
+// Returns true for success. Having a boolean return value here, instead of
+// passing down the files_failed reference, makes it less likely that we might
+// forget to set it in some error condition.
+fn check_one_line(line: &str, args: &Args) -> bool {
+    let parse_result = parse_check_line(&line);
+    let ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path,
+        expected_hash,
+    } = match parse_result {
+        Ok(parsed) => parsed,
+        Err(e) => {
+            eprintln!("{}: {}", NAME, e);
+            return false;
+        }
+    };
+    let file_string = if is_escaped {
+        "\\".to_string() + &file_string
+    } else {
+        file_string
+    };
+    let found_hash: blake3::Hash;
+    match hash_path(args, &file_path) {
+        Ok(mut output) => {
+            let mut found_hash_bytes = [0; blake3::OUT_LEN];
+            output.fill(&mut found_hash_bytes);
+            found_hash = found_hash_bytes.into();
+        }
+        Err(e) => {
+            println!("{}: FAILED ({})", file_string, e);
+            return false;
+        }
+    };
+    // This is a constant-time comparison.
+    if expected_hash == found_hash {
+        if !args.quiet() {
+            println!("{}: OK", file_string);
+        }
+        true
+    } else {
+        println!("{}: FAILED", file_string);
+        false
+    }
+}
+
+fn check_one_checkfile(path: &Path, args: &Args, files_failed: &mut u64) -> anyhow::Result<()> {
+    let mut file;
+    let stdin;
+    let mut stdin_lock;
+    let mut bufreader: io::BufReader<&mut dyn Read>;
+    if path == Path::new("-") {
+        stdin = io::stdin();
+        stdin_lock = stdin.lock();
+        bufreader = io::BufReader::new(&mut stdin_lock);
+    } else {
+        file = File::open(path)?;
+        bufreader = io::BufReader::new(&mut file);
+    }
+    let mut line = String::new();
+    loop {
+        line.clear();
+        let n = bufreader.read_line(&mut line)?;
+        if n == 0 {
+            return Ok(());
+        }
+        // check_one_line() prints errors and turns them into a success=false
+        // return, so it doesn't return a Result.
+        let success = check_one_line(&line, args);
+        if !success {
+            // We use `files_failed > 0` to indicate a mismatch, so it's important for correctness
+            // that it's impossible for this counter to overflow.
+            *files_failed = files_failed.saturating_add(1);
+        }
+    }
+}
+
+fn main() -> anyhow::Result<()> {
+    let args = Args::parse()?;
+    let mut thread_pool_builder = rayon_core::ThreadPoolBuilder::new();
+    if let Some(num_threads) = args.num_threads() {
+        thread_pool_builder = thread_pool_builder.num_threads(num_threads);
+    }
+    let thread_pool = thread_pool_builder.build()?;
+    thread_pool.install(|| {
+        let mut files_failed = 0u64;
+        // Note that file_args automatically includes `-` if nothing is given.
+        for path in &args.file_args {
+            if args.check() {
+                check_one_checkfile(path, &args, &mut files_failed)?;
+            } else {
+                // Errors encountered in hashing are tolerated and printed to
+                // stderr. This allows e.g. `b3sum *` to print errors for
+                // non-files and keep going. However, if we encounter any
+                // errors we'll still return non-zero at the end.
+                let result = hash_one_input(path, &args);
+                if let Err(e) = result {
+                    files_failed = files_failed.saturating_add(1);
+                    eprintln!("{}: {}: {}", NAME, path.to_string_lossy(), e);
+                }
+            }
+        }
+        if args.check() && files_failed > 0 {
+            eprintln!(
+                "{}: WARNING: {} computed checksum{} did NOT match",
+                NAME,
+                files_failed,
+                if files_failed == 1 { "" } else { "s" },
+            );
+        }
+        std::process::exit(if files_failed > 0 { 1 } else { 0 });
+    })
+}
+
+#[cfg(test)]
+mod test {
+    use clap::CommandFactory;
+
+    #[test]
+    fn test_args() {
+        crate::Inner::command().debug_assert();
+    }
+}
diff --git a/thirdparty/blake3/b3sum/src/unit_tests.rs b/thirdparty/blake3/b3sum/src/unit_tests.rs
new file mode 100644
index 000000000..75f672b4c
--- /dev/null
+++ b/thirdparty/blake3/b3sum/src/unit_tests.rs
@@ -0,0 +1,235 @@
+use std::path::Path;
+
+#[test]
+fn test_parse_check_line() {
+    // =========================
+    // ===== Success Cases =====
+    // =========================
+
+    // the basic case
+    let crate::ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path,
+        expected_hash,
+    } = crate::parse_check_line(
+        "0909090909090909090909090909090909090909090909090909090909090909  foo",
+    )
+    .unwrap();
+    assert_eq!(expected_hash, blake3::Hash::from([0x09; 32]));
+    assert!(!is_escaped);
+    assert_eq!(file_string, "foo");
+    assert_eq!(file_path, Path::new("foo"));
+
+    // regular whitespace
+    let crate::ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path,
+        expected_hash,
+    } = crate::parse_check_line(
+        "fafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafa   \t\r\n\n\r \t\r\n\n\r",
+    )
+    .unwrap();
+    assert_eq!(expected_hash, blake3::Hash::from([0xfa; 32]));
+    assert!(!is_escaped);
+    assert_eq!(file_string, " \t\r\n\n\r \t");
+    assert_eq!(file_path, Path::new(" \t\r\n\n\r \t"));
+
+    // path is one space
+    let crate::ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path,
+        expected_hash,
+    } = crate::parse_check_line(
+        "4242424242424242424242424242424242424242424242424242424242424242   ",
+    )
+    .unwrap();
+    assert_eq!(expected_hash, blake3::Hash::from([0x42; 32]));
+    assert!(!is_escaped);
+    assert_eq!(file_string, " ");
+    assert_eq!(file_path, Path::new(" "));
+
+    // *Unescaped* backslashes. Note that this line does *not* start with a
+    // backslash, so something like "\" + "n" is interpreted as *two*
+    // characters. We forbid all backslashes on Windows, so this test is
+    // Unix-only.
+    if cfg!(not(windows)) {
+        let crate::ParsedCheckLine {
+            file_string,
+            is_escaped,
+            file_path,
+            expected_hash,
+        } = crate::parse_check_line(
+            "4343434343434343434343434343434343434343434343434343434343434343  fo\\a\\no",
+        )
+        .unwrap();
+        assert_eq!(expected_hash, blake3::Hash::from([0x43; 32]));
+        assert!(!is_escaped);
+        assert_eq!(file_string, "fo\\a\\no");
+        assert_eq!(file_path, Path::new("fo\\a\\no"));
+    }
+
+    // escaped newlines
+    let crate::ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path,
+        expected_hash,
+    } = crate::parse_check_line(
+        "\\4444444444444444444444444444444444444444444444444444444444444444  fo\\r\\n\\n\\ro",
+    )
+    .unwrap();
+    assert_eq!(expected_hash, blake3::Hash::from([0x44; 32]));
+    assert!(is_escaped);
+    assert_eq!(file_string, "fo\\r\\n\\n\\ro");
+    assert_eq!(file_path, Path::new("fo\r\n\n\ro"));
+
+    // Escaped newline and backslash. Again because backslash is not allowed on
+    // Windows, this test is Unix-only.
+    if cfg!(not(windows)) {
+        let crate::ParsedCheckLine {
+            file_string,
+            is_escaped,
+            file_path,
+            expected_hash,
+        } = crate::parse_check_line(
+            "\\4545454545454545454545454545454545454545454545454545454545454545  fo\\n\\\\o",
+        )
+        .unwrap();
+        assert_eq!(expected_hash, blake3::Hash::from([0x45; 32]));
+        assert!(is_escaped);
+        assert_eq!(file_string, "fo\\n\\\\o");
+        assert_eq!(file_path, Path::new("fo\n\\o"));
+    }
+
+    // non-ASCII path
+    let crate::ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path,
+        expected_hash,
+    } = crate::parse_check_line(
+        "4646464646464646464646464646464646464646464646464646464646464646  否认",
+    )
+    .unwrap();
+    assert_eq!(expected_hash, blake3::Hash::from([0x46; 32]));
+    assert!(!is_escaped);
+    assert_eq!(file_string, "否认");
+    assert_eq!(file_path, Path::new("否认"));
+
+    // untagged separator "  " in the file name
+    let crate::ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path,
+        expected_hash,
+    } = crate::parse_check_line(
+        "4747474747474747474747474747474747474747474747474747474747474747  foo  bar",
+    )
+    .unwrap();
+    assert_eq!(expected_hash, blake3::Hash::from([0x47; 32]));
+    assert!(!is_escaped);
+    assert_eq!(file_string, "foo  bar");
+    assert_eq!(file_path, Path::new("foo  bar"));
+
+    // tagged separator ") = " in the file name
+    let crate::ParsedCheckLine {
+        file_string,
+        is_escaped,
+        file_path,
+        expected_hash,
+    } = crate::parse_check_line(
+        "BLAKE3 (foo) = bar) = 4848484848484848484848484848484848484848484848484848484848484848",
+    )
+    .unwrap();
+    assert_eq!(expected_hash, blake3::Hash::from([0x48; 32]));
+    assert!(!is_escaped);
+    assert_eq!(file_string, "foo) = bar");
+    assert_eq!(file_path, Path::new("foo) = bar"));
+
+    // =========================
+    // ===== Failure Cases =====
+    // =========================
+
+    // too short
+    crate::parse_check_line("").unwrap_err();
+    crate::parse_check_line("0").unwrap_err();
+    crate::parse_check_line("00").unwrap_err();
+    crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000")
+        .unwrap_err();
+    crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000  ")
+        .unwrap_err();
+
+    // not enough spaces
+    crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000 foo")
+        .unwrap_err();
+
+    // capital letter hex
+    crate::parse_check_line(
+        "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA  foo",
+    )
+    .unwrap_err();
+
+    // non-hex hex
+    crate::parse_check_line(
+        "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx  foo",
+    )
+    .unwrap_err();
+
+    // non-ASCII hex
+    crate::parse_check_line("你好, 我叫杰克. 认识你很高兴. 要不要吃个香蕉?  foo").unwrap_err();
+
+    // invalid escape sequence
+    crate::parse_check_line(
+        "\\0000000000000000000000000000000000000000000000000000000000000000  fo\\o",
+    )
+    .unwrap_err();
+
+    // truncated escape sequence
+    crate::parse_check_line(
+        "\\0000000000000000000000000000000000000000000000000000000000000000  foo\\",
+    )
+    .unwrap_err();
+
+    // null char
+    crate::parse_check_line(
+        "0000000000000000000000000000000000000000000000000000000000000000  fo\0o",
+    )
+    .unwrap_err();
+
+    // Unicode replacement char
+    crate::parse_check_line(
+        "0000000000000000000000000000000000000000000000000000000000000000  fo�o",
+    )
+    .unwrap_err();
+
+    // On Windows only, backslashes are not allowed, escaped or otherwise.
+    if cfg!(windows) {
+        crate::parse_check_line(
+            "0000000000000000000000000000000000000000000000000000000000000000  fo\\o",
+        )
+        .unwrap_err();
+        crate::parse_check_line(
+            "\\0000000000000000000000000000000000000000000000000000000000000000  fo\\\\o",
+        )
+        .unwrap_err();
+    }
+}
+
+#[test]
+fn test_filepath_to_string() {
+    let output = crate::filepath_to_string(Path::new("foo"));
+    assert_eq!(output.filepath_string, "foo");
+    assert!(!output.is_escaped);
+
+    let output = crate::filepath_to_string(Path::new("f\\ \t\r\noo"));
+    if cfg!(windows) {
+        // We normalize backslashes to forward slashes on Windows.
+        assert_eq!(output.filepath_string, "f/ \t\\r\\noo");
+    } else {
+        assert_eq!(output.filepath_string, "f\\\\ \t\\r\\noo");
+    }
+    assert!(output.is_escaped);
+}
author	Stefan Boberg <[email protected]>	2025-11-07 14:49:13 +0100
committer	GitHub Enterprise <[email protected]>	2025-11-07 14:49:13 +0100
commit	24e43a913f29ac3b314354e8ce5175f135bcc64f (patch)
tree	ca442937ceeb63461012b33a4576e9835099f106 /thirdparty/blake3/b3sum/src
parent	get oplog attachments (#622) (diff)
download	zen-24e43a913f29ac3b314354e8ce5175f135bcc64f.tar.xz zen-24e43a913f29ac3b314354e8ce5175f135bcc64f.zip