diff options
| author | Stefan Boberg <[email protected]> | 2025-11-07 14:49:13 +0100 |
|---|---|---|
| committer | GitHub Enterprise <[email protected]> | 2025-11-07 14:49:13 +0100 |
| commit | 24e43a913f29ac3b314354e8ce5175f135bcc64f (patch) | |
| tree | ca442937ceeb63461012b33a4576e9835099f106 /thirdparty/blake3/b3sum/src | |
| parent | get oplog attachments (#622) (diff) | |
| download | zen-24e43a913f29ac3b314354e8ce5175f135bcc64f.tar.xz zen-24e43a913f29ac3b314354e8ce5175f135bcc64f.zip | |
switch to xmake for package management (#611)
This change removes our dependency on vcpkg for package management, in favour of bringing some code in-tree in the `thirdparty` folder as well as using the xmake build-in package management feature. For the latter, all the package definitions are maintained in the zen repo itself, in the `repo` folder.
It should now also be easier to build the project as it will no longer depend on having the right version of vcpkg installed, which has been a common problem for new people coming in to the codebase. Now you should only need xmake to build.
* Bumps xmake requirement on github runners to 2.9.9 to resolve an issue where xmake on Windows invokes cmake with `v144` toolchain which does not exist
* BLAKE3 is now in-tree at `thirdparty/blake3`
* cpr is now in-tree at `thirdparty/cpr`
* cxxopts is now in-tree at `thirdparty/cxxopts`
* fmt is now in-tree at `thirdparty/fmt`
* robin-map is now in-tree at `thirdparty/robin-map`
* ryml is now in-tree at `thirdparty/ryml`
* sol2 is now in-tree at `thirdparty/sol2`
* spdlog is now in-tree at `thirdparty/spdlog`
* utfcpp is now in-tree at `thirdparty/utfcpp`
* xmake package repo definitions is in `repo`
* implemented support for sanitizers. ASAN is supported on windows, TSAN, UBSAN, MSAN etc are supported on Linux/MacOS though I have not yet tested it extensively on MacOS
* the zencore encryption implementation also now supports using mbedTLS which is used on MacOS, though for now we still use openssl on Linux
* crashpad
* bumps libcurl to 8.11.0 (from 8.8.0) which should address a rare build upload bug
Diffstat (limited to 'thirdparty/blake3/b3sum/src')
| -rw-r--r-- | thirdparty/blake3/b3sum/src/main.rs | 564 | ||||
| -rw-r--r-- | thirdparty/blake3/b3sum/src/unit_tests.rs | 235 |
2 files changed, 799 insertions, 0 deletions
diff --git a/thirdparty/blake3/b3sum/src/main.rs b/thirdparty/blake3/b3sum/src/main.rs new file mode 100644 index 000000000..69a10c837 --- /dev/null +++ b/thirdparty/blake3/b3sum/src/main.rs @@ -0,0 +1,564 @@ +use anyhow::{bail, ensure}; +use clap::Parser; +use std::cmp; +use std::fs::File; +use std::io; +use std::io::prelude::*; +use std::path::{Path, PathBuf}; + +#[cfg(test)] +mod unit_tests; + +const NAME: &str = "b3sum"; + +const DERIVE_KEY_ARG: &str = "derive_key"; +const KEYED_ARG: &str = "keyed"; +const LENGTH_ARG: &str = "length"; +const NO_NAMES_ARG: &str = "no_names"; +const RAW_ARG: &str = "raw"; +const TAG_ARG: &str = "tag"; +const CHECK_ARG: &str = "check"; + +#[derive(Parser)] +#[command(version, max_term_width(100))] +struct Inner { + /// Files to hash, or checkfiles to check + /// + /// When no file is given, or when - is given, read standard input. + file: Vec<PathBuf>, + + /// Use the keyed mode, reading the 32-byte key from stdin + #[arg(long, requires("file"))] + keyed: bool, + + /// Use the key derivation mode, with the given context string + /// + /// Cannot be used with --keyed. + #[arg(long, value_name("CONTEXT"), conflicts_with(KEYED_ARG))] + derive_key: Option<String>, + + /// The number of output bytes, before hex encoding + #[arg( + short, + long, + default_value_t = blake3::OUT_LEN as u64, + value_name("LEN") + )] + length: u64, + + /// The starting output byte offset, before hex encoding + #[arg(long, default_value_t = 0, value_name("SEEK"))] + seek: u64, + + /// The maximum number of threads to use + /// + /// By default, this is the number of logical cores. If this flag is + /// omitted, or if its value is 0, RAYON_NUM_THREADS is also respected. + #[arg(long, value_name("NUM"))] + num_threads: Option<usize>, + + /// Disable memory mapping + /// + /// Currently this also disables multithreading. + #[arg(long)] + no_mmap: bool, + + /// Omit filenames in the output + #[arg(long)] + no_names: bool, + + /// Write raw output bytes to stdout, rather than hex + /// + /// --no-names is implied. In this case, only a single input is allowed. + #[arg(long)] + raw: bool, + + /// Output BSD-style checksums: BLAKE3 ([FILE]) = [HASH] + #[arg(long)] + tag: bool, + + /// Read BLAKE3 sums from the [FILE]s and check them + #[arg( + short, + long, + conflicts_with(DERIVE_KEY_ARG), + conflicts_with(KEYED_ARG), + conflicts_with(LENGTH_ARG), + conflicts_with(RAW_ARG), + conflicts_with(TAG_ARG), + conflicts_with(NO_NAMES_ARG) + )] + check: bool, + + /// Skip printing OK for each checked file + /// + /// Must be used with --check. + #[arg(long, requires(CHECK_ARG))] + quiet: bool, +} + +struct Args { + inner: Inner, + file_args: Vec<PathBuf>, + base_hasher: blake3::Hasher, +} + +impl Args { + fn parse() -> anyhow::Result<Self> { + // wild::args_os() is equivalent to std::env::args_os() on Unix, + // but on Windows it adds support for globbing. + let inner = Inner::parse_from(wild::args_os()); + let file_args = if !inner.file.is_empty() { + inner.file.clone() + } else { + vec!["-".into()] + }; + if inner.raw && file_args.len() > 1 { + bail!("Only one filename can be provided when using --raw"); + } + let base_hasher = if inner.keyed { + // In keyed mode, since stdin is used for the key, we can't handle + // `-` arguments. Input::open handles that case below. + blake3::Hasher::new_keyed(&read_key_from_stdin()?) + } else if let Some(ref context) = inner.derive_key { + blake3::Hasher::new_derive_key(context) + } else { + blake3::Hasher::new() + }; + Ok(Self { + inner, + file_args, + base_hasher, + }) + } + + fn num_threads(&self) -> Option<usize> { + self.inner.num_threads + } + + fn check(&self) -> bool { + self.inner.check + } + + fn raw(&self) -> bool { + self.inner.raw + } + + fn tag(&self) -> bool { + self.inner.tag + } + + fn no_mmap(&self) -> bool { + self.inner.no_mmap + } + + fn no_names(&self) -> bool { + self.inner.no_names + } + + fn len(&self) -> u64 { + self.inner.length + } + + fn seek(&self) -> u64 { + self.inner.seek + } + + fn keyed(&self) -> bool { + self.inner.keyed + } + + fn quiet(&self) -> bool { + self.inner.quiet + } +} + +fn hash_path(args: &Args, path: &Path) -> anyhow::Result<blake3::OutputReader> { + let mut hasher = args.base_hasher.clone(); + if path == Path::new("-") { + if args.keyed() { + bail!("Cannot open `-` in keyed mode"); + } + hasher.update_reader(io::stdin().lock())?; + } else if args.no_mmap() { + hasher.update_reader(File::open(path)?)?; + } else { + // The fast path: Try to mmap the file and hash it with multiple threads. + hasher.update_mmap_rayon(path)?; + } + let mut output_reader = hasher.finalize_xof(); + output_reader.set_position(args.seek()); + Ok(output_reader) +} + +fn write_hex_output(mut output: blake3::OutputReader, args: &Args) -> anyhow::Result<()> { + // Encoding multiples of the 64 bytes is most efficient. + // TODO: This computes each output block twice when the --seek argument isn't a multiple of 64. + // We'll refactor all of this soon anyway, once SIMD optimizations are available for the XOF. + let mut len = args.len(); + let mut block = [0; blake3::BLOCK_LEN]; + while len > 0 { + output.fill(&mut block); + let hex_str = hex::encode(&block[..]); + let take_bytes = cmp::min(len, block.len() as u64); + print!("{}", &hex_str[..2 * take_bytes as usize]); + len -= take_bytes; + } + Ok(()) +} + +fn write_raw_output(output: blake3::OutputReader, args: &Args) -> anyhow::Result<()> { + let mut output = output.take(args.len()); + let stdout = std::io::stdout(); + let mut handler = stdout.lock(); + std::io::copy(&mut output, &mut handler)?; + + Ok(()) +} + +fn read_key_from_stdin() -> anyhow::Result<[u8; blake3::KEY_LEN]> { + let mut bytes = Vec::with_capacity(blake3::KEY_LEN + 1); + let n = std::io::stdin() + .lock() + .take(blake3::KEY_LEN as u64 + 1) + .read_to_end(&mut bytes)?; + if n < blake3::KEY_LEN { + bail!( + "expected {} key bytes from stdin, found {}", + blake3::KEY_LEN, + n, + ) + } else if n > blake3::KEY_LEN { + bail!("read more than {} key bytes from stdin", blake3::KEY_LEN) + } else { + Ok(bytes[..blake3::KEY_LEN].try_into().unwrap()) + } +} + +struct FilepathString { + filepath_string: String, + is_escaped: bool, +} + +// returns (string, did_escape) +fn filepath_to_string(filepath: &Path) -> FilepathString { + let unicode_cow = filepath.to_string_lossy(); + let mut filepath_string = unicode_cow.to_string(); + // If we're on Windows, normalize backslashes to forward slashes. This + // avoids a lot of ugly escaping in the common case, and it makes + // checkfiles created on Windows more likely to be portable to Unix. It + // also allows us to set a blanket "no backslashes allowed in checkfiles on + // Windows" rule, rather than allowing a Unix backslash to potentially get + // interpreted as a directory separator on Windows. + if cfg!(windows) { + filepath_string = filepath_string.replace('\\', "/"); + } + let mut is_escaped = false; + if filepath_string.contains(['\\', '\n', '\r']) { + filepath_string = filepath_string + .replace('\\', "\\\\") + .replace('\n', "\\n") + .replace('\r', "\\r"); + is_escaped = true; + } + FilepathString { + filepath_string, + is_escaped, + } +} + +fn hex_half_byte(c: char) -> anyhow::Result<u8> { + // The hex characters in the hash must be lowercase for now, though we + // could support uppercase too if we wanted to. + if '0' <= c && c <= '9' { + return Ok(c as u8 - '0' as u8); + } + if 'a' <= c && c <= 'f' { + return Ok(c as u8 - 'a' as u8 + 10); + } + bail!("Invalid hex"); +} + +// The `check` command is a security tool. That means it's much better for a +// check to fail more often than it should (a false negative), than for a check +// to ever succeed when it shouldn't (a false positive). By forbidding certain +// characters in checked filepaths, we avoid a class of false positives where +// two different filepaths can get confused with each other. +fn check_for_invalid_characters(utf8_path: &str) -> anyhow::Result<()> { + // Null characters in paths should never happen, but they can result in a + // path getting silently truncated on Unix. + if utf8_path.contains('\0') { + bail!("Null character in path"); + } + // Because we convert invalid UTF-8 sequences in paths to the Unicode + // replacement character, multiple different invalid paths can map to the + // same UTF-8 string. + if utf8_path.contains('�') { + bail!("Unicode replacement character in path"); + } + // We normalize all Windows backslashes to forward slashes in our output, + // so the only natural way to get a backslash in a checkfile on Windows is + // to construct it on Unix and copy it over. (Or of course you could just + // doctor it by hand.) To avoid confusing this with a directory separator, + // we forbid backslashes entirely on Windows. Note that this check comes + // after unescaping has been done. + if cfg!(windows) && utf8_path.contains('\\') { + bail!("Backslash in path"); + } + Ok(()) +} + +fn unescape(mut path: &str) -> anyhow::Result<String> { + let mut unescaped = String::with_capacity(2 * path.len()); + while let Some(i) = path.find('\\') { + ensure!(i < path.len() - 1, "Invalid backslash escape"); + unescaped.push_str(&path[..i]); + match path[i + 1..].chars().next().unwrap() { + // Anything other than a recognized escape sequence is an error. + 'n' => unescaped.push_str("\n"), + 'r' => unescaped.push_str("\r"), + '\\' => unescaped.push_str("\\"), + _ => bail!("Invalid backslash escape"), + } + path = &path[i + 2..]; + } + unescaped.push_str(path); + Ok(unescaped) +} + +#[derive(Debug)] +struct ParsedCheckLine { + file_string: String, + is_escaped: bool, + file_path: PathBuf, + expected_hash: blake3::Hash, +} + +fn split_untagged_check_line(line_after_slash: &str) -> Option<(&str, &str)> { + // Of the form "<hash> <file>". The file might contain " ", so we need to split from the + // left. + line_after_slash.split_once(" ") +} + +fn split_tagged_check_line(line_after_slash: &str) -> Option<(&str, &str)> { + // Of the form "BLAKE3 (<file>) = <hash>". The file might contain ") = ", so we need to split + // from the *right*. + let prefix = "BLAKE3 ("; + if !line_after_slash.starts_with(prefix) { + return None; + } + line_after_slash[prefix.len()..].rsplit_once(") = ") +} + +fn parse_check_line(mut line: &str) -> anyhow::Result<ParsedCheckLine> { + // Trim off the trailing newlines, if any. + line = line.trim_end_matches(['\r', '\n']); + // If there's a backslash at the front of the line, that means we need to + // unescape the path below. This matches the behavior of e.g. md5sum. + let Some(first) = line.chars().next() else { + bail!("Empty line"); + }; + let line_after_slash; + let is_escaped; + if first == '\\' { + is_escaped = true; + line_after_slash = &line[1..]; + } else { + is_escaped = false; + line_after_slash = line; + } + + // Split the line. It might be "<hash> <file>" or "BLAKE3 (<file>) = <hash>". The latter comes + // from the --tag flag. + let hash_hex; + let file_str; + if let Some((left, right)) = split_untagged_check_line(line_after_slash) { + hash_hex = left; + file_str = right; + } else if let Some((left, right)) = split_tagged_check_line(line_after_slash) { + file_str = left; + hash_hex = right; + } else { + bail!("Invalid check line format"); + } + + // Decode the hex hash. + ensure!(hash_hex.len() == 2 * blake3::OUT_LEN, "Invalid hash length"); + let mut hex_chars = hash_hex.chars(); + let mut hash_bytes = [0; blake3::OUT_LEN]; + for byte in &mut hash_bytes { + let high_char = hex_chars.next().unwrap(); + let low_char = hex_chars.next().unwrap(); + *byte = 16 * hex_half_byte(high_char)? + hex_half_byte(low_char)?; + } + let expected_hash: blake3::Hash = hash_bytes.into(); + + // Unescape and validate the filepath. + let file_path_string = if is_escaped { + unescape(file_str)? + } else { + file_str.to_string() + }; + ensure!(!file_path_string.is_empty(), "empty file path"); + check_for_invalid_characters(&file_path_string)?; + + Ok(ParsedCheckLine { + file_string: file_str.to_string(), + is_escaped, + file_path: file_path_string.into(), + expected_hash, + }) +} + +fn hash_one_input(path: &Path, args: &Args) -> anyhow::Result<()> { + let output = hash_path(args, path)?; + if args.raw() { + write_raw_output(output, args)?; + return Ok(()); + } + if args.no_names() { + write_hex_output(output, args)?; + println!(); + return Ok(()); + } + let FilepathString { + filepath_string, + is_escaped, + } = filepath_to_string(path); + if is_escaped { + print!("\\"); + } + if args.tag() { + print!("BLAKE3 ({}) = ", filepath_string); + write_hex_output(output, args)?; + println!(); + return Ok(()); + } + write_hex_output(output, args)?; + println!(" {}", filepath_string); + Ok(()) +} + +// Returns true for success. Having a boolean return value here, instead of +// passing down the files_failed reference, makes it less likely that we might +// forget to set it in some error condition. +fn check_one_line(line: &str, args: &Args) -> bool { + let parse_result = parse_check_line(&line); + let ParsedCheckLine { + file_string, + is_escaped, + file_path, + expected_hash, + } = match parse_result { + Ok(parsed) => parsed, + Err(e) => { + eprintln!("{}: {}", NAME, e); + return false; + } + }; + let file_string = if is_escaped { + "\\".to_string() + &file_string + } else { + file_string + }; + let found_hash: blake3::Hash; + match hash_path(args, &file_path) { + Ok(mut output) => { + let mut found_hash_bytes = [0; blake3::OUT_LEN]; + output.fill(&mut found_hash_bytes); + found_hash = found_hash_bytes.into(); + } + Err(e) => { + println!("{}: FAILED ({})", file_string, e); + return false; + } + }; + // This is a constant-time comparison. + if expected_hash == found_hash { + if !args.quiet() { + println!("{}: OK", file_string); + } + true + } else { + println!("{}: FAILED", file_string); + false + } +} + +fn check_one_checkfile(path: &Path, args: &Args, files_failed: &mut u64) -> anyhow::Result<()> { + let mut file; + let stdin; + let mut stdin_lock; + let mut bufreader: io::BufReader<&mut dyn Read>; + if path == Path::new("-") { + stdin = io::stdin(); + stdin_lock = stdin.lock(); + bufreader = io::BufReader::new(&mut stdin_lock); + } else { + file = File::open(path)?; + bufreader = io::BufReader::new(&mut file); + } + let mut line = String::new(); + loop { + line.clear(); + let n = bufreader.read_line(&mut line)?; + if n == 0 { + return Ok(()); + } + // check_one_line() prints errors and turns them into a success=false + // return, so it doesn't return a Result. + let success = check_one_line(&line, args); + if !success { + // We use `files_failed > 0` to indicate a mismatch, so it's important for correctness + // that it's impossible for this counter to overflow. + *files_failed = files_failed.saturating_add(1); + } + } +} + +fn main() -> anyhow::Result<()> { + let args = Args::parse()?; + let mut thread_pool_builder = rayon_core::ThreadPoolBuilder::new(); + if let Some(num_threads) = args.num_threads() { + thread_pool_builder = thread_pool_builder.num_threads(num_threads); + } + let thread_pool = thread_pool_builder.build()?; + thread_pool.install(|| { + let mut files_failed = 0u64; + // Note that file_args automatically includes `-` if nothing is given. + for path in &args.file_args { + if args.check() { + check_one_checkfile(path, &args, &mut files_failed)?; + } else { + // Errors encountered in hashing are tolerated and printed to + // stderr. This allows e.g. `b3sum *` to print errors for + // non-files and keep going. However, if we encounter any + // errors we'll still return non-zero at the end. + let result = hash_one_input(path, &args); + if let Err(e) = result { + files_failed = files_failed.saturating_add(1); + eprintln!("{}: {}: {}", NAME, path.to_string_lossy(), e); + } + } + } + if args.check() && files_failed > 0 { + eprintln!( + "{}: WARNING: {} computed checksum{} did NOT match", + NAME, + files_failed, + if files_failed == 1 { "" } else { "s" }, + ); + } + std::process::exit(if files_failed > 0 { 1 } else { 0 }); + }) +} + +#[cfg(test)] +mod test { + use clap::CommandFactory; + + #[test] + fn test_args() { + crate::Inner::command().debug_assert(); + } +} diff --git a/thirdparty/blake3/b3sum/src/unit_tests.rs b/thirdparty/blake3/b3sum/src/unit_tests.rs new file mode 100644 index 000000000..75f672b4c --- /dev/null +++ b/thirdparty/blake3/b3sum/src/unit_tests.rs @@ -0,0 +1,235 @@ +use std::path::Path; + +#[test] +fn test_parse_check_line() { + // ========================= + // ===== Success Cases ===== + // ========================= + + // the basic case + let crate::ParsedCheckLine { + file_string, + is_escaped, + file_path, + expected_hash, + } = crate::parse_check_line( + "0909090909090909090909090909090909090909090909090909090909090909 foo", + ) + .unwrap(); + assert_eq!(expected_hash, blake3::Hash::from([0x09; 32])); + assert!(!is_escaped); + assert_eq!(file_string, "foo"); + assert_eq!(file_path, Path::new("foo")); + + // regular whitespace + let crate::ParsedCheckLine { + file_string, + is_escaped, + file_path, + expected_hash, + } = crate::parse_check_line( + "fafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafa \t\r\n\n\r \t\r\n\n\r", + ) + .unwrap(); + assert_eq!(expected_hash, blake3::Hash::from([0xfa; 32])); + assert!(!is_escaped); + assert_eq!(file_string, " \t\r\n\n\r \t"); + assert_eq!(file_path, Path::new(" \t\r\n\n\r \t")); + + // path is one space + let crate::ParsedCheckLine { + file_string, + is_escaped, + file_path, + expected_hash, + } = crate::parse_check_line( + "4242424242424242424242424242424242424242424242424242424242424242 ", + ) + .unwrap(); + assert_eq!(expected_hash, blake3::Hash::from([0x42; 32])); + assert!(!is_escaped); + assert_eq!(file_string, " "); + assert_eq!(file_path, Path::new(" ")); + + // *Unescaped* backslashes. Note that this line does *not* start with a + // backslash, so something like "\" + "n" is interpreted as *two* + // characters. We forbid all backslashes on Windows, so this test is + // Unix-only. + if cfg!(not(windows)) { + let crate::ParsedCheckLine { + file_string, + is_escaped, + file_path, + expected_hash, + } = crate::parse_check_line( + "4343434343434343434343434343434343434343434343434343434343434343 fo\\a\\no", + ) + .unwrap(); + assert_eq!(expected_hash, blake3::Hash::from([0x43; 32])); + assert!(!is_escaped); + assert_eq!(file_string, "fo\\a\\no"); + assert_eq!(file_path, Path::new("fo\\a\\no")); + } + + // escaped newlines + let crate::ParsedCheckLine { + file_string, + is_escaped, + file_path, + expected_hash, + } = crate::parse_check_line( + "\\4444444444444444444444444444444444444444444444444444444444444444 fo\\r\\n\\n\\ro", + ) + .unwrap(); + assert_eq!(expected_hash, blake3::Hash::from([0x44; 32])); + assert!(is_escaped); + assert_eq!(file_string, "fo\\r\\n\\n\\ro"); + assert_eq!(file_path, Path::new("fo\r\n\n\ro")); + + // Escaped newline and backslash. Again because backslash is not allowed on + // Windows, this test is Unix-only. + if cfg!(not(windows)) { + let crate::ParsedCheckLine { + file_string, + is_escaped, + file_path, + expected_hash, + } = crate::parse_check_line( + "\\4545454545454545454545454545454545454545454545454545454545454545 fo\\n\\\\o", + ) + .unwrap(); + assert_eq!(expected_hash, blake3::Hash::from([0x45; 32])); + assert!(is_escaped); + assert_eq!(file_string, "fo\\n\\\\o"); + assert_eq!(file_path, Path::new("fo\n\\o")); + } + + // non-ASCII path + let crate::ParsedCheckLine { + file_string, + is_escaped, + file_path, + expected_hash, + } = crate::parse_check_line( + "4646464646464646464646464646464646464646464646464646464646464646 否认", + ) + .unwrap(); + assert_eq!(expected_hash, blake3::Hash::from([0x46; 32])); + assert!(!is_escaped); + assert_eq!(file_string, "否认"); + assert_eq!(file_path, Path::new("否认")); + + // untagged separator " " in the file name + let crate::ParsedCheckLine { + file_string, + is_escaped, + file_path, + expected_hash, + } = crate::parse_check_line( + "4747474747474747474747474747474747474747474747474747474747474747 foo bar", + ) + .unwrap(); + assert_eq!(expected_hash, blake3::Hash::from([0x47; 32])); + assert!(!is_escaped); + assert_eq!(file_string, "foo bar"); + assert_eq!(file_path, Path::new("foo bar")); + + // tagged separator ") = " in the file name + let crate::ParsedCheckLine { + file_string, + is_escaped, + file_path, + expected_hash, + } = crate::parse_check_line( + "BLAKE3 (foo) = bar) = 4848484848484848484848484848484848484848484848484848484848484848", + ) + .unwrap(); + assert_eq!(expected_hash, blake3::Hash::from([0x48; 32])); + assert!(!is_escaped); + assert_eq!(file_string, "foo) = bar"); + assert_eq!(file_path, Path::new("foo) = bar")); + + // ========================= + // ===== Failure Cases ===== + // ========================= + + // too short + crate::parse_check_line("").unwrap_err(); + crate::parse_check_line("0").unwrap_err(); + crate::parse_check_line("00").unwrap_err(); + crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000") + .unwrap_err(); + crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000 ") + .unwrap_err(); + + // not enough spaces + crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000 foo") + .unwrap_err(); + + // capital letter hex + crate::parse_check_line( + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA foo", + ) + .unwrap_err(); + + // non-hex hex + crate::parse_check_line( + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx foo", + ) + .unwrap_err(); + + // non-ASCII hex + crate::parse_check_line("你好, 我叫杰克. 认识你很高兴. 要不要吃个香蕉? foo").unwrap_err(); + + // invalid escape sequence + crate::parse_check_line( + "\\0000000000000000000000000000000000000000000000000000000000000000 fo\\o", + ) + .unwrap_err(); + + // truncated escape sequence + crate::parse_check_line( + "\\0000000000000000000000000000000000000000000000000000000000000000 foo\\", + ) + .unwrap_err(); + + // null char + crate::parse_check_line( + "0000000000000000000000000000000000000000000000000000000000000000 fo\0o", + ) + .unwrap_err(); + + // Unicode replacement char + crate::parse_check_line( + "0000000000000000000000000000000000000000000000000000000000000000 fo�o", + ) + .unwrap_err(); + + // On Windows only, backslashes are not allowed, escaped or otherwise. + if cfg!(windows) { + crate::parse_check_line( + "0000000000000000000000000000000000000000000000000000000000000000 fo\\o", + ) + .unwrap_err(); + crate::parse_check_line( + "\\0000000000000000000000000000000000000000000000000000000000000000 fo\\\\o", + ) + .unwrap_err(); + } +} + +#[test] +fn test_filepath_to_string() { + let output = crate::filepath_to_string(Path::new("foo")); + assert_eq!(output.filepath_string, "foo"); + assert!(!output.is_escaped); + + let output = crate::filepath_to_string(Path::new("f\\ \t\r\noo")); + if cfg!(windows) { + // We normalize backslashes to forward slashes on Windows. + assert_eq!(output.filepath_string, "f/ \t\\r\\noo"); + } else { + assert_eq!(output.filepath_string, "f\\\\ \t\\r\\noo"); + } + assert!(output.is_escaped); +} |