aboutsummaryrefslogtreecommitdiff
path: root/thirdparty/blake3/reference_impl/reference_impl.rs
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2025-11-07 14:49:13 +0100
committerGitHub Enterprise <[email protected]>2025-11-07 14:49:13 +0100
commit24e43a913f29ac3b314354e8ce5175f135bcc64f (patch)
treeca442937ceeb63461012b33a4576e9835099f106 /thirdparty/blake3/reference_impl/reference_impl.rs
parentget oplog attachments (#622) (diff)
downloadzen-24e43a913f29ac3b314354e8ce5175f135bcc64f.tar.xz
zen-24e43a913f29ac3b314354e8ce5175f135bcc64f.zip
switch to xmake for package management (#611)
This change removes our dependency on vcpkg for package management, in favour of bringing some code in-tree in the `thirdparty` folder as well as using the xmake build-in package management feature. For the latter, all the package definitions are maintained in the zen repo itself, in the `repo` folder. It should now also be easier to build the project as it will no longer depend on having the right version of vcpkg installed, which has been a common problem for new people coming in to the codebase. Now you should only need xmake to build. * Bumps xmake requirement on github runners to 2.9.9 to resolve an issue where xmake on Windows invokes cmake with `v144` toolchain which does not exist * BLAKE3 is now in-tree at `thirdparty/blake3` * cpr is now in-tree at `thirdparty/cpr` * cxxopts is now in-tree at `thirdparty/cxxopts` * fmt is now in-tree at `thirdparty/fmt` * robin-map is now in-tree at `thirdparty/robin-map` * ryml is now in-tree at `thirdparty/ryml` * sol2 is now in-tree at `thirdparty/sol2` * spdlog is now in-tree at `thirdparty/spdlog` * utfcpp is now in-tree at `thirdparty/utfcpp` * xmake package repo definitions is in `repo` * implemented support for sanitizers. ASAN is supported on windows, TSAN, UBSAN, MSAN etc are supported on Linux/MacOS though I have not yet tested it extensively on MacOS * the zencore encryption implementation also now supports using mbedTLS which is used on MacOS, though for now we still use openssl on Linux * crashpad * bumps libcurl to 8.11.0 (from 8.8.0) which should address a rare build upload bug
Diffstat (limited to 'thirdparty/blake3/reference_impl/reference_impl.rs')
-rw-r--r--thirdparty/blake3/reference_impl/reference_impl.rs374
1 files changed, 374 insertions, 0 deletions
diff --git a/thirdparty/blake3/reference_impl/reference_impl.rs b/thirdparty/blake3/reference_impl/reference_impl.rs
new file mode 100644
index 000000000..bc6138350
--- /dev/null
+++ b/thirdparty/blake3/reference_impl/reference_impl.rs
@@ -0,0 +1,374 @@
+//! This is the reference implementation of BLAKE3. It is used for testing and
+//! as a readable example of the algorithms involved. Section 5.1 of [the BLAKE3
+//! spec](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf)
+//! discusses this implementation. You can render docs for this implementation
+//! by running `cargo doc --open` in this directory.
+//!
+//! # Example
+//!
+//! ```
+//! let mut hasher = reference_impl::Hasher::new();
+//! hasher.update(b"abc");
+//! hasher.update(b"def");
+//! let mut hash = [0; 32];
+//! hasher.finalize(&mut hash);
+//! let mut extended_hash = [0; 500];
+//! hasher.finalize(&mut extended_hash);
+//! assert_eq!(hash, extended_hash[..32]);
+//! ```
+
+use core::cmp::min;
+
+const OUT_LEN: usize = 32;
+const KEY_LEN: usize = 32;
+const BLOCK_LEN: usize = 64;
+const CHUNK_LEN: usize = 1024;
+
+const CHUNK_START: u32 = 1 << 0;
+const CHUNK_END: u32 = 1 << 1;
+const PARENT: u32 = 1 << 2;
+const ROOT: u32 = 1 << 3;
+const KEYED_HASH: u32 = 1 << 4;
+const DERIVE_KEY_CONTEXT: u32 = 1 << 5;
+const DERIVE_KEY_MATERIAL: u32 = 1 << 6;
+
+const IV: [u32; 8] = [
+ 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
+];
+
+const MSG_PERMUTATION: [usize; 16] = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8];
+
+// The mixing function, G, which mixes either a column or a diagonal.
+fn g(state: &mut [u32; 16], a: usize, b: usize, c: usize, d: usize, mx: u32, my: u32) {
+ state[a] = state[a].wrapping_add(state[b]).wrapping_add(mx);
+ state[d] = (state[d] ^ state[a]).rotate_right(16);
+ state[c] = state[c].wrapping_add(state[d]);
+ state[b] = (state[b] ^ state[c]).rotate_right(12);
+ state[a] = state[a].wrapping_add(state[b]).wrapping_add(my);
+ state[d] = (state[d] ^ state[a]).rotate_right(8);
+ state[c] = state[c].wrapping_add(state[d]);
+ state[b] = (state[b] ^ state[c]).rotate_right(7);
+}
+
+fn round(state: &mut [u32; 16], m: &[u32; 16]) {
+ // Mix the columns.
+ g(state, 0, 4, 8, 12, m[0], m[1]);
+ g(state, 1, 5, 9, 13, m[2], m[3]);
+ g(state, 2, 6, 10, 14, m[4], m[5]);
+ g(state, 3, 7, 11, 15, m[6], m[7]);
+ // Mix the diagonals.
+ g(state, 0, 5, 10, 15, m[8], m[9]);
+ g(state, 1, 6, 11, 12, m[10], m[11]);
+ g(state, 2, 7, 8, 13, m[12], m[13]);
+ g(state, 3, 4, 9, 14, m[14], m[15]);
+}
+
+fn permute(m: &mut [u32; 16]) {
+ let mut permuted = [0; 16];
+ for i in 0..16 {
+ permuted[i] = m[MSG_PERMUTATION[i]];
+ }
+ *m = permuted;
+}
+
+fn compress(
+ chaining_value: &[u32; 8],
+ block_words: &[u32; 16],
+ counter: u64,
+ block_len: u32,
+ flags: u32,
+) -> [u32; 16] {
+ let counter_low = counter as u32;
+ let counter_high = (counter >> 32) as u32;
+ #[rustfmt::skip]
+ let mut state = [
+ chaining_value[0], chaining_value[1], chaining_value[2], chaining_value[3],
+ chaining_value[4], chaining_value[5], chaining_value[6], chaining_value[7],
+ IV[0], IV[1], IV[2], IV[3],
+ counter_low, counter_high, block_len, flags,
+ ];
+ let mut block = *block_words;
+
+ round(&mut state, &block); // round 1
+ permute(&mut block);
+ round(&mut state, &block); // round 2
+ permute(&mut block);
+ round(&mut state, &block); // round 3
+ permute(&mut block);
+ round(&mut state, &block); // round 4
+ permute(&mut block);
+ round(&mut state, &block); // round 5
+ permute(&mut block);
+ round(&mut state, &block); // round 6
+ permute(&mut block);
+ round(&mut state, &block); // round 7
+
+ for i in 0..8 {
+ state[i] ^= state[i + 8];
+ state[i + 8] ^= chaining_value[i];
+ }
+ state
+}
+
+fn first_8_words(compression_output: [u32; 16]) -> [u32; 8] {
+ compression_output[0..8].try_into().unwrap()
+}
+
+fn words_from_little_endian_bytes(bytes: &[u8], words: &mut [u32]) {
+ debug_assert_eq!(bytes.len(), 4 * words.len());
+ for (four_bytes, word) in bytes.chunks_exact(4).zip(words) {
+ *word = u32::from_le_bytes(four_bytes.try_into().unwrap());
+ }
+}
+
+// Each chunk or parent node can produce either an 8-word chaining value or, by
+// setting the ROOT flag, any number of final output bytes. The Output struct
+// captures the state just prior to choosing between those two possibilities.
+struct Output {
+ input_chaining_value: [u32; 8],
+ block_words: [u32; 16],
+ counter: u64,
+ block_len: u32,
+ flags: u32,
+}
+
+impl Output {
+ fn chaining_value(&self) -> [u32; 8] {
+ first_8_words(compress(
+ &self.input_chaining_value,
+ &self.block_words,
+ self.counter,
+ self.block_len,
+ self.flags,
+ ))
+ }
+
+ fn root_output_bytes(&self, out_slice: &mut [u8]) {
+ let mut output_block_counter = 0;
+ for out_block in out_slice.chunks_mut(2 * OUT_LEN) {
+ let words = compress(
+ &self.input_chaining_value,
+ &self.block_words,
+ output_block_counter,
+ self.block_len,
+ self.flags | ROOT,
+ );
+ // The output length might not be a multiple of 4.
+ for (word, out_word) in words.iter().zip(out_block.chunks_mut(4)) {
+ out_word.copy_from_slice(&word.to_le_bytes()[..out_word.len()]);
+ }
+ output_block_counter += 1;
+ }
+ }
+}
+
+struct ChunkState {
+ chaining_value: [u32; 8],
+ chunk_counter: u64,
+ block: [u8; BLOCK_LEN],
+ block_len: u8,
+ blocks_compressed: u8,
+ flags: u32,
+}
+
+impl ChunkState {
+ fn new(key_words: [u32; 8], chunk_counter: u64, flags: u32) -> Self {
+ Self {
+ chaining_value: key_words,
+ chunk_counter,
+ block: [0; BLOCK_LEN],
+ block_len: 0,
+ blocks_compressed: 0,
+ flags,
+ }
+ }
+
+ fn len(&self) -> usize {
+ BLOCK_LEN * self.blocks_compressed as usize + self.block_len as usize
+ }
+
+ fn start_flag(&self) -> u32 {
+ if self.blocks_compressed == 0 {
+ CHUNK_START
+ } else {
+ 0
+ }
+ }
+
+ fn update(&mut self, mut input: &[u8]) {
+ while !input.is_empty() {
+ // If the block buffer is full, compress it and clear it. More
+ // input is coming, so this compression is not CHUNK_END.
+ if self.block_len as usize == BLOCK_LEN {
+ let mut block_words = [0; 16];
+ words_from_little_endian_bytes(&self.block, &mut block_words);
+ self.chaining_value = first_8_words(compress(
+ &self.chaining_value,
+ &block_words,
+ self.chunk_counter,
+ BLOCK_LEN as u32,
+ self.flags | self.start_flag(),
+ ));
+ self.blocks_compressed += 1;
+ self.block = [0; BLOCK_LEN];
+ self.block_len = 0;
+ }
+
+ // Copy input bytes into the block buffer.
+ let want = BLOCK_LEN - self.block_len as usize;
+ let take = min(want, input.len());
+ self.block[self.block_len as usize..][..take].copy_from_slice(&input[..take]);
+ self.block_len += take as u8;
+ input = &input[take..];
+ }
+ }
+
+ fn output(&self) -> Output {
+ let mut block_words = [0; 16];
+ words_from_little_endian_bytes(&self.block, &mut block_words);
+ Output {
+ input_chaining_value: self.chaining_value,
+ block_words,
+ counter: self.chunk_counter,
+ block_len: self.block_len as u32,
+ flags: self.flags | self.start_flag() | CHUNK_END,
+ }
+ }
+}
+
+fn parent_output(
+ left_child_cv: [u32; 8],
+ right_child_cv: [u32; 8],
+ key_words: [u32; 8],
+ flags: u32,
+) -> Output {
+ let mut block_words = [0; 16];
+ block_words[..8].copy_from_slice(&left_child_cv);
+ block_words[8..].copy_from_slice(&right_child_cv);
+ Output {
+ input_chaining_value: key_words,
+ block_words,
+ counter: 0, // Always 0 for parent nodes.
+ block_len: BLOCK_LEN as u32, // Always BLOCK_LEN (64) for parent nodes.
+ flags: PARENT | flags,
+ }
+}
+
+fn parent_cv(
+ left_child_cv: [u32; 8],
+ right_child_cv: [u32; 8],
+ key_words: [u32; 8],
+ flags: u32,
+) -> [u32; 8] {
+ parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value()
+}
+
+/// An incremental hasher that can accept any number of writes.
+pub struct Hasher {
+ chunk_state: ChunkState,
+ key_words: [u32; 8],
+ cv_stack: [[u32; 8]; 54], // Space for 54 subtree chaining values:
+ cv_stack_len: u8, // 2^54 * CHUNK_LEN = 2^64
+ flags: u32,
+}
+
+impl Hasher {
+ fn new_internal(key_words: [u32; 8], flags: u32) -> Self {
+ Self {
+ chunk_state: ChunkState::new(key_words, 0, flags),
+ key_words,
+ cv_stack: [[0; 8]; 54],
+ cv_stack_len: 0,
+ flags,
+ }
+ }
+
+ /// Construct a new `Hasher` for the regular hash function.
+ pub fn new() -> Self {
+ Self::new_internal(IV, 0)
+ }
+
+ /// Construct a new `Hasher` for the keyed hash function.
+ pub fn new_keyed(key: &[u8; KEY_LEN]) -> Self {
+ let mut key_words = [0; 8];
+ words_from_little_endian_bytes(key, &mut key_words);
+ Self::new_internal(key_words, KEYED_HASH)
+ }
+
+ /// Construct a new `Hasher` for the key derivation function. The context
+ /// string should be hardcoded, globally unique, and application-specific.
+ pub fn new_derive_key(context: &str) -> Self {
+ let mut context_hasher = Self::new_internal(IV, DERIVE_KEY_CONTEXT);
+ context_hasher.update(context.as_bytes());
+ let mut context_key = [0; KEY_LEN];
+ context_hasher.finalize(&mut context_key);
+ let mut context_key_words = [0; 8];
+ words_from_little_endian_bytes(&context_key, &mut context_key_words);
+ Self::new_internal(context_key_words, DERIVE_KEY_MATERIAL)
+ }
+
+ fn push_stack(&mut self, cv: [u32; 8]) {
+ self.cv_stack[self.cv_stack_len as usize] = cv;
+ self.cv_stack_len += 1;
+ }
+
+ fn pop_stack(&mut self) -> [u32; 8] {
+ self.cv_stack_len -= 1;
+ self.cv_stack[self.cv_stack_len as usize]
+ }
+
+ // Section 5.1.2 of the BLAKE3 spec explains this algorithm in more detail.
+ fn add_chunk_chaining_value(&mut self, mut new_cv: [u32; 8], mut total_chunks: u64) {
+ // This chunk might complete some subtrees. For each completed subtree,
+ // its left child will be the current top entry in the CV stack, and
+ // its right child will be the current value of `new_cv`. Pop each left
+ // child off the stack, merge it with `new_cv`, and overwrite `new_cv`
+ // with the result. After all these merges, push the final value of
+ // `new_cv` onto the stack. The number of completed subtrees is given
+ // by the number of trailing 0-bits in the new total number of chunks.
+ while total_chunks & 1 == 0 {
+ new_cv = parent_cv(self.pop_stack(), new_cv, self.key_words, self.flags);
+ total_chunks >>= 1;
+ }
+ self.push_stack(new_cv);
+ }
+
+ /// Add input to the hash state. This can be called any number of times.
+ pub fn update(&mut self, mut input: &[u8]) {
+ while !input.is_empty() {
+ // If the current chunk is complete, finalize it and reset the
+ // chunk state. More input is coming, so this chunk is not ROOT.
+ if self.chunk_state.len() == CHUNK_LEN {
+ let chunk_cv = self.chunk_state.output().chaining_value();
+ let total_chunks = self.chunk_state.chunk_counter + 1;
+ self.add_chunk_chaining_value(chunk_cv, total_chunks);
+ self.chunk_state = ChunkState::new(self.key_words, total_chunks, self.flags);
+ }
+
+ // Compress input bytes into the current chunk state.
+ let want = CHUNK_LEN - self.chunk_state.len();
+ let take = min(want, input.len());
+ self.chunk_state.update(&input[..take]);
+ input = &input[take..];
+ }
+ }
+
+ /// Finalize the hash and write any number of output bytes.
+ pub fn finalize(&self, out_slice: &mut [u8]) {
+ // Starting with the Output from the current chunk, compute all the
+ // parent chaining values along the right edge of the tree, until we
+ // have the root Output.
+ let mut output = self.chunk_state.output();
+ let mut parent_nodes_remaining = self.cv_stack_len as usize;
+ while parent_nodes_remaining > 0 {
+ parent_nodes_remaining -= 1;
+ output = parent_output(
+ self.cv_stack[parent_nodes_remaining],
+ output.chaining_value(),
+ self.key_words,
+ self.flags,
+ );
+ }
+ output.root_output_bytes(out_slice);
+ }
+}