blake2b_simd/
blake2bp.rs

1//! BLAKE2bp, a variant of BLAKE2b that uses SIMD more efficiently.
2//!
3//! The AVX2 implementation of BLAKE2bp is about twice as fast that of BLAKE2b.
4//! However, note that it's a different hash function, and it gives a different
5//! hash from BLAKE2b for the same input.
6//!
7//! # Example
8//!
9//! ```
10//! use blake2b_simd::blake2bp;
11//!
12//! let hash = blake2bp::Params::new()
13//!     .hash_length(16)
14//!     .key(b"The Magic Words are Squeamish Ossifrage")
15//!     .to_state()
16//!     .update(b"foo")
17//!     .update(b"bar")
18//!     .update(b"baz")
19//!     .finalize();
20//! assert_eq!("e69c7d2c42a5ac14948772231c68c552", &hash.to_hex());
21//! ```
22
23use crate::guts::{Finalize, Implementation, Job, LastNode, Stride};
24use crate::many;
25use crate::Count;
26use crate::Hash;
27use crate::Word;
28use crate::BLOCKBYTES;
29use crate::KEYBYTES;
30use crate::OUTBYTES;
31use core::cmp;
32use core::fmt;
33use core::mem::size_of;
34
35#[cfg(feature = "std")]
36use std;
37
38pub(crate) const DEGREE: usize = 4;
39
40/// Compute the BLAKE2bp hash of a slice of bytes all at once, using default
41/// parameters.
42///
43/// # Example
44///
45/// ```
46/// # use blake2b_simd::blake2bp::blake2bp;
47/// let expected = "8ca9ccee7946afcb686fe7556628b5ba1bf9a691da37ca58cd049354d99f3704\
48///                 2c007427e5f219b9ab5063707ec6823872dee413ee014b4d02f2ebb6abb5f643";
49/// let hash = blake2bp(b"foo");
50/// assert_eq!(expected, &hash.to_hex());
51/// ```
52pub fn blake2bp(input: &[u8]) -> Hash {
53    Params::new().hash(input)
54}
55
56/// A parameter builder for BLAKE2bp, just like the [`Params`](../struct.Params.html) type for
57/// BLAKE2b.
58///
59/// This builder only supports configuring the hash length and a secret key. This matches the
60/// options provided by the [reference
61/// implementation](https://github.com/BLAKE2/BLAKE2/blob/320c325437539ae91091ce62efec1913cd8093c2/ref/blake2.h#L162-L165).
62///
63/// # Example
64///
65/// ```
66/// use blake2b_simd::blake2bp;
67/// let mut state = blake2bp::Params::new().hash_length(32).to_state();
68/// ```
69#[derive(Clone)]
70pub struct Params {
71    hash_length: u8,
72    key_length: u8,
73    key: [u8; KEYBYTES],
74    implementation: Implementation,
75}
76
77impl Params {
78    /// Equivalent to `Params::default()`.
79    pub fn new() -> Self {
80        Self {
81            hash_length: OUTBYTES as u8,
82            key_length: 0,
83            key: [0; KEYBYTES],
84            implementation: Implementation::detect(),
85        }
86    }
87
88    fn to_words(&self) -> ([[Word; 8]; DEGREE], [Word; 8]) {
89        let mut base_params = crate::Params::new();
90        base_params
91            .hash_length(self.hash_length as usize)
92            .key(&self.key[..self.key_length as usize])
93            .fanout(DEGREE as u8)
94            .max_depth(2)
95            .max_leaf_length(0)
96            // Note that inner_hash_length is always OUTBYTES, regardless of the hash_length
97            // parameter. This isn't documented in the spec, but it matches the behavior of the
98            // reference implementation: https://github.com/BLAKE2/BLAKE2/blob/320c325437539ae91091ce62efec1913cd8093c2/ref/blake2bp-ref.c#L55
99            .inner_hash_length(OUTBYTES);
100        let leaf_words = |worker_index| {
101            base_params
102                .clone()
103                .node_offset(worker_index)
104                .node_depth(0)
105                // Note that setting the last_node flag here has no effect,
106                // because it isn't included in the state words.
107                .to_words()
108        };
109        let leaf_words = [leaf_words(0), leaf_words(1), leaf_words(2), leaf_words(3)];
110        let root_words = base_params
111            .clone()
112            .node_offset(0)
113            .node_depth(1)
114            // Note that setting the last_node flag here has no effect, because
115            // it isn't included in the state words. Also note that because
116            // we're only preserving its state words, the root node won't hash
117            // any key bytes.
118            .to_words();
119        (leaf_words, root_words)
120    }
121
122    /// Hash an input all at once with these parameters.
123    pub fn hash(&self, input: &[u8]) -> Hash {
124        // If there's a key, just fall back to using the State.
125        if self.key_length > 0 {
126            return self.to_state().update(input).finalize();
127        }
128        let (mut leaf_words, mut root_words) = self.to_words();
129        // Hash each leaf in parallel.
130        let jobs = leaf_words.iter_mut().enumerate().map(|(i, words)| {
131            let input_start = cmp::min(input.len(), i * BLOCKBYTES);
132            Job {
133                input: &input[input_start..],
134                words,
135                count: 0,
136                last_node: if i == DEGREE - 1 {
137                    LastNode::Yes
138                } else {
139                    LastNode::No
140                },
141            }
142        });
143        many::compress_many(jobs, self.implementation, Finalize::Yes, Stride::Parallel);
144        // Hash each leaf into the root.
145        finalize_root_words(
146            &leaf_words,
147            &mut root_words,
148            self.hash_length,
149            self.implementation,
150        )
151    }
152
153    /// Construct a BLAKE2bp `State` object based on these parameters.
154    pub fn to_state(&self) -> State {
155        State::with_params(self)
156    }
157
158    /// Set the length of the final hash, from 1 to `OUTBYTES` (64). Apart from controlling the
159    /// length of the final `Hash`, this is also associated data, and changing it will result in a
160    /// totally different hash.
161    pub fn hash_length(&mut self, length: usize) -> &mut Self {
162        assert!(
163            1 <= length && length <= OUTBYTES,
164            "Bad hash length: {}",
165            length
166        );
167        self.hash_length = length as u8;
168        self
169    }
170
171    /// Use a secret key, so that BLAKE2bp acts as a MAC. The maximum key length is `KEYBYTES`
172    /// (64). An empty key is equivalent to having no key at all.
173    pub fn key(&mut self, key: &[u8]) -> &mut Self {
174        assert!(key.len() <= KEYBYTES, "Bad key length: {}", key.len());
175        self.key_length = key.len() as u8;
176        self.key = [0; KEYBYTES];
177        self.key[..key.len()].copy_from_slice(key);
178        self
179    }
180}
181
182impl Default for Params {
183    fn default() -> Self {
184        Self::new()
185    }
186}
187
188impl fmt::Debug for Params {
189    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
190        write!(
191            f,
192            "Params {{ hash_length: {}, key_length: {} }}",
193            self.hash_length,
194            // NB: Don't print the key itself. Debug shouldn't leak secrets.
195            self.key_length,
196        )
197    }
198}
199
200/// An incremental hasher for BLAKE2bp, just like the [`State`](../struct.State.html) type for
201/// BLAKE2b.
202///
203/// # Example
204///
205/// ```
206/// use blake2b_simd::blake2bp;
207///
208/// let mut state = blake2bp::State::new();
209/// state.update(b"foo");
210/// state.update(b"bar");
211/// let hash = state.finalize();
212///
213/// let expected = "e654427b6ef02949471712263e59071abbb6aa94855674c1daeed6cfaf127c33\
214///                 dfa3205f7f7f71e4f0673d25fa82a368488911f446bccd323af3ab03f53e56e5";
215/// assert_eq!(expected, &hash.to_hex());
216/// ```
217#[derive(Clone)]
218pub struct State {
219    leaf_words: [[Word; 8]; DEGREE],
220    root_words: [Word; 8],
221    // Note that this buffer is twice as large as what compress4 needs. That guarantees that we
222    // have enough input when we compress to know we don't need to finalize any of the leaves.
223    buf: [u8; 2 * DEGREE * BLOCKBYTES],
224    buf_len: u16,
225    // Note that this is the *per-leaf* count.
226    count: Count,
227    hash_length: u8,
228    implementation: Implementation,
229    is_keyed: bool,
230}
231
232impl State {
233    /// Equivalent to `State::default()` or `Params::default().to_state()`.
234    pub fn new() -> Self {
235        Self::with_params(&Params::default())
236    }
237
238    fn with_params(params: &Params) -> Self {
239        let (leaf_words, root_words) = params.to_words();
240
241        // If a key is set, initalize the buffer to contain the key bytes. Note
242        // that only the leaves hash key bytes. The root doesn't, even though
243        // the key length it still set in its parameters. Again this isn't
244        // documented in the spec, but it matches the behavior of the reference
245        // implementation:
246        // https://github.com/BLAKE2/BLAKE2/blob/320c325437539ae91091ce62efec1913cd8093c2/ref/blake2bp-ref.c#L128
247        // This particular behavior (though not the inner hash length behavior
248        // above) is also corroborated by the official test vectors; see
249        // tests/vector_tests.rs.
250        let mut buf = [0; 2 * DEGREE * BLOCKBYTES];
251        let mut buf_len = 0;
252        if params.key_length > 0 {
253            for i in 0..DEGREE {
254                let keybytes = &params.key[..params.key_length as usize];
255                buf[i * BLOCKBYTES..][..keybytes.len()].copy_from_slice(keybytes);
256                buf_len = BLOCKBYTES * DEGREE;
257            }
258        }
259
260        Self {
261            leaf_words,
262            root_words,
263            buf,
264            buf_len: buf_len as u16,
265            count: 0, // count gets updated in self.compress()
266            hash_length: params.hash_length,
267            implementation: params.implementation,
268            is_keyed: params.key_length > 0,
269        }
270    }
271
272    fn fill_buf(&mut self, input: &mut &[u8]) {
273        let take = cmp::min(self.buf.len() - self.buf_len as usize, input.len());
274        self.buf[self.buf_len as usize..][..take].copy_from_slice(&input[..take]);
275        self.buf_len += take as u16;
276        *input = &input[take..];
277    }
278
279    fn compress_to_leaves(
280        leaves: &mut [[Word; 8]; DEGREE],
281        input: &[u8],
282        count: &mut Count,
283        implementation: Implementation,
284    ) {
285        // Input is assumed to be an even number of blocks for each leaf. Since
286        // we're not finilizing, debug asserts will fire otherwise.
287        let jobs = leaves.iter_mut().enumerate().map(|(i, words)| {
288            Job {
289                input: &input[i * BLOCKBYTES..],
290                words,
291                count: *count,
292                last_node: LastNode::No, // irrelevant when not finalizing
293            }
294        });
295        many::compress_many(jobs, implementation, Finalize::No, Stride::Parallel);
296        // Note that count is the bytes input *per-leaf*.
297        *count = count.wrapping_add((input.len() / DEGREE) as Count);
298    }
299
300    /// Add input to the hash. You can call `update` any number of times.
301    pub fn update(&mut self, mut input: &[u8]) -> &mut Self {
302        // If we have a partial buffer, try to complete it. If we complete it and there's more
303        // input waiting, we need to compress to make more room. However, because we need to be
304        // sure that *none* of the leaves would need to be finalized as part of this round of
305        // compression, we need to buffer more than we would for BLAKE2b.
306        if self.buf_len > 0 {
307            self.fill_buf(&mut input);
308            // The buffer is large enough for two compressions. If we've filled
309            // the buffer and there's still more input coming, then we have to
310            // do at least one compression. If there's enough input still
311            // coming that all the leaves are guaranteed to get more, do both
312            // compressions in the buffer. Otherwise, do just one and shift the
313            // back half of the buffer to the front.
314            if !input.is_empty() {
315                if input.len() > (DEGREE - 1) * BLOCKBYTES {
316                    // Enough input coming to do both compressions.
317                    Self::compress_to_leaves(
318                        &mut self.leaf_words,
319                        &self.buf,
320                        &mut self.count,
321                        self.implementation,
322                    );
323                    self.buf_len = 0;
324                } else {
325                    // Only enough input coming for one compression.
326                    Self::compress_to_leaves(
327                        &mut self.leaf_words,
328                        &self.buf[..DEGREE * BLOCKBYTES],
329                        &mut self.count,
330                        self.implementation,
331                    );
332                    self.buf_len = (DEGREE * BLOCKBYTES) as u16;
333                    let (buf_front, buf_back) = self.buf.split_at_mut(DEGREE * BLOCKBYTES);
334                    buf_front.copy_from_slice(buf_back);
335                }
336            }
337        }
338
339        // Now we directly compress as much input as possible, without copying
340        // it into the buffer. We need to make sure we buffer at least one byte
341        // for each of the leaves, so that we know we don't need to finalize
342        // them.
343        let needed_tail = (DEGREE - 1) * BLOCKBYTES + 1;
344        let mut bulk_bytes = input.len().saturating_sub(needed_tail);
345        bulk_bytes -= bulk_bytes % (DEGREE * BLOCKBYTES);
346        if bulk_bytes > 0 {
347            Self::compress_to_leaves(
348                &mut self.leaf_words,
349                &input[..bulk_bytes],
350                &mut self.count,
351                self.implementation,
352            );
353            input = &input[bulk_bytes..];
354        }
355
356        // Buffer any remaining input, to be either compressed or finalized in
357        // a subsequent call.
358        self.fill_buf(&mut input);
359        debug_assert_eq!(0, input.len());
360        self
361    }
362
363    /// Finalize the state and return a `Hash`. This method is idempotent, and calling it multiple
364    /// times will give the same result. It's also possible to `update` with more input in between.
365    pub fn finalize(&self) -> Hash {
366        // Hash whatever's remaining in the buffer and finalize the leaves.
367        let buf_len = self.buf_len as usize;
368        let mut leaves_copy = self.leaf_words;
369        let jobs = leaves_copy
370            .iter_mut()
371            .enumerate()
372            .map(|(leaf_index, leaf_words)| {
373                let input = &self.buf[cmp::min(leaf_index * BLOCKBYTES, buf_len)..buf_len];
374                Job {
375                    input,
376                    words: leaf_words,
377                    count: self.count,
378                    last_node: if leaf_index == DEGREE - 1 {
379                        LastNode::Yes
380                    } else {
381                        LastNode::No
382                    },
383                }
384            });
385        many::compress_many(jobs, self.implementation, Finalize::Yes, Stride::Parallel);
386
387        // Concatenate each leaf into the root and hash that.
388        let mut root_words_copy = self.root_words;
389        finalize_root_words(
390            &leaves_copy,
391            &mut root_words_copy,
392            self.hash_length,
393            self.implementation,
394        )
395    }
396
397    /// Return the total number of bytes input so far.
398    ///
399    /// Note that `count` doesn't include the bytes of the key block, if any.
400    /// It's exactly the total number of input bytes fed to `update`.
401    pub fn count(&self) -> Count {
402        // Remember that self.count is *per-leaf*.
403        let mut ret = self
404            .count
405            .wrapping_mul(DEGREE as Count)
406            .wrapping_add(self.buf_len as Count);
407        if self.is_keyed {
408            ret -= (DEGREE * BLOCKBYTES) as Count;
409        }
410        ret
411    }
412}
413
414#[cfg(feature = "std")]
415impl std::io::Write for State {
416    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
417        self.update(buf);
418        Ok(buf.len())
419    }
420
421    fn flush(&mut self) -> std::io::Result<()> {
422        Ok(())
423    }
424}
425
426impl fmt::Debug for State {
427    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
428        write!(
429            f,
430            "State {{ count: {}, hash_length: {} }}",
431            self.count(),
432            self.hash_length,
433        )
434    }
435}
436
437impl Default for State {
438    fn default() -> Self {
439        Self::with_params(&Params::default())
440    }
441}
442
443// Compress each of the four finalized hashes into the root words as input,
444// using two compressions. Note that even if a future version of this
445// implementation supports the hash_length parameter and sets it as associated
446// data for all nodes, this step must still use the untruncated output of each
447// leaf. Note also that, as mentioned above, the root node doesn't hash any key
448// bytes.
449fn finalize_root_words(
450    leaf_words: &[[Word; 8]; DEGREE],
451    root_words: &mut [Word; 8],
452    hash_length: u8,
453    imp: Implementation,
454) -> Hash {
455    debug_assert_eq!(OUTBYTES, 8 * size_of::<Word>());
456    let mut block = [0; DEGREE * OUTBYTES];
457    for (word, chunk) in leaf_words
458        .iter()
459        .flat_map(|words| words.iter())
460        .zip(block.chunks_exact_mut(size_of::<Word>()))
461    {
462        chunk.copy_from_slice(&word.to_le_bytes());
463    }
464    imp.compress1_loop(
465        &block,
466        root_words,
467        0,
468        LastNode::Yes,
469        Finalize::Yes,
470        Stride::Serial,
471    );
472    Hash {
473        bytes: crate::state_words_to_bytes(&root_words),
474        len: hash_length,
475    }
476}
477
478pub(crate) fn force_portable(params: &mut Params) {
479    params.implementation = Implementation::portable();
480}
481
482#[cfg(test)]
483pub(crate) mod test {
484    use super::*;
485    use crate::paint_test_input;
486
487    // This is a simple reference implementation without the complicated buffering or parameter
488    // support of the real implementation. We need this because the official test vectors don't
489    // include any inputs large enough to exercise all the branches in the buffering logic.
490    fn blake2bp_reference(input: &[u8]) -> Hash {
491        let mut leaves = arrayvec::ArrayVec::<_, DEGREE>::new();
492        for leaf_index in 0..DEGREE {
493            leaves.push(
494                crate::Params::new()
495                    .fanout(DEGREE as u8)
496                    .max_depth(2)
497                    .node_offset(leaf_index as u64)
498                    .inner_hash_length(OUTBYTES)
499                    .to_state(),
500            );
501        }
502        leaves[DEGREE - 1].set_last_node(true);
503        for (i, chunk) in input.chunks(BLOCKBYTES).enumerate() {
504            leaves[i % DEGREE].update(chunk);
505        }
506        let mut root = crate::Params::new()
507            .fanout(DEGREE as u8)
508            .max_depth(2)
509            .node_depth(1)
510            .inner_hash_length(OUTBYTES)
511            .last_node(true)
512            .to_state();
513        for leaf in &mut leaves {
514            root.update(leaf.finalize().as_bytes());
515        }
516        root.finalize()
517    }
518
519    #[test]
520    fn test_against_reference() {
521        let mut buf = [0; 21 * BLOCKBYTES];
522        paint_test_input(&mut buf);
523        // - 8 blocks is just enought to fill the double buffer.
524        // - 9 blocks triggers the "perform one compression on the double buffer" case.
525        // - 11 blocks is the largest input where only one compression may be performed, on the
526        //   first half of the buffer, because there's not enough input to avoid needing to
527        //   finalize the second half.
528        // - 12 blocks triggers the "perform both compressions in the double buffer" case.
529        // - 15 blocks is the largest input where, after compressing 8 blocks from the buffer,
530        //   there's not enough input to hash directly from memory.
531        // - 16 blocks triggers "after emptying the buffer, hash directly from memory".
532        for num_blocks in 0..=20 {
533            for &extra in &[0, 1, BLOCKBYTES - 1] {
534                for &portable in &[false, true] {
535                    // eprintln!("\ncase -----");
536                    // dbg!(num_blocks);
537                    // dbg!(extra);
538                    // dbg!(portable);
539
540                    // First hash the input all at once, as a sanity check.
541                    let mut params = Params::new();
542                    if portable {
543                        force_portable(&mut params);
544                    }
545                    let input = &buf[..num_blocks * BLOCKBYTES + extra];
546                    let expected = blake2bp_reference(&input);
547                    let mut state = params.to_state();
548                    let found = state.update(input).finalize();
549                    assert_eq!(expected, found);
550
551                    // Then, do it again, but buffer 1 byte of input first. That causes the buffering
552                    // branch to trigger.
553                    let mut state = params.to_state();
554                    let maybe_one = cmp::min(1, input.len());
555                    state.update(&input[..maybe_one]);
556                    assert_eq!(maybe_one as Count, state.count());
557                    // Do a throwaway finalize here to check for idempotency.
558                    state.finalize();
559                    state.update(&input[maybe_one..]);
560                    assert_eq!(input.len() as Count, state.count());
561                    let found = state.finalize();
562                    assert_eq!(expected, found);
563
564                    // Finally, do it again with the all-at-once interface.
565                    assert_eq!(expected, blake2bp(input));
566                }
567            }
568        }
569    }
570}