blake2b_simd/
portable.rs

1use arrayref::{array_ref, array_refs};
2
3use super::*;
4use crate::guts::{
5    count_high, count_low, final_block, flag_word, input_debug_asserts, Finalize, LastNode, Stride,
6};
7
8// G is the mixing function, called eight times per round in the compression
9// function. V is the 16-word state vector of the compression function, usually
10// described as a 4x4 matrix. A, B, C, and D are the mixing indices, set by the
11// caller first to the four columns of V, and then to its four diagonals. X and
12// Y are words of input, chosen by the caller according to the message
13// schedule, SIGMA.
14#[inline(always)]
15fn g(v: &mut [Word; 16], a: usize, b: usize, c: usize, d: usize, x: Word, y: Word) {
16    v[a] = v[a].wrapping_add(v[b]).wrapping_add(x);
17    v[d] = (v[d] ^ v[a]).rotate_right(32);
18    v[c] = v[c].wrapping_add(v[d]);
19    v[b] = (v[b] ^ v[c]).rotate_right(24);
20    v[a] = v[a].wrapping_add(v[b]).wrapping_add(y);
21    v[d] = (v[d] ^ v[a]).rotate_right(16);
22    v[c] = v[c].wrapping_add(v[d]);
23    v[b] = (v[b] ^ v[c]).rotate_right(63);
24}
25
26// This is too much inlining for some small chips like ARM Cortex-M0, so the
27// uninline_portable feature is provided to disable it.
28#[cfg_attr(not(feature = "uninline_portable"), inline(always))]
29fn round(r: usize, m: &[Word; 16], v: &mut [Word; 16]) {
30    // Select the message schedule based on the round.
31    let s = SIGMA[r];
32
33    // Mix the columns.
34    g(v, 0, 4, 8, 12, m[s[0] as usize], m[s[1] as usize]);
35    g(v, 1, 5, 9, 13, m[s[2] as usize], m[s[3] as usize]);
36    g(v, 2, 6, 10, 14, m[s[4] as usize], m[s[5] as usize]);
37    g(v, 3, 7, 11, 15, m[s[6] as usize], m[s[7] as usize]);
38
39    // Mix the rows.
40    g(v, 0, 5, 10, 15, m[s[8] as usize], m[s[9] as usize]);
41    g(v, 1, 6, 11, 12, m[s[10] as usize], m[s[11] as usize]);
42    g(v, 2, 7, 8, 13, m[s[12] as usize], m[s[13] as usize]);
43    g(v, 3, 4, 9, 14, m[s[14] as usize], m[s[15] as usize]);
44}
45
46#[inline(always)]
47fn compress_block(
48    block: &[u8; BLOCKBYTES],
49    words: &mut [Word; 8],
50    count: Count,
51    last_block: Word,
52    last_node: Word,
53) {
54    // Initialize the compression state.
55    let mut v = [
56        words[0],
57        words[1],
58        words[2],
59        words[3],
60        words[4],
61        words[5],
62        words[6],
63        words[7],
64        IV[0],
65        IV[1],
66        IV[2],
67        IV[3],
68        IV[4] ^ count_low(count),
69        IV[5] ^ count_high(count),
70        IV[6] ^ last_block,
71        IV[7] ^ last_node,
72    ];
73
74    // Parse the message bytes as ints in little endian order.
75    const W: usize = size_of::<Word>();
76    let msg_refs = array_refs!(block, W, W, W, W, W, W, W, W, W, W, W, W, W, W, W, W);
77    let m = [
78        Word::from_le_bytes(*msg_refs.0),
79        Word::from_le_bytes(*msg_refs.1),
80        Word::from_le_bytes(*msg_refs.2),
81        Word::from_le_bytes(*msg_refs.3),
82        Word::from_le_bytes(*msg_refs.4),
83        Word::from_le_bytes(*msg_refs.5),
84        Word::from_le_bytes(*msg_refs.6),
85        Word::from_le_bytes(*msg_refs.7),
86        Word::from_le_bytes(*msg_refs.8),
87        Word::from_le_bytes(*msg_refs.9),
88        Word::from_le_bytes(*msg_refs.10),
89        Word::from_le_bytes(*msg_refs.11),
90        Word::from_le_bytes(*msg_refs.12),
91        Word::from_le_bytes(*msg_refs.13),
92        Word::from_le_bytes(*msg_refs.14),
93        Word::from_le_bytes(*msg_refs.15),
94    ];
95
96    round(0, &m, &mut v);
97    round(1, &m, &mut v);
98    round(2, &m, &mut v);
99    round(3, &m, &mut v);
100    round(4, &m, &mut v);
101    round(5, &m, &mut v);
102    round(6, &m, &mut v);
103    round(7, &m, &mut v);
104    round(8, &m, &mut v);
105    round(9, &m, &mut v);
106    round(10, &m, &mut v);
107    round(11, &m, &mut v);
108
109    words[0] ^= v[0] ^ v[8];
110    words[1] ^= v[1] ^ v[9];
111    words[2] ^= v[2] ^ v[10];
112    words[3] ^= v[3] ^ v[11];
113    words[4] ^= v[4] ^ v[12];
114    words[5] ^= v[5] ^ v[13];
115    words[6] ^= v[6] ^ v[14];
116    words[7] ^= v[7] ^ v[15];
117}
118
119pub fn compress1_loop(
120    input: &[u8],
121    words: &mut [Word; 8],
122    mut count: Count,
123    last_node: LastNode,
124    finalize: Finalize,
125    stride: Stride,
126) {
127    input_debug_asserts(input, finalize);
128
129    let mut local_words = *words;
130
131    let mut fin_offset = input.len().saturating_sub(1);
132    fin_offset -= fin_offset % stride.padded_blockbytes();
133    let mut buf = [0; BLOCKBYTES];
134    let (fin_block, fin_len, _) = final_block(input, fin_offset, &mut buf, stride);
135    let fin_last_block = flag_word(finalize.yes());
136    let fin_last_node = flag_word(finalize.yes() && last_node.yes());
137
138    let mut offset = 0;
139    loop {
140        let block;
141        let count_delta;
142        let last_block;
143        let last_node;
144        if offset == fin_offset {
145            block = fin_block;
146            count_delta = fin_len;
147            last_block = fin_last_block;
148            last_node = fin_last_node;
149        } else {
150            block = array_ref!(input, offset, BLOCKBYTES);
151            count_delta = BLOCKBYTES;
152            last_block = flag_word(false);
153            last_node = flag_word(false);
154        };
155
156        count = count.wrapping_add(count_delta as Count);
157        compress_block(block, &mut local_words, count, last_block, last_node);
158
159        // Check for termination before bumping the offset, to avoid overflow.
160        if offset == fin_offset {
161            break;
162        }
163
164        offset += stride.padded_blockbytes();
165    }
166
167    *words = local_words;
168}