p3_baby_bear/
poseidon2.rs

1//! Implementation of Poseidon2, see: https://eprint.iacr.org/2023/323
2//!
3//! For the diffusion matrix, 1 + Diag(V), we perform a search to find an optimized
4//! vector V composed of elements with efficient multiplication algorithms in AVX2/AVX512/NEON.
5//!
6//! This leads to using small values (e.g. 1, 2, 3, 4) where multiplication is implemented using addition
7//! and inverse powers of 2 where it is possible to avoid monty reductions.
8//! Additionally, for technical reasons, having the first entry be -2 is useful.
9//!
10//! Optimized Diagonal for BabyBear16:
11//! [-2, 1, 2, 1/2, 3, 4, -1/2, -3, -4, 1/2^8, 1/4, 1/8, 1/2^27, -1/2^8, -1/16, -1/2^27].
12//! Optimized Diagonal for BabyBear24:
13//! [-2, 1, 2, 1/2, 3, 4, -1/2, -3, -4, 1/2^8, 1/4, 1/8, 1/16, 1/2^7, 1/2^9, 1/2^27, -1/2^8, -1/4, -1/8, -1/16, -1/32, -1/64, -1/2^7, -1/2^27]
14//! See poseidon2\src\diffusion.rs for information on how to double check these matrices in Sage.
15
16use core::ops::Mul;
17
18use p3_field::{Field, FieldAlgebra, PrimeField32};
19use p3_monty_31::{
20    GenericPoseidon2LinearLayersMonty31, InternalLayerBaseParameters, InternalLayerParameters,
21    MontyField31, Poseidon2ExternalLayerMonty31, Poseidon2InternalLayerMonty31,
22};
23use p3_poseidon2::Poseidon2;
24
25use crate::{BabyBear, BabyBearParameters};
26
27pub type Poseidon2InternalLayerBabyBear<const WIDTH: usize> =
28    Poseidon2InternalLayerMonty31<BabyBearParameters, WIDTH, BabyBearInternalLayerParameters>;
29
30pub type Poseidon2ExternalLayerBabyBear<const WIDTH: usize> =
31    Poseidon2ExternalLayerMonty31<BabyBearParameters, WIDTH>;
32
33/// Degree of the chosen permutation polynomial for BabyBear, used as the Poseidon2 S-Box.
34///
35/// As p - 1 = 15 * 2^{27} the neither 3 nor 5 satisfy gcd(p - 1, D) = 1.
36/// Instead we use the next smallest available value, namely 7.
37const BABYBEAR_S_BOX_DEGREE: u64 = 7;
38
39/// An implementation of the Poseidon2 hash function specialised to run on the current architecture.
40///
41/// It acts on arrays of the form either `[BabyBear::Packing; WIDTH]` or `[BabyBear; WIDTH]`. For speed purposes,
42/// wherever possible, input arrays should of the form `[BabyBear::Packing; WIDTH]`.
43pub type Poseidon2BabyBear<const WIDTH: usize> = Poseidon2<
44    <BabyBear as Field>::Packing,
45    Poseidon2ExternalLayerBabyBear<WIDTH>,
46    Poseidon2InternalLayerBabyBear<WIDTH>,
47    WIDTH,
48    BABYBEAR_S_BOX_DEGREE,
49>;
50
51/// An implementation of the matrix multiplications in the internal and external layers of Poseidon2.
52///
53/// This can act on [FA; WIDTH] for any FieldAlgebra which implements multiplication by BabyBear field elements.
54/// If you have either `[BabyBear::Packing; WIDTH]` or `[BabyBear; WIDTH]` it will be much faster
55/// to use `Poseidon2BabyBear<WIDTH>` instead of building a Poseidon2 permutation using this.
56pub type GenericPoseidon2LinearLayersBabyBear =
57    GenericPoseidon2LinearLayersMonty31<BabyBearParameters, BabyBearInternalLayerParameters>;
58
59// In order to use BabyBear::new_array we need to convert our vector to a vector of u32's.
60// To do this we make use of the fact that BabyBear::ORDER_U32 - 1 = 15 * 2^27 so for 0 <= n <= 27:
61// -1/2^n = (BabyBear::ORDER_U32 - 1) >> n
62// 1/2^n = -(-1/2^n) = BabyBear::ORDER_U32 - ((BabyBear::ORDER_U32 - 1) >> n)
63
64/// The vector `[-2, 1, 2, 1/2, 3, 4, -1/2, -3, -4, 1/2^8, 1/4, 1/8, 1/2^27, -1/2^8, -1/16, -1/2^27]`
65/// saved as an array of BabyBear elements.
66const INTERNAL_DIAG_MONTY_16: [BabyBear; 16] = BabyBear::new_array([
67    BabyBear::ORDER_U32 - 2,
68    1,
69    2,
70    (BabyBear::ORDER_U32 + 1) >> 1,
71    3,
72    4,
73    (BabyBear::ORDER_U32 - 1) >> 1,
74    BabyBear::ORDER_U32 - 3,
75    BabyBear::ORDER_U32 - 4,
76    BabyBear::ORDER_U32 - ((BabyBear::ORDER_U32 - 1) >> 8),
77    BabyBear::ORDER_U32 - ((BabyBear::ORDER_U32 - 1) >> 2),
78    BabyBear::ORDER_U32 - ((BabyBear::ORDER_U32 - 1) >> 3),
79    BabyBear::ORDER_U32 - 15,
80    (BabyBear::ORDER_U32 - 1) >> 8,
81    (BabyBear::ORDER_U32 - 1) >> 4,
82    15,
83]);
84
85/// The vector [-2, 1, 2, 1/2, 3, 4, -1/2, -3, -4, 1/2^8, 1/4, 1/8, 1/16, 1/2^7, 1/2^9, 1/2^27, -1/2^8, -1/4, -1/8, -1/16, -1/32, -1/64, -1/2^7, -1/2^27]
86/// saved as an array of BabyBear elements.
87const INTERNAL_DIAG_MONTY_24: [BabyBear; 24] = BabyBear::new_array([
88    BabyBear::ORDER_U32 - 2,
89    1,
90    2,
91    (BabyBear::ORDER_U32 + 1) >> 1,
92    3,
93    4,
94    (BabyBear::ORDER_U32 - 1) >> 1,
95    BabyBear::ORDER_U32 - 3,
96    BabyBear::ORDER_U32 - 4,
97    BabyBear::ORDER_U32 - ((BabyBear::ORDER_U32 - 1) >> 8),
98    BabyBear::ORDER_U32 - ((BabyBear::ORDER_U32 - 1) >> 2),
99    BabyBear::ORDER_U32 - ((BabyBear::ORDER_U32 - 1) >> 3),
100    BabyBear::ORDER_U32 - ((BabyBear::ORDER_U32 - 1) >> 4),
101    BabyBear::ORDER_U32 - ((BabyBear::ORDER_U32 - 1) >> 7),
102    BabyBear::ORDER_U32 - ((BabyBear::ORDER_U32 - 1) >> 9),
103    BabyBear::ORDER_U32 - 15,
104    (BabyBear::ORDER_U32 - 1) >> 8,
105    (BabyBear::ORDER_U32 - 1) >> 2,
106    (BabyBear::ORDER_U32 - 1) >> 3,
107    (BabyBear::ORDER_U32 - 1) >> 4,
108    (BabyBear::ORDER_U32 - 1) >> 5,
109    (BabyBear::ORDER_U32 - 1) >> 6,
110    (BabyBear::ORDER_U32 - 1) >> 7,
111    15,
112]);
113
114/// Contains data needed to define the internal layers of the Poseidon2 permutation.
115#[derive(Debug, Clone, Default)]
116pub struct BabyBearInternalLayerParameters;
117
118impl InternalLayerBaseParameters<BabyBearParameters, 16> for BabyBearInternalLayerParameters {
119    type ArrayLike = [MontyField31<BabyBearParameters>; 15];
120
121    const INTERNAL_DIAG_MONTY: [BabyBear; 16] = INTERNAL_DIAG_MONTY_16;
122
123    /// Perform the internal matrix multiplication: s -> (1 + Diag(V))s.
124    /// We ignore `state[0]` as it is handled separately.
125    fn internal_layer_mat_mul(
126        state: &mut [MontyField31<BabyBearParameters>; 16],
127        sum: MontyField31<BabyBearParameters>,
128    ) {
129        // The diagonal matrix is defined by the vector:
130        // V = [-2, 1, 2, 1/2, 3, 4, -1/2, -3, -4, 1/2^8, 1/4, 1/8, 1/2^27, -1/2^8, -1/16, -1/2^27]
131        state[1] += sum;
132        state[2] = state[2].double() + sum;
133        state[3] = state[3].halve() + sum;
134        state[4] = sum + state[4].double() + state[4];
135        state[5] = sum + state[5].double().double();
136        state[6] = sum - state[6].halve();
137        state[7] = sum - (state[7].double() + state[7]);
138        state[8] = sum - state[8].double().double();
139        state[9] = state[9].mul_2exp_neg_n(8);
140        state[9] += sum;
141        state[10] = state[10].mul_2exp_neg_n(2);
142        state[10] += sum;
143        state[11] = state[11].mul_2exp_neg_n(3);
144        state[11] += sum;
145        state[12] = state[12].mul_2exp_neg_n(27);
146        state[12] += sum;
147        state[13] = state[13].mul_2exp_neg_n(8);
148        state[13] = sum - state[13];
149        state[14] = state[14].mul_2exp_neg_n(4);
150        state[14] = sum - state[14];
151        state[15] = state[15].mul_2exp_neg_n(27);
152        state[15] = sum - state[15];
153    }
154
155    fn generic_internal_linear_layer<FA>(state: &mut [FA; 16])
156    where
157        FA: FieldAlgebra + Mul<BabyBear, Output = FA>,
158    {
159        let part_sum: FA = state[1..].iter().cloned().sum();
160        let full_sum = part_sum.clone() + state[0].clone();
161
162        // The first three diagonal elements are -2, 1, 2 so we do something custom.
163        state[0] = part_sum - state[0].clone();
164        state[1] = full_sum.clone() + state[1].clone();
165        state[2] = full_sum.clone() + state[2].double();
166
167        // For the remaining elements we use multiplication.
168        // This could probably be improved slightly by making use of the
169        // mul_2exp_u64 and div_2exp_u64 but this would involve porting div_2exp_u64 to FieldAlgebra.
170        state
171            .iter_mut()
172            .zip(INTERNAL_DIAG_MONTY_16)
173            .skip(3)
174            .for_each(|(val, diag_elem)| {
175                *val = full_sum.clone() + val.clone() * diag_elem;
176            });
177    }
178}
179
180impl InternalLayerBaseParameters<BabyBearParameters, 24> for BabyBearInternalLayerParameters {
181    type ArrayLike = [MontyField31<BabyBearParameters>; 23];
182
183    const INTERNAL_DIAG_MONTY: [BabyBear; 24] = INTERNAL_DIAG_MONTY_24;
184
185    /// Perform the internal matrix multiplication: s -> (1 + Diag(V))s.
186    /// We ignore `state[0]` as it is handled separately.
187    fn internal_layer_mat_mul(
188        state: &mut [MontyField31<BabyBearParameters>; 24],
189        sum: MontyField31<BabyBearParameters>,
190    ) {
191        // The diagonal matrix is defined by the vector:
192        // V = [-2, 1, 2, 1/2, 3, 4, -1/2, -3, -4, 1/2^8, 1/4, 1/8, 1/16, 1/2^7, 1/2^9, 1/2^27, -1/2^8, -1/4, -1/8, -1/16, -1/32, -1/64, -1/2^7, -1/2^27]
193        state[1] += sum;
194        state[2] = state[2].double() + sum;
195        state[3] = state[3].halve() + sum;
196        state[4] = sum + state[4].double() + state[4];
197        state[5] = sum + state[5].double().double();
198        state[6] = sum - state[6].halve();
199        state[7] = sum - (state[7].double() + state[7]);
200        state[8] = sum - state[8].double().double();
201        state[9] = state[9].mul_2exp_neg_n(8);
202        state[9] += sum;
203        state[10] = state[10].mul_2exp_neg_n(2);
204        state[10] += sum;
205        state[11] = state[11].mul_2exp_neg_n(3);
206        state[11] += sum;
207        state[12] = state[12].mul_2exp_neg_n(4);
208        state[12] += sum;
209        state[13] = state[13].mul_2exp_neg_n(7);
210        state[13] += sum;
211        state[14] = state[14].mul_2exp_neg_n(9);
212        state[14] += sum;
213        state[15] = state[15].mul_2exp_neg_n(27);
214        state[15] += sum;
215        state[16] = state[16].mul_2exp_neg_n(8);
216        state[16] = sum - state[16];
217        state[17] = state[17].mul_2exp_neg_n(2);
218        state[17] = sum - state[17];
219        state[18] = state[18].mul_2exp_neg_n(3);
220        state[18] = sum - state[18];
221        state[19] = state[19].mul_2exp_neg_n(4);
222        state[19] = sum - state[19];
223        state[20] = state[20].mul_2exp_neg_n(5);
224        state[20] = sum - state[20];
225        state[21] = state[21].mul_2exp_neg_n(6);
226        state[21] = sum - state[21];
227        state[22] = state[22].mul_2exp_neg_n(7);
228        state[22] = sum - state[22];
229        state[23] = state[23].mul_2exp_neg_n(27);
230        state[23] = sum - state[23];
231    }
232
233    fn generic_internal_linear_layer<FA>(state: &mut [FA; 24])
234    where
235        FA: FieldAlgebra + Mul<BabyBear, Output = FA>,
236    {
237        let part_sum: FA = state[1..].iter().cloned().sum();
238        let full_sum = part_sum.clone() + state[0].clone();
239
240        // The first three diagonal elements are -2, 1, 2 so we do something custom.
241        state[0] = part_sum - state[0].clone();
242        state[1] = full_sum.clone() + state[1].clone();
243        state[2] = full_sum.clone() + state[2].double();
244
245        // For the remaining elements we use multiplication.
246        // This could probably be improved slightly by making use of the
247        // mul_2exp_u64 and div_2exp_u64 but this would involve porting div_2exp_u64 to FieldAlgebra.
248        state
249            .iter_mut()
250            .zip(INTERNAL_DIAG_MONTY_24)
251            .skip(3)
252            .for_each(|(val, diag_elem)| {
253                *val = full_sum.clone() + val.clone() * diag_elem;
254            });
255    }
256}
257
258impl InternalLayerParameters<BabyBearParameters, 16> for BabyBearInternalLayerParameters {}
259impl InternalLayerParameters<BabyBearParameters, 24> for BabyBearInternalLayerParameters {}
260
261#[cfg(test)]
262mod tests {
263    use p3_field::FieldAlgebra;
264    use p3_symmetric::Permutation;
265    use rand::{Rng, SeedableRng};
266    use rand_xoshiro::Xoroshiro128Plus;
267
268    use super::*;
269
270    type F = BabyBear;
271
272    // We need to make some round constants. We use Xoroshiro128Plus for this as we can easily match this PRNG in sage.
273    // See: https://github.com/0xPolygonZero/hash-constants for the sage code used to create all these tests.
274
275    /// Test on a roughly random input.
276    /// This random input is generated by the following sage code:
277    /// set_random_seed(16)
278    /// vector([BB.random_element() for t in range(16)]).
279    #[test]
280    fn test_poseidon2_width_16_random() {
281        let mut input: [F; 16] = [
282            894848333, 1437655012, 1200606629, 1690012884, 71131202, 1749206695, 1717947831,
283            120589055, 19776022, 42382981, 1831865506, 724844064, 171220207, 1299207443, 227047920,
284            1783754913,
285        ]
286        .map(F::from_canonical_u32);
287
288        let expected: [F; 16] = [
289            1255099308, 941729227, 93609187, 112406640, 492658670, 1824768948, 812517469,
290            1055381989, 670973674, 1407235524, 891397172, 1003245378, 1381303998, 1564172645,
291            1399931635, 1005462965,
292        ]
293        .map(F::from_canonical_u32);
294
295        let mut rng = Xoroshiro128Plus::seed_from_u64(1);
296        let perm = Poseidon2BabyBear::new_from_rng_128(&mut rng);
297
298        perm.permute_mut(&mut input);
299        assert_eq!(input, expected);
300    }
301
302    /// Test on a roughly random input.
303    /// This random input is generated by the following sage code:
304    /// set_random_seed(24)
305    /// vector([BB.random_element() for t in range(24)]).
306    #[test]
307    fn test_poseidon2_width_24_random() {
308        let mut input: [F; 24] = [
309            886409618, 1327899896, 1902407911, 591953491, 648428576, 1844789031, 1198336108,
310            355597330, 1799586834, 59617783, 790334801, 1968791836, 559272107, 31054313,
311            1042221543, 474748436, 135686258, 263665994, 1962340735, 1741539604, 449439011,
312            1131357108, 50869465, 1589724894,
313        ]
314        .map(F::from_canonical_u32);
315
316        let expected: [F; 24] = [
317            249424342, 562262148, 757431114, 354243402, 57767055, 976981973, 1393169022,
318            1774550827, 1527742125, 1019514605, 1776327602, 266236737, 1412355182, 1070239213,
319            426390978, 1775539440, 1527732214, 1101406020, 1417710778, 1699632661, 413672313,
320            820348291, 1067197851, 1669055675,
321        ]
322        .map(F::from_canonical_u32);
323
324        let mut rng = Xoroshiro128Plus::seed_from_u64(1);
325        let perm = Poseidon2BabyBear::new_from_rng_128(&mut rng);
326
327        perm.permute_mut(&mut input);
328
329        assert_eq!(input, expected);
330    }
331
332    /// Test the generic internal layer against the optimized internal layer
333    /// for a random input of width 16.
334    #[test]
335    fn test_generic_internal_linear_layer_16() {
336        let mut rng = rand::thread_rng();
337        let mut input1: [F; 16] = rng.gen();
338        let mut input2 = input1;
339
340        let part_sum: F = input1[1..].iter().cloned().sum();
341        let full_sum = part_sum + input1[0];
342
343        input1[0] = part_sum - input1[0];
344
345        BabyBearInternalLayerParameters::internal_layer_mat_mul(&mut input1, full_sum);
346        BabyBearInternalLayerParameters::generic_internal_linear_layer(&mut input2);
347
348        assert_eq!(input1, input2);
349    }
350
351    /// Test the generic internal layer against the optimized internal layer
352    /// for a random input of width 24.
353    #[test]
354    fn test_generic_internal_linear_layer_24() {
355        let mut rng = rand::thread_rng();
356        let mut input1: [F; 24] = rng.gen();
357        let mut input2 = input1;
358
359        let part_sum: F = input1[1..].iter().cloned().sum();
360        let full_sum = part_sum + input1[0];
361
362        input1[0] = part_sum - input1[0];
363
364        BabyBearInternalLayerParameters::internal_layer_mat_mul(&mut input1, full_sum);
365        BabyBearInternalLayerParameters::generic_internal_linear_layer(&mut input2);
366
367        assert_eq!(input1, input2);
368    }
369}