p3_goldilocks/
goldilocks.rs

1use alloc::vec;
2use alloc::vec::Vec;
3use core::fmt;
4use core::fmt::{Debug, Display, Formatter};
5use core::hash::{Hash, Hasher};
6use core::intrinsics::transmute;
7use core::iter::{Product, Sum};
8use core::ops::{Add, AddAssign, Div, Mul, MulAssign, Neg, Sub, SubAssign};
9
10use num_bigint::BigUint;
11use p3_field::{
12    exp_10540996611094048183, exp_u64_by_squaring, halve_u64, Field, FieldAlgebra, Packable,
13    PrimeField, PrimeField64, TwoAdicField,
14};
15use p3_util::{assume, branch_hint};
16use rand::distributions::{Distribution, Standard};
17use rand::Rng;
18use serde::{Deserialize, Serialize};
19
20/// The Goldilocks prime
21const P: u64 = 0xFFFF_FFFF_0000_0001;
22
23/// The prime field known as Goldilocks, defined as `F_p` where `p = 2^64 - 2^32 + 1`.
24///
25/// Note that the safety of deriving `Serialize` and `Deserialize` relies on the fact that the internal value can be any u64.
26#[derive(Copy, Clone, Default, Serialize, Deserialize)]
27#[repr(transparent)] // Packed field implementations rely on this!
28pub struct Goldilocks {
29    /// Not necessarily canonical.
30    pub(crate) value: u64,
31}
32
33impl Goldilocks {
34    pub(crate) const fn new(value: u64) -> Self {
35        Self { value }
36    }
37
38    /// Convert a constant u64 array into a constant Goldilocks array.
39    ///
40    /// This is a const version of `.map(Goldilocks::new)`.
41    #[inline]
42    #[must_use]
43    pub(crate) const fn new_array<const N: usize>(input: [u64; N]) -> [Goldilocks; N] {
44        let mut output = [Goldilocks::ZERO; N];
45        let mut i = 0;
46        loop {
47            if i == N {
48                break;
49            }
50            output[i].value = input[i];
51            i += 1;
52        }
53        output
54    }
55
56    /// Two's complement of `ORDER`, i.e. `2^64 - ORDER = 2^32 - 1`.
57    const NEG_ORDER: u64 = Self::ORDER_U64.wrapping_neg();
58}
59
60impl PartialEq for Goldilocks {
61    fn eq(&self, other: &Self) -> bool {
62        self.as_canonical_u64() == other.as_canonical_u64()
63    }
64}
65
66impl Eq for Goldilocks {}
67
68impl Packable for Goldilocks {}
69
70impl Hash for Goldilocks {
71    fn hash<H: Hasher>(&self, state: &mut H) {
72        state.write_u64(self.as_canonical_u64());
73    }
74}
75
76impl Ord for Goldilocks {
77    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
78        self.as_canonical_u64().cmp(&other.as_canonical_u64())
79    }
80}
81
82impl PartialOrd for Goldilocks {
83    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
84        Some(self.cmp(other))
85    }
86}
87
88impl Display for Goldilocks {
89    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
90        Display::fmt(&self.value, f)
91    }
92}
93
94impl Debug for Goldilocks {
95    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
96        Debug::fmt(&self.value, f)
97    }
98}
99
100impl Distribution<Goldilocks> for Standard {
101    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Goldilocks {
102        loop {
103            let next_u64 = rng.next_u64();
104            let is_canonical = next_u64 < Goldilocks::ORDER_U64;
105            if is_canonical {
106                return Goldilocks::new(next_u64);
107            }
108        }
109    }
110}
111
112impl FieldAlgebra for Goldilocks {
113    type F = Self;
114
115    const ZERO: Self = Self::new(0);
116    const ONE: Self = Self::new(1);
117    const TWO: Self = Self::new(2);
118    const NEG_ONE: Self = Self::new(Self::ORDER_U64 - 1);
119
120    #[inline]
121    fn from_f(f: Self::F) -> Self {
122        f
123    }
124
125    fn from_bool(b: bool) -> Self {
126        Self::new(b.into())
127    }
128
129    fn from_canonical_u8(n: u8) -> Self {
130        Self::new(n.into())
131    }
132
133    fn from_canonical_u16(n: u16) -> Self {
134        Self::new(n.into())
135    }
136
137    fn from_canonical_u32(n: u32) -> Self {
138        Self::new(n.into())
139    }
140
141    #[inline(always)]
142    fn from_canonical_u64(n: u64) -> Self {
143        Self::new(n)
144    }
145
146    fn from_canonical_usize(n: usize) -> Self {
147        Self::new(n as u64)
148    }
149
150    fn from_wrapped_u32(n: u32) -> Self {
151        // A u32 must be canonical, plus we don't store canonical encodings anyway, so there's no
152        // need for a reduction.
153        Self::new(n.into())
154    }
155
156    fn from_wrapped_u64(n: u64) -> Self {
157        // There's no need to reduce `n` to canonical form, as our internal encoding is
158        // non-canonical, so there's no need for a reduction.
159        Self::new(n)
160    }
161
162    #[inline]
163    fn zero_vec(len: usize) -> Vec<Self> {
164        // SAFETY: repr(transparent) ensures transmutation safety.
165        unsafe { transmute(vec![0u64; len]) }
166    }
167}
168
169impl Field for Goldilocks {
170    // TODO: Add cfg-guarded Packing for NEON
171
172    #[cfg(all(
173        target_arch = "x86_64",
174        target_feature = "avx2",
175        not(all(feature = "nightly-features", target_feature = "avx512f"))
176    ))]
177    type Packing = crate::PackedGoldilocksAVX2;
178
179    #[cfg(all(
180        feature = "nightly-features",
181        target_arch = "x86_64",
182        target_feature = "avx512f"
183    ))]
184    type Packing = crate::PackedGoldilocksAVX512;
185    #[cfg(not(any(
186        all(
187            target_arch = "x86_64",
188            target_feature = "avx2",
189            not(all(feature = "nightly-features", target_feature = "avx512f"))
190        ),
191        all(
192            feature = "nightly-features",
193            target_arch = "x86_64",
194            target_feature = "avx512f"
195        ),
196    )))]
197    type Packing = Self;
198
199    // Sage: GF(2^64 - 2^32 + 1).multiplicative_generator()
200    const GENERATOR: Self = Self::new(7);
201
202    fn is_zero(&self) -> bool {
203        self.value == 0 || self.value == Self::ORDER_U64
204    }
205
206    #[inline]
207    fn exp_u64_generic<FA: FieldAlgebra<F = Self>>(val: FA, power: u64) -> FA {
208        match power {
209            10540996611094048183 => exp_10540996611094048183(val), // used to compute x^{1/7}
210            _ => exp_u64_by_squaring(val, power),
211        }
212    }
213
214    fn try_inverse(&self) -> Option<Self> {
215        if self.is_zero() {
216            return None;
217        }
218
219        // From Fermat's little theorem, in a prime field `F_p`, the inverse of `a` is `a^(p-2)`.
220        //
221        // compute a^(p - 2) using 72 multiplications
222        // The exponent p - 2 is represented in binary as:
223        // 0b1111111111111111111111111111111011111111111111111111111111111111
224        // Adapted from: https://github.com/facebook/winterfell/blob/d238a1/math/src/field/f64/mod.rs#L136-L164
225
226        // compute base^11
227        let t2 = self.square() * *self;
228
229        // compute base^111
230        let t3 = t2.square() * *self;
231
232        // compute base^111111 (6 ones)
233        // repeatedly square t3 3 times and multiply by t3
234        let t6 = exp_acc::<3>(t3, t3);
235        let t60 = t6.square();
236        let t7 = t60 * *self;
237
238        // compute base^111111111111 (12 ones)
239        // repeatedly square t6 6 times and multiply by t6
240        let t12 = exp_acc::<5>(t60, t6);
241
242        // compute base^111111111111111111111111 (24 ones)
243        // repeatedly square t12 12 times and multiply by t12
244        let t24 = exp_acc::<12>(t12, t12);
245
246        // compute base^1111111111111111111111111111111 (31 ones)
247        // repeatedly square t24 6 times and multiply by t6 first. then square t30 and
248        // multiply by base
249        let t31 = exp_acc::<7>(t24, t7);
250
251        // compute base^111111111111111111111111111111101111111111111111111111111111111
252        // repeatedly square t31 32 times and multiply by t31
253        let t63 = exp_acc::<32>(t31, t31);
254
255        // compute base^1111111111111111111111111111111011111111111111111111111111111111
256        Some(t63.square() * *self)
257    }
258
259    #[inline]
260    fn halve(&self) -> Self {
261        Goldilocks::new(halve_u64::<P>(self.value))
262    }
263
264    #[inline]
265    fn order() -> BigUint {
266        P.into()
267    }
268}
269
270impl PrimeField for Goldilocks {
271    fn as_canonical_biguint(&self) -> BigUint {
272        <Self as PrimeField64>::as_canonical_u64(self).into()
273    }
274}
275
276impl PrimeField64 for Goldilocks {
277    const ORDER_U64: u64 = P;
278
279    #[inline]
280    fn as_canonical_u64(&self) -> u64 {
281        let mut c = self.value;
282        // We only need one condition subtraction, since 2 * ORDER would not fit in a u64.
283        if c >= Self::ORDER_U64 {
284            c -= Self::ORDER_U64;
285        }
286        c
287    }
288}
289
290impl TwoAdicField for Goldilocks {
291    const TWO_ADICITY: usize = 32;
292
293    fn two_adic_generator(bits: usize) -> Self {
294        assert!(bits <= Self::TWO_ADICITY);
295        let base = Self::new(1_753_635_133_440_165_772); // generates the whole 2^TWO_ADICITY group
296        base.exp_power_of_2(Self::TWO_ADICITY - bits)
297    }
298}
299
300impl Add for Goldilocks {
301    type Output = Self;
302
303    #[inline]
304    fn add(self, rhs: Self) -> Self {
305        let (sum, over) = self.value.overflowing_add(rhs.value);
306        let (mut sum, over) = sum.overflowing_add(u64::from(over) * Self::NEG_ORDER);
307        if over {
308            // NB: self.value > Self::ORDER && rhs.value > Self::ORDER is necessary but not
309            // sufficient for double-overflow.
310            // This assume does two things:
311            //  1. If compiler knows that either self.value or rhs.value <= ORDER, then it can skip
312            //     this check.
313            //  2. Hints to the compiler how rare this double-overflow is (thus handled better with
314            //     a branch).
315            assume(self.value > Self::ORDER_U64 && rhs.value > Self::ORDER_U64);
316            branch_hint();
317            sum += Self::NEG_ORDER; // Cannot overflow.
318        }
319        Self::new(sum)
320    }
321}
322
323impl AddAssign for Goldilocks {
324    #[inline]
325    fn add_assign(&mut self, rhs: Self) {
326        *self = *self + rhs;
327    }
328}
329
330impl Sum for Goldilocks {
331    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
332        // This is faster than iter.reduce(|x, y| x + y).unwrap_or(Self::ZERO) for iterators of length > 2.
333
334        // This sum will not overflow so long as iter.len() < 2^64.
335        let sum = iter.map(|x| x.value as u128).sum::<u128>();
336        reduce128(sum)
337    }
338}
339
340impl Sub for Goldilocks {
341    type Output = Self;
342
343    #[inline]
344    fn sub(self, rhs: Self) -> Self {
345        let (diff, under) = self.value.overflowing_sub(rhs.value);
346        let (mut diff, under) = diff.overflowing_sub(u64::from(under) * Self::NEG_ORDER);
347        if under {
348            // NB: self.value < NEG_ORDER - 1 && rhs.value > ORDER is necessary but not
349            // sufficient for double-underflow.
350            // This assume does two things:
351            //  1. If compiler knows that either self.value >= NEG_ORDER - 1 or rhs.value <= ORDER,
352            //     then it can skip this check.
353            //  2. Hints to the compiler how rare this double-underflow is (thus handled better
354            //     with a branch).
355            assume(self.value < Self::NEG_ORDER - 1 && rhs.value > Self::ORDER_U64);
356            branch_hint();
357            diff -= Self::NEG_ORDER; // Cannot underflow.
358        }
359        Self::new(diff)
360    }
361}
362
363impl SubAssign for Goldilocks {
364    #[inline]
365    fn sub_assign(&mut self, rhs: Self) {
366        *self = *self - rhs;
367    }
368}
369
370impl Neg for Goldilocks {
371    type Output = Self;
372
373    #[inline]
374    fn neg(self) -> Self::Output {
375        Self::new(Self::ORDER_U64 - self.as_canonical_u64())
376    }
377}
378
379impl Mul for Goldilocks {
380    type Output = Self;
381
382    #[inline]
383    fn mul(self, rhs: Self) -> Self {
384        reduce128(u128::from(self.value) * u128::from(rhs.value))
385    }
386}
387
388impl MulAssign for Goldilocks {
389    #[inline]
390    fn mul_assign(&mut self, rhs: Self) {
391        *self = *self * rhs;
392    }
393}
394
395impl Product for Goldilocks {
396    fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
397        iter.reduce(|x, y| x * y).unwrap_or(Self::ONE)
398    }
399}
400
401impl Div for Goldilocks {
402    type Output = Self;
403
404    #[allow(clippy::suspicious_arithmetic_impl)]
405    fn div(self, rhs: Self) -> Self {
406        self * rhs.inverse()
407    }
408}
409
410/// Squares the base N number of times and multiplies the result by the tail value.
411#[inline(always)]
412fn exp_acc<const N: usize>(base: Goldilocks, tail: Goldilocks) -> Goldilocks {
413    base.exp_power_of_2(N) * tail
414}
415
416/// Reduces to a 64-bit value. The result might not be in canonical form; it could be in between the
417/// field order and `2^64`.
418#[inline]
419pub(crate) fn reduce128(x: u128) -> Goldilocks {
420    let (x_lo, x_hi) = split(x); // This is a no-op
421    let x_hi_hi = x_hi >> 32;
422    let x_hi_lo = x_hi & Goldilocks::NEG_ORDER;
423
424    let (mut t0, borrow) = x_lo.overflowing_sub(x_hi_hi);
425    if borrow {
426        branch_hint(); // A borrow is exceedingly rare. It is faster to branch.
427        t0 -= Goldilocks::NEG_ORDER; // Cannot underflow.
428    }
429    let t1 = x_hi_lo * Goldilocks::NEG_ORDER;
430    let t2 = unsafe { add_no_canonicalize_trashing_input(t0, t1) };
431    Goldilocks::new(t2)
432}
433
434#[inline]
435#[allow(clippy::cast_possible_truncation)]
436const fn split(x: u128) -> (u64, u64) {
437    (x as u64, (x >> 64) as u64)
438}
439
440/// Fast addition modulo ORDER for x86-64.
441/// This function is marked unsafe for the following reasons:
442///   - It is only correct if x + y < 2**64 + ORDER = 0x1ffffffff00000001.
443///   - It is only faster in some circumstances. In particular, on x86 it overwrites both inputs in
444///     the registers, so its use is not recommended when either input will be used again.
445#[inline(always)]
446#[cfg(target_arch = "x86_64")]
447unsafe fn add_no_canonicalize_trashing_input(x: u64, y: u64) -> u64 {
448    let res_wrapped: u64;
449    let adjustment: u64;
450    core::arch::asm!(
451        "add {0}, {1}",
452        // Trick. The carry flag is set iff the addition overflowed.
453        // sbb x, y does x := x - y - CF. In our case, x and y are both {1:e}, so it simply does
454        // {1:e} := 0xffffffff on overflow and {1:e} := 0 otherwise. {1:e} is the low 32 bits of
455        // {1}; the high 32-bits are zeroed on write. In the end, we end up with 0xffffffff in {1}
456        // on overflow; this happens be NEG_ORDER.
457        // Note that the CPU does not realize that the result of sbb x, x does not actually depend
458        // on x. We must write the result to a register that we know to be ready. We have a
459        // dependency on {1} anyway, so let's use it.
460        "sbb {1:e}, {1:e}",
461        inlateout(reg) x => res_wrapped,
462        inlateout(reg) y => adjustment,
463        options(pure, nomem, nostack),
464    );
465    assume(x != 0 || (res_wrapped == y && adjustment == 0));
466    assume(y != 0 || (res_wrapped == x && adjustment == 0));
467    // Add NEG_ORDER == subtract ORDER.
468    // Cannot overflow unless the assumption if x + y < 2**64 + ORDER is incorrect.
469    res_wrapped + adjustment
470}
471
472#[inline(always)]
473#[cfg(not(target_arch = "x86_64"))]
474unsafe fn add_no_canonicalize_trashing_input(x: u64, y: u64) -> u64 {
475    let (res_wrapped, carry) = x.overflowing_add(y);
476    // Below cannot overflow unless the assumption if x + y < 2**64 + ORDER is incorrect.
477    res_wrapped + Goldilocks::NEG_ORDER * u64::from(carry)
478}
479
480#[cfg(test)]
481mod tests {
482    use p3_field_testing::{test_field, test_field_dft, test_two_adic_field};
483
484    use super::*;
485
486    type F = Goldilocks;
487
488    #[test]
489    fn test_goldilocks() {
490        let f = F::new(100);
491        assert_eq!(f.as_canonical_u64(), 100);
492
493        // Over the Goldilocks field, the following set of equations hold
494        // p               = 0
495        // 2^64 - 2^32 + 1 = 0
496        // 2^64            = 2^32 - 1
497        let f = F::new(u64::MAX);
498        assert_eq!(f.as_canonical_u64(), u32::MAX as u64 - 1);
499
500        let f = F::from_canonical_u64(u64::MAX);
501        assert_eq!(f.as_canonical_u64(), u32::MAX as u64 - 1);
502
503        let f = F::from_canonical_u64(0);
504        assert!(f.is_zero());
505
506        let f = F::from_canonical_u64(F::ORDER_U64);
507        assert!(f.is_zero());
508
509        assert_eq!(F::GENERATOR.as_canonical_u64(), 7_u64);
510
511        let f_1 = F::new(1);
512        let f_1_copy = F::new(1);
513
514        let expected_result = F::ZERO;
515        assert_eq!(f_1 - f_1_copy, expected_result);
516
517        let expected_result = F::new(2);
518        assert_eq!(f_1 + f_1_copy, expected_result);
519
520        let f_2 = F::new(2);
521        let expected_result = F::new(3);
522        assert_eq!(f_1 + f_1_copy * f_2, expected_result);
523
524        let expected_result = F::new(5);
525        assert_eq!(f_1 + f_2 * f_2, expected_result);
526
527        let f_p_minus_1 = F::from_canonical_u64(F::ORDER_U64 - 1);
528        let expected_result = F::ZERO;
529        assert_eq!(f_1 + f_p_minus_1, expected_result);
530
531        let f_p_minus_2 = F::from_canonical_u64(F::ORDER_U64 - 2);
532        let expected_result = F::from_canonical_u64(F::ORDER_U64 - 3);
533        assert_eq!(f_p_minus_1 + f_p_minus_2, expected_result);
534
535        let expected_result = F::new(1);
536        assert_eq!(f_p_minus_1 - f_p_minus_2, expected_result);
537
538        let expected_result = f_p_minus_1;
539        assert_eq!(f_p_minus_2 - f_p_minus_1, expected_result);
540
541        let expected_result = f_p_minus_2;
542        assert_eq!(f_p_minus_1 - f_1, expected_result);
543
544        let expected_result = F::new(3);
545        assert_eq!(f_2 * f_2 - f_1, expected_result);
546
547        // Generator check
548        let expected_multiplicative_group_generator = F::new(7);
549        assert_eq!(F::GENERATOR, expected_multiplicative_group_generator);
550
551        // Check on `reduce_u128`
552        let x = u128::MAX;
553        let y = reduce128(x);
554        // The following equalitiy sequence holds, modulo p = 2^64 - 2^32 + 1
555        // 2^128 - 1 = (2^64 - 1) * (2^64 + 1)
556        //           = (2^32 - 1 - 1) * (2^32 - 1 + 1)
557        //           = (2^32 - 2) * (2^32)
558        //           = 2^64 - 2 * 2^32
559        //           = 2^64 - 2^33
560        //           = 2^32 - 1 - 2^33
561        //           = - 2^32 - 1
562        let expected_result = -F::new(2_u64.pow(32)) - F::new(1);
563        assert_eq!(y, expected_result);
564
565        assert_eq!(f.exp_u64(10540996611094048183).exp_const_u64::<7>(), f);
566        assert_eq!(y.exp_u64(10540996611094048183).exp_const_u64::<7>(), y);
567        assert_eq!(f_2.exp_u64(10540996611094048183).exp_const_u64::<7>(), f_2);
568    }
569
570    test_field!(crate::Goldilocks);
571    test_two_adic_field!(crate::Goldilocks);
572
573    test_field_dft!(radix2dit, crate::Goldilocks, p3_dft::Radix2Dit<_>);
574    test_field_dft!(bowers, crate::Goldilocks, p3_dft::Radix2Bowers);
575    test_field_dft!(
576        parallel,
577        crate::Goldilocks,
578        p3_dft::Radix2DitParallel<crate::Goldilocks>
579    );
580}