halo2_ecc/bn254/
final_exp.rs

1use super::{Fp12Chip, Fp2Chip, FpChip, FqPoint};
2use crate::halo2_proofs::{
3    arithmetic::Field,
4    halo2curves::bn256::{Fq, Fq2, BN_X, FROBENIUS_COEFF_FQ12_C1},
5};
6use crate::{
7    ecc::get_naf,
8    fields::{fp12::mul_no_carry_w6, vector::FieldVector, FieldChip},
9};
10use halo2_base::{
11    gates::GateInstructions,
12    utils::{modulus, BigPrimeField},
13    Context,
14    QuantumCell::Constant,
15};
16use num_bigint::BigUint;
17
18const XI_0: i64 = 9;
19
20impl<F: BigPrimeField> Fp12Chip<'_, F> {
21    // computes a ** (p ** power)
22    // only works for p = 3 (mod 4) and p = 1 (mod 6)
23    pub fn frobenius_map(
24        &self,
25        ctx: &mut Context<F>,
26        a: &<Self as FieldChip<F>>::FieldPoint,
27        power: usize,
28    ) -> <Self as FieldChip<F>>::FieldPoint {
29        assert_eq!(modulus::<Fq>() % 4u64, BigUint::from(3u64));
30        assert_eq!(modulus::<Fq>() % 6u64, BigUint::from(1u64));
31        assert_eq!(a.0.len(), 12);
32        let pow = power % 12;
33        let mut out_fp2 = Vec::with_capacity(6);
34
35        let fp_chip = self.fp_chip();
36        let fp2_chip = Fp2Chip::<F>::new(fp_chip);
37        for i in 0..6 {
38            let frob_coeff = FROBENIUS_COEFF_FQ12_C1[pow].pow_vartime([i as u64]);
39            // possible optimization (not implemented): load `frob_coeff` as we multiply instead of loading first
40            // frobenius map is used infrequently so this is a small optimization
41
42            let mut a_fp2 = FieldVector(vec![a[i].clone(), a[i + 6].clone()]);
43            if pow % 2 != 0 {
44                a_fp2 = fp2_chip.conjugate(ctx, a_fp2);
45            }
46            // if `frob_coeff` is in `Fp` and not just `Fp2`, then we can be more efficient in multiplication
47            if frob_coeff == Fq2::one() {
48                out_fp2.push(a_fp2);
49            } else if frob_coeff.c1 == Fq::zero() {
50                let frob_fixed = fp_chip.load_constant(ctx, frob_coeff.c0);
51                {
52                    let out_nocarry = fp2_chip.0.fp_mul_no_carry(ctx, a_fp2, frob_fixed);
53                    out_fp2.push(fp2_chip.carry_mod(ctx, out_nocarry));
54                }
55            } else {
56                let frob_fixed = fp2_chip.load_constant(ctx, frob_coeff);
57                out_fp2.push(fp2_chip.mul(ctx, a_fp2, frob_fixed));
58            }
59        }
60
61        let out_coeffs = out_fp2
62            .iter()
63            .map(|x| x[0].clone())
64            .chain(out_fp2.iter().map(|x| x[1].clone()))
65            .collect();
66
67        FieldVector(out_coeffs)
68    }
69
70    // exp is in little-endian
71    /// # Assumptions
72    /// * `a` is nonzero field point
73    pub fn pow(
74        &self,
75        ctx: &mut Context<F>,
76        a: &<Self as FieldChip<F>>::FieldPoint,
77        exp: Vec<u64>,
78    ) -> <Self as FieldChip<F>>::FieldPoint {
79        let mut res = a.clone();
80        let mut is_started = false;
81        let naf = get_naf(exp);
82
83        for &z in naf.iter().rev() {
84            if is_started {
85                res = self.mul(ctx, &res, &res);
86            }
87
88            if z != 0 {
89                assert!(z == 1 || z == -1);
90                if is_started {
91                    res = if z == 1 {
92                        self.mul(ctx, &res, a)
93                    } else {
94                        self.divide_unsafe(ctx, &res, a)
95                    };
96                } else {
97                    assert_eq!(z, 1);
98                    is_started = true;
99                }
100            }
101        }
102        res
103    }
104
105    // assume input is an element of Fp12 in the cyclotomic subgroup GΦ₁₂
106    // A cyclotomic group is a subgroup of Fp^n defined by
107    //   GΦₙ(p) = {α ∈ Fpⁿ : α^{Φₙ(p)} = 1}
108
109    // below we implement compression and decompression for an element  GΦ₁₂ following Theorem 3.1 of https://eprint.iacr.org/2010/542.pdf
110    // Fp4 = Fp2(w^3) where (w^3)^2 = XI_0 +u
111    // Fp12 = Fp4(w) where w^3 = w^3
112
113    /// in = g0 + g2 w + g4 w^2 + g1 w^3 + g3 w^4 + g5 w^5 where g_i = g_i0 + g_i1 * u are elements of Fp2
114    /// out = Compress(in) = [ g2, g3, g4, g5 ]
115    pub fn cyclotomic_compress(&self, a: &FqPoint<F>) -> Vec<FqPoint<F>> {
116        let a = &a.0;
117        let g2 = FieldVector(vec![a[1].clone(), a[1 + 6].clone()]);
118        let g3 = FieldVector(vec![a[4].clone(), a[4 + 6].clone()]);
119        let g4 = FieldVector(vec![a[2].clone(), a[2 + 6].clone()]);
120        let g5 = FieldVector(vec![a[5].clone(), a[5 + 6].clone()]);
121        vec![g2, g3, g4, g5]
122    }
123
124    /// Input:
125    /// * `compression = [g2, g3, g4, g5]` where g_i are proper elements of Fp2
126    ///
127    /// Output:
128    /// * `Decompress(compression) = g0 + g2 w + g4 w^2 + g1 w^3 + g3 w^4 + g5 w^5` where
129    /// * All elements of output are proper elements of Fp2 and:
130    ///     c = XI0 + u
131    ///     if g2 != 0:
132    ///         g1 = (g5^2 * c + 3 g4^2 - 2 g3)/(4g2)
133    ///         g0 = (2 g1^2 + g2 * g5 - 3 g3*g4) * c + 1
134    ///     if g2 = 0:
135    ///         g1 = (2 g4 * g5)/g3
136    ///         g0 = (2 g1^2 - 3 g3 * g4) * c + 1
137    pub fn cyclotomic_decompress(
138        &self,
139        ctx: &mut Context<F>,
140        compression: Vec<FqPoint<F>>,
141    ) -> FqPoint<F> {
142        let [g2, g3, g4, g5]: [_; 4] = compression.try_into().unwrap();
143
144        let fp_chip = self.fp_chip();
145        let fp2_chip = Fp2Chip::<F>::new(fp_chip);
146        let g5_sq = fp2_chip.mul_no_carry(ctx, &g5, &g5);
147        let g5_sq_c = mul_no_carry_w6::<_, _, XI_0>(fp_chip, ctx, g5_sq);
148
149        let g4_sq = fp2_chip.mul_no_carry(ctx, &g4, &g4);
150        let g4_sq_3 = fp2_chip.scalar_mul_no_carry(ctx, &g4_sq, 3);
151        let g3_2 = fp2_chip.scalar_mul_no_carry(ctx, &g3, 2);
152
153        let mut g1_num = fp2_chip.add_no_carry(ctx, &g5_sq_c, &g4_sq_3);
154        g1_num = fp2_chip.sub_no_carry(ctx, &g1_num, &g3_2);
155        // can divide without carrying g1_num or g1_denom (I think)
156        let g2_4 = fp2_chip.scalar_mul_no_carry(ctx, &g2, 4);
157        let g1_1 = fp2_chip.divide_unsafe(ctx, &g1_num, &g2_4);
158
159        let g4_g5 = fp2_chip.mul_no_carry(ctx, &g4, &g5);
160        let g1_num = fp2_chip.scalar_mul_no_carry(ctx, &g4_g5, 2);
161        let g1_0 = fp2_chip.divide_unsafe(ctx, &g1_num, &g3);
162
163        let g2_is_zero = fp2_chip.is_zero(ctx, &g2);
164        // resulting `g1` is already in "carried" format (witness is in `[0, p)`)
165        let g1 = fp2_chip.0.select(ctx, g1_0, g1_1, g2_is_zero);
166
167        // share the computation of 2 g1^2 between the two cases
168        let g1_sq = fp2_chip.mul_no_carry(ctx, &g1, &g1);
169        let g1_sq_2 = fp2_chip.scalar_mul_no_carry(ctx, &g1_sq, 2);
170
171        let g2_g5 = fp2_chip.mul_no_carry(ctx, &g2, &g5);
172        let g3_g4 = fp2_chip.mul_no_carry(ctx, &g3, &g4);
173        let g3_g4_3 = fp2_chip.scalar_mul_no_carry(ctx, &g3_g4, 3);
174        let temp = fp2_chip.add_no_carry(ctx, &g1_sq_2, &g2_g5);
175        let temp = fp2_chip.0.select(ctx, g1_sq_2, temp, g2_is_zero);
176        let temp = fp2_chip.sub_no_carry(ctx, &temp, &g3_g4_3);
177        let mut g0 = mul_no_carry_w6::<_, _, XI_0>(fp_chip, ctx, temp);
178
179        // compute `g0 + 1`
180        g0[0].truncation.limbs[0] =
181            fp2_chip.gate().add(ctx, g0[0].truncation.limbs[0], Constant(F::ONE));
182        g0[0].native = fp2_chip.gate().add(ctx, g0[0].native, Constant(F::ONE));
183        g0[0].truncation.max_limb_bits += 1;
184        g0[0].value += 1usize;
185
186        // finally, carry g0
187        let g0 = fp2_chip.carry_mod(ctx, g0);
188
189        let mut g0 = g0.into_iter();
190        let mut g1 = g1.into_iter();
191        let mut g2 = g2.into_iter();
192        let mut g3 = g3.into_iter();
193        let mut g4 = g4.into_iter();
194        let mut g5 = g5.into_iter();
195
196        let mut out_coeffs = Vec::with_capacity(12);
197        for _ in 0..2 {
198            out_coeffs.append(&mut vec![
199                g0.next().unwrap(),
200                g2.next().unwrap(),
201                g4.next().unwrap(),
202                g1.next().unwrap(),
203                g3.next().unwrap(),
204                g5.next().unwrap(),
205            ]);
206        }
207        FieldVector(out_coeffs)
208    }
209
210    // input is [g2, g3, g4, g5] = C(g) in compressed format of `cyclotomic_compress`
211    // assume all inputs are proper Fp2 elements
212    // output is C(g^2) = [h2, h3, h4, h5] computed using Theorem 3.2 of https://eprint.iacr.org/2010/542.pdf
213    // all output elements are proper Fp2 elements (with carry)
214    //  c = XI_0 + u
215    //  h2 = 2(g2 + 3*c*B_45)
216    //  h3 = 3(A_45 - (c+1)B_45) - 2g3
217    //  h4 = 3(A_23 - (c+1)B_23) - 2g4
218    //  h5 = 2(g5 + 3B_23)
219    //  A_ij = (g_i + g_j)(g_i + c g_j)
220    //  B_ij = g_i g_j
221
222    pub fn cyclotomic_square(
223        &self,
224        ctx: &mut Context<F>,
225        compression: &[FqPoint<F>],
226    ) -> Vec<FqPoint<F>> {
227        assert_eq!(compression.len(), 4);
228        let g2 = &compression[0];
229        let g3 = &compression[1];
230        let g4 = &compression[2];
231        let g5 = &compression[3];
232
233        let fp_chip = self.fp_chip();
234        let fp2_chip = Fp2Chip::<F>::new(fp_chip);
235
236        let g2_plus_g3 = fp2_chip.add_no_carry(ctx, g2, g3);
237        let cg3 = mul_no_carry_w6::<F, FpChip<F>, XI_0>(fp_chip, ctx, g3.into());
238        let g2_plus_cg3 = fp2_chip.add_no_carry(ctx, g2, &cg3);
239        let a23 = fp2_chip.mul_no_carry(ctx, &g2_plus_g3, &g2_plus_cg3);
240
241        let g4_plus_g5 = fp2_chip.add_no_carry(ctx, g4, g5);
242        let cg5 = mul_no_carry_w6::<_, _, XI_0>(fp_chip, ctx, g5.into());
243        let g4_plus_cg5 = fp2_chip.add_no_carry(ctx, g4, &cg5);
244        let a45 = fp2_chip.mul_no_carry(ctx, &g4_plus_g5, &g4_plus_cg5);
245
246        let b23 = fp2_chip.mul_no_carry(ctx, g2, g3);
247        let b45 = fp2_chip.mul_no_carry(ctx, g4, g5);
248        let b45_c = mul_no_carry_w6::<_, _, XI_0>(fp_chip, ctx, b45.clone());
249
250        let mut temp = fp2_chip.scalar_mul_and_add_no_carry(ctx, &b45_c, g2, 3);
251        let h2 = fp2_chip.scalar_mul_no_carry(ctx, &temp, 2);
252
253        temp = fp2_chip.add_no_carry(ctx, b45_c, b45);
254        temp = fp2_chip.sub_no_carry(ctx, &a45, temp);
255        temp = fp2_chip.scalar_mul_no_carry(ctx, temp, 3);
256        let h3 = fp2_chip.scalar_mul_and_add_no_carry(ctx, g3, temp, -2);
257
258        const XI0_PLUS_1: i64 = XI_0 + 1;
259        // (c + 1) = (XI_0 + 1) + u
260        temp = mul_no_carry_w6::<F, FpChip<F>, XI0_PLUS_1>(fp_chip, ctx, b23.clone());
261        temp = fp2_chip.sub_no_carry(ctx, &a23, temp);
262        temp = fp2_chip.scalar_mul_no_carry(ctx, temp, 3);
263        let h4 = fp2_chip.scalar_mul_and_add_no_carry(ctx, g4, temp, -2);
264
265        temp = fp2_chip.scalar_mul_and_add_no_carry(ctx, b23, g5, 3);
266        let h5 = fp2_chip.scalar_mul_no_carry(ctx, temp, 2);
267
268        [h2, h3, h4, h5].into_iter().map(|h| fp2_chip.carry_mod(ctx, h)).collect()
269    }
270
271    // exp is in little-endian
272    /// # Assumptions
273    /// * `a` is a nonzero element in the cyclotomic subgroup
274    pub fn cyclotomic_pow(&self, ctx: &mut Context<F>, a: FqPoint<F>, exp: Vec<u64>) -> FqPoint<F> {
275        let mut compression = self.cyclotomic_compress(&a);
276        let mut out = None;
277        let mut is_started = false;
278        let naf = get_naf(exp);
279
280        for &z in naf.iter().rev() {
281            if is_started {
282                compression = self.cyclotomic_square(ctx, &compression);
283            }
284            if z != 0 {
285                assert!(z == 1 || z == -1);
286                if is_started {
287                    let mut res = self.cyclotomic_decompress(ctx, compression);
288                    res = if z == 1 {
289                        self.mul(ctx, &res, &a)
290                    } else {
291                        self.divide_unsafe(ctx, &res, &a)
292                    };
293                    // compression is free, so it doesn't hurt (except possibly witness generation runtime) to do it
294                    // TODO: alternatively we go from small bits to large to avoid this compression
295                    compression = self.cyclotomic_compress(&res);
296                    out = Some(res);
297                } else {
298                    assert_eq!(z, 1);
299                    is_started = true;
300                }
301            }
302        }
303        if naf[0] == 0 {
304            out = Some(self.cyclotomic_decompress(ctx, compression));
305        }
306        out.unwrap_or(a)
307    }
308
309    #[allow(non_snake_case)]
310    // use equation for (p^4 - p^2 + 1)/r in Section 5 of https://eprint.iacr.org/2008/490.pdf for BN curves
311    pub fn hard_part_BN(
312        &self,
313        ctx: &mut Context<F>,
314        m: <Self as FieldChip<F>>::FieldPoint,
315    ) -> <Self as FieldChip<F>>::FieldPoint {
316        // x = BN_X
317
318        // m^p
319        let mp = self.frobenius_map(ctx, &m, 1);
320        // m^{p^2}
321        let mp2 = self.frobenius_map(ctx, &m, 2);
322        // m^{p^3}
323        let mp3 = self.frobenius_map(ctx, &m, 3);
324
325        // y0 = m^p * m^{p^2} * m^{p^3}
326        let mp2_mp3 = self.mul(ctx, &mp2, &mp3);
327        let y0 = self.mul(ctx, &mp, &mp2_mp3);
328        // y1 = 1/m,  inverse = frob(6) = conjugation in cyclotomic subgroup
329        let y1 = self.conjugate(ctx, m.clone());
330
331        // m^x
332        let mx = self.cyclotomic_pow(ctx, m, vec![BN_X]);
333        // (m^x)^p
334        let mxp = self.frobenius_map(ctx, &mx, 1);
335        // m^{x^2}
336
337        let mx2 = self.cyclotomic_pow(ctx, mx.clone(), vec![BN_X]);
338        // (m^{x^2})^p
339        let mx2p = self.frobenius_map(ctx, &mx2, 1);
340        // y2 = (m^{x^2})^{p^2}
341        let y2 = self.frobenius_map(ctx, &mx2, 2);
342        // m^{x^3}
343        // y5 = 1/mx2
344        let y5 = self.conjugate(ctx, mx2.clone());
345
346        let mx3 = self.cyclotomic_pow(ctx, mx2, vec![BN_X]);
347        // (m^{x^3})^p
348        let mx3p = self.frobenius_map(ctx, &mx3, 1);
349
350        // y3 = 1/mxp
351        let y3 = self.conjugate(ctx, mxp);
352        // y4 = 1/(mx * mx2p)
353        let mx_mx2p = self.mul(ctx, &mx, &mx2p);
354        let y4 = self.conjugate(ctx, mx_mx2p);
355        // y6 = 1/(mx3 * mx3p)
356        let mx3_mx3p = self.mul(ctx, &mx3, &mx3p);
357        let y6 = self.conjugate(ctx, mx3_mx3p);
358
359        // out = y0 * y1^2 * y2^6 * y3^12 * y4^18 * y5^30 * y6^36
360        // we compute this using the vectorial addition chain from p. 6 of https://eprint.iacr.org/2008/490.pdf
361        let mut T0 = self.mul(ctx, &y6, &y6);
362        T0 = self.mul(ctx, &T0, &y4);
363        T0 = self.mul(ctx, &T0, &y5);
364        let mut T1 = self.mul(ctx, &y3, &y5);
365        T1 = self.mul(ctx, &T1, &T0);
366        T0 = self.mul(ctx, &T0, &y2);
367        T1 = self.mul(ctx, &T1, &T1);
368        T1 = self.mul(ctx, &T1, &T0);
369        T1 = self.mul(ctx, &T1, &T1);
370        T0 = self.mul(ctx, &T1, &y1);
371        T1 = self.mul(ctx, &T1, &y0);
372        T0 = self.mul(ctx, &T0, &T0);
373        T0 = self.mul(ctx, &T0, &T1);
374
375        T0
376    }
377
378    // out = in^{ (q^6 - 1)*(q^2 + 1) }
379    /// # Assumptions
380    /// * `a` is nonzero field point
381    pub fn easy_part(
382        &self,
383        ctx: &mut Context<F>,
384        a: <Self as FieldChip<F>>::FieldPoint,
385    ) -> <Self as FieldChip<F>>::FieldPoint {
386        // a^{q^6} = conjugate of a
387        let f1 = self.conjugate(ctx, a.clone());
388        let f2 = self.divide_unsafe(ctx, &f1, a);
389        let f3 = self.frobenius_map(ctx, &f2, 2);
390        self.mul(ctx, &f3, &f2)
391    }
392
393    // out = in^{(q^12 - 1)/r}
394    pub fn final_exp(
395        &self,
396        ctx: &mut Context<F>,
397        a: <Self as FieldChip<F>>::FieldPoint,
398    ) -> <Self as FieldChip<F>>::FieldPoint {
399        let f0 = self.easy_part(ctx, a);
400        let f = self.hard_part_BN(ctx, f0);
401        f
402    }
403}