openvm_circuit/arch/testing/
cuda.rs

1use std::sync::Arc;
2
3use openvm_circuit_primitives::{
4    bitwise_op_lookup::{
5        BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
6        BitwiseOperationLookupChipGPU, SharedBitwiseOperationLookupChip,
7    },
8    range_tuple::{
9        RangeTupleCheckerAir, RangeTupleCheckerBus, RangeTupleCheckerChip,
10        RangeTupleCheckerChipGPU, SharedRangeTupleCheckerChip,
11    },
12    var_range::{
13        SharedVariableRangeCheckerChip, VariableRangeCheckerAir, VariableRangeCheckerBus,
14        VariableRangeCheckerChip, VariableRangeCheckerChipGPU,
15    },
16};
17use openvm_cuda_backend::{
18    data_transporter::assert_eq_host_and_device_matrix,
19    engine::GpuBabyBearPoseidon2Engine,
20    prover_backend::GpuBackend,
21    types::{F, SC},
22};
23use openvm_instructions::{program::PC_BITS, riscv::RV32_REGISTER_AS};
24use openvm_poseidon2_air::{Poseidon2Config, Poseidon2SubAir};
25use openvm_stark_backend::{
26    config::Val,
27    interaction::{LookupBus, PermutationCheckBus},
28    p3_air::BaseAir,
29    p3_field::{FieldAlgebra, PrimeField32},
30    prover::{cpu::CpuBackend, types::AirProvingContext},
31    rap::AnyRap,
32    utils::disable_debug_builder,
33    verifier::VerificationError,
34    AirRef, Chip,
35};
36use openvm_stark_sdk::{
37    config::{setup_tracing_with_log_level, FriParameters},
38    engine::{StarkFriEngine, VerificationDataWithFriParams},
39};
40use rand::{rngs::StdRng, Rng, SeedableRng};
41use tracing::Level;
42
43#[cfg(feature = "metrics")]
44use crate::metrics::VmMetrics;
45use crate::{
46    arch::{
47        instructions::instruction::Instruction,
48        testing::{
49            default_tracing_memory, default_var_range_checker_bus, dummy_memory_helper,
50            execution::{air::ExecutionDummyAir, DeviceExecutionTester},
51            memory::DeviceMemoryTester,
52            program::{air::ProgramDummyAir, DeviceProgramTester},
53            TestBuilder, TestChipHarness, EXECUTION_BUS, MEMORY_BUS, MEMORY_MERKLE_BUS,
54            POSEIDON2_DIRECT_BUS, READ_INSTRUCTION_BUS,
55        },
56        Arena, DenseRecordArena, ExecutionBridge, ExecutionBus, ExecutionState, MatrixRecordArena,
57        MemoryConfig, PreflightExecutor, Streams, VmStateMut,
58    },
59    system::{
60        cuda::{poseidon2::Poseidon2PeripheryChipGPU, DIGEST_WIDTH},
61        memory::{
62            offline_checker::{MemoryBridge, MemoryBus},
63            MemoryAirInventory, SharedMemoryHelper,
64        },
65        poseidon2::air::Poseidon2PeripheryAir,
66        program::ProgramBus,
67        SystemPort,
68    },
69    utils::next_power_of_two_or_zero,
70};
71
72pub struct GpuTestChipHarness<F, Executor, AIR, GpuChip, CpuChip> {
73    pub executor: Executor,
74    pub air: AIR,
75    pub gpu_chip: GpuChip,
76    pub cpu_chip: CpuChip,
77    pub dense_arena: DenseRecordArena,
78    pub matrix_arena: MatrixRecordArena<F>,
79}
80
81impl<F, Executor, AIR, GpuChip, CpuChip> GpuTestChipHarness<F, Executor, AIR, GpuChip, CpuChip>
82where
83    F: PrimeField32,
84    AIR: BaseAir<F>,
85{
86    pub fn with_capacity(
87        executor: Executor,
88        air: AIR,
89        gpu_chip: GpuChip,
90        cpu_chip: CpuChip,
91        height: usize,
92    ) -> Self {
93        let width = air.width();
94        let height = next_power_of_two_or_zero(height);
95        let dense_arena = DenseRecordArena::with_capacity(height, width);
96        let matrix_arena = MatrixRecordArena::with_capacity(height, width);
97        Self {
98            executor,
99            air,
100            gpu_chip,
101            cpu_chip,
102            dense_arena,
103            matrix_arena,
104        }
105    }
106}
107
108impl TestBuilder<F> for GpuChipTestBuilder {
109    fn execute<E, RA>(&mut self, executor: &mut E, arena: &mut RA, instruction: &Instruction<F>)
110    where
111        E: PreflightExecutor<F, RA>,
112        RA: Arena,
113    {
114        let initial_pc = self.rng.gen_range(0..(1 << PC_BITS));
115        self.execute_with_pc(executor, arena, instruction, initial_pc);
116    }
117
118    fn execute_with_pc<E, RA>(
119        &mut self,
120        executor: &mut E,
121        arena: &mut RA,
122        instruction: &Instruction<F>,
123        initial_pc: u32,
124    ) where
125        E: PreflightExecutor<F, RA>,
126        RA: Arena,
127    {
128        let initial_state = ExecutionState {
129            pc: initial_pc,
130            timestamp: self.memory.memory.timestamp(),
131        };
132        tracing::debug!("initial_timestamp={}", initial_state.timestamp);
133
134        let mut pc = initial_pc;
135        let state_mut = VmStateMut::new(
136            &mut pc,
137            &mut self.memory.memory,
138            &mut self.streams,
139            &mut self.rng,
140            &mut self.custom_pvs,
141            arena,
142            #[cfg(feature = "metrics")]
143            &mut self.metrics,
144        );
145
146        executor
147            .execute(state_mut, instruction)
148            .expect("Expected the execution not to fail");
149        let final_state = ExecutionState {
150            pc,
151            timestamp: self.memory.memory.timestamp(),
152        };
153
154        self.program.execute(instruction, &initial_state);
155        self.execution.execute(initial_state, final_state);
156    }
157
158    fn read_cell(&mut self, address_space: usize, pointer: usize) -> F {
159        self.read::<1>(address_space, pointer)[0]
160    }
161
162    fn write_cell(&mut self, address_space: usize, pointer: usize, value: F) {
163        self.write(address_space, pointer, [value]);
164    }
165
166    fn read<const N: usize>(&mut self, address_space: usize, pointer: usize) -> [F; N] {
167        self.memory.read(address_space, pointer)
168    }
169
170    fn write<const N: usize>(&mut self, address_space: usize, pointer: usize, value: [F; N]) {
171        self.memory.write(address_space, pointer, value);
172    }
173
174    fn write_usize<const N: usize>(
175        &mut self,
176        address_space: usize,
177        pointer: usize,
178        value: [usize; N],
179    ) {
180        self.write(address_space, pointer, value.map(F::from_canonical_usize));
181    }
182
183    fn address_bits(&self) -> usize {
184        self.memory.config.pointer_max_bits
185    }
186
187    fn last_to_pc(&self) -> F {
188        self.execution.0.last_to_pc()
189    }
190
191    fn last_from_pc(&self) -> F {
192        self.execution.0.last_from_pc()
193    }
194
195    fn execution_final_state(&self) -> ExecutionState<F> {
196        self.execution.0.records.last().unwrap().final_state
197    }
198
199    fn streams_mut(&mut self) -> &mut Streams<F> {
200        &mut self.streams
201    }
202
203    fn get_default_register(&mut self, increment: usize) -> usize {
204        self.default_register += increment;
205        self.default_register - increment
206    }
207
208    fn get_default_pointer(&mut self, increment: usize) -> usize {
209        self.default_pointer += increment;
210        self.default_pointer - increment
211    }
212
213    fn write_heap_pointer_default(
214        &mut self,
215        reg_increment: usize,
216        pointer_increment: usize,
217    ) -> (usize, usize) {
218        let register = self.get_default_register(reg_increment);
219        let pointer = self.get_default_pointer(pointer_increment);
220        self.write(1, register, pointer.to_le_bytes().map(F::from_canonical_u8));
221        (register, pointer)
222    }
223
224    fn write_heap_default<const NUM_LIMBS: usize>(
225        &mut self,
226        reg_increment: usize,
227        pointer_increment: usize,
228        writes: Vec<[F; NUM_LIMBS]>,
229    ) -> (usize, usize) {
230        let register = self.get_default_register(reg_increment);
231        let pointer = self.get_default_pointer(pointer_increment);
232        self.write_heap(register, pointer, writes);
233        (register, pointer)
234    }
235}
236
237pub struct GpuChipTestBuilder {
238    pub memory: DeviceMemoryTester,
239    pub execution: DeviceExecutionTester,
240    pub program: DeviceProgramTester,
241    pub streams: Streams<F>,
242
243    var_range_checker: Arc<VariableRangeCheckerChipGPU>,
244    bitwise_op_lookup: Option<Arc<BitwiseOperationLookupChipGPU<8>>>,
245    range_tuple_checker: Option<Arc<RangeTupleCheckerChipGPU<2>>>,
246
247    rng: StdRng,
248    pub custom_pvs: Vec<Option<F>>,
249    default_register: usize,
250    default_pointer: usize,
251    #[cfg(feature = "metrics")]
252    metrics: VmMetrics,
253}
254
255impl Default for GpuChipTestBuilder {
256    fn default() -> Self {
257        let mut mem_config = MemoryConfig::default();
258        // Currently tests still use gen_pointer for the full 1<<29 range of address space 1.
259        mem_config.addr_spaces[RV32_REGISTER_AS as usize].num_cells = 1 << 29;
260        Self::volatile(mem_config, default_var_range_checker_bus())
261    }
262}
263
264impl GpuChipTestBuilder {
265    pub fn new() -> Self {
266        // TODO: allow for custom test builder configuration
267        Self::default()
268    }
269
270    pub fn volatile(mem_config: MemoryConfig, bus: VariableRangeCheckerBus) -> Self {
271        setup_tracing_with_log_level(Level::INFO);
272        let mem_bus = MemoryBus::new(MEMORY_BUS);
273        let range_checker = Arc::new(VariableRangeCheckerChipGPU::hybrid(Arc::new(
274            VariableRangeCheckerChip::new(bus),
275        )));
276        Self {
277            memory: DeviceMemoryTester::volatile(
278                default_tracing_memory(&mem_config, 1),
279                mem_bus,
280                mem_config,
281                range_checker.clone(),
282            ),
283            execution: DeviceExecutionTester::new(ExecutionBus::new(EXECUTION_BUS)),
284            program: DeviceProgramTester::new(ProgramBus::new(READ_INSTRUCTION_BUS)),
285            streams: Default::default(),
286            var_range_checker: range_checker,
287            bitwise_op_lookup: None,
288            range_tuple_checker: None,
289            rng: StdRng::seed_from_u64(0),
290            custom_pvs: Vec::new(),
291            default_register: 0,
292            default_pointer: 0,
293            #[cfg(feature = "metrics")]
294            metrics: VmMetrics::default(),
295        }
296    }
297
298    pub fn persistent(mem_config: MemoryConfig, bus: VariableRangeCheckerBus) -> Self {
299        setup_tracing_with_log_level(Level::INFO);
300        let mem_bus = MemoryBus::new(MEMORY_BUS);
301        let range_checker = Arc::new(VariableRangeCheckerChipGPU::hybrid(Arc::new(
302            VariableRangeCheckerChip::new(bus),
303        )));
304        Self {
305            memory: DeviceMemoryTester::persistent(
306                default_tracing_memory(&mem_config, DIGEST_WIDTH),
307                mem_bus,
308                mem_config,
309                range_checker.clone(),
310            ),
311            execution: DeviceExecutionTester::new(ExecutionBus::new(EXECUTION_BUS)),
312            program: DeviceProgramTester::new(ProgramBus::new(READ_INSTRUCTION_BUS)),
313            streams: Default::default(),
314            var_range_checker: range_checker,
315            bitwise_op_lookup: None,
316            range_tuple_checker: None,
317            rng: StdRng::seed_from_u64(0),
318            custom_pvs: Vec::new(),
319            default_register: 0,
320            default_pointer: 0,
321            #[cfg(feature = "metrics")]
322            metrics: VmMetrics::default(),
323        }
324    }
325
326    pub fn with_bitwise_op_lookup(mut self, bus: BitwiseOperationLookupBus) -> Self {
327        self.bitwise_op_lookup = Some(Arc::new(BitwiseOperationLookupChipGPU::hybrid(Arc::new(
328            BitwiseOperationLookupChip::new(bus),
329        ))));
330        self
331    }
332
333    pub fn with_range_tuple_checker(mut self, bus: RangeTupleCheckerBus<2>) -> Self {
334        self.range_tuple_checker = Some(Arc::new(RangeTupleCheckerChipGPU::hybrid(Arc::new(
335            RangeTupleCheckerChip::new(bus),
336        ))));
337        self
338    }
339
340    pub fn execute_harness<E, A, C, RA: Arena>(
341        &mut self,
342        harness: &mut TestChipHarness<F, E, A, C, RA>,
343        instruction: &Instruction<F>,
344    ) where
345        E: PreflightExecutor<F, RA>,
346    {
347        self.execute(&mut harness.executor, &mut harness.arena, instruction);
348    }
349
350    pub fn execute_with_pc_harness<E, A, C, RA: Arena>(
351        &mut self,
352        harness: &mut TestChipHarness<F, E, A, C, RA>,
353        instruction: &Instruction<F>,
354        initial_pc: u32,
355    ) where
356        E: PreflightExecutor<F, RA>,
357    {
358        self.execute_with_pc(
359            &mut harness.executor,
360            &mut harness.arena,
361            instruction,
362            initial_pc,
363        );
364    }
365
366    pub fn write_heap<const NUM_LIMBS: usize>(
367        &mut self,
368        register: usize,
369        pointer: usize,
370        writes: Vec<[F; NUM_LIMBS]>,
371    ) {
372        self.write(
373            1usize,
374            register,
375            pointer.to_le_bytes().map(F::from_canonical_u8),
376        );
377        if NUM_LIMBS.is_power_of_two() {
378            for (i, &write) in writes.iter().enumerate() {
379                self.write(2usize, pointer + i * NUM_LIMBS, write);
380            }
381        } else {
382            for (i, &write) in writes.iter().enumerate() {
383                let ptr = pointer + i * NUM_LIMBS;
384                for j in (0..NUM_LIMBS).step_by(4) {
385                    self.write::<4>(2usize, ptr + j, write[j..j + 4].try_into().unwrap());
386                }
387            }
388        }
389    }
390
391    pub fn system_port(&self) -> SystemPort {
392        SystemPort {
393            execution_bus: self.execution_bus(),
394            program_bus: self.program_bus(),
395            memory_bridge: self.memory_bridge(),
396        }
397    }
398    pub fn execution_bridge(&self) -> ExecutionBridge {
399        ExecutionBridge::new(self.execution.bus(), self.program.bus())
400    }
401
402    pub fn memory_bridge(&self) -> MemoryBridge {
403        self.memory.memory_bridge()
404    }
405
406    pub fn execution_bus(&self) -> ExecutionBus {
407        self.execution.bus()
408    }
409
410    pub fn program_bus(&self) -> ProgramBus {
411        self.program.bus()
412    }
413
414    pub fn memory_bus(&self) -> MemoryBus {
415        self.memory.mem_bus
416    }
417
418    pub fn rng(&mut self) -> &mut StdRng {
419        &mut self.rng
420    }
421
422    pub fn range_checker(&self) -> Arc<VariableRangeCheckerChipGPU> {
423        self.var_range_checker.clone()
424    }
425
426    pub fn bitwise_op_lookup(&self) -> Arc<BitwiseOperationLookupChipGPU<8>> {
427        self.bitwise_op_lookup
428            .clone()
429            .expect("Initialize GpuChipTestBuilder with .with_bitwise_op_lookup()")
430    }
431
432    pub fn range_tuple_checker(&self) -> Arc<RangeTupleCheckerChipGPU<2>> {
433        self.range_tuple_checker
434            .clone()
435            .expect("Initialize GpuChipTestBuilder with .with_range_tuple_checker()")
436    }
437
438    // WARNING: This CPU chip is meant for hybrid chip use, its usage WILL
439    // result in altered tracegen. For a dummy primitive chip for trace
440    // comparison, see utils::dummy_range_checker.
441    pub fn cpu_range_checker(&self) -> SharedVariableRangeCheckerChip {
442        self.var_range_checker.cpu_chip.clone().unwrap()
443    }
444
445    // WARNING: This CPU chip is meant for hybrid chip use, its usage WILL
446    // result in altered tracegen. For a dummy primitive chip for trace
447    // comparison, see utils::dummy_bitwise_op_lookup.
448    pub fn cpu_bitwise_op_lookup(&self) -> SharedBitwiseOperationLookupChip<8> {
449        self.bitwise_op_lookup
450            .as_ref()
451            .expect("Initialize GpuChipTestBuilder with .with_bitwise_op_lookup()")
452            .cpu_chip
453            .clone()
454            .unwrap()
455    }
456
457    // WARNING: This CPU chip is meant for hybrid chip use, its usage WILL
458    // result in altered tracegen. For a dummy primitive chip for trace
459    // comparison, see utils::dummy_range_tuple_checker.
460    pub fn cpu_range_tuple_checker(&self) -> SharedRangeTupleCheckerChip<2> {
461        self.range_tuple_checker
462            .as_ref()
463            .expect("Initialize GpuChipTestBuilder with .with_range_tuple_checker()")
464            .cpu_chip
465            .clone()
466            .unwrap()
467    }
468
469    // WARNING: This utility is meant for hybrid chip use, its usage WILL
470    // result in altered tracegen. For use during trace comparison, see
471    // utils::dummy_memory_helper.
472    pub fn cpu_memory_helper(&self) -> SharedMemoryHelper<F> {
473        SharedMemoryHelper::new(
474            self.cpu_range_checker(),
475            self.memory.config.timestamp_max_bits,
476        )
477    }
478
479    // See [cpu_memory_helper]. Use this utility for creation of CPU chips that
480    // are meant for tracegen comparison purposes which should not update other
481    // periphery chips (e.g., range checker).
482    pub fn dummy_memory_helper(&self) -> SharedMemoryHelper<F> {
483        dummy_memory_helper(self.cpu_range_checker().bus(), self.timestamp_max_bits())
484    }
485
486    pub fn timestamp_max_bits(&self) -> usize {
487        self.memory.config.timestamp_max_bits
488    }
489
490    pub fn build(self) -> GpuChipTester {
491        GpuChipTester {
492            var_range_checker: Some(self.var_range_checker),
493            bitwise_op_lookup: self.bitwise_op_lookup,
494            range_tuple_checker: self.range_tuple_checker,
495            memory: Some(self.memory),
496            ..Default::default()
497        }
498        .load(
499            ExecutionDummyAir::new(self.execution.bus()),
500            self.execution,
501            (),
502        )
503        .load(ProgramDummyAir::new(self.program.bus()), self.program, ())
504    }
505}
506
507#[derive(Default)]
508pub struct GpuChipTester {
509    pub airs: Vec<AirRef<SC>>,
510    pub ctxs: Vec<AirProvingContext<GpuBackend>>,
511    pub memory: Option<DeviceMemoryTester>,
512    pub var_range_checker: Option<Arc<VariableRangeCheckerChipGPU>>,
513    pub bitwise_op_lookup: Option<Arc<BitwiseOperationLookupChipGPU<8>>>,
514    pub range_tuple_checker: Option<Arc<RangeTupleCheckerChipGPU<2>>>,
515}
516
517impl GpuChipTester {
518    pub fn load<A, G, RA>(mut self, air: A, gpu_chip: G, gpu_arena: RA) -> Self
519    where
520        A: AnyRap<SC> + 'static,
521        G: Chip<RA, GpuBackend>,
522    {
523        let proving_ctx = gpu_chip.generate_proving_ctx(gpu_arena);
524        if proving_ctx.common_main.is_some() {
525            self = self.load_air_proving_ctx(Arc::new(air) as AirRef<SC>, proving_ctx);
526        }
527        self
528    }
529
530    pub fn load_harness<E, A, G, RA>(self, harness: TestChipHarness<F, E, A, G, RA>) -> Self
531    where
532        A: AnyRap<SC> + 'static,
533        G: Chip<RA, GpuBackend>,
534    {
535        self.load(harness.air, harness.chip, harness.arena)
536    }
537
538    pub fn load_periphery<A, G>(self, air: A, gpu_chip: G) -> Self
539    where
540        A: AnyRap<SC> + 'static,
541        G: Chip<(), GpuBackend>,
542    {
543        self.load(air, gpu_chip, ())
544    }
545
546    pub fn load_air_proving_ctx(
547        mut self,
548        air: AirRef<SC>,
549        proving_ctx: AirProvingContext<GpuBackend>,
550    ) -> Self {
551        #[cfg(feature = "touchemall")]
552        {
553            use openvm_cuda_backend::engine::check_trace_validity;
554
555            check_trace_validity(&proving_ctx, &air.name());
556        }
557        self.airs.push(air);
558        self.ctxs.push(proving_ctx);
559        self
560    }
561
562    pub fn load_and_compare<A, G, RA, C, CRA>(
563        mut self,
564        air: A,
565        gpu_chip: G,
566        gpu_arena: RA,
567        cpu_chip: C,
568        cpu_arena: CRA,
569    ) -> Self
570    where
571        A: AnyRap<SC> + 'static,
572        C: Chip<CRA, CpuBackend<SC>>,
573        G: Chip<RA, GpuBackend>,
574    {
575        let proving_ctx = gpu_chip.generate_proving_ctx(gpu_arena);
576        let expected_trace = cpu_chip.generate_proving_ctx(cpu_arena).common_main;
577        if proving_ctx.common_main.is_none() {
578            assert!(expected_trace.is_none());
579            return self;
580        }
581        #[cfg(feature = "touchemall")]
582        {
583            use openvm_cuda_backend::engine::check_trace_validity;
584
585            check_trace_validity(&proving_ctx, &air.name());
586        }
587        assert_eq_host_and_device_matrix(
588            expected_trace.unwrap(),
589            proving_ctx.common_main.as_ref().unwrap(),
590        );
591        self.airs.push(Arc::new(air) as AirRef<SC>);
592        self.ctxs.push(proving_ctx);
593        self
594    }
595
596    pub fn load_gpu_harness<E, A, GpuChip, CpuChip>(
597        self,
598        harness: GpuTestChipHarness<Val<SC>, E, A, GpuChip, CpuChip>,
599    ) -> Self
600    where
601        A: AnyRap<SC> + 'static,
602        CpuChip: Chip<MatrixRecordArena<Val<SC>>, CpuBackend<SC>>,
603        GpuChip: Chip<DenseRecordArena, GpuBackend>,
604    {
605        self.load_and_compare(
606            harness.air,
607            harness.gpu_chip,
608            harness.dense_arena,
609            harness.cpu_chip,
610            harness.matrix_arena,
611        )
612    }
613
614    pub fn finalize(mut self) -> Self {
615        if let Some(mut memory_tester) = self.memory.take() {
616            let is_persistent = memory_tester.inventory.continuation_enabled();
617            let touched_memory = memory_tester.memory.finalize::<F>(is_persistent);
618            let memory_bridge = memory_tester.memory_bridge();
619
620            for chip in memory_tester.chip_for_block.into_values() {
621                self = self.load_periphery(chip.0.air, chip);
622            }
623
624            let airs = MemoryAirInventory::<SC>::new(
625                memory_bridge,
626                &memory_tester.config,
627                memory_tester.range_bus,
628                is_persistent.then_some((
629                    PermutationCheckBus::new(MEMORY_MERKLE_BUS),
630                    PermutationCheckBus::new(POSEIDON2_DIRECT_BUS),
631                )),
632            )
633            .into_airs();
634            let ctxs = memory_tester
635                .inventory
636                .generate_proving_ctxs(memory_tester.memory.access_adapter_records, touched_memory);
637            for (air, ctx) in airs
638                .into_iter()
639                .zip(ctxs)
640                .filter(|(_, ctx)| ctx.common_main.is_some())
641            {
642                self = self.load_air_proving_ctx(air, ctx);
643            }
644
645            if let Some(hasher_chip) = memory_tester.hasher_chip {
646                let air: AirRef<SC> = match hasher_chip.as_ref() {
647                    Poseidon2PeripheryChipGPU::Register0(_) => {
648                        let config = Poseidon2Config::default();
649                        Arc::new(Poseidon2PeripheryAir::new(
650                            Arc::new(Poseidon2SubAir::<F, 0>::new(config.constants.into())),
651                            LookupBus::new(POSEIDON2_DIRECT_BUS),
652                        ))
653                    }
654                    Poseidon2PeripheryChipGPU::Register1(_) => {
655                        let config = Poseidon2Config::default();
656                        Arc::new(Poseidon2PeripheryAir::new(
657                            Arc::new(Poseidon2SubAir::<F, 1>::new(config.constants.into())),
658                            LookupBus::new(POSEIDON2_DIRECT_BUS),
659                        ))
660                    }
661                };
662                let ctx = hasher_chip.generate_proving_ctx(());
663                self = self.load_air_proving_ctx(air, ctx);
664            }
665        }
666        if let Some(var_range_checker) = self.var_range_checker.take() {
667            self = self.load_periphery(
668                VariableRangeCheckerAir::new(var_range_checker.cpu_chip.as_ref().unwrap().bus()),
669                var_range_checker,
670            );
671        }
672        if let Some(bitwise_op_lookup) = self.bitwise_op_lookup.take() {
673            self = self.load_periphery(
674                BitwiseOperationLookupAir::<8>::new(
675                    bitwise_op_lookup.cpu_chip.as_ref().unwrap().bus(),
676                ),
677                bitwise_op_lookup,
678            );
679        }
680        if let Some(range_tuple_checker) = self.range_tuple_checker.take() {
681            self = self.load_periphery(
682                RangeTupleCheckerAir {
683                    bus: *range_tuple_checker.cpu_chip.as_ref().unwrap().bus(),
684                },
685                range_tuple_checker,
686            );
687        }
688        self
689    }
690
691    pub fn test<P: Fn() -> GpuBabyBearPoseidon2Engine>(
692        self,
693        engine_provider: P,
694    ) -> Result<VerificationDataWithFriParams<SC>, VerificationError> {
695        engine_provider().run_test(self.airs, self.ctxs)
696    }
697
698    pub fn simple_test(self) -> Result<VerificationDataWithFriParams<SC>, VerificationError> {
699        self.test(|| GpuBabyBearPoseidon2Engine::new(FriParameters::new_for_testing(1)))
700    }
701
702    pub fn simple_test_with_expected_error(self, expected_error: VerificationError) {
703        disable_debug_builder();
704        let msg = format!(
705            "Expected verification to fail with {:?}, but it didn't",
706            &expected_error
707        );
708        let result = self.simple_test();
709        assert_eq!(result.err(), Some(expected_error), "{}", msg);
710    }
711}