openvm_rv32im_circuit/extension/
mod.rs

1use std::sync::Arc;
2
3use derive_more::derive::From;
4use openvm_circuit::{
5    arch::{
6        AirInventory, AirInventoryError, ChipInventory, ChipInventoryError, ExecutionBridge,
7        ExecutorInventoryBuilder, ExecutorInventoryError, RowMajorMatrixArena, VmCircuitExtension,
8        VmExecutionExtension, VmProverExtension,
9    },
10    system::{memory::SharedMemoryHelper, SystemPort},
11};
12use openvm_circuit_derive::{AnyEnum, MeteredExecutor};
13use openvm_circuit_primitives::{
14    bitwise_op_lookup::{
15        BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
16        SharedBitwiseOperationLookupChip,
17    },
18    range_tuple::{
19        RangeTupleCheckerAir, RangeTupleCheckerBus, RangeTupleCheckerChip,
20        SharedRangeTupleCheckerChip,
21    },
22};
23use openvm_instructions::{program::DEFAULT_PC_STEP, LocalOpcode, PhantomDiscriminant};
24use openvm_rv32im_transpiler::{
25    BaseAluOpcode, BranchEqualOpcode, BranchLessThanOpcode, DivRemOpcode, LessThanOpcode,
26    MulHOpcode, MulOpcode, Rv32AuipcOpcode, Rv32HintStoreOpcode, Rv32JalLuiOpcode, Rv32JalrOpcode,
27    Rv32LoadStoreOpcode, Rv32Phantom, ShiftOpcode,
28};
29use openvm_stark_backend::{
30    config::{StarkGenericConfig, Val},
31    engine::StarkEngine,
32    p3_field::PrimeField32,
33    prover::cpu::{CpuBackend, CpuDevice},
34};
35use serde::{Deserialize, Serialize};
36use strum::IntoEnumIterator;
37
38use crate::{adapters::*, *};
39
40cfg_if::cfg_if! {
41    if #[cfg(feature = "cuda")] {
42        mod cuda;
43        pub use cuda::{
44            Rv32ImGpuProverExt as Rv32ImGpuProverExt,
45        };
46    } else {
47        pub use self::{
48            Rv32ImCpuProverExt as Rv32ImProverExt,
49        };
50    }
51}
52
53// ============ Extension Struct Definitions ============
54
55/// RISC-V 32-bit Base (RV32I) Extension
56#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)]
57pub struct Rv32I;
58
59/// RISC-V Extension for handling IO (not to be confused with I base extension)
60#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)]
61pub struct Rv32Io;
62
63/// RISC-V 32-bit Multiplication Extension (RV32M) Extension
64#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
65pub struct Rv32M {
66    #[serde(default = "default_range_tuple_checker_sizes")]
67    pub range_tuple_checker_sizes: [u32; 2],
68}
69
70impl Default for Rv32M {
71    fn default() -> Self {
72        Self {
73            range_tuple_checker_sizes: default_range_tuple_checker_sizes(),
74        }
75    }
76}
77
78fn default_range_tuple_checker_sizes() -> [u32; 2] {
79    [1 << 8, 8 * (1 << 8)]
80}
81
82// ============ Executor and Periphery Enums for Extension ============
83
84/// RISC-V 32-bit Base (RV32I) Instruction Executors
85#[derive(Clone, From, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
86pub enum Rv32IExecutor {
87    // Rv32 (for standard 32-bit integers):
88    BaseAlu(Rv32BaseAluExecutor),
89    LessThan(Rv32LessThanExecutor),
90    Shift(Rv32ShiftExecutor),
91    LoadStore(Rv32LoadStoreExecutor),
92    LoadSignExtend(Rv32LoadSignExtendExecutor),
93    BranchEqual(Rv32BranchEqualExecutor),
94    BranchLessThan(Rv32BranchLessThanExecutor),
95    JalLui(Rv32JalLuiExecutor),
96    Jalr(Rv32JalrExecutor),
97    Auipc(Rv32AuipcExecutor),
98}
99
100/// RISC-V 32-bit Multiplication Extension (RV32M) Instruction Executors
101#[derive(Clone, From, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
102pub enum Rv32MExecutor {
103    Multiplication(Rv32MultiplicationExecutor),
104    MultiplicationHigh(Rv32MulHExecutor),
105    DivRem(Rv32DivRemExecutor),
106}
107
108/// RISC-V 32-bit Io Instruction Executors
109#[derive(Clone, Copy, From, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
110pub enum Rv32IoExecutor {
111    HintStore(Rv32HintStoreExecutor),
112}
113
114// ============ VmExtension Implementations ============
115
116impl<F: PrimeField32> VmExecutionExtension<F> for Rv32I {
117    type Executor = Rv32IExecutor;
118
119    fn extend_execution(
120        &self,
121        inventory: &mut ExecutorInventoryBuilder<F, Rv32IExecutor>,
122    ) -> Result<(), ExecutorInventoryError> {
123        let pointer_max_bits = inventory.pointer_max_bits();
124
125        let base_alu =
126            Rv32BaseAluExecutor::new(Rv32BaseAluAdapterExecutor, BaseAluOpcode::CLASS_OFFSET);
127        inventory.add_executor(base_alu, BaseAluOpcode::iter().map(|x| x.global_opcode()))?;
128
129        let lt = LessThanExecutor::new(Rv32BaseAluAdapterExecutor, LessThanOpcode::CLASS_OFFSET);
130        inventory.add_executor(lt, LessThanOpcode::iter().map(|x| x.global_opcode()))?;
131
132        let shift = ShiftExecutor::new(Rv32BaseAluAdapterExecutor, ShiftOpcode::CLASS_OFFSET);
133        inventory.add_executor(shift, ShiftOpcode::iter().map(|x| x.global_opcode()))?;
134
135        let load_store = LoadStoreExecutor::new(
136            Rv32LoadStoreAdapterExecutor::new(pointer_max_bits),
137            Rv32LoadStoreOpcode::CLASS_OFFSET,
138        );
139        inventory.add_executor(
140            load_store,
141            Rv32LoadStoreOpcode::iter()
142                .take(Rv32LoadStoreOpcode::STOREB as usize + 1)
143                .map(|x| x.global_opcode()),
144        )?;
145
146        let load_sign_extend =
147            LoadSignExtendExecutor::new(Rv32LoadStoreAdapterExecutor::new(pointer_max_bits));
148        inventory.add_executor(
149            load_sign_extend,
150            [Rv32LoadStoreOpcode::LOADB, Rv32LoadStoreOpcode::LOADH].map(|x| x.global_opcode()),
151        )?;
152
153        let beq = BranchEqualExecutor::new(
154            Rv32BranchAdapterExecutor,
155            BranchEqualOpcode::CLASS_OFFSET,
156            DEFAULT_PC_STEP,
157        );
158        inventory.add_executor(beq, BranchEqualOpcode::iter().map(|x| x.global_opcode()))?;
159
160        let blt = BranchLessThanExecutor::new(
161            Rv32BranchAdapterExecutor,
162            BranchLessThanOpcode::CLASS_OFFSET,
163        );
164        inventory.add_executor(blt, BranchLessThanOpcode::iter().map(|x| x.global_opcode()))?;
165
166        let jal_lui = Rv32JalLuiExecutor::new(Rv32CondRdWriteAdapterExecutor::new(
167            Rv32RdWriteAdapterExecutor,
168        ));
169        inventory.add_executor(jal_lui, Rv32JalLuiOpcode::iter().map(|x| x.global_opcode()))?;
170
171        let jalr = Rv32JalrExecutor::new(Rv32JalrAdapterExecutor);
172        inventory.add_executor(jalr, Rv32JalrOpcode::iter().map(|x| x.global_opcode()))?;
173
174        let auipc = Rv32AuipcExecutor::new(Rv32RdWriteAdapterExecutor);
175        inventory.add_executor(auipc, Rv32AuipcOpcode::iter().map(|x| x.global_opcode()))?;
176
177        // There is no downside to adding phantom sub-executors, so we do it in the base extension.
178        inventory.add_phantom_sub_executor(
179            phantom::Rv32HintInputSubEx,
180            PhantomDiscriminant(Rv32Phantom::HintInput as u16),
181        )?;
182        inventory.add_phantom_sub_executor(
183            phantom::Rv32HintRandomSubEx,
184            PhantomDiscriminant(Rv32Phantom::HintRandom as u16),
185        )?;
186        inventory.add_phantom_sub_executor(
187            phantom::Rv32PrintStrSubEx,
188            PhantomDiscriminant(Rv32Phantom::PrintStr as u16),
189        )?;
190        inventory.add_phantom_sub_executor(
191            phantom::Rv32HintLoadByKeySubEx,
192            PhantomDiscriminant(Rv32Phantom::HintLoadByKey as u16),
193        )?;
194
195        Ok(())
196    }
197}
198
199impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for Rv32I {
200    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
201        let SystemPort {
202            execution_bus,
203            program_bus,
204            memory_bridge,
205        } = inventory.system().port();
206
207        let exec_bridge = ExecutionBridge::new(execution_bus, program_bus);
208        let range_checker = inventory.range_checker().bus;
209        let pointer_max_bits = inventory.pointer_max_bits();
210
211        let bitwise_lu = {
212            // A trick to get around Rust's borrow rules
213            let existing_air = inventory.find_air::<BitwiseOperationLookupAir<8>>().next();
214            if let Some(air) = existing_air {
215                air.bus
216            } else {
217                let bus = BitwiseOperationLookupBus::new(inventory.new_bus_idx());
218                let air = BitwiseOperationLookupAir::<8>::new(bus);
219                inventory.add_air(air);
220                air.bus
221            }
222        };
223
224        let base_alu = Rv32BaseAluAir::new(
225            Rv32BaseAluAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu),
226            BaseAluCoreAir::new(bitwise_lu, BaseAluOpcode::CLASS_OFFSET),
227        );
228        inventory.add_air(base_alu);
229
230        let lt = Rv32LessThanAir::new(
231            Rv32BaseAluAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu),
232            LessThanCoreAir::new(bitwise_lu, LessThanOpcode::CLASS_OFFSET),
233        );
234        inventory.add_air(lt);
235
236        let shift = Rv32ShiftAir::new(
237            Rv32BaseAluAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu),
238            ShiftCoreAir::new(bitwise_lu, range_checker, ShiftOpcode::CLASS_OFFSET),
239        );
240        inventory.add_air(shift);
241
242        let load_store = Rv32LoadStoreAir::new(
243            Rv32LoadStoreAdapterAir::new(
244                memory_bridge,
245                exec_bridge,
246                range_checker,
247                pointer_max_bits,
248            ),
249            LoadStoreCoreAir::new(Rv32LoadStoreOpcode::CLASS_OFFSET),
250        );
251        inventory.add_air(load_store);
252
253        let load_sign_extend = Rv32LoadSignExtendAir::new(
254            Rv32LoadStoreAdapterAir::new(
255                memory_bridge,
256                exec_bridge,
257                range_checker,
258                pointer_max_bits,
259            ),
260            LoadSignExtendCoreAir::new(range_checker),
261        );
262        inventory.add_air(load_sign_extend);
263
264        let beq = Rv32BranchEqualAir::new(
265            Rv32BranchAdapterAir::new(exec_bridge, memory_bridge),
266            BranchEqualCoreAir::new(BranchEqualOpcode::CLASS_OFFSET, DEFAULT_PC_STEP),
267        );
268        inventory.add_air(beq);
269
270        let blt = Rv32BranchLessThanAir::new(
271            Rv32BranchAdapterAir::new(exec_bridge, memory_bridge),
272            BranchLessThanCoreAir::new(bitwise_lu, BranchLessThanOpcode::CLASS_OFFSET),
273        );
274        inventory.add_air(blt);
275
276        let jal_lui = Rv32JalLuiAir::new(
277            Rv32CondRdWriteAdapterAir::new(Rv32RdWriteAdapterAir::new(memory_bridge, exec_bridge)),
278            Rv32JalLuiCoreAir::new(bitwise_lu),
279        );
280        inventory.add_air(jal_lui);
281
282        let jalr = Rv32JalrAir::new(
283            Rv32JalrAdapterAir::new(memory_bridge, exec_bridge),
284            Rv32JalrCoreAir::new(bitwise_lu, range_checker),
285        );
286        inventory.add_air(jalr);
287
288        let auipc = Rv32AuipcAir::new(
289            Rv32RdWriteAdapterAir::new(memory_bridge, exec_bridge),
290            Rv32AuipcCoreAir::new(bitwise_lu),
291        );
292        inventory.add_air(auipc);
293
294        Ok(())
295    }
296}
297
298pub struct Rv32ImCpuProverExt;
299// This implementation is specific to CpuBackend because the lookup chips (VariableRangeChecker,
300// BitwiseOperationLookupChip) are specific to CpuBackend.
301impl<E, SC, RA> VmProverExtension<E, RA, Rv32I> for Rv32ImCpuProverExt
302where
303    SC: StarkGenericConfig,
304    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
305    RA: RowMajorMatrixArena<Val<SC>>,
306    Val<SC>: PrimeField32,
307{
308    fn extend_prover(
309        &self,
310        _: &Rv32I,
311        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
312    ) -> Result<(), ChipInventoryError> {
313        let range_checker = inventory.range_checker()?.clone();
314        let timestamp_max_bits = inventory.timestamp_max_bits();
315        let pointer_max_bits = inventory.airs().pointer_max_bits();
316        let mem_helper = SharedMemoryHelper::new(range_checker.clone(), timestamp_max_bits);
317
318        let bitwise_lu = {
319            let existing_chip = inventory
320                .find_chip::<SharedBitwiseOperationLookupChip<8>>()
321                .next();
322            if let Some(chip) = existing_chip {
323                chip.clone()
324            } else {
325                let air: &BitwiseOperationLookupAir<8> = inventory.next_air()?;
326                let chip = Arc::new(BitwiseOperationLookupChip::new(air.bus));
327                inventory.add_periphery_chip(chip.clone());
328                chip
329            }
330        };
331
332        // These calls to next_air are not strictly necessary to construct the chips, but provide a
333        // safeguard to ensure that chip construction matches the circuit definition
334        inventory.next_air::<Rv32BaseAluAir>()?;
335        let base_alu = Rv32BaseAluChip::new(
336            BaseAluFiller::new(
337                Rv32BaseAluAdapterFiller::new(bitwise_lu.clone()),
338                bitwise_lu.clone(),
339                BaseAluOpcode::CLASS_OFFSET,
340            ),
341            mem_helper.clone(),
342        );
343        inventory.add_executor_chip(base_alu);
344
345        inventory.next_air::<Rv32LessThanAir>()?;
346        let lt = Rv32LessThanChip::new(
347            LessThanFiller::new(
348                Rv32BaseAluAdapterFiller::new(bitwise_lu.clone()),
349                bitwise_lu.clone(),
350                LessThanOpcode::CLASS_OFFSET,
351            ),
352            mem_helper.clone(),
353        );
354        inventory.add_executor_chip(lt);
355
356        inventory.next_air::<Rv32ShiftAir>()?;
357        let shift = Rv32ShiftChip::new(
358            ShiftFiller::new(
359                Rv32BaseAluAdapterFiller::new(bitwise_lu.clone()),
360                bitwise_lu.clone(),
361                range_checker.clone(),
362                ShiftOpcode::CLASS_OFFSET,
363            ),
364            mem_helper.clone(),
365        );
366        inventory.add_executor_chip(shift);
367
368        inventory.next_air::<Rv32LoadStoreAir>()?;
369        let load_store_chip = Rv32LoadStoreChip::new(
370            LoadStoreFiller::new(
371                Rv32LoadStoreAdapterFiller::new(pointer_max_bits, range_checker.clone()),
372                Rv32LoadStoreOpcode::CLASS_OFFSET,
373            ),
374            mem_helper.clone(),
375        );
376        inventory.add_executor_chip(load_store_chip);
377
378        inventory.next_air::<Rv32LoadSignExtendAir>()?;
379        let load_sign_extend = Rv32LoadSignExtendChip::new(
380            LoadSignExtendFiller::new(
381                Rv32LoadStoreAdapterFiller::new(pointer_max_bits, range_checker.clone()),
382                range_checker.clone(),
383            ),
384            mem_helper.clone(),
385        );
386        inventory.add_executor_chip(load_sign_extend);
387
388        inventory.next_air::<Rv32BranchEqualAir>()?;
389        let beq = Rv32BranchEqualChip::new(
390            BranchEqualFiller::new(
391                Rv32BranchAdapterFiller,
392                BranchEqualOpcode::CLASS_OFFSET,
393                DEFAULT_PC_STEP,
394            ),
395            mem_helper.clone(),
396        );
397        inventory.add_executor_chip(beq);
398
399        inventory.next_air::<Rv32BranchLessThanAir>()?;
400        let blt = Rv32BranchLessThanChip::new(
401            BranchLessThanFiller::new(
402                Rv32BranchAdapterFiller,
403                bitwise_lu.clone(),
404                BranchLessThanOpcode::CLASS_OFFSET,
405            ),
406            mem_helper.clone(),
407        );
408        inventory.add_executor_chip(blt);
409
410        inventory.next_air::<Rv32JalLuiAir>()?;
411        let jal_lui = Rv32JalLuiChip::new(
412            Rv32JalLuiFiller::new(
413                Rv32CondRdWriteAdapterFiller::new(Rv32RdWriteAdapterFiller),
414                bitwise_lu.clone(),
415            ),
416            mem_helper.clone(),
417        );
418        inventory.add_executor_chip(jal_lui);
419
420        inventory.next_air::<Rv32JalrAir>()?;
421        let jalr = Rv32JalrChip::new(
422            Rv32JalrFiller::new(
423                Rv32JalrAdapterFiller,
424                bitwise_lu.clone(),
425                range_checker.clone(),
426            ),
427            mem_helper.clone(),
428        );
429        inventory.add_executor_chip(jalr);
430
431        inventory.next_air::<Rv32AuipcAir>()?;
432        let auipc = Rv32AuipcChip::new(
433            Rv32AuipcFiller::new(Rv32RdWriteAdapterFiller, bitwise_lu.clone()),
434            mem_helper.clone(),
435        );
436        inventory.add_executor_chip(auipc);
437
438        Ok(())
439    }
440}
441
442impl<F> VmExecutionExtension<F> for Rv32M {
443    type Executor = Rv32MExecutor;
444
445    fn extend_execution(
446        &self,
447        inventory: &mut ExecutorInventoryBuilder<F, Rv32MExecutor>,
448    ) -> Result<(), ExecutorInventoryError> {
449        let mult =
450            Rv32MultiplicationExecutor::new(Rv32MultAdapterExecutor, MulOpcode::CLASS_OFFSET);
451        inventory.add_executor(mult, MulOpcode::iter().map(|x| x.global_opcode()))?;
452
453        let mul_h = Rv32MulHExecutor::new(Rv32MultAdapterExecutor, MulHOpcode::CLASS_OFFSET);
454        inventory.add_executor(mul_h, MulHOpcode::iter().map(|x| x.global_opcode()))?;
455
456        let div_rem = Rv32DivRemExecutor::new(Rv32MultAdapterExecutor, DivRemOpcode::CLASS_OFFSET);
457        inventory.add_executor(div_rem, DivRemOpcode::iter().map(|x| x.global_opcode()))?;
458
459        Ok(())
460    }
461}
462
463impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for Rv32M {
464    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
465        let SystemPort {
466            execution_bus,
467            program_bus,
468            memory_bridge,
469        } = inventory.system().port();
470        let exec_bridge = ExecutionBridge::new(execution_bus, program_bus);
471
472        let bitwise_lu = {
473            let existing_air = inventory.find_air::<BitwiseOperationLookupAir<8>>().next();
474            if let Some(air) = existing_air {
475                air.bus
476            } else {
477                let bus = BitwiseOperationLookupBus::new(inventory.new_bus_idx());
478                let air = BitwiseOperationLookupAir::<8>::new(bus);
479                inventory.add_air(air);
480                air.bus
481            }
482        };
483
484        let range_tuple_checker = {
485            let existing_air = inventory.find_air::<RangeTupleCheckerAir<2>>().find(|c| {
486                c.bus.sizes[0] >= self.range_tuple_checker_sizes[0]
487                    && c.bus.sizes[1] >= self.range_tuple_checker_sizes[1]
488            });
489            if let Some(air) = existing_air {
490                air.bus
491            } else {
492                let bus = RangeTupleCheckerBus::new(
493                    inventory.new_bus_idx(),
494                    self.range_tuple_checker_sizes,
495                );
496                let air = RangeTupleCheckerAir { bus };
497                inventory.add_air(air);
498                air.bus
499            }
500        };
501
502        let mult = Rv32MultiplicationAir::new(
503            Rv32MultAdapterAir::new(exec_bridge, memory_bridge),
504            MultiplicationCoreAir::new(range_tuple_checker, MulOpcode::CLASS_OFFSET),
505        );
506        inventory.add_air(mult);
507
508        let mul_h = Rv32MulHAir::new(
509            Rv32MultAdapterAir::new(exec_bridge, memory_bridge),
510            MulHCoreAir::new(bitwise_lu, range_tuple_checker),
511        );
512        inventory.add_air(mul_h);
513
514        let div_rem = Rv32DivRemAir::new(
515            Rv32MultAdapterAir::new(exec_bridge, memory_bridge),
516            DivRemCoreAir::new(bitwise_lu, range_tuple_checker, DivRemOpcode::CLASS_OFFSET),
517        );
518        inventory.add_air(div_rem);
519
520        Ok(())
521    }
522}
523
524// This implementation is specific to CpuBackend because the lookup chips (VariableRangeChecker,
525// BitwiseOperationLookupChip) are specific to CpuBackend.
526impl<E, SC, RA> VmProverExtension<E, RA, Rv32M> for Rv32ImCpuProverExt
527where
528    SC: StarkGenericConfig,
529    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
530    RA: RowMajorMatrixArena<Val<SC>>,
531    Val<SC>: PrimeField32,
532{
533    fn extend_prover(
534        &self,
535        extension: &Rv32M,
536        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
537    ) -> Result<(), ChipInventoryError> {
538        let range_checker = inventory.range_checker()?.clone();
539        let timestamp_max_bits = inventory.timestamp_max_bits();
540        let mem_helper = SharedMemoryHelper::new(range_checker.clone(), timestamp_max_bits);
541
542        let bitwise_lu = {
543            let existing_chip = inventory
544                .find_chip::<SharedBitwiseOperationLookupChip<8>>()
545                .next();
546            if let Some(chip) = existing_chip {
547                chip.clone()
548            } else {
549                let air: &BitwiseOperationLookupAir<8> = inventory.next_air()?;
550                let chip = Arc::new(BitwiseOperationLookupChip::new(air.bus));
551                inventory.add_periphery_chip(chip.clone());
552                chip
553            }
554        };
555
556        let range_tuple_checker = {
557            let existing_chip = inventory
558                .find_chip::<SharedRangeTupleCheckerChip<2>>()
559                .find(|c| {
560                    c.bus().sizes[0] >= extension.range_tuple_checker_sizes[0]
561                        && c.bus().sizes[1] >= extension.range_tuple_checker_sizes[1]
562                });
563            if let Some(chip) = existing_chip {
564                chip.clone()
565            } else {
566                let air: &RangeTupleCheckerAir<2> = inventory.next_air()?;
567                let chip = SharedRangeTupleCheckerChip::new(RangeTupleCheckerChip::new(air.bus));
568                inventory.add_periphery_chip(chip.clone());
569                chip
570            }
571        };
572
573        // These calls to next_air are not strictly necessary to construct the chips, but provide a
574        // safeguard to ensure that chip construction matches the circuit definition
575        inventory.next_air::<Rv32MultiplicationAir>()?;
576        let mult = Rv32MultiplicationChip::new(
577            MultiplicationFiller::new(
578                Rv32MultAdapterFiller,
579                range_tuple_checker.clone(),
580                MulOpcode::CLASS_OFFSET,
581            ),
582            mem_helper.clone(),
583        );
584        inventory.add_executor_chip(mult);
585
586        inventory.next_air::<Rv32MulHAir>()?;
587        let mul_h = Rv32MulHChip::new(
588            MulHFiller::new(
589                Rv32MultAdapterFiller,
590                bitwise_lu.clone(),
591                range_tuple_checker.clone(),
592            ),
593            mem_helper.clone(),
594        );
595        inventory.add_executor_chip(mul_h);
596
597        inventory.next_air::<Rv32DivRemAir>()?;
598        let div_rem = Rv32DivRemChip::new(
599            DivRemFiller::new(
600                Rv32MultAdapterFiller,
601                bitwise_lu.clone(),
602                range_tuple_checker.clone(),
603                DivRemOpcode::CLASS_OFFSET,
604            ),
605            mem_helper.clone(),
606        );
607        inventory.add_executor_chip(div_rem);
608
609        Ok(())
610    }
611}
612
613impl<F> VmExecutionExtension<F> for Rv32Io {
614    type Executor = Rv32IoExecutor;
615
616    fn extend_execution(
617        &self,
618        inventory: &mut ExecutorInventoryBuilder<F, Rv32IoExecutor>,
619    ) -> Result<(), ExecutorInventoryError> {
620        let pointer_max_bits = inventory.pointer_max_bits();
621        let hint_store =
622            Rv32HintStoreExecutor::new(pointer_max_bits, Rv32HintStoreOpcode::CLASS_OFFSET);
623        inventory.add_executor(
624            hint_store,
625            Rv32HintStoreOpcode::iter().map(|x| x.global_opcode()),
626        )?;
627
628        Ok(())
629    }
630}
631
632impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for Rv32Io {
633    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
634        let SystemPort {
635            execution_bus,
636            program_bus,
637            memory_bridge,
638        } = inventory.system().port();
639
640        let exec_bridge = ExecutionBridge::new(execution_bus, program_bus);
641        let pointer_max_bits = inventory.pointer_max_bits();
642
643        let bitwise_lu = {
644            let existing_air = inventory.find_air::<BitwiseOperationLookupAir<8>>().next();
645            if let Some(air) = existing_air {
646                air.bus
647            } else {
648                let bus = BitwiseOperationLookupBus::new(inventory.new_bus_idx());
649                let air = BitwiseOperationLookupAir::<8>::new(bus);
650                inventory.add_air(air);
651                air.bus
652            }
653        };
654
655        let hint_store = Rv32HintStoreAir::new(
656            exec_bridge,
657            memory_bridge,
658            bitwise_lu,
659            Rv32HintStoreOpcode::CLASS_OFFSET,
660            pointer_max_bits,
661        );
662        inventory.add_air(hint_store);
663
664        Ok(())
665    }
666}
667
668// This implementation is specific to CpuBackend because the lookup chips (VariableRangeChecker,
669// BitwiseOperationLookupChip) are specific to CpuBackend.
670impl<E, SC, RA> VmProverExtension<E, RA, Rv32Io> for Rv32ImCpuProverExt
671where
672    SC: StarkGenericConfig,
673    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
674    RA: RowMajorMatrixArena<Val<SC>>,
675    Val<SC>: PrimeField32,
676{
677    fn extend_prover(
678        &self,
679        _: &Rv32Io,
680        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
681    ) -> Result<(), ChipInventoryError> {
682        let range_checker = inventory.range_checker()?.clone();
683        let timestamp_max_bits = inventory.timestamp_max_bits();
684        let mem_helper = SharedMemoryHelper::new(range_checker.clone(), timestamp_max_bits);
685        let pointer_max_bits = inventory.airs().pointer_max_bits();
686
687        let bitwise_lu = {
688            let existing_chip = inventory
689                .find_chip::<SharedBitwiseOperationLookupChip<8>>()
690                .next();
691            if let Some(chip) = existing_chip {
692                chip.clone()
693            } else {
694                let air: &BitwiseOperationLookupAir<8> = inventory.next_air()?;
695                let chip = Arc::new(BitwiseOperationLookupChip::new(air.bus));
696                inventory.add_periphery_chip(chip.clone());
697                chip
698            }
699        };
700
701        inventory.next_air::<Rv32HintStoreAir>()?;
702        let hint_store = Rv32HintStoreChip::new(
703            Rv32HintStoreFiller::new(pointer_max_bits, bitwise_lu.clone()),
704            mem_helper.clone(),
705        );
706        inventory.add_executor_chip(hint_store);
707
708        Ok(())
709    }
710}
711
712/// Phantom sub-executors
713mod phantom {
714    use eyre::bail;
715    use openvm_circuit::{
716        arch::{PhantomSubExecutor, Streams},
717        system::memory::online::GuestMemory,
718    };
719    use openvm_instructions::PhantomDiscriminant;
720    use openvm_stark_backend::p3_field::{Field, PrimeField32};
721    use rand::{rngs::StdRng, Rng};
722
723    use crate::adapters::{memory_read, read_rv32_register};
724
725    pub struct Rv32HintInputSubEx;
726    pub struct Rv32HintRandomSubEx;
727    pub struct Rv32PrintStrSubEx;
728    pub struct Rv32HintLoadByKeySubEx;
729
730    impl<F: Field> PhantomSubExecutor<F> for Rv32HintInputSubEx {
731        fn phantom_execute(
732            &self,
733            _: &GuestMemory,
734            streams: &mut Streams<F>,
735            _: &mut StdRng,
736            _: PhantomDiscriminant,
737            _: u32,
738            _: u32,
739            _: u16,
740        ) -> eyre::Result<()> {
741            let mut hint = match streams.input_stream.pop_front() {
742                Some(hint) => hint,
743                None => {
744                    bail!("EndOfInputStream");
745                }
746            };
747            streams.hint_stream.clear();
748            streams.hint_stream.extend(
749                (hint.len() as u32)
750                    .to_le_bytes()
751                    .iter()
752                    .map(|b| F::from_canonical_u8(*b)),
753            );
754            // Extend by 0 for 4 byte alignment
755            let capacity = hint.len().div_ceil(4) * 4;
756            hint.resize(capacity, F::ZERO);
757            streams.hint_stream.extend(hint);
758            Ok(())
759        }
760    }
761
762    impl<F: PrimeField32> PhantomSubExecutor<F> for Rv32HintRandomSubEx {
763        fn phantom_execute(
764            &self,
765            memory: &GuestMemory,
766            streams: &mut Streams<F>,
767            rng: &mut StdRng,
768            _: PhantomDiscriminant,
769            a: u32,
770            _: u32,
771            _: u16,
772        ) -> eyre::Result<()> {
773            static WARN_ONCE: std::sync::Once = std::sync::Once::new();
774            WARN_ONCE.call_once(|| {
775                eprintln!("WARNING: Using fixed-seed RNG for deterministic randomness. Consider security implications for your use case.");
776            });
777
778            let len = read_rv32_register(memory, a) as usize;
779            streams.hint_stream.clear();
780            streams.hint_stream.extend(
781                std::iter::repeat_with(|| F::from_canonical_u8(rng.gen::<u8>())).take(len * 4),
782            );
783            Ok(())
784        }
785    }
786
787    impl<F: PrimeField32> PhantomSubExecutor<F> for Rv32PrintStrSubEx {
788        fn phantom_execute(
789            &self,
790            memory: &GuestMemory,
791            _: &mut Streams<F>,
792            _: &mut StdRng,
793            _: PhantomDiscriminant,
794            a: u32,
795            b: u32,
796            _: u16,
797        ) -> eyre::Result<()> {
798            let rd = read_rv32_register(memory, a);
799            let rs1 = read_rv32_register(memory, b);
800            let bytes = (0..rs1)
801                .map(|i| memory_read::<1>(memory, 2, rd + i)[0])
802                .collect::<Vec<u8>>();
803            let peeked_str = String::from_utf8(bytes)?;
804            print!("{peeked_str}");
805            Ok(())
806        }
807    }
808
809    impl<F: PrimeField32> PhantomSubExecutor<F> for Rv32HintLoadByKeySubEx {
810        fn phantom_execute(
811            &self,
812            memory: &GuestMemory,
813            streams: &mut Streams<F>,
814            _: &mut StdRng,
815            _: PhantomDiscriminant,
816            a: u32,
817            b: u32,
818            _: u16,
819        ) -> eyre::Result<()> {
820            let ptr = read_rv32_register(memory, a);
821            let len = read_rv32_register(memory, b);
822            let key: Vec<u8> = (0..len)
823                .map(|i| memory_read::<1>(memory, 2, ptr + i)[0])
824                .collect();
825            if let Some(val) = streams.kv_store.get(&key) {
826                let to_push = hint_load_by_key_decode::<F>(val);
827                for input in to_push.into_iter().rev() {
828                    streams.input_stream.push_front(input);
829                }
830            } else {
831                bail!("Rv32HintLoadByKey: key not found");
832            }
833            Ok(())
834        }
835    }
836
837    pub fn hint_load_by_key_decode<F: PrimeField32>(value: &[u8]) -> Vec<Vec<F>> {
838        let mut offset = 0;
839        let len = extract_u32(value, offset) as usize;
840        offset += 4;
841        let mut ret = Vec::with_capacity(len);
842        for _ in 0..len {
843            let v_len = extract_u32(value, offset) as usize;
844            offset += 4;
845            let v = (0..v_len)
846                .map(|_| {
847                    let ret = F::from_canonical_u32(extract_u32(value, offset));
848                    offset += 4;
849                    ret
850                })
851                .collect();
852            ret.push(v);
853        }
854        ret
855    }
856
857    fn extract_u32(value: &[u8], offset: usize) -> u32 {
858        u32::from_le_bytes(value[offset..offset + 4].try_into().unwrap())
859    }
860}