openvm_circuit/system/cuda/
mod.rs

1use std::sync::Arc;
2
3use connector::VmConnectorChipGPU;
4use memory::MemoryInventoryGPU;
5use openvm_circuit::{
6    arch::{DenseRecordArena, SystemConfig, PUBLIC_VALUES_AIR_ID},
7    system::{
8        connector::VmConnectorChip,
9        memory::{interface::MemoryInterfaceAirs, online::GuestMemory, MemoryAirInventory},
10        SystemChipComplex, SystemRecords,
11    },
12};
13use openvm_circuit_primitives::var_range::VariableRangeCheckerChipGPU;
14use openvm_cuda_backend::{prover_backend::GpuBackend, types::F};
15use openvm_stark_backend::{
16    prover::types::{AirProvingContext, CommittedTraceData},
17    Chip,
18};
19use openvm_stark_sdk::config::baby_bear_poseidon2::BabyBearPoseidon2Config;
20use poseidon2::Poseidon2PeripheryChipGPU;
21use program::ProgramChipGPU;
22use public_values::PublicValuesChipGPU;
23
24pub(crate) const DIGEST_WIDTH: usize = 8;
25
26pub mod access_adapters;
27pub mod boundary;
28pub mod connector;
29pub mod extensions;
30pub mod memory;
31pub mod merkle_tree;
32pub mod phantom;
33pub mod poseidon2;
34pub mod program;
35pub mod public_values;
36
37pub struct SystemChipInventoryGPU {
38    pub program: ProgramChipGPU,
39    pub connector: VmConnectorChipGPU,
40    pub memory_inventory: MemoryInventoryGPU,
41    pub public_values: Option<PublicValuesChipGPU>,
42}
43
44impl SystemChipInventoryGPU {
45    pub fn new(
46        config: &SystemConfig,
47        mem_inventory: &MemoryAirInventory<BabyBearPoseidon2Config>,
48        range_checker: Arc<VariableRangeCheckerChipGPU>,
49        hasher_chip: Option<Arc<Poseidon2PeripheryChipGPU>>,
50    ) -> Self {
51        let cpu_range_checker = range_checker.cpu_chip.clone().unwrap();
52
53        // We create an empty program chip: the program should be loaded later (and can be swapped
54        // out). The execution frequencies are supplied only after execution.
55        let program_chip = ProgramChipGPU::new();
56        let connector_chip = VmConnectorChipGPU::new(VmConnectorChip::new(
57            cpu_range_checker.clone(),
58            config.memory_config.timestamp_max_bits,
59        ));
60
61        let memory_inventory = match &mem_inventory.interface {
62            MemoryInterfaceAirs::Persistent { .. } => {
63                assert!(config.continuation_enabled);
64                MemoryInventoryGPU::persistent(
65                    config.memory_config.clone(),
66                    range_checker.clone(),
67                    hasher_chip.unwrap(),
68                )
69            }
70            MemoryInterfaceAirs::Volatile { .. } => {
71                assert!(!config.continuation_enabled);
72                MemoryInventoryGPU::volatile(config.memory_config.clone(), range_checker.clone())
73            }
74        };
75
76        let public_values_chip = config.has_public_values_chip().then(|| {
77            PublicValuesChipGPU::new(
78                range_checker,
79                config.num_public_values,
80                config.max_constraint_degree as u32 - 1,
81                config.memory_config.timestamp_max_bits as u32,
82            )
83        });
84
85        Self {
86            program: program_chip,
87            connector: connector_chip,
88            memory_inventory,
89            public_values: public_values_chip,
90        }
91    }
92}
93
94impl SystemChipComplex<DenseRecordArena, GpuBackend> for SystemChipInventoryGPU {
95    fn load_program(&mut self, cached_program_trace: CommittedTraceData<GpuBackend>) {
96        self.program.cached.replace(cached_program_trace);
97    }
98
99    fn transport_init_memory_to_device(&mut self, memory: &GuestMemory) {
100        if self.memory_inventory.persistent.is_some() {
101            self.memory_inventory.set_initial_memory(&memory.memory);
102        }
103    }
104
105    fn generate_proving_ctx(
106        &mut self,
107        system_records: SystemRecords<F>,
108        mut record_arenas: Vec<DenseRecordArena>,
109    ) -> Vec<AirProvingContext<GpuBackend>> {
110        let SystemRecords {
111            from_state,
112            to_state,
113            exit_code,
114            filtered_exec_frequencies,
115            access_adapter_records,
116            touched_memory,
117            public_values,
118        } = system_records;
119
120        let program_ctx = self.program.generate_proving_ctx(filtered_exec_frequencies);
121
122        self.connector.cpu_chip.begin(from_state);
123        self.connector.cpu_chip.end(to_state, exit_code);
124        let connector_ctx = self.connector.generate_proving_ctx(());
125
126        let pv_ctx = self.public_values.as_mut().map(|chip| {
127            chip.public_values = public_values;
128            let arena = record_arenas.remove(PUBLIC_VALUES_AIR_ID);
129            chip.generate_proving_ctx(arena)
130        });
131
132        let memory_ctxs = self
133            .memory_inventory
134            .generate_proving_ctxs(access_adapter_records, touched_memory);
135
136        [program_ctx, connector_ctx]
137            .into_iter()
138            .chain(pv_ctx)
139            .chain(memory_ctxs)
140            .collect()
141    }
142
143    #[cfg(feature = "metrics")]
144    fn finalize_trace_heights(&self, heights: &mut [usize]) {
145        use crate::system::cuda::boundary::BoundaryFields;
146
147        let boundary_idx = PUBLIC_VALUES_AIR_ID + usize::from(self.public_values.is_some());
148        let mut access_adapter_offset = boundary_idx + 1;
149        match self.memory_inventory.boundary.fields {
150            BoundaryFields::Volatile(_) => {
151                let boundary_height = self.memory_inventory.boundary.num_records.unwrap_or(0);
152                heights[boundary_idx] = boundary_height;
153            }
154            BoundaryFields::Persistent(ref boundary) => {
155                let boundary_height = 2 * self.memory_inventory.boundary.num_records.unwrap_or(0);
156                heights[boundary_idx] = boundary_height;
157                heights[boundary_idx + 1] = self.memory_inventory.unpadded_merkle_height;
158                access_adapter_offset += 1;
159
160                // Poseidon2Periphery height also varies based on memory, so set it now even though
161                // it's not a system chip:
162                let poseidon_height = boundary
163                    .poseidon2_buffer
164                    .current_trace_height
165                    .load(std::sync::atomic::Ordering::Relaxed);
166                // We know the chip insertion index, which starts from *the end* of the the AIR
167                // ordering
168                const POSEIDON2_INSERTION_IDX: usize = 1;
169                let poseidon_idx = heights.len() - 1 - POSEIDON2_INSERTION_IDX;
170                heights[poseidon_idx] = poseidon_height;
171            }
172        }
173        let access_heights = &self.memory_inventory.access_adapters.unpadded_heights;
174        heights[access_adapter_offset..access_adapter_offset + access_heights.len()]
175            .copy_from_slice(access_heights);
176    }
177}