openvm_circuit/system/cuda/
mod.rs

1use std::sync::Arc;
2
3use connector::VmConnectorChipGPU;
4use memory::MemoryInventoryGPU;
5use openvm_circuit::{
6    arch::{DenseRecordArena, SystemConfig, PUBLIC_VALUES_AIR_ID},
7    system::{
8        connector::VmConnectorChip,
9        memory::{interface::MemoryInterfaceAirs, online::GuestMemory, MemoryAirInventory},
10        SystemChipComplex, SystemRecords,
11    },
12};
13use openvm_circuit_primitives::var_range::VariableRangeCheckerChipGPU;
14use openvm_cuda_backend::{prover_backend::GpuBackend, types::F};
15use openvm_stark_backend::{
16    prover::types::{AirProvingContext, CommittedTraceData},
17    Chip,
18};
19use openvm_stark_sdk::config::baby_bear_poseidon2::BabyBearPoseidon2Config;
20use poseidon2::Poseidon2PeripheryChipGPU;
21use program::ProgramChipGPU;
22use public_values::PublicValuesChipGPU;
23
24use crate::system::memory::CHUNK;
25
26pub(crate) const DIGEST_WIDTH: usize = 8;
27
28pub mod access_adapters;
29pub mod boundary;
30pub mod connector;
31pub mod extensions;
32pub mod memory;
33pub mod merkle_tree;
34pub mod phantom;
35pub mod poseidon2;
36pub mod program;
37pub mod public_values;
38
39pub struct SystemChipInventoryGPU {
40    pub program: ProgramChipGPU,
41    pub connector: VmConnectorChipGPU,
42    pub memory_inventory: MemoryInventoryGPU,
43    pub public_values: Option<PublicValuesChipGPU>,
44}
45
46impl SystemChipInventoryGPU {
47    pub fn new(
48        config: &SystemConfig,
49        mem_inventory: &MemoryAirInventory<BabyBearPoseidon2Config>,
50        range_checker: Arc<VariableRangeCheckerChipGPU>,
51        hasher_chip: Option<Arc<Poseidon2PeripheryChipGPU>>,
52    ) -> Self {
53        let cpu_range_checker = range_checker.cpu_chip.clone().unwrap();
54
55        // We create an empty program chip: the program should be loaded later (and can be swapped
56        // out). The execution frequencies are supplied only after execution.
57        let program_chip = ProgramChipGPU::new();
58        let connector_chip = VmConnectorChipGPU::new(VmConnectorChip::new(
59            cpu_range_checker.clone(),
60            config.memory_config.timestamp_max_bits,
61        ));
62
63        let memory_inventory = match &mem_inventory.interface {
64            MemoryInterfaceAirs::Persistent { .. } => {
65                assert!(config.continuation_enabled);
66                MemoryInventoryGPU::persistent(
67                    config.memory_config.clone(),
68                    range_checker.clone(),
69                    hasher_chip.unwrap(),
70                )
71            }
72            MemoryInterfaceAirs::Volatile { .. } => {
73                assert!(!config.continuation_enabled);
74                MemoryInventoryGPU::volatile(config.memory_config.clone(), range_checker.clone())
75            }
76        };
77
78        let public_values_chip = config.has_public_values_chip().then(|| {
79            PublicValuesChipGPU::new(
80                range_checker,
81                config.num_public_values,
82                config.max_constraint_degree as u32 - 1,
83                config.memory_config.timestamp_max_bits as u32,
84            )
85        });
86
87        Self {
88            program: program_chip,
89            connector: connector_chip,
90            memory_inventory,
91            public_values: public_values_chip,
92        }
93    }
94}
95
96impl SystemChipComplex<DenseRecordArena, GpuBackend> for SystemChipInventoryGPU {
97    fn load_program(&mut self, cached_program_trace: CommittedTraceData<GpuBackend>) {
98        self.program.cached.replace(cached_program_trace);
99    }
100
101    fn transport_init_memory_to_device(&mut self, memory: &GuestMemory) {
102        if self.memory_inventory.persistent.is_some() {
103            self.memory_inventory.set_initial_memory(&memory.memory);
104        }
105    }
106
107    fn generate_proving_ctx(
108        &mut self,
109        system_records: SystemRecords<F>,
110        mut record_arenas: Vec<DenseRecordArena>,
111    ) -> Vec<AirProvingContext<GpuBackend>> {
112        let SystemRecords {
113            from_state,
114            to_state,
115            exit_code,
116            filtered_exec_frequencies,
117            access_adapter_records,
118            touched_memory,
119            public_values,
120        } = system_records;
121
122        let program_ctx = self.program.generate_proving_ctx(filtered_exec_frequencies);
123
124        self.connector.cpu_chip.begin(from_state);
125        self.connector.cpu_chip.end(to_state, exit_code);
126        let connector_ctx = self.connector.generate_proving_ctx(());
127
128        let pv_ctx = self.public_values.as_mut().map(|chip| {
129            chip.public_values = public_values;
130            let arena = record_arenas.remove(PUBLIC_VALUES_AIR_ID);
131            chip.generate_proving_ctx(arena)
132        });
133
134        let memory_ctxs = self
135            .memory_inventory
136            .generate_proving_ctxs(access_adapter_records, touched_memory);
137
138        [program_ctx, connector_ctx]
139            .into_iter()
140            .chain(pv_ctx)
141            .chain(memory_ctxs)
142            .collect()
143    }
144
145    fn memory_top_tree(&self) -> Option<&[[F; CHUNK]]> {
146        self.memory_inventory
147            .persistent
148            .as_ref()
149            .and_then(|persistent| {
150                let top_tree = &persistent.merkle_tree.top_roots_host;
151                (!top_tree.is_empty()).then_some(top_tree.as_slice())
152            })
153    }
154
155    #[cfg(feature = "metrics")]
156    fn finalize_trace_heights(&self, heights: &mut [usize]) {
157        use crate::system::cuda::boundary::BoundaryFields;
158
159        let boundary_idx = PUBLIC_VALUES_AIR_ID + usize::from(self.public_values.is_some());
160        let mut access_adapter_offset = boundary_idx + 1;
161        match self.memory_inventory.boundary.fields {
162            BoundaryFields::Volatile(_) => {
163                let boundary_height = self.memory_inventory.boundary.num_records.unwrap_or(0);
164                heights[boundary_idx] = boundary_height;
165            }
166            BoundaryFields::Persistent(ref boundary) => {
167                let boundary_height = 2 * self.memory_inventory.boundary.num_records.unwrap_or(0);
168                heights[boundary_idx] = boundary_height;
169                heights[boundary_idx + 1] = self.memory_inventory.unpadded_merkle_height;
170                access_adapter_offset += 1;
171
172                // Poseidon2Periphery height also varies based on memory, so set it now even though
173                // it's not a system chip:
174                let poseidon_height = boundary
175                    .poseidon2_buffer
176                    .current_trace_height
177                    .load(std::sync::atomic::Ordering::Relaxed);
178                // We know the chip insertion index, which starts from *the end* of the the AIR
179                // ordering
180                const POSEIDON2_INSERTION_IDX: usize = 1;
181                let poseidon_idx = heights.len() - 1 - POSEIDON2_INSERTION_IDX;
182                heights[poseidon_idx] = poseidon_height;
183            }
184        }
185        let access_heights = &self.memory_inventory.access_adapters.unpadded_heights;
186        heights[access_adapter_offset..access_adapter_offset + access_heights.len()]
187            .copy_from_slice(access_heights);
188    }
189}