openvm_circuit/system/cuda/
memory.rs

1use std::sync::Arc;
2
3use openvm_circuit::{
4    arch::{AddressSpaceHostLayout, DenseRecordArena, MemoryConfig, ADDR_SPACE_OFFSET},
5    system::{
6        memory::{online::LinearMemory, AddressMap, TimestampedValues},
7        TouchedMemory,
8    },
9};
10use openvm_circuit_primitives::var_range::VariableRangeCheckerChipGPU;
11use openvm_cuda_backend::{prover_backend::GpuBackend, types::F};
12use openvm_cuda_common::{
13    copy::{cuda_memcpy, MemCopyD2D, MemCopyH2D},
14    d_buffer::DeviceBuffer,
15    memory_manager::MemTracker,
16};
17use openvm_stark_backend::{
18    p3_field::PrimeCharacteristicRing, p3_util::log2_ceil_usize, prover::types::AirProvingContext,
19    Chip,
20};
21
22use super::{
23    access_adapters::AccessAdapterInventoryGPU,
24    boundary::{BoundaryChipGPU, BoundaryFields},
25    merkle_tree::{MemoryMerkleTree, TIMESTAMPED_BLOCK_WIDTH},
26    Poseidon2PeripheryChipGPU, DIGEST_WIDTH,
27};
28
29pub struct MemoryInventoryGPU {
30    pub boundary: BoundaryChipGPU,
31    pub access_adapters: AccessAdapterInventoryGPU,
32    pub persistent: Option<PersistentMemoryInventoryGPU>,
33    #[cfg(feature = "metrics")]
34    pub(super) unpadded_merkle_height: usize,
35}
36
37pub struct PersistentMemoryInventoryGPU {
38    pub merkle_tree: MemoryMerkleTree,
39    pub initial_memory: Vec<DeviceBuffer<u8>>,
40}
41
42impl MemoryInventoryGPU {
43    pub fn volatile(config: MemoryConfig, range_checker: Arc<VariableRangeCheckerChipGPU>) -> Self {
44        let addr_space_max_bits = log2_ceil_usize(
45            (ADDR_SPACE_OFFSET + 2u32.pow(config.addr_space_height as u32)) as usize,
46        );
47        Self {
48            boundary: BoundaryChipGPU::volatile(
49                range_checker.clone(),
50                addr_space_max_bits,
51                config.pointer_max_bits,
52            ),
53            access_adapters: AccessAdapterInventoryGPU::new(
54                range_checker,
55                config.max_access_adapter_n,
56                config.timestamp_max_bits,
57            ),
58            persistent: None,
59            #[cfg(feature = "metrics")]
60            unpadded_merkle_height: 0,
61        }
62    }
63
64    pub fn persistent(
65        config: MemoryConfig,
66        range_checker: Arc<VariableRangeCheckerChipGPU>,
67        hasher_chip: Arc<Poseidon2PeripheryChipGPU>,
68    ) -> Self {
69        Self {
70            boundary: BoundaryChipGPU::persistent(hasher_chip.shared_buffer()),
71            access_adapters: AccessAdapterInventoryGPU::new(
72                range_checker,
73                config.max_access_adapter_n,
74                config.timestamp_max_bits,
75            ),
76            persistent: Some(PersistentMemoryInventoryGPU {
77                merkle_tree: MemoryMerkleTree::new(config.clone(), hasher_chip.clone()),
78                initial_memory: Vec::new(),
79            }),
80            #[cfg(feature = "metrics")]
81            unpadded_merkle_height: 0,
82        }
83    }
84
85    pub fn continuation_enabled(&self) -> bool {
86        self.persistent.is_some()
87    }
88
89    pub fn set_initial_memory(&mut self, initial_memory: &AddressMap) {
90        let _mem = MemTracker::start("set initial memory");
91        let persistent = self
92            .persistent
93            .as_mut()
94            .expect("`set_initial_memory` requires persistent memory");
95        for (addr_sp, raw_mem) in initial_memory
96            .get_memory()
97            .iter()
98            .map(|mem| mem.as_slice())
99            .enumerate()
100        {
101            tracing::debug!(
102                "Setting initial memory for address space {}: {} bytes",
103                addr_sp,
104                raw_mem.len()
105            );
106            persistent.initial_memory.push(if raw_mem.is_empty() {
107                DeviceBuffer::new()
108            } else {
109                raw_mem
110                    .to_device()
111                    .expect("failed to copy memory to device")
112            });
113            persistent
114                .merkle_tree
115                .build_async(&persistent.initial_memory[addr_sp], addr_sp);
116        }
117        match &mut self.boundary.fields {
118            BoundaryFields::Volatile(_) => {
119                panic!("`set_initial_memory` requires persistent memory")
120            }
121            BoundaryFields::Persistent(fields) => {
122                fields.initial_leaves = persistent
123                    .initial_memory
124                    .iter()
125                    .skip(1)
126                    .map(|per_as| per_as.as_raw_ptr())
127                    .collect();
128            }
129        }
130    }
131
132    pub fn generate_proving_ctxs(
133        &mut self,
134        access_adapter_arena: DenseRecordArena,
135        touched_memory: TouchedMemory<F>,
136    ) -> Vec<AirProvingContext<GpuBackend>> {
137        let mem = MemTracker::start("generate mem proving ctxs");
138        let merkle_proof_ctx = match touched_memory {
139            TouchedMemory::Persistent(partition) => {
140                let persistent = self
141                    .persistent
142                    .as_mut()
143                    .expect("persistent touched memory requires persistent memory interface");
144
145                let unpadded_merkle_height =
146                    persistent.merkle_tree.calculate_unpadded_height(&partition);
147                #[cfg(feature = "metrics")]
148                {
149                    self.unpadded_merkle_height = unpadded_merkle_height;
150                }
151
152                mem.tracing_info("boundary finalize");
153                let (touched_memory, empty) = if partition.is_empty() {
154                    let leftmost_values = 'left: {
155                        let mut res = [F::ZERO; DIGEST_WIDTH];
156                        if persistent.initial_memory[ADDR_SPACE_OFFSET as usize].is_empty() {
157                            break 'left res;
158                        }
159                        let layout = &persistent.merkle_tree.mem_config().addr_spaces
160                            [ADDR_SPACE_OFFSET as usize]
161                            .layout;
162                        let one_cell_size = layout.size();
163                        let values = vec![0u8; one_cell_size * DIGEST_WIDTH];
164                        unsafe {
165                            cuda_memcpy::<true, false>(
166                                values.as_ptr() as *mut std::ffi::c_void,
167                                persistent.initial_memory[ADDR_SPACE_OFFSET as usize].as_ptr()
168                                    as *const std::ffi::c_void,
169                                values.len(),
170                            )
171                            .unwrap();
172                            for i in 0..DIGEST_WIDTH {
173                                res[i] = layout.to_field::<F>(&values[i * one_cell_size..]);
174                            }
175                        }
176                        res
177                    };
178
179                    (
180                        vec![(
181                            (1, 0),
182                            TimestampedValues {
183                                timestamp: 0,
184                                values: leftmost_values,
185                            },
186                        )],
187                        true,
188                    )
189                } else {
190                    (partition, false)
191                };
192                debug_assert_eq!(
193                    size_of_val(&touched_memory[0]),
194                    TIMESTAMPED_BLOCK_WIDTH * size_of::<u32>()
195                );
196                let d_touched_memory = touched_memory.to_device().unwrap().as_buffer::<u32>();
197                if empty {
198                    self.boundary
199                        .finalize_records_persistent::<DIGEST_WIDTH>(DeviceBuffer::new());
200                } else {
201                    self.boundary.finalize_records_persistent::<DIGEST_WIDTH>(
202                        d_touched_memory.device_copy().unwrap().as_buffer::<u32>(),
203                    ); // TODO do not copy
204                }
205                mem.tracing_info("merkle update");
206                persistent.merkle_tree.finalize();
207                let merkle_tree_ctx = persistent.merkle_tree.update_with_touched_blocks(
208                    unpadded_merkle_height,
209                    &d_touched_memory,
210                    empty,
211                );
212                Some(merkle_tree_ctx)
213            }
214            TouchedMemory::Volatile(partition) => {
215                assert!(self.persistent.is_none(), "TouchedMemory enum mismatch");
216                self.boundary.finalize_records_volatile(partition);
217                None
218            }
219        };
220        mem.tracing_info("boundary tracegen");
221        let mut ret = vec![self.boundary.generate_proving_ctx(())];
222        if let Some(merkle_proof_ctx) = merkle_proof_ctx {
223            ret.push(merkle_proof_ctx);
224            mem.tracing_info("dropping merkle tree");
225            let persistent = self.persistent.as_mut().unwrap();
226            persistent.merkle_tree.drop_subtrees();
227            persistent.initial_memory = Vec::new();
228        }
229        ret.extend(
230            self.access_adapters
231                .generate_air_proving_ctxs(access_adapter_arena),
232        );
233        ret
234    }
235}
236
237impl Drop for PersistentMemoryInventoryGPU {
238    fn drop(&mut self) {
239        // WARNING: The merkle subtree events must be completed before dropping the initial memory
240        // buffers. This prevents buffers from dropping before build_async completes.
241        self.merkle_tree.drop_subtrees();
242        self.initial_memory.clear();
243    }
244}