openvm_circuit/system/cuda/
memory.rs1use std::sync::Arc;
2
3use openvm_circuit::{
4 arch::{AddressSpaceHostLayout, DenseRecordArena, MemoryConfig, ADDR_SPACE_OFFSET},
5 system::{
6 memory::{online::LinearMemory, AddressMap, TimestampedValues},
7 TouchedMemory,
8 },
9};
10use openvm_circuit_primitives::var_range::VariableRangeCheckerChipGPU;
11use openvm_cuda_backend::{prover_backend::GpuBackend, types::F};
12use openvm_cuda_common::{
13 copy::{cuda_memcpy, MemCopyD2D, MemCopyH2D},
14 d_buffer::DeviceBuffer,
15 memory_manager::MemTracker,
16};
17use openvm_stark_backend::{
18 p3_field::PrimeCharacteristicRing, p3_util::log2_ceil_usize, prover::types::AirProvingContext,
19 Chip,
20};
21
22use super::{
23 access_adapters::AccessAdapterInventoryGPU,
24 boundary::{BoundaryChipGPU, BoundaryFields},
25 merkle_tree::{MemoryMerkleTree, TIMESTAMPED_BLOCK_WIDTH},
26 Poseidon2PeripheryChipGPU, DIGEST_WIDTH,
27};
28
29pub struct MemoryInventoryGPU {
30 pub boundary: BoundaryChipGPU,
31 pub access_adapters: AccessAdapterInventoryGPU,
32 pub persistent: Option<PersistentMemoryInventoryGPU>,
33 #[cfg(feature = "metrics")]
34 pub(super) unpadded_merkle_height: usize,
35}
36
37pub struct PersistentMemoryInventoryGPU {
38 pub merkle_tree: MemoryMerkleTree,
39 pub initial_memory: Vec<DeviceBuffer<u8>>,
40}
41
42impl MemoryInventoryGPU {
43 pub fn volatile(config: MemoryConfig, range_checker: Arc<VariableRangeCheckerChipGPU>) -> Self {
44 let addr_space_max_bits = log2_ceil_usize(
45 (ADDR_SPACE_OFFSET + 2u32.pow(config.addr_space_height as u32)) as usize,
46 );
47 Self {
48 boundary: BoundaryChipGPU::volatile(
49 range_checker.clone(),
50 addr_space_max_bits,
51 config.pointer_max_bits,
52 ),
53 access_adapters: AccessAdapterInventoryGPU::new(
54 range_checker,
55 config.max_access_adapter_n,
56 config.timestamp_max_bits,
57 ),
58 persistent: None,
59 #[cfg(feature = "metrics")]
60 unpadded_merkle_height: 0,
61 }
62 }
63
64 pub fn persistent(
65 config: MemoryConfig,
66 range_checker: Arc<VariableRangeCheckerChipGPU>,
67 hasher_chip: Arc<Poseidon2PeripheryChipGPU>,
68 ) -> Self {
69 Self {
70 boundary: BoundaryChipGPU::persistent(hasher_chip.shared_buffer()),
71 access_adapters: AccessAdapterInventoryGPU::new(
72 range_checker,
73 config.max_access_adapter_n,
74 config.timestamp_max_bits,
75 ),
76 persistent: Some(PersistentMemoryInventoryGPU {
77 merkle_tree: MemoryMerkleTree::new(config.clone(), hasher_chip.clone()),
78 initial_memory: Vec::new(),
79 }),
80 #[cfg(feature = "metrics")]
81 unpadded_merkle_height: 0,
82 }
83 }
84
85 pub fn continuation_enabled(&self) -> bool {
86 self.persistent.is_some()
87 }
88
89 pub fn set_initial_memory(&mut self, initial_memory: &AddressMap) {
90 let _mem = MemTracker::start("set initial memory");
91 let persistent = self
92 .persistent
93 .as_mut()
94 .expect("`set_initial_memory` requires persistent memory");
95 for (addr_sp, raw_mem) in initial_memory
96 .get_memory()
97 .iter()
98 .map(|mem| mem.as_slice())
99 .enumerate()
100 {
101 tracing::debug!(
102 "Setting initial memory for address space {}: {} bytes",
103 addr_sp,
104 raw_mem.len()
105 );
106 persistent.initial_memory.push(if raw_mem.is_empty() {
107 DeviceBuffer::new()
108 } else {
109 raw_mem
110 .to_device()
111 .expect("failed to copy memory to device")
112 });
113 persistent
114 .merkle_tree
115 .build_async(&persistent.initial_memory[addr_sp], addr_sp);
116 }
117 match &mut self.boundary.fields {
118 BoundaryFields::Volatile(_) => {
119 panic!("`set_initial_memory` requires persistent memory")
120 }
121 BoundaryFields::Persistent(fields) => {
122 fields.initial_leaves = persistent
123 .initial_memory
124 .iter()
125 .skip(1)
126 .map(|per_as| per_as.as_raw_ptr())
127 .collect();
128 }
129 }
130 }
131
132 pub fn generate_proving_ctxs(
133 &mut self,
134 access_adapter_arena: DenseRecordArena,
135 touched_memory: TouchedMemory<F>,
136 ) -> Vec<AirProvingContext<GpuBackend>> {
137 let mem = MemTracker::start("generate mem proving ctxs");
138 let merkle_proof_ctx = match touched_memory {
139 TouchedMemory::Persistent(partition) => {
140 let persistent = self
141 .persistent
142 .as_mut()
143 .expect("persistent touched memory requires persistent memory interface");
144
145 let unpadded_merkle_height =
146 persistent.merkle_tree.calculate_unpadded_height(&partition);
147 #[cfg(feature = "metrics")]
148 {
149 self.unpadded_merkle_height = unpadded_merkle_height;
150 }
151
152 mem.tracing_info("boundary finalize");
153 let (touched_memory, empty) = if partition.is_empty() {
154 let leftmost_values = 'left: {
155 let mut res = [F::ZERO; DIGEST_WIDTH];
156 if persistent.initial_memory[ADDR_SPACE_OFFSET as usize].is_empty() {
157 break 'left res;
158 }
159 let layout = &persistent.merkle_tree.mem_config().addr_spaces
160 [ADDR_SPACE_OFFSET as usize]
161 .layout;
162 let one_cell_size = layout.size();
163 let values = vec![0u8; one_cell_size * DIGEST_WIDTH];
164 unsafe {
165 cuda_memcpy::<true, false>(
166 values.as_ptr() as *mut std::ffi::c_void,
167 persistent.initial_memory[ADDR_SPACE_OFFSET as usize].as_ptr()
168 as *const std::ffi::c_void,
169 values.len(),
170 )
171 .unwrap();
172 for i in 0..DIGEST_WIDTH {
173 res[i] = layout.to_field::<F>(&values[i * one_cell_size..]);
174 }
175 }
176 res
177 };
178
179 (
180 vec![(
181 (1, 0),
182 TimestampedValues {
183 timestamp: 0,
184 values: leftmost_values,
185 },
186 )],
187 true,
188 )
189 } else {
190 (partition, false)
191 };
192 debug_assert_eq!(
193 size_of_val(&touched_memory[0]),
194 TIMESTAMPED_BLOCK_WIDTH * size_of::<u32>()
195 );
196 let d_touched_memory = touched_memory.to_device().unwrap().as_buffer::<u32>();
197 if empty {
198 self.boundary
199 .finalize_records_persistent::<DIGEST_WIDTH>(DeviceBuffer::new());
200 } else {
201 self.boundary.finalize_records_persistent::<DIGEST_WIDTH>(
202 d_touched_memory.device_copy().unwrap().as_buffer::<u32>(),
203 ); }
205 mem.tracing_info("merkle update");
206 persistent.merkle_tree.finalize();
207 let merkle_tree_ctx = persistent.merkle_tree.update_with_touched_blocks(
208 unpadded_merkle_height,
209 &d_touched_memory,
210 empty,
211 );
212 Some(merkle_tree_ctx)
213 }
214 TouchedMemory::Volatile(partition) => {
215 assert!(self.persistent.is_none(), "TouchedMemory enum mismatch");
216 self.boundary.finalize_records_volatile(partition);
217 None
218 }
219 };
220 mem.tracing_info("boundary tracegen");
221 let mut ret = vec![self.boundary.generate_proving_ctx(())];
222 if let Some(merkle_proof_ctx) = merkle_proof_ctx {
223 ret.push(merkle_proof_ctx);
224 mem.tracing_info("dropping merkle tree");
225 let persistent = self.persistent.as_mut().unwrap();
226 persistent.merkle_tree.drop_subtrees();
227 persistent.initial_memory = Vec::new();
228 }
229 ret.extend(
230 self.access_adapters
231 .generate_air_proving_ctxs(access_adapter_arena),
232 );
233 ret
234 }
235}
236
237impl Drop for PersistentMemoryInventoryGPU {
238 fn drop(&mut self) {
239 self.merkle_tree.drop_subtrees();
242 self.initial_memory.clear();
243 }
244}