openvm_rv32im_circuit/auipc/
cuda.rs1use std::{mem::size_of, sync::Arc};
2
3use derive_new::new;
4use openvm_circuit::{arch::DenseRecordArena, utils::next_power_of_two_or_zero};
5use openvm_circuit_primitives::{
6 bitwise_op_lookup::BitwiseOperationLookupChipGPU, var_range::VariableRangeCheckerChipGPU,
7};
8use openvm_cuda_backend::{
9 base::DeviceMatrix, chip::get_empty_air_proving_ctx, prover_backend::GpuBackend, types::F,
10};
11use openvm_cuda_common::copy::MemCopyH2D;
12use openvm_stark_backend::{prover::types::AirProvingContext, Chip};
13
14use crate::{
15 adapters::{Rv32RdWriteAdapterCols, Rv32RdWriteAdapterRecord, RV32_CELL_BITS},
16 cuda_abi::auipc_cuda::tracegen,
17 Rv32AuipcCoreCols, Rv32AuipcCoreRecord,
18};
19
20#[derive(new)]
21pub struct Rv32AuipcChipGpu {
22 pub range_checker: Arc<VariableRangeCheckerChipGPU>,
23 pub bitwise_lookup: Arc<BitwiseOperationLookupChipGPU<RV32_CELL_BITS>>,
24 pub timestamp_max_bits: usize,
25}
26
27impl Chip<DenseRecordArena, GpuBackend> for Rv32AuipcChipGpu {
28 fn generate_proving_ctx(&self, arena: DenseRecordArena) -> AirProvingContext<GpuBackend> {
29 const RECORD_SIZE: usize = size_of::<(Rv32RdWriteAdapterRecord, Rv32AuipcCoreRecord)>();
30 let records = arena.allocated();
31 if records.is_empty() {
32 return get_empty_air_proving_ctx::<GpuBackend>();
33 }
34 debug_assert_eq!(records.len() % RECORD_SIZE, 0);
35
36 let trace_width = Rv32AuipcCoreCols::<F>::width() + Rv32RdWriteAdapterCols::<F>::width();
37 let trace_height = next_power_of_two_or_zero(records.len() / RECORD_SIZE);
38
39 let d_records = records.to_device().unwrap();
40 let d_trace = DeviceMatrix::<F>::with_capacity(trace_height, trace_width);
41
42 unsafe {
43 tracegen(
44 d_trace.buffer(),
45 trace_height,
46 &d_records,
47 &self.range_checker.count,
48 &self.bitwise_lookup.count,
49 RV32_CELL_BITS,
50 self.timestamp_max_bits as u32,
51 )
52 .unwrap();
53 }
54 AirProvingContext::simple_no_pis(d_trace)
55 }
56}