openvm_circuit/arch/
interpreter_preflight.rs

1use std::{iter::repeat_n, sync::Arc};
2
3#[cfg(not(feature = "parallel"))]
4use itertools::Itertools;
5use openvm_instructions::{instruction::Instruction, program::Program, LocalOpcode, SystemOpcode};
6use openvm_stark_backend::{
7    p3_field::{Field, PrimeField32},
8    p3_maybe_rayon::prelude::*,
9};
10
11use crate::{
12    arch::{
13        execution_mode::PreflightCtx, interpreter::get_pc_index, Arena, ExecutionError, ExecutorId,
14        ExecutorInventory, PreflightExecutor, StaticProgramError, VmExecState,
15    },
16    system::memory::online::TracingMemory,
17};
18
19/// VM preflight executor (E3 executor) for use with trace generation.
20/// Note: This executor doesn't hold any VM state and can be used for multiple execution.
21pub struct PreflightInterpretedInstance<F, E> {
22    // NOTE[jpw]: we use an Arc so that VmInstance can hold both VirtualMachine and
23    // PreflightInterpretedInstance. All we really need is to borrow `executors: &'a [E]`.
24    inventory: Arc<ExecutorInventory<E>>,
25
26    /// This is a map from (pc - pc_base) / pc_step -> [PcEntry].
27    /// We will set `executor_idx` to `u32::MAX` in the [PcEntry] if the program has no instruction
28    /// at that pc.
29    // PERF[jpw/ayush]: We could map directly to the raw pointer(u64) for executor, but storing the
30    // u32 may be better for cache efficiency.
31    pc_handler: Vec<PcEntry<F>>,
32    // pc_handler, execution_frequencies will all have the same length, which equals
33    // `Program::len()`
34    execution_frequencies: Vec<u32>,
35    pc_base: u32,
36
37    pub(super) executor_idx_to_air_idx: Vec<usize>,
38}
39
40#[repr(C)]
41#[derive(Clone)]
42pub struct PcEntry<F> {
43    // NOTE[jpw]: revisit storing only smaller `precompute` for better cache locality. Currently
44    // VmOpcode is usize so align=8 and there are 7 u32 operands so we store ExecutorId(u32) after
45    // to avoid padding. This means PcEntry has align=8 and size=40 bytes, which is too big
46    pub insn: Instruction<F>,
47    pub executor_idx: ExecutorId,
48}
49
50impl<F: Field, E> PreflightInterpretedInstance<F, E> {
51    /// Creates a new interpreter instance for preflight execution.
52    /// Rewrites the program into an internal table specialized for enum dispatch.
53    ///
54    /// ## Assumption
55    /// There are less than `u32::MAX` total AIRs.
56    pub fn new(
57        program: &Program<F>,
58        inventory: Arc<ExecutorInventory<E>>,
59        executor_idx_to_air_idx: Vec<usize>,
60    ) -> Result<Self, StaticProgramError> {
61        if inventory.executors().len() > u32::MAX as usize {
62            // This would mean we cannot use u32::MAX as an "undefined" executor index
63            return Err(StaticProgramError::TooManyExecutors);
64        }
65        let len = program.instructions_and_debug_infos.len();
66        let pc_base = program.pc_base;
67        let base_idx = get_pc_index(pc_base);
68        let mut pc_handler = Vec::with_capacity(base_idx + len);
69        pc_handler.extend(repeat_n(PcEntry::undefined(), base_idx));
70        for insn_and_debug_info in &program.instructions_and_debug_infos {
71            if let Some((insn, _)) = insn_and_debug_info {
72                let insn = insn.clone();
73                let executor_idx = if insn.opcode == SystemOpcode::TERMINATE.global_opcode() {
74                    // The execution loop will always branch to terminate before using this executor
75                    0
76                } else {
77                    *inventory.instruction_lookup.get(&insn.opcode).ok_or(
78                        StaticProgramError::ExecutorNotFound {
79                            opcode: insn.opcode,
80                        },
81                    )?
82                };
83                assert!(
84                    (executor_idx as usize) < inventory.executors.len(),
85                    "ExecutorInventory ensures executor_idx is in bounds"
86                );
87                let pc_entry = PcEntry { insn, executor_idx };
88                pc_handler.push(pc_entry);
89            } else {
90                pc_handler.push(PcEntry::undefined());
91            }
92        }
93        Ok(Self {
94            inventory,
95            execution_frequencies: vec![0u32; base_idx + len],
96            pc_base,
97            pc_handler,
98            executor_idx_to_air_idx,
99        })
100    }
101
102    pub fn executors(&self) -> &[E] {
103        &self.inventory.executors
104    }
105
106    pub fn filtered_execution_frequencies(&self) -> Vec<u32> {
107        let base_idx = get_pc_index(self.pc_base);
108        self.pc_handler
109            .par_iter()
110            .zip_eq(&self.execution_frequencies)
111            .skip(base_idx)
112            .filter_map(|(entry, freq)| entry.is_some().then_some(*freq))
113            .collect()
114    }
115
116    pub fn reset_execution_frequencies(&mut self) {
117        self.execution_frequencies.fill(0);
118    }
119}
120
121impl<F: PrimeField32, E> PreflightInterpretedInstance<F, E> {
122    /// Stopping is triggered by should_stop() or if VM is terminated
123    pub fn execute_from_state<RA>(
124        &mut self,
125        state: &mut VmExecState<F, TracingMemory, PreflightCtx<RA>>,
126    ) -> Result<(), ExecutionError>
127    where
128        RA: Arena,
129        E: PreflightExecutor<F, RA>,
130    {
131        loop {
132            if let Ok(Some(_)) = state.exit_code {
133                // should terminate
134                break;
135            }
136            if state
137                .ctx
138                .instret_end
139                .is_some_and(|instret_end| state.instret() >= instret_end)
140            {
141                // should suspend
142                break;
143            }
144
145            // Fetch, decode and execute single instruction
146            self.execute_instruction(state)?;
147            *state.instret_mut() += 1;
148        }
149
150        Ok(())
151    }
152
153    /// Executes a single instruction and updates VM state
154    #[inline(always)]
155    fn execute_instruction<RA>(
156        &mut self,
157        state: &mut VmExecState<F, TracingMemory, PreflightCtx<RA>>,
158    ) -> Result<(), ExecutionError>
159    where
160        RA: Arena,
161        E: PreflightExecutor<F, RA>,
162    {
163        let pc = state.pc();
164        let pc_idx = get_pc_index(pc);
165        let pc_entry = self
166            .pc_handler
167            .get(pc_idx)
168            .ok_or_else(|| ExecutionError::PcOutOfBounds(pc))?;
169        // SAFETY: `execution_frequencies` has the same length as `pc_handler` so `get_pc_entry`
170        // already does the bounds check
171        unsafe {
172            *self.execution_frequencies.get_unchecked_mut(pc_idx) += 1;
173        };
174        // SAFETY: the `executor_idx` comes from ExecutorInventory, which ensures that
175        // `executor_idx` is within bounds
176        let executor = unsafe {
177            self.inventory
178                .executors
179                .get_unchecked(pc_entry.executor_idx as usize)
180        };
181        tracing::trace!("pc: {pc:#x} | {:?}", pc_entry.insn);
182
183        let opcode = pc_entry.insn.opcode;
184        let c = pc_entry.insn.c;
185        // Handle termination instruction
186        if opcode.as_usize() == SystemOpcode::CLASS_OFFSET + SystemOpcode::TERMINATE as usize {
187            state.exit_code = Ok(Some(c.as_canonical_u32()));
188            return Ok(());
189        }
190
191        // Execute the instruction using the control implementation
192        tracing::trace!(
193            "opcode: {} | timestamp: {}",
194            executor.get_opcode_name(pc_entry.insn.opcode.as_usize()),
195            state.memory.timestamp()
196        );
197        let arena = unsafe {
198            // SAFETY: executor_idx is guarantee to be within bounds by ProgramHandler constructor
199            let air_idx = *self
200                .executor_idx_to_air_idx
201                .get_unchecked(pc_entry.executor_idx as usize);
202            // SAFETY: air_idx is a valid AIR index in the vkey, and always construct arenas with
203            // length equal to num_airs
204            state.ctx.arenas.get_unchecked_mut(air_idx)
205        };
206        let vm_state_mut = state.vm_state.into_mut(arena);
207        executor.execute(vm_state_mut, &pc_entry.insn)?;
208
209        #[cfg(feature = "metrics")]
210        {
211            crate::metrics::update_instruction_metrics(state, executor, pc, pc_entry);
212        }
213
214        Ok(())
215    }
216}
217
218impl<F> PcEntry<F> {
219    pub fn is_some(&self) -> bool {
220        self.executor_idx != u32::MAX
221    }
222}
223
224impl<F: Default> PcEntry<F> {
225    fn undefined() -> Self {
226        Self {
227            insn: Instruction::default(),
228            executor_idx: u32::MAX,
229        }
230    }
231}
232
233/// Macro for executing and emitting metrics for instructions/s and number of instructions executed.
234/// Does not include any tracing span.
235#[macro_export]
236macro_rules! execute_spanned {
237    ($name:literal, $executor:expr, $state:expr) => {{
238        #[cfg(feature = "metrics")]
239        let start = std::time::Instant::now();
240        #[cfg(feature = "metrics")]
241        let start_instret = $state.instret();
242
243        let result = $executor.execute_from_state($state);
244
245        #[cfg(feature = "metrics")]
246        {
247            let elapsed = start.elapsed();
248            let insns = $state.instret() - start_instret;
249            tracing::info!("instructions_executed={insns}");
250            metrics::counter!(concat!($name, "_insns")).absolute(insns);
251            metrics::gauge!(concat!($name, "_insn_mi/s"))
252                .set(insns as f64 / elapsed.as_micros() as f64);
253        }
254        result
255    }};
256}