openvm_circuit/arch/
interpreter_preflight.rs

1use std::{iter::repeat_n, sync::Arc};
2
3use openvm_instructions::{instruction::Instruction, program::Program, LocalOpcode, SystemOpcode};
4use openvm_stark_backend::{
5    p3_field::{Field, PrimeField32},
6    p3_maybe_rayon::prelude::*,
7};
8
9use crate::{
10    arch::{
11        execution_mode::PreflightCtx, interpreter::get_pc_index, Arena, ExecutionError, ExecutorId,
12        ExecutorInventory, PreflightExecutor, StaticProgramError, VmExecState, VmStateMut,
13    },
14    system::memory::online::TracingMemory,
15};
16
17/// VM preflight executor (E3 executor) for use with trace generation.
18/// Note: This executor doesn't hold any VM state and can be used for multiple execution.
19pub struct PreflightInterpretedInstance<F, E> {
20    // NOTE[jpw]: we use an Arc so that VmInstance can hold both VirtualMachine and
21    // PreflightInterpretedInstance. All we really need is to borrow `executors: &'a [E]`.
22    inventory: Arc<ExecutorInventory<E>>,
23
24    /// This is a map from (pc - pc_base) / pc_step -> [PcEntry].
25    /// We will set `executor_idx` to `u32::MAX` in the [PcEntry] if the program has no instruction
26    /// at that pc.
27    // PERF[jpw/ayush]: We could map directly to the raw pointer(u64) for executor, but storing the
28    // u32 may be better for cache efficiency.
29    pc_handler: Vec<PcEntry<F>>,
30    // pc_handler, execution_frequencies will all have the same length, which equals
31    // `Program::len()`
32    execution_frequencies: Vec<u32>,
33    pc_base: u32,
34
35    pub(super) executor_idx_to_air_idx: Vec<usize>,
36}
37
38#[repr(C)]
39#[derive(Clone)]
40pub struct PcEntry<F> {
41    // NOTE[jpw]: revisit storing only smaller `precompute` for better cache locality. Currently
42    // VmOpcode is usize so align=8 and there are 7 u32 operands so we store ExecutorId(u32) after
43    // to avoid padding. This means PcEntry has align=8 and size=40 bytes, which is too big
44    pub insn: Instruction<F>,
45    pub executor_idx: ExecutorId,
46}
47
48impl<F: Field, E> PreflightInterpretedInstance<F, E> {
49    /// Creates a new interpreter instance for preflight execution.
50    /// Rewrites the program into an internal table specialized for enum dispatch.
51    ///
52    /// ## Assumption
53    /// There are less than `u32::MAX` total AIRs.
54    pub fn new(
55        program: &Program<F>,
56        inventory: Arc<ExecutorInventory<E>>,
57        executor_idx_to_air_idx: Vec<usize>,
58    ) -> Result<Self, StaticProgramError> {
59        if inventory.executors().len() > u32::MAX as usize {
60            // This would mean we cannot use u32::MAX as an "undefined" executor index
61            return Err(StaticProgramError::TooManyExecutors);
62        }
63        let len = program.instructions_and_debug_infos.len();
64        let pc_base = program.pc_base;
65        let base_idx = get_pc_index(pc_base);
66        let mut pc_handler = Vec::with_capacity(base_idx + len);
67        pc_handler.extend(repeat_n(PcEntry::undefined(), base_idx));
68        for insn_and_debug_info in &program.instructions_and_debug_infos {
69            if let Some((insn, _)) = insn_and_debug_info {
70                let insn = insn.clone();
71                let executor_idx = if insn.opcode == SystemOpcode::TERMINATE.global_opcode() {
72                    // The execution loop will always branch to terminate before using this executor
73                    0
74                } else {
75                    *inventory.instruction_lookup.get(&insn.opcode).ok_or(
76                        StaticProgramError::ExecutorNotFound {
77                            opcode: insn.opcode,
78                        },
79                    )?
80                };
81                assert!(
82                    (executor_idx as usize) < inventory.executors.len(),
83                    "ExecutorInventory ensures executor_idx is in bounds"
84                );
85                let pc_entry = PcEntry { insn, executor_idx };
86                pc_handler.push(pc_entry);
87            } else {
88                pc_handler.push(PcEntry::undefined());
89            }
90        }
91        Ok(Self {
92            inventory,
93            execution_frequencies: vec![0u32; base_idx + len],
94            pc_base,
95            pc_handler,
96            executor_idx_to_air_idx,
97        })
98    }
99
100    pub fn executors(&self) -> &[E] {
101        &self.inventory.executors
102    }
103
104    pub fn filtered_execution_frequencies(&self) -> Vec<u32>
105    where
106        E: Send + Sync,
107    {
108        let base_idx = get_pc_index(self.pc_base);
109        self.pc_handler
110            .par_iter()
111            .enumerate()
112            .skip(base_idx)
113            .filter(|(_, entry)| entry.is_some())
114            .map(|(i, _)| self.execution_frequencies[i])
115            .collect()
116    }
117
118    pub fn reset_execution_frequencies(&mut self) {
119        self.execution_frequencies.fill(0);
120    }
121}
122
123impl<F: PrimeField32, E> PreflightInterpretedInstance<F, E> {
124    /// Stopping is triggered by should_stop() or if VM is terminated
125    pub fn execute_from_state<RA>(
126        &mut self,
127        state: &mut VmExecState<F, TracingMemory, PreflightCtx<RA>>,
128    ) -> Result<(), ExecutionError>
129    where
130        RA: Arena,
131        E: PreflightExecutor<F, RA>,
132    {
133        loop {
134            if let Ok(Some(_)) = state.exit_code {
135                // should terminate
136                break;
137            }
138            if state
139                .ctx
140                .instret_end
141                .is_some_and(|instret_end| state.instret >= instret_end)
142            {
143                // should suspend
144                break;
145            }
146
147            // Fetch, decode and execute single instruction
148            self.execute_instruction(state)?;
149            state.instret += 1;
150        }
151
152        Ok(())
153    }
154
155    /// Executes a single instruction and updates VM state
156    #[inline(always)]
157    fn execute_instruction<RA>(
158        &mut self,
159        state: &mut VmExecState<F, TracingMemory, PreflightCtx<RA>>,
160    ) -> Result<(), ExecutionError>
161    where
162        RA: Arena,
163        E: PreflightExecutor<F, RA>,
164    {
165        let pc = state.pc;
166        let pc_idx = get_pc_index(pc);
167        let pc_entry = self
168            .pc_handler
169            .get(pc_idx)
170            .ok_or_else(|| ExecutionError::PcOutOfBounds(pc))?;
171        // SAFETY: `execution_frequencies` has the same length as `pc_handler` so `get_pc_entry`
172        // already does the bounds check
173        unsafe {
174            *self.execution_frequencies.get_unchecked_mut(pc_idx) += 1;
175        };
176        // SAFETY: the `executor_idx` comes from ExecutorInventory, which ensures that
177        // `executor_idx` is within bounds
178        let executor = unsafe {
179            self.inventory
180                .executors
181                .get_unchecked(pc_entry.executor_idx as usize)
182        };
183        tracing::trace!("pc: {pc:#x} | {:?}", pc_entry.insn);
184
185        let opcode = pc_entry.insn.opcode;
186        let c = pc_entry.insn.c;
187        // Handle termination instruction
188        if opcode.as_usize() == SystemOpcode::CLASS_OFFSET + SystemOpcode::TERMINATE as usize {
189            state.exit_code = Ok(Some(c.as_canonical_u32()));
190            return Ok(());
191        }
192
193        // Execute the instruction using the control implementation
194        tracing::trace!(
195            "opcode: {} | timestamp: {}",
196            executor.get_opcode_name(pc_entry.insn.opcode.as_usize()),
197            state.memory.timestamp()
198        );
199        let arena = unsafe {
200            // SAFETY: executor_idx is guarantee to be within bounds by ProgramHandler constructor
201            let air_idx = *self
202                .executor_idx_to_air_idx
203                .get_unchecked(pc_entry.executor_idx as usize);
204            // SAFETY: air_idx is a valid AIR index in the vkey, and always construct arenas with
205            // length equal to num_airs
206            state.ctx.arenas.get_unchecked_mut(air_idx)
207        };
208        let state_mut = VmStateMut {
209            pc: &mut state.vm_state.pc,
210            memory: &mut state.vm_state.memory,
211            streams: &mut state.vm_state.streams,
212            rng: &mut state.vm_state.rng,
213            custom_pvs: &mut state.vm_state.custom_pvs,
214            ctx: arena,
215            #[cfg(feature = "metrics")]
216            metrics: &mut state.vm_state.metrics,
217        };
218        executor.execute(state_mut, &pc_entry.insn)?;
219
220        #[cfg(feature = "metrics")]
221        {
222            crate::metrics::update_instruction_metrics(state, executor, pc, pc_entry);
223        }
224
225        Ok(())
226    }
227}
228
229impl<F> PcEntry<F> {
230    pub fn is_some(&self) -> bool {
231        self.executor_idx != u32::MAX
232    }
233}
234
235impl<F: Default> PcEntry<F> {
236    fn undefined() -> Self {
237        Self {
238            insn: Instruction::default(),
239            executor_idx: u32::MAX,
240        }
241    }
242}
243
244/// Macro for executing and emitting metrics for instructions/s and number of instructions executed.
245/// Does not include any tracing span.
246#[macro_export]
247macro_rules! execute_spanned {
248    ($name:literal, $executor:expr, $state:expr) => {{
249        #[cfg(feature = "metrics")]
250        let start = std::time::Instant::now();
251        #[cfg(feature = "metrics")]
252        let start_instret = $state.instret;
253
254        let result = $executor.execute_from_state($state);
255
256        #[cfg(feature = "metrics")]
257        {
258            let elapsed = start.elapsed();
259            let insns = $state.instret - start_instret;
260            tracing::info!("instructions_executed={insns}");
261            metrics::counter!(concat!($name, "_insns")).absolute(insns);
262            metrics::gauge!(concat!($name, "_insn_mi/s"))
263                .set(insns as f64 / elapsed.as_micros() as f64);
264        }
265        result
266    }};
267}