openvm_instructions/
program.rs

1use std::{
2    fmt::{self, Display},
3    ops::Deref,
4    sync::Arc,
5};
6
7use itertools::Itertools;
8use openvm_stark_backend::p3_field::Field;
9use serde::{de::Deserializer, Deserialize, Serialize, Serializer};
10
11use crate::instruction::{DebugInfo, Instruction};
12
13pub const PC_BITS: usize = 30;
14/// We use default PC step of 4 whenever possible for consistency with RISC-V, where 4 comes
15/// from the fact that each standard RISC-V instruction is 32-bits = 4 bytes.
16pub const DEFAULT_PC_STEP: u32 = 4;
17pub const MAX_ALLOWED_PC: u32 = (1 << PC_BITS) - 1;
18
19#[derive(Clone, Debug, Default, Serialize, Deserialize)]
20#[serde(bound(serialize = "F: Serialize", deserialize = "F: Deserialize<'de>"))]
21pub struct Program<F> {
22    /// A map from program counter to instruction.
23    /// Sometimes the instructions are enumerated as 0, 4, 8, etc.
24    /// Maybe at some point we will replace this with a struct that would have a `Vec` under the
25    /// hood and divide the incoming `pc` by whatever given.
26    #[serde(
27        serialize_with = "serialize_instructions_and_debug_infos",
28        deserialize_with = "deserialize_instructions_and_debug_infos"
29    )]
30    pub instructions_and_debug_infos: Vec<Option<(Instruction<F>, Option<DebugInfo>)>>,
31    pub pc_base: u32,
32}
33
34#[derive(Clone, Debug, Default)]
35pub struct ProgramDebugInfo {
36    inner: Arc<Vec<Option<DebugInfo>>>,
37    pc_base: u32,
38}
39
40impl<F: Field> Program<F> {
41    pub fn new_empty(pc_base: u32) -> Self {
42        Self {
43            instructions_and_debug_infos: vec![],
44            pc_base,
45        }
46    }
47
48    pub fn new_without_debug_infos(instructions: &[Instruction<F>], pc_base: u32) -> Self {
49        Self {
50            instructions_and_debug_infos: instructions
51                .iter()
52                .map(|instruction| Some((instruction.clone(), None)))
53                .collect(),
54            pc_base,
55        }
56    }
57
58    pub fn new_without_debug_infos_with_option(
59        instructions: &[Option<Instruction<F>>],
60        pc_base: u32,
61    ) -> Self {
62        Self {
63            instructions_and_debug_infos: instructions
64                .iter()
65                .map(|instruction| instruction.clone().map(|instruction| (instruction, None)))
66                .collect(),
67            pc_base,
68        }
69    }
70
71    /// We assume that pc_start = pc_base = 0 everywhere except the RISC-V programs, until we need
72    /// otherwise We use [DEFAULT_PC_STEP] for consistency with RISC-V
73    pub fn from_instructions_and_debug_infos(
74        instructions: &[Instruction<F>],
75        debug_infos: &[Option<DebugInfo>],
76    ) -> Self {
77        Self {
78            instructions_and_debug_infos: instructions
79                .iter()
80                .zip_eq(debug_infos.iter())
81                .map(|(instruction, debug_info)| Some((instruction.clone(), debug_info.clone())))
82                .collect(),
83            pc_base: 0,
84        }
85    }
86
87    pub fn strip_debug_infos(self) -> Self {
88        Self {
89            instructions_and_debug_infos: self
90                .instructions_and_debug_infos
91                .into_iter()
92                .map(|opt| opt.map(|(ins, _)| (ins, None)))
93                .collect(),
94            ..self
95        }
96    }
97
98    pub fn from_instructions(instructions: &[Instruction<F>]) -> Self {
99        Self::new_without_debug_infos(instructions, 0)
100    }
101
102    pub fn len(&self) -> usize {
103        self.instructions_and_debug_infos.len()
104    }
105
106    pub fn is_empty(&self) -> bool {
107        self.instructions_and_debug_infos.is_empty()
108    }
109
110    pub fn defined_instructions(&self) -> Vec<Instruction<F>> {
111        self.instructions_and_debug_infos
112            .iter()
113            .flatten()
114            .map(|(instruction, _)| instruction.clone())
115            .collect()
116    }
117
118    // if this is being called a lot, we may want to optimize this later
119    pub fn num_defined_instructions(&self) -> usize {
120        self.defined_instructions().len()
121    }
122
123    pub fn enumerate_by_pc(&self) -> Vec<(u32, Instruction<F>, Option<DebugInfo>)> {
124        self.instructions_and_debug_infos
125            .iter()
126            .enumerate()
127            .flat_map(|(index, option)| {
128                option.clone().map(|(instruction, debug_info)| {
129                    (
130                        self.pc_base + (DEFAULT_PC_STEP * (index as u32)),
131                        instruction,
132                        debug_info,
133                    )
134                })
135            })
136            .collect()
137    }
138
139    // such that pc = pc_base + (step * index)
140    pub fn get_instruction_and_debug_info(
141        &self,
142        index: usize,
143    ) -> Option<&(Instruction<F>, Option<DebugInfo>)> {
144        self.instructions_and_debug_infos
145            .get(index)
146            .and_then(|x| x.as_ref())
147    }
148
149    pub fn push_instruction_and_debug_info(
150        &mut self,
151        instruction: Instruction<F>,
152        debug_info: Option<DebugInfo>,
153    ) {
154        self.instructions_and_debug_infos
155            .push(Some((instruction, debug_info)));
156    }
157
158    pub fn push_instruction(&mut self, instruction: Instruction<F>) {
159        self.push_instruction_and_debug_info(instruction, None);
160    }
161
162    pub fn append(&mut self, other: Program<F>) {
163        self.instructions_and_debug_infos
164            .extend(other.instructions_and_debug_infos);
165    }
166}
167
168impl<F> Program<F> {
169    pub fn debug_infos(&self) -> ProgramDebugInfo {
170        let debug_infos = self
171            .instructions_and_debug_infos
172            .iter()
173            .map(|opt| opt.as_ref().and_then(|(_, debug_info)| debug_info.clone()))
174            .collect();
175        ProgramDebugInfo {
176            inner: Arc::new(debug_infos),
177            pc_base: self.pc_base,
178        }
179    }
180}
181
182impl<F: Field> Display for Program<F> {
183    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
184        for instruction in self.defined_instructions().iter() {
185            let Instruction {
186                opcode,
187                a,
188                b,
189                c,
190                d,
191                e,
192                f,
193                g,
194            } = instruction;
195            writeln!(
196                formatter,
197                "{:?} {} {} {} {} {} {} {}",
198                opcode, a, b, c, d, e, f, g,
199            )?;
200        }
201        Ok(())
202    }
203}
204
205impl ProgramDebugInfo {
206    /// ## Panics
207    /// If `pc` is out of bounds.
208    pub fn get(&self, pc: u32) -> &Option<DebugInfo> {
209        let pc_base = self.pc_base;
210        let pc_idx = ((pc - pc_base) / DEFAULT_PC_STEP) as usize;
211        &self.inner[pc_idx]
212    }
213}
214
215impl Deref for ProgramDebugInfo {
216    type Target = [Option<DebugInfo>];
217
218    fn deref(&self) -> &Self::Target {
219        &self.inner
220    }
221}
222
223pub fn display_program_with_pc<F: Field>(program: &Program<F>) {
224    for (pc, instruction) in program.defined_instructions().iter().enumerate() {
225        let Instruction {
226            opcode,
227            a,
228            b,
229            c,
230            d,
231            e,
232            f,
233            g,
234        } = instruction;
235        println!(
236            "{} | {:?} {} {} {} {} {} {} {}",
237            pc, opcode, a, b, c, d, e, f, g
238        );
239    }
240}
241
242// `debug_info` is based on the symbol table of the binary. Usually serializing `debug_info` is not
243// meaningful because the program is executed by another binary. So here we only serialize
244// instructions.
245fn serialize_instructions_and_debug_infos<F: Serialize, S: Serializer>(
246    data: &[Option<(Instruction<F>, Option<DebugInfo>)>],
247    serializer: S,
248) -> Result<S::Ok, S::Error> {
249    let mut ins_data = Vec::with_capacity(data.len());
250    let total_len = data.len() as u32;
251    for (i, o) in data.iter().enumerate() {
252        if let Some(o) = o {
253            ins_data.push((&o.0, i as u32));
254        }
255    }
256    (ins_data, total_len).serialize(serializer)
257}
258
259#[allow(clippy::type_complexity)]
260fn deserialize_instructions_and_debug_infos<'de, F: Deserialize<'de>, D: Deserializer<'de>>(
261    deserializer: D,
262) -> Result<Vec<Option<(Instruction<F>, Option<DebugInfo>)>>, D::Error> {
263    let (inst_data, total_len): (Vec<(Instruction<F>, u32)>, u32) =
264        Deserialize::deserialize(deserializer)?;
265    let mut ret: Vec<Option<(Instruction<F>, Option<DebugInfo>)>> = Vec::new();
266    ret.resize_with(total_len as usize, || None);
267    for (inst, i) in inst_data {
268        ret[i as usize] = Some((inst, None));
269    }
270    Ok(ret)
271}
272
273#[cfg(test)]
274mod tests {
275    use itertools::izip;
276    use p3_baby_bear::BabyBear;
277
278    use super::*;
279    use crate::VmOpcode;
280
281    type F = BabyBear;
282
283    #[test]
284    fn test_program_serde() {
285        let mut program = Program::<F>::new_empty(0);
286        program.instructions_and_debug_infos.push(Some((
287            Instruction::from_isize(VmOpcode::from_usize(113), 1, 2, 3, 4, 5),
288            None,
289        )));
290        program.instructions_and_debug_infos.push(None);
291        program.instructions_and_debug_infos.push(None);
292        program.instructions_and_debug_infos.push(Some((
293            Instruction::from_isize(VmOpcode::from_usize(145), 10, 20, 30, 40, 50),
294            None,
295        )));
296        program.instructions_and_debug_infos.push(Some((
297            Instruction::from_isize(VmOpcode::from_usize(145), 10, 20, 30, 40, 50),
298            None,
299        )));
300        program.instructions_and_debug_infos.push(None);
301        let bytes = bitcode::serialize(&program).unwrap();
302        let de_program: Program<F> = bitcode::deserialize(&bytes).unwrap();
303        for (expected_ins, ins) in izip!(
304            &program.instructions_and_debug_infos,
305            &de_program.instructions_and_debug_infos
306        ) {
307            match (expected_ins, ins) {
308                (Some(expected_ins), Some(ins)) => {
309                    assert_eq!(expected_ins.0, ins.0);
310                }
311                (None, None) => {}
312                _ => {
313                    panic!("Different instructions after serialization");
314                }
315            }
316        }
317    }
318}