1// Initial version taken from https://github.com/succinctlabs/sp1/blob/v2.0.0/crates/core/executor/src/disassembler/elf.rs under MIT License
2// and https://github.com/risc0/risc0/blob/f61379bf69b24d56e49d6af96a3b284961dcc498/risc0/binfmt/src/elf.rs#L34 under Apache License
3use std::{cmp::min, collections::BTreeMap, fmt::Debug};
45use elf::{
6 abi::{EM_RISCV, ET_EXEC, PF_X, PT_LOAD},
7 endian::LittleEndian,
8 file::Class,
9 ElfBytes,
10};
11use eyre::{self, bail, ContextCompat};
12#[cfg(feature = "function-span")]
13use openvm_instructions::exe::FnBound;
14use openvm_instructions::{exe::FnBounds, program::MAX_ALLOWED_PC};
15use openvm_platform::WORD_SIZE;
1617pub const ELF_DEFAULT_MAX_NUM_PUBLIC_VALUES: usize = 32;
1819/// RISC-V 32IM ELF (Executable and Linkable Format) File.
20///
21/// This file represents a binary in the ELF format, specifically the RISC-V 32IM architecture
22/// with the following extensions:
23///
24/// - Base Integer Instruction Set (I)
25/// - Integer Multiplication and Division (M)
26///
27/// This format is commonly used in embedded systems and is supported by many compilers.
28#[derive(Debug, Clone)]
29pub struct Elf {
30/// The instructions of the program encoded as 32-bits.
31pub instructions: Vec<u32>,
32/// The start address of the program.
33pub(crate) pc_start: u32,
34/// The base address of the program.
35pub(crate) pc_base: u32,
36/// The initial memory image, useful for global constants.
37pub(crate) memory_image: BTreeMap<u32, u32>,
38/// The upper bound of the number of public values the program would publish.
39 /// TODO: read from project config.
40pub(crate) max_num_public_values: usize,
41/// Debug info for spanning benchmark metrics by function.
42pub(crate) fn_bounds: FnBounds,
43}
4445impl Elf {
46/// Create a new [Elf].
47pub(crate) const fn new(
48 instructions: Vec<u32>,
49 pc_start: u32,
50 pc_base: u32,
51 memory_image: BTreeMap<u32, u32>,
52 fn_bounds: FnBounds,
53 ) -> Self {
54Self {
55 instructions,
56 pc_start,
57 pc_base,
58 memory_image,
59 max_num_public_values: ELF_DEFAULT_MAX_NUM_PUBLIC_VALUES,
60 fn_bounds,
61 }
62 }
6364/// Parse the ELF file into a vector of 32-bit encoded instructions and the first memory
65 /// address.
66 ///
67 /// # Errors
68 ///
69 /// This function may return an error if the ELF is not valid.
70 ///
71 /// Reference: [Executable and Linkable Format](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format)
72pub fn decode(input: &[u8], max_mem: u32) -> eyre::Result<Self> {
73let mut image: BTreeMap<u32, u32> = BTreeMap::new();
7475// Parse the ELF file assuming that it is little-endian..
76let elf = ElfBytes::<LittleEndian>::minimal_parse(input)
77 .map_err(|err| eyre::eyre!("Elf parse error: {err}"))?;
7879// Some sanity checks to make sure that the ELF file is valid.
80if elf.ehdr.class != Class::ELF32 {
81bail!("Not a 32-bit ELF");
82 } else if elf.ehdr.e_machine != EM_RISCV {
83bail!("Invalid machine type, must be RISC-V");
84 } else if elf.ehdr.e_type != ET_EXEC {
85bail!("Invalid ELF type, must be executable");
86 }
8788#[cfg(not(feature = "function-span"))]
89let fn_bounds = Default::default();
9091#[cfg(feature = "function-span")]
92let mut fn_bounds = FnBounds::new();
93#[cfg(feature = "function-span")]
94{
95if let Some((symtab, stringtab)) = elf.symbol_table()? {
96for symbol in symtab.iter() {
97if symbol.st_symtype() == elf::abi::STT_FUNC {
98 fn_bounds.insert(
99 symbol.st_value as u32,
100 FnBound {
101 start: symbol.st_value as u32,
102 end: (symbol.st_value + symbol.st_size - (WORD_SIZE as u64)) as u32,
103 name: stringtab.get(symbol.st_name as usize).unwrap().to_string(),
104 },
105 );
106 }
107 }
108 } else {
109println!("No symbol table found");
110 }
111 }
112113// Get the entrypoint of the ELF file as an u32.
114let entry: u32 = elf
115 .ehdr
116 .e_entry
117 .try_into()
118 .map_err(|err| eyre::eyre!("e_entry was larger than 32 bits. {err}"))?;
119120// Make sure the entrypoint is valid.
121if entry >= max_mem || entry % WORD_SIZE as u32 != 0 {
122bail!("Invalid entrypoint");
123 }
124125// Get the segments of the ELF file.
126let segments = elf
127 .segments()
128 .ok_or_else(|| eyre::eyre!("Missing segment table"))?;
129if segments.len() > 256 {
130bail!("Too many program headers");
131 }
132133let mut instructions: Vec<u32> = Vec::new();
134let mut base_address = u32::MAX;
135136// Only read segments that are executable instructions that are also PT_LOAD.
137for segment in segments.iter().filter(|x| x.p_type == PT_LOAD) {
138// Get the file size of the segment as an u32.
139let file_size: u32 = segment.p_filesz.try_into()?;
140if file_size >= max_mem {
141bail!("invalid segment file_size");
142 }
143144// Get the memory size of the segment as an u32.
145let mem_size: u32 = segment.p_memsz.try_into()?;
146if mem_size >= max_mem {
147bail!("Invalid segment mem_size");
148 }
149150// Get the virtual address of the segment as an u32.
151let vaddr: u32 = segment.p_vaddr.try_into()?;
152if vaddr % WORD_SIZE as u32 != 0 {
153bail!("vaddr {vaddr:08x} is unaligned");
154 }
155156// If the virtual address is less than the first memory address, then update the first
157 // memory address.
158if (segment.p_flags & PF_X) != 0 && base_address > vaddr {
159 base_address = vaddr;
160 }
161162// Get the offset to the segment.
163let offset: u32 = segment.p_offset.try_into()?;
164165// Read the segment and decode each word as an instruction.
166for i in (0..mem_size).step_by(WORD_SIZE) {
167let addr = vaddr
168 .checked_add(i)
169 .ok_or_else(|| eyre::eyre!("vaddr overflow"))?;
170if addr >= max_mem {
171bail!(
172"address [0x{addr:08x}] exceeds maximum address for guest programs [0x{max_mem:08x}]"
173);
174 } else if addr > MAX_ALLOWED_PC && (segment.p_flags & PF_X) != 0 {
175bail!("instruction address [0x{addr:08x}] exceeds maximum PC [0x{MAX_ALLOWED_PC:08x}]");
176 }
177178// If we are reading past the end of the file, then break.
179if i >= file_size {
180 image.insert(addr, 0);
181continue;
182 }
183184// Get the word as an u32 but make sure we don't read pass the end of the file.
185let mut word = 0;
186let len = min(file_size - i, WORD_SIZE as u32);
187for j in 0..len {
188let offset = (offset + i + j) as usize;
189let byte = input.get(offset).context("Invalid segment offset")?;
190 word |= u32::from(*byte) << (j * 8);
191 }
192 image.insert(addr, word);
193if (segment.p_flags & PF_X) != 0 {
194 instructions.push(word);
195 }
196 }
197 }
198199Ok(Elf::new(
200 instructions,
201 entry,
202 base_address,
203 image,
204 fn_bounds,
205 ))
206 }
207}