openvm_transpiler/
elf.rs

1// Initial version taken from https://github.com/succinctlabs/sp1/blob/v2.0.0/crates/core/executor/src/disassembler/elf.rs under MIT License
2// and https://github.com/risc0/risc0/blob/f61379bf69b24d56e49d6af96a3b284961dcc498/risc0/binfmt/src/elf.rs#L34 under Apache License
3use std::{cmp::min, collections::BTreeMap, fmt::Debug};
4
5use elf::{
6    abi::{EM_RISCV, ET_EXEC, PF_X, PT_LOAD},
7    endian::LittleEndian,
8    file::Class,
9    ElfBytes,
10};
11use eyre::{self, bail, ContextCompat};
12#[cfg(feature = "function-span")]
13use openvm_instructions::exe::FnBound;
14use openvm_instructions::{exe::FnBounds, program::MAX_ALLOWED_PC};
15use openvm_platform::WORD_SIZE;
16
17pub const ELF_DEFAULT_MAX_NUM_PUBLIC_VALUES: usize = 32;
18
19/// RISC-V 32IM ELF (Executable and Linkable Format) File.
20///
21/// This file represents a binary in the ELF format, specifically the RISC-V 32IM architecture
22/// with the following extensions:
23///
24/// - Base Integer Instruction Set (I)
25/// - Integer Multiplication and Division (M)
26///
27/// This format is commonly used in embedded systems and is supported by many compilers.
28#[derive(Debug, Clone)]
29pub struct Elf {
30    /// The instructions of the program encoded as 32-bits.
31    pub instructions: Vec<u32>,
32    /// The start address of the program.
33    pub(crate) pc_start: u32,
34    /// The base address of the program.
35    pub(crate) pc_base: u32,
36    /// The initial memory image, useful for global constants.
37    pub(crate) memory_image: BTreeMap<u32, u32>,
38    /// The upper bound of the number of public values the program would publish.
39    /// TODO: read from project config.
40    pub(crate) max_num_public_values: usize,
41    /// Debug info for spanning benchmark metrics by function.
42    pub(crate) fn_bounds: FnBounds,
43}
44
45impl Elf {
46    /// Create a new [Elf].
47    pub(crate) const fn new(
48        instructions: Vec<u32>,
49        pc_start: u32,
50        pc_base: u32,
51        memory_image: BTreeMap<u32, u32>,
52        fn_bounds: FnBounds,
53    ) -> Self {
54        Self {
55            instructions,
56            pc_start,
57            pc_base,
58            memory_image,
59            max_num_public_values: ELF_DEFAULT_MAX_NUM_PUBLIC_VALUES,
60            fn_bounds,
61        }
62    }
63
64    /// Parse the ELF file into a vector of 32-bit encoded instructions and the first memory
65    /// address.
66    ///
67    /// # Errors
68    ///
69    /// This function may return an error if the ELF is not valid.
70    ///
71    /// Reference: [Executable and Linkable Format](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format)
72    pub fn decode(input: &[u8], max_mem: u32) -> eyre::Result<Self> {
73        let mut image: BTreeMap<u32, u32> = BTreeMap::new();
74
75        // Parse the ELF file assuming that it is little-endian..
76        let elf = ElfBytes::<LittleEndian>::minimal_parse(input)
77            .map_err(|err| eyre::eyre!("Elf parse error: {err}"))?;
78
79        // Some sanity checks to make sure that the ELF file is valid.
80        if elf.ehdr.class != Class::ELF32 {
81            bail!("Not a 32-bit ELF");
82        } else if elf.ehdr.e_machine != EM_RISCV {
83            bail!("Invalid machine type, must be RISC-V");
84        } else if elf.ehdr.e_type != ET_EXEC {
85            bail!("Invalid ELF type, must be executable");
86        }
87
88        #[cfg(not(feature = "function-span"))]
89        let fn_bounds = Default::default();
90
91        #[cfg(feature = "function-span")]
92        let mut fn_bounds = FnBounds::new();
93        #[cfg(feature = "function-span")]
94        {
95            if let Some((symtab, stringtab)) = elf.symbol_table()? {
96                for symbol in symtab.iter() {
97                    if symbol.st_symtype() == elf::abi::STT_FUNC {
98                        fn_bounds.insert(
99                            symbol.st_value as u32,
100                            FnBound {
101                                start: symbol.st_value as u32,
102                                end: (symbol.st_value + symbol.st_size - (WORD_SIZE as u64)) as u32,
103                                name: stringtab.get(symbol.st_name as usize).unwrap().to_string(),
104                            },
105                        );
106                    }
107                }
108            } else {
109                println!("No symbol table found");
110            }
111        }
112
113        // Get the entrypoint of the ELF file as an u32.
114        let entry: u32 = elf
115            .ehdr
116            .e_entry
117            .try_into()
118            .map_err(|err| eyre::eyre!("e_entry was larger than 32 bits. {err}"))?;
119
120        // Make sure the entrypoint is valid.
121        if entry >= max_mem || entry % WORD_SIZE as u32 != 0 {
122            bail!("Invalid entrypoint");
123        }
124
125        // Get the segments of the ELF file.
126        let segments = elf
127            .segments()
128            .ok_or_else(|| eyre::eyre!("Missing segment table"))?;
129        if segments.len() > 256 {
130            bail!("Too many program headers");
131        }
132
133        let mut instructions: Vec<u32> = Vec::new();
134        let mut base_address = u32::MAX;
135
136        // Only read segments that are executable instructions that are also PT_LOAD.
137        for segment in segments.iter().filter(|x| x.p_type == PT_LOAD) {
138            // Get the file size of the segment as an u32.
139            let file_size: u32 = segment.p_filesz.try_into()?;
140            if file_size >= max_mem {
141                bail!("invalid segment file_size");
142            }
143
144            // Get the memory size of the segment as an u32.
145            let mem_size: u32 = segment.p_memsz.try_into()?;
146            if mem_size >= max_mem {
147                bail!("Invalid segment mem_size");
148            }
149
150            // Get the virtual address of the segment as an u32.
151            let vaddr: u32 = segment.p_vaddr.try_into()?;
152            if vaddr % WORD_SIZE as u32 != 0 {
153                bail!("vaddr {vaddr:08x} is unaligned");
154            }
155
156            // If the virtual address is less than the first memory address, then update the first
157            // memory address.
158            if (segment.p_flags & PF_X) != 0 && base_address > vaddr {
159                base_address = vaddr;
160            }
161
162            // Get the offset to the segment.
163            let offset: u32 = segment.p_offset.try_into()?;
164
165            // Read the segment and decode each word as an instruction.
166            for i in (0..mem_size).step_by(WORD_SIZE) {
167                let addr = vaddr
168                    .checked_add(i)
169                    .ok_or_else(|| eyre::eyre!("vaddr overflow"))?;
170                if addr >= max_mem {
171                    bail!(
172                        "address [0x{addr:08x}] exceeds maximum address for guest programs [0x{max_mem:08x}]"
173                    );
174                } else if addr > MAX_ALLOWED_PC && (segment.p_flags & PF_X) != 0 {
175                    bail!("instruction address [0x{addr:08x}] exceeds maximum PC [0x{MAX_ALLOWED_PC:08x}]");
176                }
177
178                // If we are reading past the end of the file, then break.
179                if i >= file_size {
180                    image.insert(addr, 0);
181                    continue;
182                }
183
184                // Get the word as an u32 but make sure we don't read pass the end of the file.
185                let mut word = 0;
186                let len = min(file_size - i, WORD_SIZE as u32);
187                for j in 0..len {
188                    let offset = (offset + i + j) as usize;
189                    let byte = input.get(offset).context("Invalid segment offset")?;
190                    word |= u32::from(*byte) << (j * 8);
191                }
192                image.insert(addr, word);
193                if (segment.p_flags & PF_X) != 0 {
194                    instructions.push(word);
195                }
196            }
197        }
198
199        Ok(Elf::new(
200            instructions,
201            entry,
202            base_address,
203            image,
204            fn_bounds,
205        ))
206    }
207}