openvm_transpiler/
elf.rs

1// Initial version taken from https://github.com/succinctlabs/sp1/blob/v2.0.0/crates/core/executor/src/disassembler/elf.rs under MIT License
2// and https://github.com/risc0/risc0/blob/f61379bf69b24d56e49d6af96a3b284961dcc498/risc0/binfmt/src/elf.rs#L34 under Apache License
3use std::{cmp::min, collections::BTreeMap, fmt::Debug};
4#[cfg(feature = "function-span")]
5use std::{
6    collections::{hash_map::Entry, HashMap},
7    io::Write,
8};
9
10use elf::{
11    abi::{EM_RISCV, ET_EXEC, PF_X, PT_LOAD},
12    endian::LittleEndian,
13    file::Class,
14    ElfBytes,
15};
16use eyre::{self, bail, ContextCompat};
17#[cfg(feature = "function-span")]
18use openvm_instructions::exe::FnBound;
19use openvm_instructions::{exe::FnBounds, program::MAX_ALLOWED_PC};
20use openvm_platform::WORD_SIZE;
21
22/// RISC-V 32IM ELF (Executable and Linkable Format) File.
23///
24/// This file represents a binary in the ELF format, specifically the RISC-V 32IM architecture
25/// with the following extensions:
26///
27/// - Base Integer Instruction Set (I)
28/// - Integer Multiplication and Division (M)
29///
30/// This format is commonly used in embedded systems and is supported by many compilers.
31#[derive(Debug, Clone)]
32pub struct Elf {
33    /// The instructions of the program encoded as 32-bits.
34    pub instructions: Vec<u32>,
35    /// The start address of the program.
36    pub(crate) pc_start: u32,
37    /// The base address of the program.
38    pub(crate) pc_base: u32,
39    /// The initial memory image, useful for global constants.
40    pub(crate) memory_image: BTreeMap<u32, u32>,
41    /// Debug info for spanning benchmark metrics by function.
42    pub(crate) fn_bounds: FnBounds,
43}
44
45impl Elf {
46    /// Create a new [Elf].
47    pub(crate) const fn new(
48        instructions: Vec<u32>,
49        pc_start: u32,
50        pc_base: u32,
51        memory_image: BTreeMap<u32, u32>,
52        fn_bounds: FnBounds,
53    ) -> Self {
54        Self {
55            instructions,
56            pc_start,
57            pc_base,
58            memory_image,
59            fn_bounds,
60        }
61    }
62
63    /// Parse the ELF file into a vector of 32-bit encoded instructions and the first memory
64    /// address.
65    ///
66    /// # Errors
67    ///
68    /// This function may return an error if the ELF is not valid.
69    ///
70    /// Reference: [Executable and Linkable Format](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format)
71    pub fn decode(input: &[u8], max_mem: u32) -> eyre::Result<Self> {
72        let mut image: BTreeMap<u32, u32> = BTreeMap::new();
73
74        // Parse the ELF file assuming that it is little-endian..
75        let elf = ElfBytes::<LittleEndian>::minimal_parse(input)
76            .map_err(|err| eyre::eyre!("Elf parse error: {err}"))?;
77
78        // Some sanity checks to make sure that the ELF file is valid.
79        if elf.ehdr.class != Class::ELF32 {
80            bail!("Not a 32-bit ELF");
81        } else if elf.ehdr.e_machine != EM_RISCV {
82            bail!("Invalid machine type, must be RISC-V");
83        } else if elf.ehdr.e_type != ET_EXEC {
84            bail!("Invalid ELF type, must be executable");
85        }
86
87        #[cfg(not(feature = "function-span"))]
88        let fn_bounds = Default::default();
89
90        #[cfg(feature = "function-span")]
91        let mut fn_bounds = FnBounds::new();
92        #[cfg(feature = "function-span")]
93        {
94            if let Some((symtab, stringtab)) = elf.symbol_table()? {
95                let mut fn_names = Vec::new();
96                for symbol in symtab.iter() {
97                    if symbol.st_symtype() == elf::abi::STT_FUNC {
98                        let raw_name = stringtab.get(symbol.st_name as usize).unwrap().to_string();
99                        let demangled_name = rustc_demangle::demangle(&raw_name).to_string();
100                        fn_names.push((demangled_name, symbol.st_name));
101                    }
102                }
103
104                let mut buf = Vec::new();
105                let mut offsets = HashMap::new();
106                buf.push(0);
107                for (name, st_name) in fn_names {
108                    if let Entry::Vacant(e) = offsets.entry(st_name) {
109                        let offset = buf.len();
110                        e.insert(offset);
111                        buf.extend_from_slice(name.as_bytes());
112                        buf.push(0);
113                    }
114                }
115
116                for symbol in symtab.iter() {
117                    if symbol.st_symtype() == elf::abi::STT_FUNC {
118                        fn_bounds.insert(
119                            symbol.st_value as u32,
120                            FnBound {
121                                start: symbol.st_value as u32,
122                                end: (symbol.st_value + symbol.st_size - (WORD_SIZE as u64)) as u32,
123                                name: offsets[&symbol.st_name].to_string(),
124                            },
125                        );
126                    }
127                }
128
129                let guest_symbols_path = std::env::var("GUEST_SYMBOLS_PATH")
130                    .map_err(|e| eyre::eyre!("{e}: GUEST_SYMBOLS_PATH"))?;
131                let mut guest_symbols_file =
132                    std::fs::File::create(&guest_symbols_path).map_err(|e| {
133                        eyre::eyre!(
134                            "Failed to create guest symbols file at {guest_symbols_path}: {e}"
135                        )
136                    })?;
137                guest_symbols_file.write_all(buf.as_slice())?;
138            } else {
139                println!("No symbol table found");
140            }
141        }
142
143        // Get the entrypoint of the ELF file as an u32.
144        let entry: u32 = elf
145            .ehdr
146            .e_entry
147            .try_into()
148            .map_err(|err| eyre::eyre!("e_entry was larger than 32 bits. {err}"))?;
149
150        // Make sure the entrypoint is valid.
151        if entry >= max_mem || !entry.is_multiple_of(WORD_SIZE as u32) {
152            bail!("Invalid entrypoint");
153        }
154
155        // Get the segments of the ELF file.
156        let segments = elf
157            .segments()
158            .ok_or_else(|| eyre::eyre!("Missing segment table"))?;
159        if segments.len() > 256 {
160            bail!("Too many program headers");
161        }
162
163        let mut instructions: Vec<u32> = Vec::new();
164        let mut base_address = u32::MAX;
165        // Track the end of the last executable segment to detect non-contiguous executable
166        // segments.
167        let mut last_exec_end: Option<u32> = None;
168
169        // Collect and sort PT_LOAD segments by virtual address to ensure executable
170        // segment contiguity checks are correct regardless of ELF header ordering.
171        let mut load_segments: Vec<_> = segments.iter().filter(|x| x.p_type == PT_LOAD).collect();
172        load_segments.sort_by_key(|s| s.p_vaddr);
173
174        for segment in load_segments {
175            // Get the file size of the segment as an u32.
176            let file_size: u32 = segment.p_filesz.try_into()?;
177            if file_size >= max_mem {
178                bail!("invalid segment file_size");
179            }
180
181            // Get the memory size of the segment as an u32.
182            let mem_size: u32 = segment.p_memsz.try_into()?;
183            if mem_size >= max_mem {
184                bail!("Invalid segment mem_size");
185            }
186
187            // Get the virtual address of the segment as an u32.
188            let vaddr: u32 = segment.p_vaddr.try_into()?;
189            if !vaddr.is_multiple_of(WORD_SIZE as u32) {
190                bail!("vaddr {vaddr:08x} is unaligned");
191            }
192
193            // Track executable segments and reject non-contiguous ones.
194            if (segment.p_flags & PF_X) != 0 {
195                if let Some(prev_end) = last_exec_end {
196                    if vaddr != prev_end {
197                        bail!(
198                            "Non-contiguous executable segments are not supported: \
199                             previous segment ended at 0x{prev_end:08x}, \
200                             next segment starts at 0x{vaddr:08x}"
201                        );
202                    }
203                }
204                if base_address > vaddr {
205                    base_address = vaddr;
206                }
207                last_exec_end = Some(
208                    vaddr
209                        .checked_add(mem_size)
210                        .ok_or_else(|| eyre::eyre!("executable segment end address overflow"))?,
211                );
212            }
213
214            // Get the offset to the segment.
215            let offset: u32 = segment.p_offset.try_into()?;
216
217            // Read the segment and decode each word as an instruction.
218            for i in (0..mem_size).step_by(WORD_SIZE) {
219                let addr = vaddr
220                    .checked_add(i)
221                    .ok_or_else(|| eyre::eyre!("vaddr overflow"))?;
222                if addr >= max_mem {
223                    bail!(
224                        "address [0x{addr:08x}] exceeds maximum address for guest programs [0x{max_mem:08x}]"
225                    );
226                } else if addr > MAX_ALLOWED_PC && (segment.p_flags & PF_X) != 0 {
227                    bail!("instruction address [0x{addr:08x}] exceeds maximum PC [0x{MAX_ALLOWED_PC:08x}]");
228                }
229
230                // If we are reading past the end of the file, then break.
231                if i >= file_size {
232                    image.insert(addr, 0);
233                    continue;
234                }
235
236                // Get the word as an u32 but make sure we don't read pass the end of the file.
237                let mut word = 0;
238                let len = min(file_size - i, WORD_SIZE as u32);
239                for j in 0..len {
240                    let offset = (offset + i + j) as usize;
241                    let byte = input.get(offset).context("Invalid segment offset")?;
242                    word |= u32::from(*byte) << (j * 8);
243                }
244                image.insert(addr, word);
245                if (segment.p_flags & PF_X) != 0 {
246                    instructions.push(word);
247                }
248            }
249        }
250
251        Ok(Elf::new(
252            instructions,
253            entry,
254            base_address,
255            image,
256            fn_bounds,
257        ))
258    }
259}