openvm_transpiler/
elf.rs

1// Initial version taken from https://github.com/succinctlabs/sp1/blob/v2.0.0/crates/core/executor/src/disassembler/elf.rs under MIT License
2// and https://github.com/risc0/risc0/blob/f61379bf69b24d56e49d6af96a3b284961dcc498/risc0/binfmt/src/elf.rs#L34 under Apache License
3use std::{cmp::min, collections::BTreeMap, fmt::Debug};
4#[cfg(feature = "function-span")]
5use std::{
6    collections::{hash_map::Entry, HashMap},
7    io::Write,
8};
9
10use elf::{
11    abi::{EM_RISCV, ET_EXEC, PF_X, PT_LOAD},
12    endian::LittleEndian,
13    file::Class,
14    ElfBytes,
15};
16use eyre::{self, bail, ContextCompat};
17#[cfg(feature = "function-span")]
18use openvm_instructions::exe::FnBound;
19use openvm_instructions::{exe::FnBounds, program::MAX_ALLOWED_PC};
20use openvm_platform::WORD_SIZE;
21
22/// RISC-V 32IM ELF (Executable and Linkable Format) File.
23///
24/// This file represents a binary in the ELF format, specifically the RISC-V 32IM architecture
25/// with the following extensions:
26///
27/// - Base Integer Instruction Set (I)
28/// - Integer Multiplication and Division (M)
29///
30/// This format is commonly used in embedded systems and is supported by many compilers.
31#[derive(Debug, Clone)]
32pub struct Elf {
33    /// The instructions of the program encoded as 32-bits.
34    pub instructions: Vec<u32>,
35    /// The start address of the program.
36    pub(crate) pc_start: u32,
37    /// The base address of the program.
38    pub(crate) pc_base: u32,
39    /// The initial memory image, useful for global constants.
40    pub(crate) memory_image: BTreeMap<u32, u32>,
41    /// Debug info for spanning benchmark metrics by function.
42    pub(crate) fn_bounds: FnBounds,
43}
44
45impl Elf {
46    /// Create a new [Elf].
47    pub(crate) const fn new(
48        instructions: Vec<u32>,
49        pc_start: u32,
50        pc_base: u32,
51        memory_image: BTreeMap<u32, u32>,
52        fn_bounds: FnBounds,
53    ) -> Self {
54        Self {
55            instructions,
56            pc_start,
57            pc_base,
58            memory_image,
59            fn_bounds,
60        }
61    }
62
63    /// Parse the ELF file into a vector of 32-bit encoded instructions and the first memory
64    /// address.
65    ///
66    /// # Errors
67    ///
68    /// This function may return an error if the ELF is not valid.
69    ///
70    /// Reference: [Executable and Linkable Format](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format)
71    pub fn decode(input: &[u8], max_mem: u32) -> eyre::Result<Self> {
72        let mut image: BTreeMap<u32, u32> = BTreeMap::new();
73
74        // Parse the ELF file assuming that it is little-endian..
75        let elf = ElfBytes::<LittleEndian>::minimal_parse(input)
76            .map_err(|err| eyre::eyre!("Elf parse error: {err}"))?;
77
78        // Some sanity checks to make sure that the ELF file is valid.
79        if elf.ehdr.class != Class::ELF32 {
80            bail!("Not a 32-bit ELF");
81        } else if elf.ehdr.e_machine != EM_RISCV {
82            bail!("Invalid machine type, must be RISC-V");
83        } else if elf.ehdr.e_type != ET_EXEC {
84            bail!("Invalid ELF type, must be executable");
85        }
86
87        #[cfg(not(feature = "function-span"))]
88        let fn_bounds = Default::default();
89
90        #[cfg(feature = "function-span")]
91        let mut fn_bounds = FnBounds::new();
92        #[cfg(feature = "function-span")]
93        {
94            if let Some((symtab, stringtab)) = elf.symbol_table()? {
95                let mut fn_names = Vec::new();
96                for symbol in symtab.iter() {
97                    if symbol.st_symtype() == elf::abi::STT_FUNC {
98                        let raw_name = stringtab.get(symbol.st_name as usize).unwrap().to_string();
99                        let demangled_name = rustc_demangle::demangle(&raw_name).to_string();
100                        fn_names.push((demangled_name, symbol.st_name));
101                    }
102                }
103
104                let mut buf = Vec::new();
105                let mut offsets = HashMap::new();
106                buf.push(0);
107                for (name, st_name) in fn_names {
108                    if let Entry::Vacant(e) = offsets.entry(st_name) {
109                        let offset = buf.len();
110                        e.insert(offset);
111                        buf.extend_from_slice(name.as_bytes());
112                        buf.push(0);
113                    }
114                }
115
116                for symbol in symtab.iter() {
117                    if symbol.st_symtype() == elf::abi::STT_FUNC {
118                        fn_bounds.insert(
119                            symbol.st_value as u32,
120                            FnBound {
121                                start: symbol.st_value as u32,
122                                end: (symbol.st_value + symbol.st_size - (WORD_SIZE as u64)) as u32,
123                                name: offsets[&symbol.st_name].to_string(),
124                            },
125                        );
126                    }
127                }
128
129                let guest_symbols_path = std::env::var("GUEST_SYMBOLS_PATH")
130                    .map_err(|e| eyre::eyre!("{e}: GUEST_SYMBOLS_PATH"))?;
131                let mut guest_symbols_file =
132                    std::fs::File::create(&guest_symbols_path).map_err(|e| {
133                        eyre::eyre!(
134                            "Failed to create guest symbols file at {guest_symbols_path}: {e}"
135                        )
136                    })?;
137                guest_symbols_file.write_all(buf.as_slice())?;
138            } else {
139                println!("No symbol table found");
140            }
141        }
142
143        // Get the entrypoint of the ELF file as an u32.
144        let entry: u32 = elf
145            .ehdr
146            .e_entry
147            .try_into()
148            .map_err(|err| eyre::eyre!("e_entry was larger than 32 bits. {err}"))?;
149
150        // Make sure the entrypoint is valid.
151        if entry >= max_mem || entry % WORD_SIZE as u32 != 0 {
152            bail!("Invalid entrypoint");
153        }
154
155        // Get the segments of the ELF file.
156        let segments = elf
157            .segments()
158            .ok_or_else(|| eyre::eyre!("Missing segment table"))?;
159        if segments.len() > 256 {
160            bail!("Too many program headers");
161        }
162
163        let mut instructions: Vec<u32> = Vec::new();
164        let mut base_address = u32::MAX;
165
166        // Only read segments that are executable instructions that are also PT_LOAD.
167        for segment in segments.iter().filter(|x| x.p_type == PT_LOAD) {
168            // Get the file size of the segment as an u32.
169            let file_size: u32 = segment.p_filesz.try_into()?;
170            if file_size >= max_mem {
171                bail!("invalid segment file_size");
172            }
173
174            // Get the memory size of the segment as an u32.
175            let mem_size: u32 = segment.p_memsz.try_into()?;
176            if mem_size >= max_mem {
177                bail!("Invalid segment mem_size");
178            }
179
180            // Get the virtual address of the segment as an u32.
181            let vaddr: u32 = segment.p_vaddr.try_into()?;
182            if vaddr % WORD_SIZE as u32 != 0 {
183                bail!("vaddr {vaddr:08x} is unaligned");
184            }
185
186            // If the virtual address is less than the first memory address, then update the first
187            // memory address.
188            if (segment.p_flags & PF_X) != 0 && base_address > vaddr {
189                base_address = vaddr;
190            }
191
192            // Get the offset to the segment.
193            let offset: u32 = segment.p_offset.try_into()?;
194
195            // Read the segment and decode each word as an instruction.
196            for i in (0..mem_size).step_by(WORD_SIZE) {
197                let addr = vaddr
198                    .checked_add(i)
199                    .ok_or_else(|| eyre::eyre!("vaddr overflow"))?;
200                if addr >= max_mem {
201                    bail!(
202                        "address [0x{addr:08x}] exceeds maximum address for guest programs [0x{max_mem:08x}]"
203                    );
204                } else if addr > MAX_ALLOWED_PC && (segment.p_flags & PF_X) != 0 {
205                    bail!("instruction address [0x{addr:08x}] exceeds maximum PC [0x{MAX_ALLOWED_PC:08x}]");
206                }
207
208                // If we are reading past the end of the file, then break.
209                if i >= file_size {
210                    image.insert(addr, 0);
211                    continue;
212                }
213
214                // Get the word as an u32 but make sure we don't read pass the end of the file.
215                let mut word = 0;
216                let len = min(file_size - i, WORD_SIZE as u32);
217                for j in 0..len {
218                    let offset = (offset + i + j) as usize;
219                    let byte = input.get(offset).context("Invalid segment offset")?;
220                    word |= u32::from(*byte) << (j * 8);
221                }
222                image.insert(addr, word);
223                if (segment.p_flags & PF_X) != 0 {
224                    instructions.push(word);
225                }
226            }
227        }
228
229        Ok(Elf::new(
230            instructions,
231            entry,
232            base_address,
233            image,
234            fn_bounds,
235        ))
236    }
237}