elf/
endian.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
//! An all-safe-code endian-aware integer parsing implementation via the
//! [EndianParse] trait.
//!
//! This module provides four endian parsing implementations optimized to support the different
//! common use-cases for an ELF parsing library.  Each trait impl represents a
//! specification that encapsulates an interface for parsing integers from some
//! set of allowed byte orderings.
//!
//! * [AnyEndian]: Dynamically parsing either byte order at runtime based on the type of ELF object being parsed.
//! * [BigEndian]/[LittleEndian]: For tools that know they only want to parse a single given byte order known at compile time.
//! * [type@NativeEndian]: For tools that know they want to parse the same byte order as the target's byte order.
//
// Note:
//   I'd love to see this get replaced with safe transmutes, if that RFC ever gets formalized.
//   Until then, this crate serves as an example implementation for what's possible with purely safe rust.
use crate::abi;
use crate::parse::ParseError;

/// This macro writes out safe code to get a subslice from the the byte slice $data
/// at the given $off as a [u8; size_of<$typ>], then calls the corresponding safe
/// endian-aware conversion on it.
///
/// This uses safe integer math and returns a ParseError on overflow or if $data did
/// not contain enough bytes at $off to perform the conversion.
macro_rules! safe_from {
    ( $self:ident, $typ:ty, $off:ident, $data:ident) => {{
        const SIZE: usize = core::mem::size_of::<$typ>();

        let end = (*$off)
            .checked_add(SIZE)
            .ok_or(ParseError::IntegerOverflow)?;

        let buf: [u8; SIZE] = $data
            .get(*$off..end)
            .ok_or(ParseError::SliceReadError((*$off, end)))?
            .try_into()?;

        *$off = end;

        // Note: This check evaluates to a constant true/false for the "fixed" types
        // so the compiler should optimize out the check (LittleEndian, BigEndian, NativeEndian)
        if $self.is_little() {
            Ok(<$typ>::from_le_bytes(buf))
        } else {
            Ok(<$typ>::from_be_bytes(buf))
        }
    }};
}

/// An all-safe-code endian-aware integer parsing trait.
///
/// These methods use safe code to get a subslice from the the byte slice $data
/// at the given $off as a [u8; size_of<$typ>], then calls the corresponding safe
/// endian-aware conversion on it.
///
/// These use checked integer math and returns a ParseError on overflow or if $data did
/// not contain enough bytes at $off to perform the conversion.
pub trait EndianParse: Clone + Copy + Default + PartialEq + Eq {
    fn parse_u8_at(self, offset: &mut usize, data: &[u8]) -> Result<u8, ParseError> {
        safe_from!(self, u8, offset, data)
    }

    fn parse_u16_at(self, offset: &mut usize, data: &[u8]) -> Result<u16, ParseError> {
        safe_from!(self, u16, offset, data)
    }

    fn parse_u32_at(self, offset: &mut usize, data: &[u8]) -> Result<u32, ParseError> {
        safe_from!(self, u32, offset, data)
    }

    fn parse_u64_at(self, offset: &mut usize, data: &[u8]) -> Result<u64, ParseError> {
        safe_from!(self, u64, offset, data)
    }

    fn parse_i32_at(self, offset: &mut usize, data: &[u8]) -> Result<i32, ParseError> {
        safe_from!(self, i32, offset, data)
    }

    fn parse_i64_at(self, offset: &mut usize, data: &[u8]) -> Result<i64, ParseError> {
        safe_from!(self, i64, offset, data)
    }

    /// Get an endian-aware integer parsing spec for an ELF [FileHeader](crate::file::FileHeader)'s
    /// `ident[EI_DATA]` byte.
    ///
    /// Returns an [UnsupportedElfEndianness](ParseError::UnsupportedElfEndianness) if this spec
    /// doesn't support parsing the byte-order represented by ei_data. If you're
    /// seeing this error, are you trying to read files of any endianness? i.e.
    /// did you want to use AnyEndian?
    fn from_ei_data(ei_data: u8) -> Result<Self, ParseError>;

    fn is_little(self) -> bool;

    #[inline(always)]
    fn is_big(self) -> bool {
        !self.is_little()
    }
}

/// An endian parsing type that can choose at runtime which byte order to parse integers as.
/// This is useful for scenarios where a single compiled binary wants to dynamically
/// interpret ELF files of any byte order.
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub enum AnyEndian {
    /// Used for a little-endian ELF structures that have been parsed with AnyEndian
    #[default]
    Little,
    /// Used for a big-endian ELF structures that have been parsed with AnyEndian
    Big,
}

/// A zero-sized type that always parses integers as if they're in little-endian order.
/// This is useful for scenarios where a combiled binary knows it only wants to interpret
/// little-endian ELF files and doesn't want the performance penalty of evaluating a match
/// each time it parses an integer.
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub struct LittleEndian;

/// A zero-sized type that always parses integers as if they're in big-endian order.
/// This is useful for scenarios where a combiled binary knows it only wants to interpret
/// big-endian ELF files and doesn't want the performance penalty of evaluating a match
/// each time it parses an integer.
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub struct BigEndian;

/// A zero-sized type that always parses integers as if they're in the compilation target's native-endian order.
/// This is useful for toolchain scenarios where a combiled binary knows it only wants to interpret
/// ELF files compiled for the same target and doesn't want the performance penalty of evaluating a match
/// each time it parses an integer.
#[cfg(target_endian = "little")]
pub type NativeEndian = LittleEndian;

#[cfg(target_endian = "little")]
#[allow(non_upper_case_globals)]
#[doc(hidden)]
pub const NativeEndian: LittleEndian = LittleEndian;

/// A zero-sized type that always parses integers as if they're in the compilation target's native-endian order.
/// This is useful for toolchain scenarios where a combiled binary knows it only wants to interpret
/// ELF files compiled for the same target and doesn't want the performance penalty of evaluating a match
/// each time it parses an integer.
#[cfg(target_endian = "big")]
pub type NativeEndian = BigEndian;

#[cfg(target_endian = "big")]
#[allow(non_upper_case_globals)]
#[doc(hidden)]
pub const NativeEndian: BigEndian = BigEndian;

impl EndianParse for LittleEndian {
    fn from_ei_data(ei_data: u8) -> Result<Self, ParseError> {
        match ei_data {
            abi::ELFDATA2LSB => Ok(LittleEndian),
            _ => Err(ParseError::UnsupportedElfEndianness(ei_data)),
        }
    }

    #[inline(always)]
    fn is_little(self) -> bool {
        true
    }
}

impl EndianParse for BigEndian {
    fn from_ei_data(ei_data: u8) -> Result<Self, ParseError> {
        match ei_data {
            abi::ELFDATA2MSB => Ok(BigEndian),
            _ => Err(ParseError::UnsupportedElfEndianness(ei_data)),
        }
    }

    #[inline(always)]
    fn is_little(self) -> bool {
        false
    }
}

impl EndianParse for AnyEndian {
    fn from_ei_data(ei_data: u8) -> Result<Self, ParseError> {
        match ei_data {
            abi::ELFDATA2LSB => Ok(AnyEndian::Little),
            abi::ELFDATA2MSB => Ok(AnyEndian::Big),
            _ => Err(ParseError::UnsupportedElfEndianness(ei_data)),
        }
    }

    #[inline(always)]
    fn is_little(self) -> bool {
        match self {
            AnyEndian::Little => true,
            AnyEndian::Big => false,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    macro_rules! parse_test {
        ( $endian:expr, $res_typ:ty, $method:ident, $expect:expr) => {{
            let bytes = [
                0x01u8, 0x02u8, 0x03u8, 0x04u8, 0x05u8, 0x06u8, 0x07u8, 0x08u8,
            ];
            let mut offset = 0;
            let result = $endian.$method(&mut offset, &bytes).unwrap();
            assert_eq!(result, $expect);
            assert_eq!(offset, core::mem::size_of::<$res_typ>());
        }};
    }

    macro_rules! fuzz_too_short_test {
        ( $endian:expr, $res_typ:ty, $method:ident) => {{
            let bytes = [
                0x01u8, 0x02u8, 0x03u8, 0x04u8, 0x05u8, 0x06u8, 0x07u8, 0x08u8,
            ];
            let size = core::mem::size_of::<$res_typ>();
            for n in 0..size {
                let buf = bytes.split_at(n).0.as_ref();
                let mut offset: usize = 0;
                let error = $endian
                    .$method(&mut offset, buf)
                    .expect_err("Expected an error, but parsed: ");
                assert!(
                    matches!(error, ParseError::SliceReadError(_)),
                    "Unexpected Error type found: {error}"
                );
            }
        }};
    }

    #[test]
    fn parse_u8_at() {
        parse_test!(LittleEndian, u8, parse_u8_at, 0x01u8);
        parse_test!(BigEndian, u8, parse_u8_at, 0x01u8);
        parse_test!(AnyEndian::Little, u8, parse_u8_at, 0x01u8);
        parse_test!(AnyEndian::Big, u8, parse_u8_at, 0x01u8);
    }

    #[test]
    fn parse_u16_at() {
        parse_test!(LittleEndian, u16, parse_u16_at, 0x0201u16);
        parse_test!(BigEndian, u16, parse_u16_at, 0x0102u16);
        parse_test!(AnyEndian::Little, u16, parse_u16_at, 0x0201u16);
        parse_test!(AnyEndian::Big, u16, parse_u16_at, 0x0102u16);
    }

    #[test]
    fn parse_u32_at() {
        parse_test!(LittleEndian, u32, parse_u32_at, 0x04030201u32);
        parse_test!(BigEndian, u32, parse_u32_at, 0x01020304u32);
        parse_test!(AnyEndian::Little, u32, parse_u32_at, 0x04030201u32);
        parse_test!(AnyEndian::Big, u32, parse_u32_at, 0x01020304u32);
    }

    #[test]
    fn parse_u64_at() {
        parse_test!(LittleEndian, u64, parse_u64_at, 0x0807060504030201u64);
        parse_test!(BigEndian, u64, parse_u64_at, 0x0102030405060708u64);
        parse_test!(AnyEndian::Little, u64, parse_u64_at, 0x0807060504030201u64);
        parse_test!(AnyEndian::Big, u64, parse_u64_at, 0x0102030405060708u64);
    }

    #[test]
    fn parse_i32_at() {
        parse_test!(LittleEndian, i32, parse_i32_at, 0x04030201i32);
        parse_test!(BigEndian, i32, parse_i32_at, 0x01020304i32);
        parse_test!(AnyEndian::Little, i32, parse_i32_at, 0x04030201i32);
        parse_test!(AnyEndian::Big, i32, parse_i32_at, 0x01020304i32);
    }

    #[test]
    fn parse_i64_at() {
        parse_test!(LittleEndian, i64, parse_i64_at, 0x0807060504030201i64);
        parse_test!(BigEndian, i64, parse_i64_at, 0x0102030405060708i64);
        parse_test!(AnyEndian::Little, i64, parse_i64_at, 0x0807060504030201i64);
        parse_test!(AnyEndian::Big, i64, parse_i64_at, 0x0102030405060708i64);
    }

    #[test]
    fn fuzz_u8_too_short() {
        fuzz_too_short_test!(LittleEndian, u8, parse_u8_at);
        fuzz_too_short_test!(BigEndian, u8, parse_u8_at);
        fuzz_too_short_test!(AnyEndian::Little, u8, parse_u8_at);
        fuzz_too_short_test!(AnyEndian::Big, u8, parse_u8_at);
    }

    #[test]
    fn fuzz_u16_too_short() {
        fuzz_too_short_test!(LittleEndian, u16, parse_u16_at);
        fuzz_too_short_test!(BigEndian, u16, parse_u16_at);
        fuzz_too_short_test!(AnyEndian::Little, u16, parse_u16_at);
        fuzz_too_short_test!(AnyEndian::Big, u16, parse_u16_at);
    }

    #[test]
    fn fuzz_u32_too_short() {
        fuzz_too_short_test!(LittleEndian, u32, parse_u32_at);
        fuzz_too_short_test!(BigEndian, u32, parse_u32_at);
        fuzz_too_short_test!(AnyEndian::Little, u32, parse_u32_at);
        fuzz_too_short_test!(AnyEndian::Big, u32, parse_u32_at);
    }

    #[test]
    fn fuzz_i32_too_short() {
        fuzz_too_short_test!(LittleEndian, i32, parse_i32_at);
        fuzz_too_short_test!(BigEndian, i32, parse_i32_at);
        fuzz_too_short_test!(AnyEndian::Little, i32, parse_i32_at);
        fuzz_too_short_test!(AnyEndian::Big, i32, parse_i32_at);
    }

    #[test]
    fn fuzz_u64_too_short() {
        fuzz_too_short_test!(LittleEndian, u64, parse_u64_at);
        fuzz_too_short_test!(BigEndian, u64, parse_u64_at);
        fuzz_too_short_test!(AnyEndian::Little, u64, parse_u64_at);
        fuzz_too_short_test!(AnyEndian::Big, u64, parse_u64_at);
    }

    #[test]
    fn fuzz_i64_too_short() {
        fuzz_too_short_test!(LittleEndian, i64, parse_i64_at);
        fuzz_too_short_test!(BigEndian, i64, parse_i64_at);
        fuzz_too_short_test!(AnyEndian::Little, i64, parse_i64_at);
        fuzz_too_short_test!(AnyEndian::Big, i64, parse_i64_at);
    }
}