goblin/mach/
imports.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
//! Dynamically linked symbolic imports

// table of tuples:
// <seg-index, seg-offset, type, symbol-library-ordinal, symbol-name, addend>
// symbol flags are undocumented

use alloc::vec::Vec;
use core::fmt::{self, Debug};
use core::ops::Range;
use scroll::{Pread, Sleb128, Uleb128};

use crate::container;
use crate::error;
use crate::mach::bind_opcodes;
use crate::mach::load_command;
use crate::mach::segment;

#[derive(Debug)]
/// Import binding information generated by running the Finite State Automaton programmed via `bind_opcodes`
struct BindInformation<'a> {
    seg_index: u8,
    seg_offset: u64,
    bind_type: u8,
    symbol_library_ordinal: u8,
    symbol_name: &'a str,
    symbol_flags: u8,
    addend: i64,
    special_dylib: u8, // seeing self = 0 assuming this means the symbol is imported from itself, because its... libSystem.B.dylib?
    is_lazy: bool,
}

impl<'a> BindInformation<'a> {
    pub fn new(is_lazy: bool) -> Self {
        let mut bind_info = BindInformation::default();
        if is_lazy {
            bind_info.is_lazy = true;
            bind_info.bind_type = bind_opcodes::BIND_TYPE_POINTER;
        }
        bind_info
    }
    pub fn is_weak(&self) -> bool {
        self.symbol_flags & bind_opcodes::BIND_SYMBOL_FLAGS_WEAK_IMPORT != 0
    }
}

impl<'a> Default for BindInformation<'a> {
    fn default() -> Self {
        BindInformation {
            seg_index: 0,
            seg_offset: 0x0,
            bind_type: 0x0,
            special_dylib: 1,
            symbol_library_ordinal: 0,
            symbol_name: "",
            symbol_flags: 0,
            addend: 0,
            is_lazy: false,
        }
    }
}

#[derive(Debug)]
/// An dynamically linked symbolic import
pub struct Import<'a> {
    /// The symbol name dyld uses to resolve this import
    pub name: &'a str,
    /// The library this symbol belongs to (thanks to two-level namespaces)
    pub dylib: &'a str,
    ///  Whether the symbol is lazily resolved or not
    pub is_lazy: bool,
    /// The offset in the binary this import is found
    pub offset: u64,
    /// The size of this import
    pub size: usize,
    /// The virtual memory address at which this import is found
    pub address: u64,
    /// The addend of this import
    pub addend: i64,
    /// Whether this import is weak
    pub is_weak: bool,
    /// The offset in the stream of bind opcodes that caused this import
    pub start_of_sequence_offset: u64,
}

impl<'a> Import<'a> {
    /// Create a new import from the import binding information in `bi`
    fn new(
        bi: &BindInformation<'a>,
        libs: &[&'a str],
        segments: &[segment::Segment],
        start_of_sequence_offset: usize,
    ) -> Import<'a> {
        let (offset, address) = {
            let segment = &segments[bi.seg_index as usize];
            (
                segment.fileoff + bi.seg_offset,
                segment.vmaddr + bi.seg_offset,
            )
        };
        let size = if bi.is_lazy { 8 } else { 0 };
        Import {
            name: bi.symbol_name,
            dylib: libs[bi.symbol_library_ordinal as usize],
            is_lazy: bi.is_lazy,
            offset,
            size,
            address,
            addend: bi.addend,
            is_weak: bi.is_weak(),
            start_of_sequence_offset: start_of_sequence_offset as u64,
        }
    }
}

/// An interpreter for mach BIND opcodes.
/// Runs on prebound (non lazy) symbols (usually dylib extern consts and extern variables),
/// and lazy symbols (usually dylib functions)
pub struct BindInterpreter<'a> {
    data: &'a [u8],
    location: Range<usize>,
    lazy_location: Range<usize>,
}

impl<'a> Debug for BindInterpreter<'a> {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        fmt.debug_struct("BindInterpreter")
            .field("data", &"<... redacted ...>")
            .field(
                "location",
                &format_args!("{:#x}..{:#x}", self.location.start, self.location.end),
            )
            .field(
                "lazy_location",
                &format_args!(
                    "{:#x}..{:#x}",
                    self.lazy_location.start, self.lazy_location.end
                ),
            )
            .finish()
    }
}

impl<'a> BindInterpreter<'a> {
    /// Construct a new import binding interpreter from `bytes` and the load `command`
    pub fn new(bytes: &'a [u8], command: &load_command::DyldInfoCommand) -> Self {
        let get_pos = |off: u32, size: u32| -> Range<usize> {
            let start = off as usize;
            start..start.saturating_add(size as usize)
        };
        let location = get_pos(command.bind_off, command.bind_size);
        let lazy_location = get_pos(command.lazy_bind_off, command.lazy_bind_size);
        BindInterpreter {
            data: bytes,
            location,
            lazy_location,
        }
    }
    /// Return the imports in this binary
    pub fn imports(
        &self,
        libs: &[&'a str],
        segments: &[segment::Segment],
        ctx: container::Ctx,
    ) -> error::Result<Vec<Import<'a>>> {
        let mut imports = Vec::new();
        self.run(false, libs, segments, ctx, &mut imports)?;
        self.run(true, libs, segments, ctx, &mut imports)?;
        Ok(imports)
    }
    fn run(
        &self,
        is_lazy: bool,
        libs: &[&'a str],
        segments: &[segment::Segment],
        ctx: container::Ctx,
        imports: &mut Vec<Import<'a>>,
    ) -> error::Result<()> {
        use crate::mach::bind_opcodes::*;
        let location = if is_lazy {
            &self.lazy_location
        } else {
            &self.location
        };
        let mut bind_info = BindInformation::new(is_lazy);
        let mut offset = location.start;
        let mut start_of_sequence: usize = 0;
        while offset < location.end {
            let opcode = self.data.gread::<i8>(&mut offset)? as bind_opcodes::Opcode;
            // let mut input = String::new();
            // ::std::io::stdin().read_line(&mut input).unwrap();
            // println!("opcode: {} ({:#x}) offset: {:#x}\n {:?}", opcode_to_str(opcode & BIND_OPCODE_MASK), opcode, offset - location.start - 1, &bind_info);
            match opcode & BIND_OPCODE_MASK {
                // we do nothing, don't update our records, and add a new, fresh record
                BIND_OPCODE_DONE => {
                    bind_info = BindInformation::new(is_lazy);
                    start_of_sequence = offset - location.start;
                }
                BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => {
                    let symbol_library_ordinal = opcode & BIND_IMMEDIATE_MASK;
                    bind_info.symbol_library_ordinal = symbol_library_ordinal;
                }
                BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => {
                    let symbol_library_ordinal = Uleb128::read(&self.data, &mut offset)?;
                    bind_info.symbol_library_ordinal = symbol_library_ordinal as u8;
                }
                BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => {
                    // dyld puts the immediate into the symbol_library_ordinal field...
                    let special_dylib = opcode & BIND_IMMEDIATE_MASK;
                    // Printf.printf "special_dylib: 0x%x\n" special_dylib
                    bind_info.special_dylib = special_dylib;
                }
                BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
                    let symbol_flags = opcode & BIND_IMMEDIATE_MASK;
                    let symbol_name = self.data.pread::<&str>(offset)?;
                    offset += symbol_name.len() + 1; // second time this \0 caused debug woes
                    bind_info.symbol_name = symbol_name;
                    bind_info.symbol_flags = symbol_flags;
                }
                BIND_OPCODE_SET_TYPE_IMM => {
                    let bind_type = opcode & BIND_IMMEDIATE_MASK;
                    bind_info.bind_type = bind_type;
                }
                BIND_OPCODE_SET_ADDEND_SLEB => {
                    let addend = Sleb128::read(&self.data, &mut offset)?;
                    bind_info.addend = addend;
                }
                BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
                    let seg_index = opcode & BIND_IMMEDIATE_MASK;
                    // dyld sets the address to the segActualLoadAddress(segIndex) + uleb128
                    // address = segActualLoadAddress(segmentIndex) + read_uleb128(p, end);
                    let seg_offset = Uleb128::read(&self.data, &mut offset)?;
                    bind_info.seg_index = seg_index;
                    bind_info.seg_offset = seg_offset;
                }
                BIND_OPCODE_ADD_ADDR_ULEB => {
                    let addr = Uleb128::read(&self.data, &mut offset)?;
                    let seg_offset = bind_info.seg_offset.wrapping_add(addr);
                    bind_info.seg_offset = seg_offset;
                }
                // record the record by placing its value into our list
                BIND_OPCODE_DO_BIND => {
                    // from dyld:
                    //      if ( address >= segmentEndAddress )
                    // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p);
                    // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last);
                    // address += sizeof(intptr_t);
                    imports.push(Import::new(&bind_info, libs, segments, start_of_sequence));
                    let seg_offset = bind_info.seg_offset.wrapping_add(ctx.size() as u64);
                    bind_info.seg_offset = seg_offset;
                }
                BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => {
                    // dyld:
                    // if ( address >= segmentEndAddress )
                    // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p);
                    // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last);
                    // address += read_uleb128(p, end) + sizeof(intptr_t);
                    // we bind the old record, then increment bind info address for the next guy, plus the ptr offset *)
                    imports.push(Import::new(&bind_info, libs, segments, start_of_sequence));
                    let addr = Uleb128::read(&self.data, &mut offset)?;
                    let seg_offset = bind_info
                        .seg_offset
                        .wrapping_add(addr)
                        .wrapping_add(ctx.size() as u64);
                    bind_info.seg_offset = seg_offset;
                }
                BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => {
                    // dyld:
                    // if ( address >= segmentEndAddress )
                    // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p);
                    // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last);
                    // address += immediate*sizeof(intptr_t) + sizeof(intptr_t);
                    // break;
                    // similarly, we bind the old record, then perform address manipulation for the next record
                    imports.push(Import::new(&bind_info, libs, segments, start_of_sequence));
                    let scale = opcode & BIND_IMMEDIATE_MASK;
                    let size = ctx.size() as u64;
                    let seg_offset = bind_info
                        .seg_offset
                        .wrapping_add(u64::from(scale) * size)
                        .wrapping_add(size);
                    bind_info.seg_offset = seg_offset;
                }
                BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => {
                    // dyld:
                    // count = read_uleb128(p, end);
                    // skip = read_uleb128(p, end);
                    // for (uint32_t i=0; i < count; ++i) {
                    // if ( address >= segmentEndAddress )
                    // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p);
                    // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last);
                    // address += skip + sizeof(intptr_t);
                    // }
                    // break;
                    let count = Uleb128::read(&self.data, &mut offset)?;
                    let skip = Uleb128::read(&self.data, &mut offset)?;
                    let skip_plus_size = skip + ctx.size() as u64;
                    for _i in 0..count {
                        imports.push(Import::new(&bind_info, libs, segments, start_of_sequence));
                        let seg_offset = bind_info.seg_offset.wrapping_add(skip_plus_size);
                        bind_info.seg_offset = seg_offset;
                    }
                }
                _ => {}
            }
        }
        Ok(())
    }
}