goblin/pe/symbol.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525
use crate::error;
use crate::strtab;
use alloc::vec::Vec;
use core::fmt::{self, Debug};
use scroll::{ctx, IOread, IOwrite, Pread, Pwrite, SizeWith};
/// Size of a single symbol in the COFF Symbol Table.
pub const COFF_SYMBOL_SIZE: usize = 18;
// Values for `Symbol::section_number`.
/// The symbol record is not yet assigned a section. A `value` of zero
/// indicates that a reference to an external symbol is defined elsewhere.
/// A `value` of non-zero is a common symbol with a size that is specified by the `value`.
pub const IMAGE_SYM_UNDEFINED: i16 = 0;
/// The symbol has an absolute (non-relocatable) `value` and is not an address.
pub const IMAGE_SYM_ABSOLUTE: i16 = -1;
/// The symbol provides general type or debugging information but does not
/// correspond to a section.
pub const IMAGE_SYM_DEBUG: i16 = -2;
// Base types for `Symbol::typ`.
/// No type information or unknown base type. Microsoft tools use this setting
pub const IMAGE_SYM_TYPE_NULL: u16 = 0;
/// No valid type; used with void pointers and functions
pub const IMAGE_SYM_TYPE_VOID: u16 = 1;
/// A character (signed byte)
pub const IMAGE_SYM_TYPE_CHAR: u16 = 2;
/// A 2-byte signed integer
pub const IMAGE_SYM_TYPE_SHORT: u16 = 3;
/// A natural integer type (normally 4 bytes in Windows)
pub const IMAGE_SYM_TYPE_INT: u16 = 4;
/// A 4-byte signed integer
pub const IMAGE_SYM_TYPE_LONG: u16 = 5;
/// A 4-byte floating-point number
pub const IMAGE_SYM_TYPE_FLOAT: u16 = 6;
/// An 8-byte floating-point number
pub const IMAGE_SYM_TYPE_DOUBLE: u16 = 7;
/// A structure
pub const IMAGE_SYM_TYPE_STRUCT: u16 = 8;
/// A union
pub const IMAGE_SYM_TYPE_UNION: u16 = 9;
/// An enumerated type
pub const IMAGE_SYM_TYPE_ENUM: u16 = 10;
/// A member of enumeration (a specific value)
pub const IMAGE_SYM_TYPE_MOE: u16 = 11;
/// A byte; unsigned 1-byte integer
pub const IMAGE_SYM_TYPE_BYTE: u16 = 12;
/// A word; unsigned 2-byte integer
pub const IMAGE_SYM_TYPE_WORD: u16 = 13;
/// An unsigned integer of natural size (normally, 4 bytes)
pub const IMAGE_SYM_TYPE_UINT: u16 = 14;
/// An unsigned 4-byte integer
pub const IMAGE_SYM_TYPE_DWORD: u16 = 15;
// Derived types for `Symbol::typ`.
/// No derived type; the symbol is a simple scalar variable.
pub const IMAGE_SYM_DTYPE_NULL: u16 = 0;
/// The symbol is a pointer to base type.
pub const IMAGE_SYM_DTYPE_POINTER: u16 = 1;
/// The symbol is a function that returns a base type.
pub const IMAGE_SYM_DTYPE_FUNCTION: u16 = 2;
/// The symbol is an array of base type.
pub const IMAGE_SYM_DTYPE_ARRAY: u16 = 3;
pub const IMAGE_SYM_TYPE_MASK: u16 = 0xf;
pub const IMAGE_SYM_DTYPE_SHIFT: usize = 4;
// Values for `Symbol::storage_class`.
/// A special symbol that represents the end of function, for debugging purposes.
pub const IMAGE_SYM_CLASS_END_OF_FUNCTION: u8 = 0xff;
/// No assigned storage class.
pub const IMAGE_SYM_CLASS_NULL: u8 = 0;
/// The automatic (stack) variable.
///
/// The `value` field specifies the stack frame offset.
pub const IMAGE_SYM_CLASS_AUTOMATIC: u8 = 1;
/// A value that Microsoft tools use for external symbols.
///
/// The `value` field indicates the size if the section number is
/// `IMAGE_SYM_UNDEFINED` (0). If the section number is not zero,
/// then the `value` field specifies the offset within the section.
pub const IMAGE_SYM_CLASS_EXTERNAL: u8 = 2;
/// A static symbol.
///
/// The 'value' field specifies the offset of the symbol within the section.
/// If the `value` field is zero, then the symbol represents a section name.
pub const IMAGE_SYM_CLASS_STATIC: u8 = 3;
/// A register variable.
///
/// The `value` field specifies the register number.
pub const IMAGE_SYM_CLASS_REGISTER: u8 = 4;
/// A symbol that is defined externally.
pub const IMAGE_SYM_CLASS_EXTERNAL_DEF: u8 = 5;
/// A code label that is defined within the module.
///
/// The `value` field specifies the offset of the symbol within the section.
pub const IMAGE_SYM_CLASS_LABEL: u8 = 6;
/// A reference to a code label that is not defined.
pub const IMAGE_SYM_CLASS_UNDEFINED_LABEL: u8 = 7;
/// The structure member.
///
/// The `value` field specifies the n th member.
pub const IMAGE_SYM_CLASS_MEMBER_OF_STRUCT: u8 = 8;
/// A formal argument (parameter) of a function.
///
/// The `value` field specifies the n th argument.
pub const IMAGE_SYM_CLASS_ARGUMENT: u8 = 9;
/// The structure tag-name entry.
pub const IMAGE_SYM_CLASS_STRUCT_TAG: u8 = 10;
/// A union member.
///
/// The `value` field specifies the n th member.
pub const IMAGE_SYM_CLASS_MEMBER_OF_UNION: u8 = 11;
/// The Union tag-name entry.
pub const IMAGE_SYM_CLASS_UNION_TAG: u8 = 12;
/// A Typedef entry.
pub const IMAGE_SYM_CLASS_TYPE_DEFINITION: u8 = 13;
/// A static data declaration.
pub const IMAGE_SYM_CLASS_UNDEFINED_STATIC: u8 = 14;
/// An enumerated type tagname entry.
pub const IMAGE_SYM_CLASS_ENUM_TAG: u8 = 15;
/// A member of an enumeration.
///
/// The `value` field specifies the n th member.
pub const IMAGE_SYM_CLASS_MEMBER_OF_ENUM: u8 = 16;
/// A register parameter.
pub const IMAGE_SYM_CLASS_REGISTER_PARAM: u8 = 17;
/// A bit-field reference.
///
/// The `value` field specifies the n th bit in the bit field.
pub const IMAGE_SYM_CLASS_BIT_FIELD: u8 = 18;
/// A .bb (beginning of block) or .eb (end of block) record.
///
/// The `value` field is the relocatable address of the code location.
pub const IMAGE_SYM_CLASS_BLOCK: u8 = 100;
/// A value that Microsoft tools use for symbol records that define the extent of a function.
///
/// Records may be begin function (.bf ), end function ( .ef ), and lines in function ( .lf ).
/// For .lf records, the `value` field gives the number of source lines in the function.
/// For .ef records, the `value` field gives the size of the function code.
pub const IMAGE_SYM_CLASS_FUNCTION: u8 = 101;
/// An end-of-structure entry.
pub const IMAGE_SYM_CLASS_END_OF_STRUCT: u8 = 102;
/// The source-file symbol record.
///
/// The symbol is followed by auxiliary records that name the file.
pub const IMAGE_SYM_CLASS_FILE: u8 = 103;
/// A definition of a section (Microsoft tools use STATIC storage class instead).
pub const IMAGE_SYM_CLASS_SECTION: u8 = 104;
/// A weak external.
pub const IMAGE_SYM_CLASS_WEAK_EXTERNAL: u8 = 105;
/// A CLR token symbol.
///
/// The name is an ASCII string that consists of the hexadecimal value of the token.
pub const IMAGE_SYM_CLASS_CLR_TOKEN: u8 = 107;
/// A COFF symbol.
///
/// Unwind information for this function can be loaded with [`ExceptionData::get_unwind_info`].
///
/// [`ExceptionData::get_unwind_info`]: struct.ExceptionData.html#method.get_unwind_info
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)]
pub struct Symbol {
/// The name of the symbol.
///
/// An array of 8 bytes is used if the name is not more than 8 bytes long.
/// This array is padded with nulls on the right if the name is less than 8 bytes long.
///
/// For longer names, the first 4 bytes are all zeros, and the second 4 bytes
/// are an offset into the string table.
pub name: [u8; 8],
/// The value that is associated with the symbol.
///
/// The interpretation of this field depends on `section_number` and
/// `storage_class`. A typical meaning is the relocatable address.
pub value: u32,
/// A one-based index into the section table. Zero and negative values have special meanings.
pub section_number: i16,
/// A number that represents type.
///
/// Microsoft tools set this field to 0x20 (function) or 0x0 (not a function).
pub typ: u16,
/// An enumerated value that represents storage class.
pub storage_class: u8,
/// The number of auxiliary symbol table entries that follow this record.
///
/// Each auxiliary record is the same size as a standard symbol-table record (18 bytes),
/// but rather than define a new symbol, the auxiliary record gives additional information
/// on the last symbol defined.
pub number_of_aux_symbols: u8,
}
impl Symbol {
/// Parse the symbol at the given offset.
///
/// If the symbol has an inline name, then also returns a reference to the name's
/// location in `bytes`.
pub fn parse<'a>(bytes: &'a [u8], offset: usize) -> error::Result<(Option<&'a str>, Symbol)> {
let symbol = bytes.pread::<Symbol>(offset)?;
let name = if symbol.name[0] != 0 {
bytes
.pread_with(offset, ctx::StrCtx::DelimiterUntil(0, 8))
.ok()
} else {
None
};
Ok((name, symbol))
}
/// Returns the symbol name.
///
/// This may be a reference to an inline name in the symbol, or to
/// a strtab entry.
pub fn name<'a>(&'a self, strtab: &'a strtab::Strtab) -> error::Result<&'a str> {
if let Some(offset) = self.name_offset() {
strtab.get_at(offset as usize).ok_or_else(|| {
error::Error::Malformed(format!("Invalid Symbol name offset {:#x}", offset))
})
} else {
Ok(self.name.pread(0)?)
}
}
/// Return the strtab offset of the symbol name.
///
/// Returns `None` if the name is inline.
pub fn name_offset(&self) -> Option<u32> {
// Symbol offset starts at the strtable's length, so let's adjust it
let length_field_size = core::mem::size_of::<u32>() as u32;
if self.name[0] == 0 {
self.name
.pread_with(4, scroll::LE)
.ok()
.map(|offset: u32| offset - length_field_size)
} else {
None
}
}
/// Set the strtab offset of the symbol name.
pub fn set_name_offset(&mut self, offset: u32) {
self.name[..4].copy_from_slice(&[0; 4]);
self.name.pwrite_with(offset, 4, scroll::LE).unwrap();
}
/// Return the base type of the symbol.
///
/// This type uses the `IMAGE_SYM_TYPE_*` definitions.
pub fn base_type(&self) -> u16 {
self.typ & IMAGE_SYM_TYPE_MASK
}
/// Return the derived type of the symbol.
///
/// This type uses the `IMAGE_SYM_DTYPE_*` definitions.
pub fn derived_type(&self) -> u16 {
self.typ >> IMAGE_SYM_DTYPE_SHIFT
}
/// Return true for function definitions.
///
/// These symbols use `AuxFunctionDefinition` for auxiliary symbol records.
pub fn is_function_definition(&self) -> bool {
self.storage_class == IMAGE_SYM_CLASS_EXTERNAL
&& self.derived_type() == IMAGE_SYM_DTYPE_FUNCTION
&& self.section_number > 0
}
/// Return true for weak external symbols.
///
/// These symbols use `AuxWeakExternal` for auxiliary symbol records.
pub fn is_weak_external(&self) -> bool {
self.storage_class == IMAGE_SYM_CLASS_WEAK_EXTERNAL
}
/// Return true for file symbol records.
///
/// The auxiliary records contain the name of the source code file.
pub fn is_file(&self) -> bool {
self.storage_class == IMAGE_SYM_CLASS_FILE
}
/// Return true for section definitions.
///
/// These symbols use `AuxSectionDefinition` for auxiliary symbol records.
pub fn is_section_definition(&self) -> bool {
self.storage_class == IMAGE_SYM_CLASS_STATIC && self.number_of_aux_symbols > 0
}
}
/// Auxiliary symbol record for function definitions.
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)]
pub struct AuxFunctionDefinition {
/// The symbol-table index of the corresponding `.bf` (begin function) symbol record.
pub tag_index: u32,
/// The size of the executable code for the function itself.
///
/// If the function is in its own section, the `size_of_raw_data` in the section header
/// is greater or equal to this field, depending on alignment considerations.
pub total_size: u32,
/// The file offset of the first COFF line-number entry for the function,
/// or zero if none exists.
pub pointer_to_line_number: u32,
/// The symbol-table index of the record for the next function.
///
/// If the function is the last in the symbol table, this field is set to zero.
pub pointer_to_next_function: u32,
/// Unused padding.
pub unused: [u8; 2],
}
/// Auxiliary symbol record for symbols with storage class `IMAGE_SYM_CLASS_FUNCTION`.
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)]
pub struct AuxBeginAndEndFunction {
/// Unused padding.
pub unused1: [u8; 4],
/// The actual ordinal line number within the source file, corresponding
/// to the `.bf` or `.ef` record.
pub line_number: u16,
/// Unused padding.
pub unused2: [u8; 6],
/// The symbol-table index of the next `.bf` symbol record.
///
/// If the function is the last in the symbol table, this field is set to zero.
/// It is not used for `.ef` records.
pub pointer_to_next_function: u32,
/// Unused padding.
pub unused3: [u8; 2],
}
// Values for the `characteristics` field of `AuxWeakExternal`.
/// Indicates that no library search for the symbol should be performed.
pub const IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY: u32 = 1;
/// Indicates that a library search for the symbol should be performed.
pub const IMAGE_WEAK_EXTERN_SEARCH_LIBRARY: u32 = 2;
/// Indicates that the symbol is an alias for the symbol given by the `tag_index` field.
pub const IMAGE_WEAK_EXTERN_SEARCH_ALIAS: u32 = 3;
/// Auxiliary symbol record for weak external symbols.
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)]
pub struct AuxWeakExternal {
/// The symbol-table index of the symbol to be linked if an external definition is not found.
pub tag_index: u32,
/// Flags that control how the symbol should be linked.
pub characteristics: u32,
/// Unused padding.
pub unused: [u8; 10],
}
// Values for the `selection` field of `AuxSectionDefinition`.
/// If this symbol is already defined, the linker issues a "multiply defined symbol" error.
pub const IMAGE_COMDAT_SELECT_NODUPLICATES: u8 = 1;
/// Any section that defines the same COMDAT symbol can be linked; the rest are removed.
pub const IMAGE_COMDAT_SELECT_ANY: u8 = 2;
/// The linker chooses an arbitrary section among the definitions for this symbol.
///
/// If all definitions are not the same size, a "multiply defined symbol" error is issued.
pub const IMAGE_COMDAT_SELECT_SAME_SIZE: u8 = 3;
/// The linker chooses an arbitrary section among the definitions for this symbol.
///
/// If all definitions do not match exactly, a "multiply defined symbol" error is issued.
pub const IMAGE_COMDAT_SELECT_EXACT_MATCH: u8 = 4;
/// The section is linked if a certain other COMDAT section is linked.
///
/// This other section is indicated by the `number` field of the auxiliary symbol record
/// for the section definition. This setting is useful for definitions that have components
/// in multiple sections (for example, code in one and data in another), but where all must
/// be linked or discarded as a set. The other section with which this section is associated
/// must be a COMDAT section; it cannot be another associative COMDAT section (that is, the
/// other section cannot have `IMAGE_COMDAT_SELECT_ASSOCIATIVE` set).
pub const IMAGE_COMDAT_SELECT_ASSOCIATIVE: u8 = 5;
/// The linker chooses the largest definition from among all of the definitions for this symbol.
///
/// If multiple definitions have this size, the choice between them is arbitrary.
pub const IMAGE_COMDAT_SELECT_LARGEST: u8 = 6;
/// Auxiliary symbol record for section definitions.
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)]
pub struct AuxSectionDefinition {
/// The size of section data; the same as `size_of_raw_data` in the section header.
pub length: u32,
/// The number of relocation entries for the section.
pub number_of_relocations: u16,
/// The number of line-number entries for the section.
pub number_of_line_numbers: u16,
/// The checksum for communal data.
///
/// It is applicable if the `IMAGE_SCN_LNK_COMDAT` flag is set in the section header.
pub checksum: u32,
/// One-based index into the section table for the associated section.
///
/// This is used when the `selection` field is `IMAGE_COMDAT_SELECT_ASSOCIATIVE`.
pub number: u16,
/// The COMDAT selection number.
///
/// This is applicable if the section is a COMDAT section.
pub selection: u8,
/// Unused padding.
pub unused: [u8; 3],
}
/// A COFF symbol table.
// TODO: #[derive(Pwrite)] produce unparseable tokens
pub struct SymbolTable<'a> {
symbols: &'a [u8],
}
impl<'a> SymbolTable<'a> {
/// Parse a COFF symbol table at the given offset.
///
/// The offset and number of symbols should be from the COFF header.
pub fn parse(bytes: &'a [u8], offset: usize, number: usize) -> error::Result<SymbolTable<'a>> {
let symbols = bytes.pread_with(offset, Self::size(number))?;
Ok(SymbolTable { symbols })
}
/// Get the size in bytes of the symbol table.
pub fn size(number: usize) -> usize {
number * COFF_SYMBOL_SIZE
}
/// Get the symbol at the given index.
///
/// If the symbol has an inline name, then also returns a reference to the name's
/// location in `bytes`.
pub fn get(&self, index: usize) -> Option<(Option<&'a str>, Symbol)> {
let offset = index * COFF_SYMBOL_SIZE;
Symbol::parse(self.symbols, offset).ok()
}
/// Get the auxiliary symbol record for a function definition.
pub fn aux_function_definition(&self, index: usize) -> Option<AuxFunctionDefinition> {
let offset = index * COFF_SYMBOL_SIZE;
self.symbols.pread(offset).ok()
}
/// Get the auxiliary symbol record for a `.bf` or `.ef` symbol record.
pub fn aux_begin_and_end_function(&self, index: usize) -> Option<AuxBeginAndEndFunction> {
let offset = index * COFF_SYMBOL_SIZE;
self.symbols.pread(offset).ok()
}
/// Get the auxiliary symbol record for a weak external.
pub fn aux_weak_external(&self, index: usize) -> Option<AuxWeakExternal> {
let offset = index * COFF_SYMBOL_SIZE;
self.symbols.pread(offset).ok()
}
/// Get the file name from the auxiliary symbol record for a file symbol record.
pub fn aux_file(&self, index: usize, number: usize) -> Option<&'a str> {
let offset = index * COFF_SYMBOL_SIZE;
let length = number * COFF_SYMBOL_SIZE;
self.symbols
.pread_with(offset, ctx::StrCtx::DelimiterUntil(0, length))
.ok()
}
/// Get the auxiliary symbol record for a section definition.
pub fn aux_section_definition(&self, index: usize) -> Option<AuxSectionDefinition> {
let offset = index * COFF_SYMBOL_SIZE;
self.symbols.pread(offset).ok()
}
/// Return an iterator for the COFF symbols.
///
/// This iterator skips over auxiliary symbol records.
pub fn iter(&self) -> SymbolIterator<'a> {
SymbolIterator {
index: 0,
symbols: self.symbols,
}
}
}
impl<'a> ctx::TryIntoCtx<scroll::Endian> for SymbolTable<'a> {
type Error = error::Error;
fn try_into_ctx(self, bytes: &mut [u8], _ctx: scroll::Endian) -> Result<usize, Self::Error> {
bytes.pwrite(self.symbols, 0).map_err(|err| err.into())
}
}
impl<'a> Debug for SymbolTable<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt.debug_struct("SymbolTable")
.field("symbols", &self.iter().collect::<Vec<_>>())
.finish()
}
}
/// An iterator for COFF symbols.
///
/// This iterator skips over auxiliary symbol records.
#[derive(Default)]
pub struct SymbolIterator<'a> {
index: usize,
symbols: &'a [u8],
}
impl<'a> Iterator for SymbolIterator<'a> {
type Item = (usize, Option<&'a str>, Symbol);
fn next(&mut self) -> Option<Self::Item> {
let offset = self.index * COFF_SYMBOL_SIZE;
if offset >= self.symbols.len() {
None
} else {
let index = self.index;
let (name, symbol) = Symbol::parse(self.symbols, offset).ok()?;
self.index += 1 + symbol.number_of_aux_symbols as usize;
Some((index, name, symbol))
}
}
}