elf/
hash.rs

1//! Parsing hash table sections for symbol tables: `.hash`, and `.gnu.hash`
2use core::mem::size_of;
3
4use crate::endian::EndianParse;
5use crate::file::Class;
6use crate::parse::{ParseAt, ParseError, ParsingTable, ReadBytesExt};
7use crate::string_table::StringTable;
8use crate::symbol::{Symbol, SymbolTable};
9
10impl ParseAt for u32 {
11    fn parse_at<E: EndianParse>(
12        endian: E,
13        _class: Class,
14        offset: &mut usize,
15        data: &[u8],
16    ) -> Result<Self, ParseError> {
17        endian.parse_u32_at(offset, data)
18    }
19
20    #[inline]
21    fn size_for(_class: Class) -> usize {
22        core::mem::size_of::<u32>()
23    }
24}
25
26type U32Table<'data, E> = ParsingTable<'data, E, u32>;
27
28/// Header at the start of SysV Hash Table sections of type [SHT_HASH](crate::abi::SHT_HASH).
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct SysVHashHeader {
31    pub nbucket: u32,
32    pub nchain: u32,
33}
34
35impl ParseAt for SysVHashHeader {
36    fn parse_at<E: EndianParse>(
37        endian: E,
38        _class: Class,
39        offset: &mut usize,
40        data: &[u8],
41    ) -> Result<Self, ParseError> {
42        Ok(SysVHashHeader {
43            nbucket: endian.parse_u32_at(offset, data)?,
44            nchain: endian.parse_u32_at(offset, data)?,
45        })
46    }
47
48    #[inline]
49    fn size_for(_class: Class) -> usize {
50        size_of::<u32>() + size_of::<u32>()
51    }
52}
53
54/// Calculate the SysV hash value for a given symbol name.
55pub fn sysv_hash(name: &[u8]) -> u32 {
56    let mut hash = 0u32;
57    for byte in name {
58        hash = hash.wrapping_mul(16).wrapping_add(*byte as u32);
59        hash ^= (hash >> 24) & 0xf0;
60    }
61    hash & 0xfffffff
62}
63
64#[derive(Debug)]
65pub struct SysVHashTable<'data, E: EndianParse> {
66    buckets: U32Table<'data, E>,
67    chains: U32Table<'data, E>,
68}
69
70/// This constructs a lazy-parsing type that keeps a reference to the provided data
71/// bytes from which it lazily parses and interprets its contents.
72impl<'data, E: EndianParse> SysVHashTable<'data, E> {
73    /// Construct a SysVHashTable from given bytes. Keeps a reference to the data for lazy parsing.
74    pub fn new(endian: E, class: Class, data: &'data [u8]) -> Result<Self, ParseError> {
75        let mut offset = 0;
76        let hdr = SysVHashHeader::parse_at(endian, class, &mut offset, data)?;
77
78        let buckets_size = size_of::<u32>()
79            .checked_mul(hdr.nbucket.try_into()?)
80            .ok_or(ParseError::IntegerOverflow)?;
81        let buckets_end = offset
82            .checked_add(buckets_size)
83            .ok_or(ParseError::IntegerOverflow)?;
84        let buckets_buf = data.get_bytes(offset..buckets_end)?;
85        let buckets = U32Table::new(endian, class, buckets_buf);
86        offset = buckets_end;
87
88        let chains_size = size_of::<u32>()
89            .checked_mul(hdr.nchain.try_into()?)
90            .ok_or(ParseError::IntegerOverflow)?;
91        let chains_end = offset
92            .checked_add(chains_size)
93            .ok_or(ParseError::IntegerOverflow)?;
94        let chains_buf = data.get_bytes(offset..chains_end)?;
95        let chains = U32Table::new(endian, class, chains_buf);
96
97        Ok(SysVHashTable { buckets, chains })
98    }
99
100    /// Use the hash table to find the symbol table entry with the given name and hash.
101    pub fn find(
102        &self,
103        name: &[u8],
104        symtab: &SymbolTable<'data, E>,
105        strtab: &StringTable<'data>,
106    ) -> Result<Option<(usize, Symbol)>, ParseError> {
107        // empty hash tables don't have any entries. This avoids a divde by zero in the modulus calculation
108        if self.buckets.is_empty() {
109            return Ok(None);
110        }
111
112        let hash = sysv_hash(name);
113
114        let start = (hash as usize) % self.buckets.len();
115        let mut index = self.buckets.get(start)? as usize;
116
117        // Bound the number of chain lookups by the chain size so we don't loop forever
118        let mut i = 0;
119        while index != 0 && i < self.chains.len() {
120            let symbol = symtab.get(index)?;
121            if strtab.get_raw(symbol.st_name as usize)? == name {
122                return Ok(Some((index, symbol)));
123            }
124
125            index = self.chains.get(index)? as usize;
126            i += 1;
127        }
128        Ok(None)
129    }
130}
131
132/// Calculate the GNU hash for a given symbol name.
133pub fn gnu_hash(name: &[u8]) -> u32 {
134    let mut hash = 5381u32;
135    for byte in name {
136        hash = hash.wrapping_mul(33).wrapping_add(u32::from(*byte));
137    }
138    hash
139}
140
141/// Header at the start of a GNU extension Hash Table section of type [SHT_GNU_HASH](crate::abi::SHT_GNU_HASH).
142#[derive(Debug, Clone, PartialEq, Eq)]
143pub struct GnuHashHeader {
144    pub nbucket: u32,
145    /// The symbol table index of the first symbol in the hash table.
146    /// (GNU hash sections omit symbols at the start of the table that wont be looked up)
147    pub table_start_idx: u32,
148    /// The number of words in the bloom filter. (must be a non-zero power of 2)
149    pub nbloom: u32,
150    /// The bit shift count for the bloom filter.
151    pub nshift: u32,
152}
153
154impl ParseAt for GnuHashHeader {
155    fn parse_at<E: EndianParse>(
156        endian: E,
157        _class: Class,
158        offset: &mut usize,
159        data: &[u8],
160    ) -> Result<Self, ParseError> {
161        Ok(GnuHashHeader {
162            nbucket: endian.parse_u32_at(offset, data)?,
163            table_start_idx: endian.parse_u32_at(offset, data)?,
164            nbloom: endian.parse_u32_at(offset, data)?,
165            nshift: endian.parse_u32_at(offset, data)?,
166        })
167    }
168
169    #[inline]
170    fn size_for(_class: Class) -> usize {
171        size_of::<u32>() + size_of::<u32>() + size_of::<u32>() + size_of::<u32>()
172    }
173}
174
175type U64Table<'data, E> = ParsingTable<'data, E, u64>;
176
177impl ParseAt for u64 {
178    fn parse_at<E: EndianParse>(
179        endian: E,
180        _class: Class,
181        offset: &mut usize,
182        data: &[u8],
183    ) -> Result<Self, ParseError> {
184        endian.parse_u64_at(offset, data)
185    }
186
187    #[inline]
188    fn size_for(_class: Class) -> usize {
189        core::mem::size_of::<u64>()
190    }
191}
192
193#[derive(Debug)]
194pub struct GnuHashTable<'data, E: EndianParse> {
195    pub hdr: GnuHashHeader,
196
197    endian: E,
198    class: Class,
199    bloom: &'data [u8],
200    buckets: U32Table<'data, E>,
201    chains: U32Table<'data, E>,
202}
203
204impl<'data, E: EndianParse> GnuHashTable<'data, E> {
205    /// Construct a GnuHashTable from given bytes. Keeps a reference to the data for lazy parsing.
206    pub fn new(endian: E, class: Class, data: &'data [u8]) -> Result<Self, ParseError> {
207        let mut offset = 0;
208        let hdr = GnuHashHeader::parse_at(endian, class, &mut offset, data)?;
209
210        // length of the bloom filter in bytes. ELF32 is [u32; nbloom], ELF64 is [u64; nbloom].
211        let nbloom: usize = hdr.nbloom as usize;
212        let bloom_size = match class {
213            Class::ELF32 => nbloom
214                .checked_mul(size_of::<u32>())
215                .ok_or(ParseError::IntegerOverflow)?,
216            Class::ELF64 => nbloom
217                .checked_mul(size_of::<u64>())
218                .ok_or(ParseError::IntegerOverflow)?,
219        };
220        let bloom_end = offset
221            .checked_add(bloom_size)
222            .ok_or(ParseError::IntegerOverflow)?;
223        let bloom_buf = data.get_bytes(offset..bloom_end)?;
224        offset = bloom_end;
225
226        let buckets_size = size_of::<u32>()
227            .checked_mul(hdr.nbucket.try_into()?)
228            .ok_or(ParseError::IntegerOverflow)?;
229        let buckets_end = offset
230            .checked_add(buckets_size)
231            .ok_or(ParseError::IntegerOverflow)?;
232        let buckets_buf = data.get_bytes(offset..buckets_end)?;
233        let buckets = U32Table::new(endian, class, buckets_buf);
234        offset = buckets_end;
235
236        // the rest of the section is the chains
237        let chains_buf = data
238            .get(offset..)
239            .ok_or(ParseError::SliceReadError((offset, data.len())))?;
240        let chains = U32Table::new(endian, class, chains_buf);
241
242        Ok(GnuHashTable {
243            hdr,
244            endian,
245            class,
246            bloom: bloom_buf,
247            buckets,
248            chains,
249        })
250    }
251
252    /// Use the hash table to find the symbol table entry with the given name.
253    pub fn find(
254        &self,
255        name: &[u8],
256        symtab: &SymbolTable<'data, E>,
257        strtab: &StringTable<'data>,
258    ) -> Result<Option<(usize, Symbol)>, ParseError> {
259        // empty hash tables don't have any entries. This avoids a divde by zero in the modulus calculation,
260        // and also avoids a potential division by zero panic in the bloom filter index calculation.
261        if self.buckets.is_empty() || self.hdr.nbloom == 0 {
262            return Ok(None);
263        }
264
265        let hash = gnu_hash(name);
266
267        // Test against bloom filter.
268        let (bloom_width, filter) = match self.class {
269            Class::ELF32 => {
270                let bloom_width: u32 = 8 * size_of::<u32>() as u32; // 32
271                let bloom_idx = (hash / (bloom_width)) % self.hdr.nbloom;
272                let bloom_table = U32Table::new(self.endian, self.class, self.bloom);
273                (bloom_width, bloom_table.get(bloom_idx as usize)? as u64)
274            }
275            Class::ELF64 => {
276                let bloom_width: u32 = 8 * size_of::<u64>() as u32; // 64
277                let bloom_idx = (hash / (bloom_width)) % self.hdr.nbloom;
278                let bloom_table = U64Table::new(self.endian, self.class, self.bloom);
279                (bloom_width, bloom_table.get(bloom_idx as usize)?)
280            }
281        };
282
283        // Check bloom filter for both hashes - symbol is present in the hash table IFF both bits are set.
284        if filter & (1 << (hash % bloom_width)) == 0 {
285            return Ok(None);
286        }
287        let hash2 = hash
288            .checked_shr(self.hdr.nshift)
289            .ok_or(ParseError::IntegerOverflow)?;
290        if filter & (1 << (hash2 % bloom_width)) == 0 {
291            return Ok(None);
292        }
293
294        let table_start_idx = self.hdr.table_start_idx as usize;
295        let chain_start_idx = self.buckets.get((hash as usize) % self.buckets.len())? as usize;
296        if chain_start_idx < table_start_idx {
297            // All symbols before table_start_idx don't exist in the hash table
298            return Ok(None);
299        }
300
301        let chain_len = self.chains.len();
302        for chain_idx in (chain_start_idx - table_start_idx)..chain_len {
303            let chain_hash = self.chains.get(chain_idx)?;
304
305            // compare the hashes by or'ing the 1's bit back on
306            if hash | 1 == chain_hash | 1 {
307                // we have a hash match!
308                // let's see if this symtab[sym_idx].name is what we're looking for
309                let sym_idx = chain_idx
310                    .checked_add(table_start_idx)
311                    .ok_or(ParseError::IntegerOverflow)?;
312                let symbol = symtab.get(sym_idx)?;
313                let r_sym_name = strtab.get_raw(symbol.st_name as usize)?;
314
315                if r_sym_name == name {
316                    return Ok(Some((sym_idx, symbol)));
317                }
318            }
319
320            // the chain uses the 1's bit to signal chain comparison stoppage
321            if chain_hash & 1 != 0 {
322                break;
323            }
324        }
325
326        Ok(None)
327    }
328}
329
330#[cfg(test)]
331mod sysv_parse_tests {
332    use super::*;
333    use crate::endian::{BigEndian, LittleEndian};
334    use crate::parse::{test_parse_for, test_parse_fuzz_too_short};
335
336    #[test]
337    fn parse_sysvhdr32_lsb() {
338        test_parse_for(
339            LittleEndian,
340            Class::ELF32,
341            SysVHashHeader {
342                nbucket: 0x03020100,
343                nchain: 0x07060504,
344            },
345        );
346    }
347
348    #[test]
349    fn parse_sysvhdr32_msb() {
350        test_parse_for(
351            BigEndian,
352            Class::ELF32,
353            SysVHashHeader {
354                nbucket: 0x00010203,
355                nchain: 0x04050607,
356            },
357        );
358    }
359
360    #[test]
361    fn parse_sysvhdr64_lsb() {
362        test_parse_for(
363            LittleEndian,
364            Class::ELF64,
365            SysVHashHeader {
366                nbucket: 0x03020100,
367                nchain: 0x07060504,
368            },
369        );
370    }
371
372    #[test]
373    fn parse_sysvhdr64_msb() {
374        test_parse_for(
375            BigEndian,
376            Class::ELF64,
377            SysVHashHeader {
378                nbucket: 0x00010203,
379                nchain: 0x04050607,
380            },
381        );
382    }
383
384    #[test]
385    fn parse_sysvhdr32_lsb_fuzz_too_short() {
386        test_parse_fuzz_too_short::<_, SysVHashHeader>(LittleEndian, Class::ELF32);
387    }
388
389    #[test]
390    fn parse_sysvhdr32_msb_fuzz_too_short() {
391        test_parse_fuzz_too_short::<_, SysVHashHeader>(BigEndian, Class::ELF32);
392    }
393
394    #[test]
395    fn parse_sysvhdr64_lsb_fuzz_too_short() {
396        test_parse_fuzz_too_short::<_, SysVHashHeader>(LittleEndian, Class::ELF64);
397    }
398
399    #[test]
400    fn parse_sysvhdr64_msb_fuzz_too_short() {
401        test_parse_fuzz_too_short::<_, SysVHashHeader>(BigEndian, Class::ELF64);
402    }
403}
404
405#[cfg(test)]
406mod gnu_parse_tests {
407    use super::*;
408    use crate::endian::{BigEndian, LittleEndian};
409    use crate::parse::{test_parse_for, test_parse_fuzz_too_short};
410
411    #[test]
412    fn gnu_hash_tests() {
413        // some known example hash values
414        assert_eq!(gnu_hash(b""), 0x00001505);
415        assert_eq!(gnu_hash(b"printf"), 0x156b2bb8);
416        assert_eq!(gnu_hash(b"exit"), 0x7c967e3f);
417        assert_eq!(gnu_hash(b"syscall"), 0xbac212a0);
418    }
419
420    #[test]
421    fn parse_gnuhdr32_lsb() {
422        test_parse_for(
423            LittleEndian,
424            Class::ELF32,
425            GnuHashHeader {
426                nbucket: 0x03020100,
427                table_start_idx: 0x07060504,
428                nbloom: 0x0B0A0908,
429                nshift: 0x0F0E0D0C,
430            },
431        );
432    }
433
434    #[test]
435    fn parse_gnuhdr32_msb() {
436        test_parse_for(
437            BigEndian,
438            Class::ELF32,
439            GnuHashHeader {
440                nbucket: 0x00010203,
441                table_start_idx: 0x04050607,
442                nbloom: 0x008090A0B,
443                nshift: 0x0C0D0E0F,
444            },
445        );
446    }
447
448    #[test]
449    fn parse_gnuhdr64_lsb() {
450        test_parse_for(
451            LittleEndian,
452            Class::ELF64,
453            GnuHashHeader {
454                nbucket: 0x03020100,
455                table_start_idx: 0x07060504,
456                nbloom: 0x0B0A0908,
457                nshift: 0x0F0E0D0C,
458            },
459        );
460    }
461
462    #[test]
463    fn parse_gnuhdr64_msb() {
464        test_parse_for(
465            BigEndian,
466            Class::ELF64,
467            GnuHashHeader {
468                nbucket: 0x00010203,
469                table_start_idx: 0x04050607,
470                nbloom: 0x008090A0B,
471                nshift: 0x0C0D0E0F,
472            },
473        );
474    }
475
476    #[test]
477    fn parse_gnuhdr32_lsb_fuzz_too_short() {
478        test_parse_fuzz_too_short::<_, GnuHashHeader>(LittleEndian, Class::ELF32);
479    }
480
481    #[test]
482    fn parse_gnuhdr32_msb_fuzz_too_short() {
483        test_parse_fuzz_too_short::<_, GnuHashHeader>(BigEndian, Class::ELF32);
484    }
485
486    #[test]
487    fn parse_gnuhdr64_lsb_fuzz_too_short() {
488        test_parse_fuzz_too_short::<_, GnuHashHeader>(LittleEndian, Class::ELF64);
489    }
490
491    #[test]
492    fn parse_gnuhdr64_msb_fuzz_too_short() {
493        test_parse_fuzz_too_short::<_, GnuHashHeader>(BigEndian, Class::ELF64);
494    }
495}