summaryrefslogtreecommitdiffstats
path: root/third_party/rust/goblin/src/elf/gnu_hash.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/goblin/src/elf/gnu_hash.rs')
-rw-r--r--third_party/rust/goblin/src/elf/gnu_hash.rs220
1 files changed, 220 insertions, 0 deletions
diff --git a/third_party/rust/goblin/src/elf/gnu_hash.rs b/third_party/rust/goblin/src/elf/gnu_hash.rs
new file mode 100644
index 0000000000..d785382c26
--- /dev/null
+++ b/third_party/rust/goblin/src/elf/gnu_hash.rs
@@ -0,0 +1,220 @@
+//! A Gnu Hash table as 4 sections:
+//!
+//! 1. Header
+//! 2. Bloom Filter
+//! 3. Hash Buckets
+//! 4. Chains
+//!
+//! The header has is an array of four `u32`s:
+//!
+//! 1. nbuckets
+//! 2. symndx
+//! 3. maskwords
+//! 4. shift2
+//!
+//! See more:
+//! * http://www.linker-aliens.org/blogs/ali/entry/gnu_hash_elf_sections
+//! or https://blogs.oracle.com/solaris/gnu-hash-elf-sections-v2
+//! * https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/
+
+/// GNU hash function: accepts a symbol name and returns a value that may be
+/// used to compute a bucket index.
+///
+/// Consequently, if the hashing function returns the value `x` for some name,
+/// `buckets[x % nbuckets]` gives an index, `y`, into both the symbol table
+/// and the chain table.
+pub fn hash(symbol: &str) -> u32 {
+ const HASH_SEED: u32 = 5381;
+ symbol.bytes().fold(HASH_SEED, |hash, b| {
+ hash.wrapping_mul(33).wrapping_add(u32::from(b))
+ })
+}
+
+#[cfg(test)]
+mod tests {
+ use super::hash;
+ #[test]
+ fn test_hash() {
+ assert_eq!(hash(""), 0x0000_1505);
+ assert_eq!(hash("printf"), 0x156b_2bb8);
+ assert_eq!(hash("exit"), 0x7c96_7e3f);
+ assert_eq!(hash("syscall"), 0xbac2_12a0);
+ assert_eq!(hash("flapenguin.me"), 0x8ae9_f18e);
+ }
+}
+
+macro_rules! elf_gnu_hash_impl {
+ ($IntTy:ty) => {
+ use crate::elf::sym::Sym;
+ use crate::strtab::Strtab;
+ use core::fmt;
+ use core::mem;
+ use core::slice;
+
+ const INT_SIZE: usize = mem::size_of::<$IntTy>();
+ const U32_SIZE: usize = mem::size_of::<u32>();
+ /// Size of a bits mask in bloom filter
+ const ELFCLASS_BITS: u32 = INT_SIZE as u32 * 8;
+
+ /// A better hash table for the ELF used by GNU systems in GNU-compatible software.
+ pub struct GnuHash<'a> {
+ /// Index of the first symbol in the `.dynsym` table which is accessible with
+ /// the hash table
+ symindex: u32,
+ /// Shift count used in the bloom filter
+ shift2: u32,
+ /// 2 bit bloom filter on `chains`
+ // Either 32 or 64-bit depending on the class of object
+ bloom_filter: &'a [$IntTy],
+ /// GNU hash table bucket array; indexes start at 0. This array holds symbol
+ /// table indexes and contains the index of hashes in `chains`
+ buckets: &'a [u32],
+ /// Hash values; indexes start at 0. This array holds symbol table indexes.
+ chains: &'a [u32], // => chains[dynsyms.len() - symindex]
+ dynsyms: &'a [Sym],
+ }
+
+ impl fmt::Debug for GnuHash<'_> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("GnuHash")
+ .field("nbuckets", &self.buckets.len())
+ .field("symindex", &self.symindex)
+ .field("maskwords", &(self.bloom_filter.len() - 1))
+ .field("shift2", &self.shift2)
+ .field("bloom_filter", &self.bloom_filter.as_ptr())
+ .field("bucket", &self.buckets.as_ptr())
+ .field("chains", &self.chains.as_ptr())
+ .finish()
+ }
+ }
+
+ impl<'a> GnuHash<'a> {
+ /// Initialize a GnuHash from a pointer to `.hash` (or `.gnu.hash`) section
+ /// and total number of dynamic symbols.
+ /// # Safety
+ ///
+ /// This function creates a `GnuHash` directly from a raw pointer
+ pub unsafe fn from_raw_table(
+ hashtab: &'a [u8],
+ dynsyms: &'a [Sym],
+ ) -> Result<Self, &'static str> {
+ if hashtab.as_ptr() as usize % INT_SIZE != 0 {
+ return Err("hashtab is not aligned with 64-bit");
+ }
+
+ if hashtab.len() <= 16 {
+ return Err("failed to read in number of buckets");
+ }
+
+ let [nbuckets, symindex, maskwords, shift2] =
+ (hashtab.as_ptr() as *const u32 as *const [u32; 4]).read();
+
+ if !maskwords.is_power_of_two() {
+ return Err("maskwords must be a power of two");
+ }
+
+ let hashtab = &hashtab[16..];
+ {
+ // SAFETY: Condition to check for an overflow
+ // size_of(chains) + size_of(buckets) + size_of(bloom_filter) == size_of(hashtab)
+
+ if dynsyms.len() <= symindex as usize {
+ return Err("symindex must be smaller than dynsyms.len()");
+ }
+ let chains_size = (dynsyms.len() - symindex as usize).checked_mul(U32_SIZE);
+ let buckets_size = (nbuckets as usize).checked_mul(U32_SIZE);
+ let bloom_size = (maskwords as usize).checked_mul(INT_SIZE);
+
+ let total_size = match (chains_size, buckets_size, bloom_size) {
+ (Some(a), Some(b), Some(c)) => {
+ a.checked_add(b).and_then(|t| t.checked_add(c))
+ }
+ _ => None,
+ };
+ match total_size {
+ Some(size) if size == hashtab.len() => {}
+ _ => return Err("index out of bound or non-complete hash section"),
+ }
+ }
+
+ let bloom_filter_ptr = hashtab.as_ptr() as *const $IntTy;
+ let buckets_ptr = bloom_filter_ptr.add(maskwords as usize) as *const u32;
+ let chains_ptr = buckets_ptr.add(nbuckets as usize);
+ let bloom_filter = slice::from_raw_parts(bloom_filter_ptr, maskwords as usize);
+ let buckets = slice::from_raw_parts(buckets_ptr, nbuckets as usize);
+ let chains = slice::from_raw_parts(chains_ptr, dynsyms.len() - symindex as usize);
+ Ok(Self {
+ symindex,
+ shift2,
+ bloom_filter,
+ buckets,
+ chains,
+ dynsyms,
+ })
+ }
+
+ /// Locate the hash chain, and corresponding hash value element.
+ #[cold]
+ fn lookup(&self, symbol: &str, hash: u32, dynstrtab: &Strtab) -> Option<&'a Sym> {
+ const MASK_LOWEST_BIT: u32 = 0xffff_fffe;
+ let bucket = self.buckets[hash as usize % self.buckets.len()];
+
+ // Empty hash chain, symbol not present
+ if bucket < self.symindex {
+ return None;
+ }
+ // Walk the chain until the symbol is found or the chain is exhausted.
+ let chain_idx = bucket - self.symindex;
+ let hash = hash & MASK_LOWEST_BIT;
+ let chains = &self.chains.get((chain_idx as usize)..)?;
+ let dynsyms = &self.dynsyms.get((bucket as usize)..)?;
+ for (hash2, symb) in chains.iter().zip(dynsyms.iter()) {
+ if (hash == (hash2 & MASK_LOWEST_BIT))
+ && (symbol == &dynstrtab[symb.st_name as usize])
+ {
+ return Some(symb);
+ }
+ // Chain ends with an element with the lowest bit set to 1.
+ if hash2 & 1 == 1 {
+ break;
+ }
+ }
+ None
+ }
+
+ /// Check if symbol maybe is in the hash table, or definitely not in it.
+ #[inline]
+ fn check_maybe_match(&self, hash: u32) -> bool {
+ const MASK: u32 = ELFCLASS_BITS - 1;
+ let hash2 = hash >> self.shift2;
+ // `x & (N - 1)` is equivalent to `x % N` iff `N = 2^y`.
+ let bitmask: $IntTy = 1 << (hash & (MASK)) | 1 << (hash2 & MASK);
+ let bloom_idx = (hash / ELFCLASS_BITS) & (self.bloom_filter.len() as u32 - 1);
+ let bitmask_word = self.bloom_filter[bloom_idx as usize];
+ (bitmask_word & bitmask) == bitmask
+ }
+
+ /// Given a symbol, a hash of that symbol, a dynamic string table and
+ /// a `dynstrtab` to cross-reference names, maybe returns a Sym.
+ pub fn find(&self, symbol: &str, dynstrtab: &Strtab) -> Option<&'a Sym> {
+ let hash = self::hash(symbol);
+ self.find_with_hash(symbol, hash, dynstrtab)
+ }
+
+ /// This function will not check if the passed `hash` is really
+ /// the hash of `symbol`
+ pub fn find_with_hash(
+ &self,
+ symbol: &str,
+ hash: u32,
+ dynstrtab: &Strtab,
+ ) -> Option<&'a Sym> {
+ if self.check_maybe_match(hash) {
+ self.lookup(symbol, hash, dynstrtab)
+ } else {
+ None
+ }
+ }
+ }
+ };
+}