summaryrefslogtreecommitdiffstats
path: root/third_party/rust/goblin/src/strtab.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/goblin/src/strtab.rs')
-rw-r--r--third_party/rust/goblin/src/strtab.rs264
1 files changed, 264 insertions, 0 deletions
diff --git a/third_party/rust/goblin/src/strtab.rs b/third_party/rust/goblin/src/strtab.rs
new file mode 100644
index 0000000000..dc7b8080f0
--- /dev/null
+++ b/third_party/rust/goblin/src/strtab.rs
@@ -0,0 +1,264 @@
+//! A byte-offset based string table.
+//! Commonly used in ELF binaries, Unix archives, and even PE binaries.
+
+use core::fmt;
+use core::ops::Index;
+use core::str;
+use scroll::{ctx, Pread};
+if_alloc! {
+ use crate::error;
+ use alloc::vec::Vec;
+}
+
+/// A common string table format which is indexed by byte offsets (and not
+/// member index). Constructed using [`parse`](#method.parse)
+/// with your choice of delimiter. Please be careful.
+pub struct Strtab<'a> {
+ delim: ctx::StrCtx,
+ bytes: &'a [u8],
+ #[cfg(feature = "alloc")]
+ strings: Vec<(usize, &'a str)>,
+}
+
+#[inline(always)]
+fn get_str(offset: usize, bytes: &[u8], delim: ctx::StrCtx) -> scroll::Result<&str> {
+ bytes.pread_with::<&str>(offset, delim)
+}
+
+impl<'a> Strtab<'a> {
+ /// Creates a `Strtab` with `bytes` as the backing string table, using `delim` as the delimiter between entries.
+ ///
+ /// NB: this does *not* preparse the string table, which can have non-optimal access patterns.
+ /// See https://github.com/m4b/goblin/pull/275#issue-660364025
+ pub fn new(bytes: &'a [u8], delim: u8) -> Self {
+ Self::from_slice_unparsed(bytes, 0, bytes.len(), delim)
+ }
+
+ /// Creates a `Strtab` directly without bounds check and without parsing it.
+ ///
+ /// This is potentially unsafe and should only be used if `feature = "alloc"` is disabled.
+ pub fn from_slice_unparsed(bytes: &'a [u8], offset: usize, len: usize, delim: u8) -> Self {
+ Self {
+ delim: ctx::StrCtx::Delimiter(delim),
+ bytes: &bytes[offset..offset + len],
+ #[cfg(feature = "alloc")]
+ strings: Vec::new(),
+ }
+ }
+ /// Gets a str reference from the backing bytes starting at byte `offset`.
+ ///
+ /// If the index is out of bounds, `None` is returned. Panics if bytes are invalid UTF-8.
+ /// Use this method if the `Strtab` was created using `from_slice_unparsed()`.
+ pub fn get_unsafe(&self, offset: usize) -> Option<&'a str> {
+ if offset >= self.bytes.len() {
+ None
+ } else {
+ Some(get_str(offset, self.bytes, self.delim).unwrap())
+ }
+ }
+ #[cfg(feature = "alloc")]
+ /// Parses a `Strtab` from `bytes` at `offset` with `len` size as the backing string table, using `delim` as the delimiter.
+ ///
+ /// Errors if bytes are invalid UTF-8.
+ /// Requires `feature = "alloc"`
+ pub fn parse(bytes: &'a [u8], offset: usize, len: usize, delim: u8) -> error::Result<Self> {
+ let (end, overflow) = offset.overflowing_add(len);
+ if overflow || end > bytes.len() {
+ return Err(error::Error::Malformed(format!(
+ "Strtable size ({}) + offset ({}) is out of bounds for {} #bytes. Overflowed: {}",
+ len,
+ offset,
+ bytes.len(),
+ overflow
+ )));
+ }
+ let mut result = Self::from_slice_unparsed(bytes, offset, len, delim);
+ let mut i = 0;
+ while i < result.bytes.len() {
+ let string = get_str(i, result.bytes, result.delim)?;
+ result.strings.push((i, string));
+ i += string.len() + 1;
+ }
+ Ok(result)
+ }
+ #[cfg(feature = "alloc")]
+ /// Parses a `Strtab` with `bytes` as the backing string table, using `delim` as the delimiter between entries.
+ ///
+ /// Requires `feature = "alloc"`
+ pub fn new_preparsed(bytes: &'a [u8], delim: u8) -> error::Result<Self> {
+ Self::parse(bytes, 0, bytes.len(), delim)
+ }
+ #[cfg(feature = "alloc")]
+ /// Converts the string table to a vector of parsed strings.
+ ///
+ /// Note: This method is used to check the parsed contents of `strtab`.
+ /// If you want to get the correct contents of `strtab` as `Vec`, use the following example.
+ ///
+ /// # Examples
+ /// ```rust
+ /// use goblin::error::Error;
+ ///
+ /// pub fn show_shdr_strtab(bytes: &[u8]) -> Result<(), Error> {
+ /// let elf = goblin::elf::Elf::parse(&bytes)?;
+ ///
+ /// for section in elf.section_headers {
+ /// println!("{}", elf.shdr_strtab.get_at(section.sh_name).unwrap_or(""));
+ /// }
+ ///
+ /// Ok(())
+ /// }
+ /// ```
+ ///
+ /// Requires `feature = "alloc"`
+ pub fn to_vec(&self) -> error::Result<Vec<&'a str>> {
+ // Fallback in case `Strtab` was created using `from_slice_unparsed()`.
+ if self.strings.is_empty() {
+ let mut result = Vec::new();
+ let mut i = 0;
+ while i < self.bytes.len() {
+ let string = get_str(i, self.bytes, self.delim)?;
+ result.push(string);
+ i += string.len() + 1;
+ }
+ return Ok(result);
+ }
+ Ok(self.strings.iter().map(|&(_key, value)| value).collect())
+ }
+ #[cfg(feature = "alloc")]
+ /// Safely gets a str reference from the parsed table starting at byte `offset`.
+ ///
+ /// If the index is out of bounds, `None` is returned.
+ /// Requires `feature = "alloc"`
+ pub fn get_at(&self, offset: usize) -> Option<&'a str> {
+ match self
+ .strings
+ .binary_search_by_key(&offset, |&(key, _value)| key)
+ {
+ Ok(index) => Some(self.strings[index].1),
+ Err(index) => {
+ if index == 0 {
+ return None;
+ }
+ let (string_begin_offset, entire_string) = self.strings[index - 1];
+ entire_string.get(offset - string_begin_offset..)
+ }
+ }
+ }
+ #[deprecated(since = "0.4.2", note = "Use from_slice_unparsed() instead")]
+ /// Construct a strtab from a `ptr`, and a `size`, using `delim` as the delimiter
+ ///
+ /// # Safety
+ /// This function creates a `Strtab` directly from a raw pointer and size
+ pub unsafe fn from_raw(ptr: *const u8, len: usize, delim: u8) -> Strtab<'a> {
+ Self::from_slice_unparsed(core::slice::from_raw_parts(ptr, len), 0, len, delim)
+ }
+ #[deprecated(since = "0.4.2", note = "Bad performance, use get_at() instead")]
+ #[cfg(feature = "alloc")]
+ /// Parses a str reference from the parsed table starting at byte `offset`.
+ ///
+ /// If the index is out of bounds, `None` is returned.
+ /// Requires `feature = "alloc"`
+ pub fn get(&self, offset: usize) -> Option<error::Result<&'a str>> {
+ if offset >= self.bytes.len() {
+ None
+ } else {
+ Some(get_str(offset, self.bytes, self.delim).map_err(core::convert::Into::into))
+ }
+ }
+}
+
+impl<'a> fmt::Debug for Strtab<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("Strtab")
+ .field("delim", &self.delim)
+ .field("bytes", &str::from_utf8(self.bytes))
+ .finish()
+ }
+}
+
+impl<'a> Default for Strtab<'a> {
+ fn default() -> Self {
+ Self {
+ delim: ctx::StrCtx::default(),
+ bytes: &[],
+ #[cfg(feature = "alloc")]
+ strings: Vec::new(),
+ }
+ }
+}
+
+impl<'a> Index<usize> for Strtab<'a> {
+ type Output = str;
+ /// Gets str reference at starting at byte `offset`.
+ /// **NB**: this will panic if the underlying bytes are not valid utf8, or the offset is invalid
+ #[inline(always)]
+ fn index(&self, offset: usize) -> &Self::Output {
+ // This can't delegate to get() because get() requires #[cfg(features = "alloc")]
+ // It's also slightly less useful than get() because the lifetime -- specified by the Index
+ // trait -- matches &self, even though we could return &'a instead
+ get_str(offset, self.bytes, self.delim).unwrap()
+ }
+}
+
+#[test]
+fn as_vec_no_final_null() {
+ let strtab = Strtab::new_preparsed(b"\0printf\0memmove\0busta", 0x0).unwrap();
+ let vec = strtab.to_vec().unwrap();
+ assert_eq!(vec.len(), 4);
+ assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
+}
+
+#[test]
+fn as_vec_no_first_null_no_final_null() {
+ let strtab = Strtab::new_preparsed(b"printf\0memmove\0busta", 0x0).unwrap();
+ let vec = strtab.to_vec().unwrap();
+ assert_eq!(vec.len(), 3);
+ assert_eq!(vec, vec!["printf", "memmove", "busta"]);
+}
+
+#[test]
+fn to_vec_final_null() {
+ let strtab = Strtab::new_preparsed(b"\0printf\0memmove\0busta\0", 0x0).unwrap();
+ let vec = strtab.to_vec().unwrap();
+ assert_eq!(vec.len(), 4);
+ assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
+}
+
+#[test]
+fn to_vec_newline_delim() {
+ let strtab = Strtab::new_preparsed(b"\nprintf\nmemmove\nbusta\n", b'\n').unwrap();
+ let vec = strtab.to_vec().unwrap();
+ assert_eq!(vec.len(), 4);
+ assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
+}
+
+#[test]
+fn parse_utf8() {
+ assert!(match Strtab::new_preparsed(&[0x80, 0x80], b'\n') {
+ Err(error::Error::Scroll(scroll::Error::BadInput {
+ size: 2,
+ msg: "invalid utf8",
+ })) => true,
+ _ => false,
+ });
+ assert!(
+ match Strtab::new_preparsed(&[0xC6, 0x92, 0x6F, 0x6F], b'\n') {
+ Ok(_) => true,
+ _ => false,
+ }
+ );
+}
+
+#[test]
+fn get_at_utf8() {
+ let strtab = Strtab::new_preparsed("\nƒoo\nmemmove\n🅱️usta\n".as_bytes(), b'\n').unwrap();
+ assert_eq!(strtab.get_at(0), Some(""));
+ assert_eq!(strtab.get_at(5), Some(""));
+ assert_eq!(strtab.get_at(6), Some("memmove"));
+ assert_eq!(strtab.get_at(14), Some("\u{1f171}\u{fe0f}usta"));
+ assert_eq!(strtab.get_at(16), None);
+ assert_eq!(strtab.get_at(18), Some("\u{fe0f}usta"));
+ assert_eq!(strtab.get_at(21), Some("usta"));
+ assert_eq!(strtab.get_at(25), Some(""));
+ assert_eq!(strtab.get_at(26), None);
+}