summaryrefslogtreecommitdiffstats
path: root/third_party/rust/goblin/src/strtab.rs
blob: dc7b8080f0389156c9d59f211f98035c987efc6e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
//! A byte-offset based string table.
//! Commonly used in ELF binaries, Unix archives, and even PE binaries.

use core::fmt;
use core::ops::Index;
use core::str;
use scroll::{ctx, Pread};
if_alloc! {
    use crate::error;
    use alloc::vec::Vec;
}

/// A common string table format which is indexed by byte offsets (and not
/// member index). Constructed using [`parse`](#method.parse)
/// with your choice of delimiter. Please be careful.
pub struct Strtab<'a> {
    delim: ctx::StrCtx,
    bytes: &'a [u8],
    #[cfg(feature = "alloc")]
    strings: Vec<(usize, &'a str)>,
}

#[inline(always)]
fn get_str(offset: usize, bytes: &[u8], delim: ctx::StrCtx) -> scroll::Result<&str> {
    bytes.pread_with::<&str>(offset, delim)
}

impl<'a> Strtab<'a> {
    /// Creates a `Strtab` with `bytes` as the backing string table, using `delim` as the delimiter between entries.
    ///
    /// NB: this does *not* preparse the string table, which can have non-optimal access patterns.
    /// See https://github.com/m4b/goblin/pull/275#issue-660364025
    pub fn new(bytes: &'a [u8], delim: u8) -> Self {
        Self::from_slice_unparsed(bytes, 0, bytes.len(), delim)
    }

    /// Creates a `Strtab` directly without bounds check and without parsing it.
    ///
    /// This is potentially unsafe and should only be used if `feature = "alloc"` is disabled.
    pub fn from_slice_unparsed(bytes: &'a [u8], offset: usize, len: usize, delim: u8) -> Self {
        Self {
            delim: ctx::StrCtx::Delimiter(delim),
            bytes: &bytes[offset..offset + len],
            #[cfg(feature = "alloc")]
            strings: Vec::new(),
        }
    }
    /// Gets a str reference from the backing bytes starting at byte `offset`.
    ///
    /// If the index is out of bounds, `None` is returned. Panics if bytes are invalid UTF-8.
    /// Use this method if the `Strtab` was created using `from_slice_unparsed()`.
    pub fn get_unsafe(&self, offset: usize) -> Option<&'a str> {
        if offset >= self.bytes.len() {
            None
        } else {
            Some(get_str(offset, self.bytes, self.delim).unwrap())
        }
    }
    #[cfg(feature = "alloc")]
    /// Parses a `Strtab` from `bytes` at `offset` with `len` size as the backing string table, using `delim` as the delimiter.
    ///
    /// Errors if bytes are invalid UTF-8.
    /// Requires `feature = "alloc"`
    pub fn parse(bytes: &'a [u8], offset: usize, len: usize, delim: u8) -> error::Result<Self> {
        let (end, overflow) = offset.overflowing_add(len);
        if overflow || end > bytes.len() {
            return Err(error::Error::Malformed(format!(
                "Strtable size ({}) + offset ({}) is out of bounds for {} #bytes. Overflowed: {}",
                len,
                offset,
                bytes.len(),
                overflow
            )));
        }
        let mut result = Self::from_slice_unparsed(bytes, offset, len, delim);
        let mut i = 0;
        while i < result.bytes.len() {
            let string = get_str(i, result.bytes, result.delim)?;
            result.strings.push((i, string));
            i += string.len() + 1;
        }
        Ok(result)
    }
    #[cfg(feature = "alloc")]
    /// Parses a `Strtab` with `bytes` as the backing string table, using `delim` as the delimiter between entries.
    ///
    /// Requires `feature = "alloc"`
    pub fn new_preparsed(bytes: &'a [u8], delim: u8) -> error::Result<Self> {
        Self::parse(bytes, 0, bytes.len(), delim)
    }
    #[cfg(feature = "alloc")]
    /// Converts the string table to a vector of parsed strings.
    ///
    /// Note: This method is used to check the parsed contents of `strtab`.
    /// If you want to get the correct contents of `strtab` as `Vec`, use the following example.
    ///
    /// # Examples
    /// ```rust
    /// use goblin::error::Error;
    ///
    /// pub fn show_shdr_strtab(bytes: &[u8]) -> Result<(), Error> {
    ///     let elf = goblin::elf::Elf::parse(&bytes)?;
    ///
    ///     for section in elf.section_headers {
    ///         println!("{}", elf.shdr_strtab.get_at(section.sh_name).unwrap_or(""));
    ///     }
    ///
    ///     Ok(())
    /// }
    /// ```
    ///
    /// Requires `feature = "alloc"`
    pub fn to_vec(&self) -> error::Result<Vec<&'a str>> {
        // Fallback in case `Strtab` was created using `from_slice_unparsed()`.
        if self.strings.is_empty() {
            let mut result = Vec::new();
            let mut i = 0;
            while i < self.bytes.len() {
                let string = get_str(i, self.bytes, self.delim)?;
                result.push(string);
                i += string.len() + 1;
            }
            return Ok(result);
        }
        Ok(self.strings.iter().map(|&(_key, value)| value).collect())
    }
    #[cfg(feature = "alloc")]
    /// Safely gets a str reference from the parsed table starting at byte `offset`.
    ///
    /// If the index is out of bounds, `None` is returned.
    /// Requires `feature = "alloc"`
    pub fn get_at(&self, offset: usize) -> Option<&'a str> {
        match self
            .strings
            .binary_search_by_key(&offset, |&(key, _value)| key)
        {
            Ok(index) => Some(self.strings[index].1),
            Err(index) => {
                if index == 0 {
                    return None;
                }
                let (string_begin_offset, entire_string) = self.strings[index - 1];
                entire_string.get(offset - string_begin_offset..)
            }
        }
    }
    #[deprecated(since = "0.4.2", note = "Use from_slice_unparsed() instead")]
    /// Construct a strtab from a `ptr`, and a `size`, using `delim` as the delimiter
    ///
    /// # Safety
    /// This function creates a `Strtab` directly from a raw pointer and size
    pub unsafe fn from_raw(ptr: *const u8, len: usize, delim: u8) -> Strtab<'a> {
        Self::from_slice_unparsed(core::slice::from_raw_parts(ptr, len), 0, len, delim)
    }
    #[deprecated(since = "0.4.2", note = "Bad performance, use get_at() instead")]
    #[cfg(feature = "alloc")]
    /// Parses a str reference from the parsed table starting at byte `offset`.
    ///
    /// If the index is out of bounds, `None` is returned.
    /// Requires `feature = "alloc"`
    pub fn get(&self, offset: usize) -> Option<error::Result<&'a str>> {
        if offset >= self.bytes.len() {
            None
        } else {
            Some(get_str(offset, self.bytes, self.delim).map_err(core::convert::Into::into))
        }
    }
}

impl<'a> fmt::Debug for Strtab<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.debug_struct("Strtab")
            .field("delim", &self.delim)
            .field("bytes", &str::from_utf8(self.bytes))
            .finish()
    }
}

impl<'a> Default for Strtab<'a> {
    fn default() -> Self {
        Self {
            delim: ctx::StrCtx::default(),
            bytes: &[],
            #[cfg(feature = "alloc")]
            strings: Vec::new(),
        }
    }
}

impl<'a> Index<usize> for Strtab<'a> {
    type Output = str;
    /// Gets str reference at starting at byte `offset`.
    /// **NB**: this will panic if the underlying bytes are not valid utf8, or the offset is invalid
    #[inline(always)]
    fn index(&self, offset: usize) -> &Self::Output {
        // This can't delegate to get() because get() requires #[cfg(features = "alloc")]
        // It's also slightly less useful than get() because the lifetime -- specified by the Index
        // trait -- matches &self, even though we could return &'a instead
        get_str(offset, self.bytes, self.delim).unwrap()
    }
}

#[test]
fn as_vec_no_final_null() {
    let strtab = Strtab::new_preparsed(b"\0printf\0memmove\0busta", 0x0).unwrap();
    let vec = strtab.to_vec().unwrap();
    assert_eq!(vec.len(), 4);
    assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
}

#[test]
fn as_vec_no_first_null_no_final_null() {
    let strtab = Strtab::new_preparsed(b"printf\0memmove\0busta", 0x0).unwrap();
    let vec = strtab.to_vec().unwrap();
    assert_eq!(vec.len(), 3);
    assert_eq!(vec, vec!["printf", "memmove", "busta"]);
}

#[test]
fn to_vec_final_null() {
    let strtab = Strtab::new_preparsed(b"\0printf\0memmove\0busta\0", 0x0).unwrap();
    let vec = strtab.to_vec().unwrap();
    assert_eq!(vec.len(), 4);
    assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
}

#[test]
fn to_vec_newline_delim() {
    let strtab = Strtab::new_preparsed(b"\nprintf\nmemmove\nbusta\n", b'\n').unwrap();
    let vec = strtab.to_vec().unwrap();
    assert_eq!(vec.len(), 4);
    assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
}

#[test]
fn parse_utf8() {
    assert!(match Strtab::new_preparsed(&[0x80, 0x80], b'\n') {
        Err(error::Error::Scroll(scroll::Error::BadInput {
            size: 2,
            msg: "invalid utf8",
        })) => true,
        _ => false,
    });
    assert!(
        match Strtab::new_preparsed(&[0xC6, 0x92, 0x6F, 0x6F], b'\n') {
            Ok(_) => true,
            _ => false,
        }
    );
}

#[test]
fn get_at_utf8() {
    let strtab = Strtab::new_preparsed("\nƒoo\nmemmove\n🅱️usta\n".as_bytes(), b'\n').unwrap();
    assert_eq!(strtab.get_at(0), Some(""));
    assert_eq!(strtab.get_at(5), Some(""));
    assert_eq!(strtab.get_at(6), Some("memmove"));
    assert_eq!(strtab.get_at(14), Some("\u{1f171}\u{fe0f}usta"));
    assert_eq!(strtab.get_at(16), None);
    assert_eq!(strtab.get_at(18), Some("\u{fe0f}usta"));
    assert_eq!(strtab.get_at(21), Some("usta"));
    assert_eq!(strtab.get_at(25), Some(""));
    assert_eq!(strtab.get_at(26), None);
}