summaryrefslogtreecommitdiffstats
path: root/third_party/rust/object/src/write/string.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/object/src/write/string.rs')
-rw-r--r--third_party/rust/object/src/write/string.rs140
1 files changed, 140 insertions, 0 deletions
diff --git a/third_party/rust/object/src/write/string.rs b/third_party/rust/object/src/write/string.rs
new file mode 100644
index 0000000000..6a7ac31fe6
--- /dev/null
+++ b/third_party/rust/object/src/write/string.rs
@@ -0,0 +1,140 @@
+use indexmap::IndexSet;
+
+use crate::alloc::vec::Vec;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(crate) struct StringId(usize);
+
+#[derive(Debug, Default)]
+pub(crate) struct StringTable<'a> {
+ strings: IndexSet<&'a [u8]>,
+ offsets: Vec<usize>,
+}
+
+impl<'a> StringTable<'a> {
+ /// Add a string to the string table.
+ ///
+ /// Panics if the string table has already been written, or
+ /// if the string contains a null byte.
+ pub fn add(&mut self, string: &'a [u8]) -> StringId {
+ assert!(self.offsets.is_empty());
+ assert!(!string.contains(&0));
+ let id = self.strings.insert_full(string).0;
+ StringId(id)
+ }
+
+ /// Return the offset of the given string.
+ ///
+ /// Panics if the string table has not been written, or
+ /// if the string is not in the string table.
+ pub fn get_offset(&self, id: StringId) -> usize {
+ self.offsets[id.0]
+ }
+
+ /// Append the string table to the given `Vec`, and
+ /// calculate the list of string offsets.
+ ///
+ /// `base` is the initial string table offset. For example,
+ /// this should be 1 for ELF, to account for the initial
+ /// null byte (which must have been written by the caller).
+ pub fn write(&mut self, base: usize, w: &mut Vec<u8>) {
+ assert!(self.offsets.is_empty());
+
+ let mut ids: Vec<_> = (0..self.strings.len()).collect();
+ sort(&mut ids, 1, &self.strings);
+
+ self.offsets = vec![0; ids.len()];
+ let mut offset = base;
+ let mut previous = &[][..];
+ for id in ids {
+ let string = self.strings.get_index(id).unwrap();
+ if previous.ends_with(string) {
+ self.offsets[id] = offset - string.len() - 1;
+ } else {
+ self.offsets[id] = offset;
+ w.extend_from_slice(string);
+ w.push(0);
+ offset += string.len() + 1;
+ previous = string;
+ }
+ }
+ }
+}
+
+// Multi-key quicksort.
+//
+// Ordering is such that if a string is a suffix of at least one other string,
+// then it is placed immediately after one of those strings. That is:
+// - comparison starts at the end of the string
+// - shorter strings come later
+//
+// Based on the implementation in LLVM.
+fn sort(mut ids: &mut [usize], mut pos: usize, strings: &IndexSet<&[u8]>) {
+ loop {
+ if ids.len() <= 1 {
+ return;
+ }
+
+ let pivot = byte(ids[0], pos, strings);
+ let mut lower = 0;
+ let mut upper = ids.len();
+ let mut i = 1;
+ while i < upper {
+ let b = byte(ids[i], pos, strings);
+ if b > pivot {
+ ids.swap(lower, i);
+ lower += 1;
+ i += 1;
+ } else if b < pivot {
+ upper -= 1;
+ ids.swap(upper, i);
+ } else {
+ i += 1;
+ }
+ }
+
+ sort(&mut ids[..lower], pos, strings);
+ sort(&mut ids[upper..], pos, strings);
+
+ if pivot == 0 {
+ return;
+ }
+ ids = &mut ids[lower..upper];
+ pos += 1;
+ }
+}
+
+fn byte(id: usize, pos: usize, strings: &IndexSet<&[u8]>) -> u8 {
+ let string = strings.get_index(id).unwrap();
+ let len = string.len();
+ if len >= pos {
+ string[len - pos]
+ } else {
+ // We know the strings don't contain null bytes.
+ 0
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn string_table() {
+ let mut table = StringTable::default();
+ let id0 = table.add(b"");
+ let id1 = table.add(b"foo");
+ let id2 = table.add(b"bar");
+ let id3 = table.add(b"foobar");
+
+ let mut data = Vec::new();
+ data.push(0);
+ table.write(1, &mut data);
+ assert_eq!(data, b"\0foobar\0foo\0");
+
+ assert_eq!(table.get_offset(id0), 11);
+ assert_eq!(table.get_offset(id1), 8);
+ assert_eq!(table.get_offset(id2), 4);
+ assert_eq!(table.get_offset(id3), 1);
+ }
+}