diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /vendor/object/src/read/archive.rs | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/object/src/read/archive.rs')
-rw-r--r-- | vendor/object/src/read/archive.rs | 502 |
1 files changed, 502 insertions, 0 deletions
diff --git a/vendor/object/src/read/archive.rs b/vendor/object/src/read/archive.rs new file mode 100644 index 000000000..0208878e4 --- /dev/null +++ b/vendor/object/src/read/archive.rs @@ -0,0 +1,502 @@ +//! Support for archive files. + +use core::convert::TryInto; + +use crate::archive; +use crate::read::{self, Error, ReadError, ReadRef}; + +/// The kind of archive format. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum ArchiveKind { + /// There are no special files that indicate the archive format. + Unknown, + /// The GNU (or System V) archive format. + Gnu, + /// The GNU (or System V) archive format with 64-bit symbol table. + Gnu64, + /// The BSD archive format. + Bsd, + /// The BSD archive format with 64-bit symbol table. + /// + /// This is used for Darwin. + Bsd64, + /// The Windows COFF archive format. + Coff, +} + +/// A partially parsed archive file. +#[derive(Debug)] +pub struct ArchiveFile<'data, R: ReadRef<'data> = &'data [u8]> { + data: R, + len: u64, + offset: u64, + kind: ArchiveKind, + symbols: (u64, u64), + names: &'data [u8], +} + +impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { + /// Parse the archive header and special members. + pub fn parse(data: R) -> read::Result<Self> { + let len = data.len().read_error("Unknown archive length")?; + let mut tail = 0; + let magic = data + .read_bytes(&mut tail, archive::MAGIC.len() as u64) + .read_error("Invalid archive size")?; + if magic != &archive::MAGIC[..] { + return Err(Error("Unsupported archive identifier")); + } + + let mut file = ArchiveFile { + data, + offset: tail, + len, + kind: ArchiveKind::Unknown, + symbols: (0, 0), + names: &[], + }; + + // The first few members may be special, so parse them. + // GNU has: + // - "/" or "/SYM64/": symbol table (optional) + // - "//": names table (optional) + // COFF has: + // - "/": first linker member + // - "/": second linker member + // - "//": names table + // BSD has: + // - "__.SYMDEF" or "__.SYMDEF SORTED": symbol table (optional) + // BSD 64-bit has: + // - "__.SYMDEF_64" or "__.SYMDEF_64 SORTED": symbol table (optional) + // BSD may use the extended name for the symbol table. This is handled + // by `ArchiveMember::parse`. + if tail < len { + let member = ArchiveMember::parse(data, &mut tail, &[])?; + if member.name == b"/" { + // GNU symbol table (unless we later determine this is COFF). + file.kind = ArchiveKind::Gnu; + file.symbols = member.file_range(); + file.offset = tail; + + if tail < len { + let member = ArchiveMember::parse(data, &mut tail, &[])?; + if member.name == b"/" { + // COFF linker member. + file.kind = ArchiveKind::Coff; + file.symbols = member.file_range(); + file.offset = tail; + + if tail < len { + let member = ArchiveMember::parse(data, &mut tail, &[])?; + if member.name == b"//" { + // COFF names table. + file.names = member.data(data)?; + file.offset = tail; + } + } + } else if member.name == b"//" { + // GNU names table. + file.names = member.data(data)?; + file.offset = tail; + } + } + } else if member.name == b"/SYM64/" { + // GNU 64-bit symbol table. + file.kind = ArchiveKind::Gnu64; + file.symbols = member.file_range(); + file.offset = tail; + + if tail < len { + let member = ArchiveMember::parse(data, &mut tail, &[])?; + if member.name == b"//" { + // GNU names table. + file.names = member.data(data)?; + file.offset = tail; + } + } + } else if member.name == b"//" { + // GNU names table. + file.kind = ArchiveKind::Gnu; + file.names = member.data(data)?; + file.offset = tail; + } else if member.name == b"__.SYMDEF" || member.name == b"__.SYMDEF SORTED" { + // BSD symbol table. + file.kind = ArchiveKind::Bsd; + file.symbols = member.file_range(); + file.offset = tail; + } else if member.name == b"__.SYMDEF_64" || member.name == b"__.SYMDEF_64 SORTED" { + // BSD 64-bit symbol table. + file.kind = ArchiveKind::Bsd64; + file.symbols = member.file_range(); + file.offset = tail; + } else { + // TODO: This could still be a BSD file. We leave this as unknown for now. + } + } + Ok(file) + } + + /// Return the archive format. + #[inline] + pub fn kind(&self) -> ArchiveKind { + self.kind + } + + /// Iterate over the members of the archive. + /// + /// This does not return special members. + #[inline] + pub fn members(&self) -> ArchiveMemberIterator<'data, R> { + ArchiveMemberIterator { + data: self.data, + offset: self.offset, + len: self.len, + names: self.names, + } + } +} + +/// An iterator over the members of an archive. +#[derive(Debug)] +pub struct ArchiveMemberIterator<'data, R: ReadRef<'data> = &'data [u8]> { + data: R, + offset: u64, + len: u64, + names: &'data [u8], +} + +impl<'data, R: ReadRef<'data>> Iterator for ArchiveMemberIterator<'data, R> { + type Item = read::Result<ArchiveMember<'data>>; + + fn next(&mut self) -> Option<Self::Item> { + if self.offset >= self.len { + return None; + } + let member = ArchiveMember::parse(self.data, &mut self.offset, self.names); + if member.is_err() { + self.offset = self.len; + } + Some(member) + } +} + +/// A partially parsed archive member. +#[derive(Debug)] +pub struct ArchiveMember<'data> { + header: &'data archive::Header, + name: &'data [u8], + offset: u64, + size: u64, +} + +impl<'data> ArchiveMember<'data> { + /// Parse the archive member header, name, and file data. + /// + /// This reads the extended name (if any) and adjusts the file size. + fn parse<R: ReadRef<'data>>( + data: R, + offset: &mut u64, + names: &'data [u8], + ) -> read::Result<Self> { + let header = data + .read::<archive::Header>(offset) + .read_error("Invalid archive member header")?; + if header.terminator != archive::TERMINATOR { + return Err(Error("Invalid archive terminator")); + } + + let mut file_offset = *offset; + let mut file_size = + parse_u64_digits(&header.size, 10).read_error("Invalid archive member size")?; + *offset = offset + .checked_add(file_size) + .read_error("Archive member size is too large")?; + // Entries are padded to an even number of bytes. + if (file_size & 1) != 0 { + *offset = offset.saturating_add(1); + } + + let name = if header.name[0] == b'/' && (header.name[1] as char).is_digit(10) { + // Read file name from the names table. + parse_sysv_extended_name(&header.name[1..], names) + .read_error("Invalid archive extended name offset")? + } else if &header.name[..3] == b"#1/" && (header.name[3] as char).is_digit(10) { + // Read file name from the start of the file data. + parse_bsd_extended_name(&header.name[3..], data, &mut file_offset, &mut file_size) + .read_error("Invalid archive extended name length")? + } else if header.name[0] == b'/' { + let name_len = memchr::memchr(b' ', &header.name).unwrap_or(header.name.len()); + &header.name[..name_len] + } else { + let name_len = memchr::memchr(b'/', &header.name) + .or_else(|| memchr::memchr(b' ', &header.name)) + .unwrap_or(header.name.len()); + &header.name[..name_len] + }; + + Ok(ArchiveMember { + header, + name, + offset: file_offset, + size: file_size, + }) + } + + /// Return the raw header. + #[inline] + pub fn header(&self) -> &'data archive::Header { + self.header + } + + /// Return the parsed file name. + /// + /// This may be an extended file name. + #[inline] + pub fn name(&self) -> &'data [u8] { + self.name + } + + /// Parse the file modification timestamp from the header. + #[inline] + pub fn date(&self) -> Option<u64> { + parse_u64_digits(&self.header.date, 10) + } + + /// Parse the user ID from the header. + #[inline] + pub fn uid(&self) -> Option<u64> { + parse_u64_digits(&self.header.uid, 10) + } + + /// Parse the group ID from the header. + #[inline] + pub fn gid(&self) -> Option<u64> { + parse_u64_digits(&self.header.gid, 10) + } + + /// Parse the file mode from the header. + #[inline] + pub fn mode(&self) -> Option<u64> { + parse_u64_digits(&self.header.mode, 8) + } + + /// Return the offset and size of the file data. + pub fn file_range(&self) -> (u64, u64) { + (self.offset, self.size) + } + + /// Return the file data. + #[inline] + pub fn data<R: ReadRef<'data>>(&self, data: R) -> read::Result<&'data [u8]> { + data.read_bytes_at(self.offset, self.size) + .read_error("Archive member size is too large") + } +} + +// Ignores bytes starting from the first space. +fn parse_u64_digits(digits: &[u8], radix: u32) -> Option<u64> { + if let [b' ', ..] = digits { + return None; + } + let mut result: u64 = 0; + for &c in digits { + if c == b' ' { + return Some(result); + } else { + let x = (c as char).to_digit(radix)?; + result = result + .checked_mul(u64::from(radix))? + .checked_add(u64::from(x))?; + } + } + Some(result) +} + +fn parse_sysv_extended_name<'data>(digits: &[u8], names: &'data [u8]) -> Result<&'data [u8], ()> { + let offset = parse_u64_digits(digits, 10).ok_or(())?; + let offset = offset.try_into().map_err(|_| ())?; + let name_data = names.get(offset..).ok_or(())?; + let name = match memchr::memchr2(b'/', b'\0', name_data) { + Some(len) => &name_data[..len], + None => name_data, + }; + Ok(name) +} + +/// Modifies `data` to start after the extended name. +fn parse_bsd_extended_name<'data, R: ReadRef<'data>>( + digits: &[u8], + data: R, + offset: &mut u64, + size: &mut u64, +) -> Result<&'data [u8], ()> { + let len = parse_u64_digits(digits, 10).ok_or(())?; + *size = size.checked_sub(len).ok_or(())?; + let name_data = data.read_bytes(offset, len)?; + let name = match memchr::memchr(b'\0', name_data) { + Some(len) => &name_data[..len], + None => name_data, + }; + Ok(name) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn kind() { + let data = b"!<arch>\n"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Unknown); + + let data = b"\ + !<arch>\n\ + / 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Gnu); + + let data = b"\ + !<arch>\n\ + // 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Gnu); + + let data = b"\ + !<arch>\n\ + / 4 `\n\ + 0000\ + // 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Gnu); + + let data = b"\ + !<arch>\n\ + /SYM64/ 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Gnu64); + + let data = b"\ + !<arch>\n\ + /SYM64/ 4 `\n\ + 0000\ + // 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Gnu64); + + let data = b"\ + !<arch>\n\ + __.SYMDEF 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Bsd); + + let data = b"\ + !<arch>\n\ + #1/9 13 `\n\ + __.SYMDEF0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Bsd); + + let data = b"\ + !<arch>\n\ + #1/16 20 `\n\ + __.SYMDEF SORTED0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Bsd); + + let data = b"\ + !<arch>\n\ + __.SYMDEF_64 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Bsd64); + + let data = b"\ + !<arch>\n\ + #1/12 16 `\n\ + __.SYMDEF_640000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Bsd64); + + let data = b"\ + !<arch>\n\ + #1/19 23 `\n\ + __.SYMDEF_64 SORTED0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Bsd64); + + let data = b"\ + !<arch>\n\ + / 4 `\n\ + 0000\ + / 4 `\n\ + 0000\ + // 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Coff); + } + + #[test] + fn gnu_names() { + let data = b"\ + !<arch>\n\ + // 18 `\n\ + 0123456789abcdef/\n\ + s p a c e/ 0 0 0 644 4 `\n\ + 0000\ + 0123456789abcde/0 0 0 644 3 `\n\ + odd\n\ + /0 0 0 0 644 4 `\n\ + even"; + let data = &data[..]; + let archive = ArchiveFile::parse(data).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Gnu); + let mut members = archive.members(); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"s p a c e"); + assert_eq!(member.data(data).unwrap(), &b"0000"[..]); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"0123456789abcde"); + assert_eq!(member.data(data).unwrap(), &b"odd"[..]); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"0123456789abcdef"); + assert_eq!(member.data(data).unwrap(), &b"even"[..]); + + assert!(members.next().is_none()); + } + + #[test] + fn bsd_names() { + let data = b"\ + !<arch>\n\ + 0123456789abcde 0 0 0 644 3 `\n\ + odd\n\ + #1/16 0 0 0 644 20 `\n\ + 0123456789abcdefeven"; + let data = &data[..]; + let archive = ArchiveFile::parse(data).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Unknown); + let mut members = archive.members(); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"0123456789abcde"); + assert_eq!(member.data(data).unwrap(), &b"odd"[..]); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"0123456789abcdef"); + assert_eq!(member.data(data).unwrap(), &b"even"[..]); + + assert!(members.next().is_none()); + } +} |