summaryrefslogtreecommitdiffstats
path: root/vendor/tar-0.4.38/src/archive.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:44 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:44 +0000
commitc23a457e72abe608715ac76f076f47dc42af07a5 (patch)
tree2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /vendor/tar-0.4.38/src/archive.rs
parentReleasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff)
downloadrustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz
rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip
Merging upstream version 1.74.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/tar-0.4.38/src/archive.rs')
-rw-r--r--vendor/tar-0.4.38/src/archive.rs577
1 files changed, 577 insertions, 0 deletions
diff --git a/vendor/tar-0.4.38/src/archive.rs b/vendor/tar-0.4.38/src/archive.rs
new file mode 100644
index 000000000..1bed51249
--- /dev/null
+++ b/vendor/tar-0.4.38/src/archive.rs
@@ -0,0 +1,577 @@
+use std::cell::{Cell, RefCell};
+use std::cmp;
+use std::convert::TryFrom;
+use std::fs;
+use std::io::prelude::*;
+use std::io::{self, SeekFrom};
+use std::marker;
+use std::path::Path;
+
+use crate::entry::{EntryFields, EntryIo};
+use crate::error::TarError;
+use crate::other;
+use crate::pax::pax_extensions_size;
+use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
+
+/// A top-level representation of an archive file.
+///
+/// This archive can have an entry added to it and it can be iterated over.
+pub struct Archive<R: ?Sized + Read> {
+ inner: ArchiveInner<R>,
+}
+
+pub struct ArchiveInner<R: ?Sized> {
+ pos: Cell<u64>,
+ unpack_xattrs: bool,
+ preserve_permissions: bool,
+ preserve_mtime: bool,
+ overwrite: bool,
+ ignore_zeros: bool,
+ obj: RefCell<R>,
+}
+
+/// An iterator over the entries of an archive.
+pub struct Entries<'a, R: 'a + Read> {
+ fields: EntriesFields<'a>,
+ _ignored: marker::PhantomData<&'a Archive<R>>,
+}
+
+trait SeekRead: Read + Seek {}
+impl<R: Read + Seek> SeekRead for R {}
+
+struct EntriesFields<'a> {
+ archive: &'a Archive<dyn Read + 'a>,
+ seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
+ next: u64,
+ done: bool,
+ raw: bool,
+}
+
+impl<R: Read> Archive<R> {
+ /// Create a new archive with the underlying object as the reader.
+ pub fn new(obj: R) -> Archive<R> {
+ Archive {
+ inner: ArchiveInner {
+ unpack_xattrs: false,
+ preserve_permissions: false,
+ preserve_mtime: true,
+ overwrite: true,
+ ignore_zeros: false,
+ obj: RefCell::new(obj),
+ pos: Cell::new(0),
+ },
+ }
+ }
+
+ /// Unwrap this archive, returning the underlying object.
+ pub fn into_inner(self) -> R {
+ self.inner.obj.into_inner()
+ }
+
+ /// Construct an iterator over the entries in this archive.
+ ///
+ /// Note that care must be taken to consider each entry within an archive in
+ /// sequence. If entries are processed out of sequence (from what the
+ /// iterator returns), then the contents read for each entry may be
+ /// corrupted.
+ pub fn entries(&mut self) -> io::Result<Entries<R>> {
+ let me: &mut Archive<dyn Read> = self;
+ me._entries(None).map(|fields| Entries {
+ fields: fields,
+ _ignored: marker::PhantomData,
+ })
+ }
+
+ /// Unpacks the contents tarball into the specified `dst`.
+ ///
+ /// This function will iterate over the entire contents of this tarball,
+ /// extracting each file in turn to the location specified by the entry's
+ /// path name.
+ ///
+ /// This operation is relatively sensitive in that it will not write files
+ /// outside of the path specified by `dst`. Files in the archive which have
+ /// a '..' in their path are skipped during the unpacking process.
+ ///
+ /// # Examples
+ ///
+ /// ```no_run
+ /// use std::fs::File;
+ /// use tar::Archive;
+ ///
+ /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
+ /// ar.unpack("foo").unwrap();
+ /// ```
+ pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
+ let me: &mut Archive<dyn Read> = self;
+ me._unpack(dst.as_ref())
+ }
+
+ /// Indicate whether extended file attributes (xattrs on Unix) are preserved
+ /// when unpacking this archive.
+ ///
+ /// This flag is disabled by default and is currently only implemented on
+ /// Unix using xattr support. This may eventually be implemented for
+ /// Windows, however, if other archive implementations are found which do
+ /// this as well.
+ pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) {
+ self.inner.unpack_xattrs = unpack_xattrs;
+ }
+
+ /// Indicate whether extended permissions (like suid on Unix) are preserved
+ /// when unpacking this entry.
+ ///
+ /// This flag is disabled by default and is currently only implemented on
+ /// Unix.
+ pub fn set_preserve_permissions(&mut self, preserve: bool) {
+ self.inner.preserve_permissions = preserve;
+ }
+
+ /// Indicate whether files and symlinks should be overwritten on extraction.
+ pub fn set_overwrite(&mut self, overwrite: bool) {
+ self.inner.overwrite = overwrite;
+ }
+
+ /// Indicate whether access time information is preserved when unpacking
+ /// this entry.
+ ///
+ /// This flag is enabled by default.
+ pub fn set_preserve_mtime(&mut self, preserve: bool) {
+ self.inner.preserve_mtime = preserve;
+ }
+
+ /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more
+ /// entries.
+ ///
+ /// This can be used in case multiple tar archives have been concatenated together.
+ pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) {
+ self.inner.ignore_zeros = ignore_zeros;
+ }
+}
+
+impl<R: Seek + Read> Archive<R> {
+ /// Construct an iterator over the entries in this archive for a seekable
+ /// reader. Seek will be used to efficiently skip over file contents.
+ ///
+ /// Note that care must be taken to consider each entry within an archive in
+ /// sequence. If entries are processed out of sequence (from what the
+ /// iterator returns), then the contents read for each entry may be
+ /// corrupted.
+ pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> {
+ let me: &Archive<dyn Read> = self;
+ let me_seekable: &Archive<dyn SeekRead> = self;
+ me._entries(Some(me_seekable)).map(|fields| Entries {
+ fields: fields,
+ _ignored: marker::PhantomData,
+ })
+ }
+}
+
+impl Archive<dyn Read + '_> {
+ fn _entries<'a>(
+ &'a self,
+ seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>,
+ ) -> io::Result<EntriesFields<'a>> {
+ if self.inner.pos.get() != 0 {
+ return Err(other(
+ "cannot call entries unless archive is at \
+ position 0",
+ ));
+ }
+ Ok(EntriesFields {
+ archive: self,
+ seekable_archive,
+ done: false,
+ next: 0,
+ raw: false,
+ })
+ }
+
+ fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
+ if dst.symlink_metadata().is_err() {
+ fs::create_dir_all(&dst)
+ .map_err(|e| TarError::new(format!("failed to create `{}`", dst.display()), e))?;
+ }
+
+ // Canonicalizing the dst directory will prepend the path with '\\?\'
+ // on windows which will allow windows APIs to treat the path as an
+ // extended-length path with a 32,767 character limit. Otherwise all
+ // unpacked paths over 260 characters will fail on creation with a
+ // NotFound exception.
+ let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf());
+
+ // Delay any directory entries until the end (they will be created if needed by
+ // descendants), to ensure that directory permissions do not interfer with descendant
+ // extraction.
+ let mut directories = Vec::new();
+ for entry in self._entries(None)? {
+ let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?;
+ if file.header().entry_type() == crate::EntryType::Directory {
+ directories.push(file);
+ } else {
+ file.unpack_in(dst)?;
+ }
+ }
+ for mut dir in directories {
+ dir.unpack_in(dst)?;
+ }
+
+ Ok(())
+ }
+}
+
+impl<'a, R: Read> Entries<'a, R> {
+ /// Indicates whether this iterator will return raw entries or not.
+ ///
+ /// If the raw list of entries are returned, then no preprocessing happens
+ /// on account of this library, for example taking into account GNU long name
+ /// or long link archive members. Raw iteration is disabled by default.
+ pub fn raw(self, raw: bool) -> Entries<'a, R> {
+ Entries {
+ fields: EntriesFields {
+ raw: raw,
+ ..self.fields
+ },
+ _ignored: marker::PhantomData,
+ }
+ }
+}
+impl<'a, R: Read> Iterator for Entries<'a, R> {
+ type Item = io::Result<Entry<'a, R>>;
+
+ fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
+ self.fields
+ .next()
+ .map(|result| result.map(|e| EntryFields::from(e).into_entry()))
+ }
+}
+
+impl<'a> EntriesFields<'a> {
+ fn next_entry_raw(
+ &mut self,
+ pax_size: Option<u64>,
+ ) -> io::Result<Option<Entry<'a, io::Empty>>> {
+ let mut header = Header::new_old();
+ let mut header_pos = self.next;
+ loop {
+ // Seek to the start of the next header in the archive
+ let delta = self.next - self.archive.inner.pos.get();
+ self.skip(delta)?;
+
+ // EOF is an indicator that we are at the end of the archive.
+ if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? {
+ return Ok(None);
+ }
+
+ // If a header is not all zeros, we have another valid header.
+ // Otherwise, check if we are ignoring zeros and continue, or break as if this is the
+ // end of the archive.
+ if !header.as_bytes().iter().all(|i| *i == 0) {
+ self.next += 512;
+ break;
+ }
+
+ if !self.archive.inner.ignore_zeros {
+ return Ok(None);
+ }
+ self.next += 512;
+ header_pos = self.next;
+ }
+
+ // Make sure the checksum is ok
+ let sum = header.as_bytes()[..148]
+ .iter()
+ .chain(&header.as_bytes()[156..])
+ .fold(0, |a, b| a + (*b as u32))
+ + 8 * 32;
+ let cksum = header.cksum()?;
+ if sum != cksum {
+ return Err(other("archive header checksum mismatch"));
+ }
+
+ let file_pos = self.next;
+ let mut size = header.entry_size()?;
+ if size == 0 {
+ if let Some(pax_size) = pax_size {
+ size = pax_size;
+ }
+ }
+ let ret = EntryFields {
+ size: size,
+ header_pos: header_pos,
+ file_pos: file_pos,
+ data: vec![EntryIo::Data((&self.archive.inner).take(size))],
+ header: header,
+ long_pathname: None,
+ long_linkname: None,
+ pax_extensions: None,
+ unpack_xattrs: self.archive.inner.unpack_xattrs,
+ preserve_permissions: self.archive.inner.preserve_permissions,
+ preserve_mtime: self.archive.inner.preserve_mtime,
+ overwrite: self.archive.inner.overwrite,
+ };
+
+ // Store where the next entry is, rounding up by 512 bytes (the size of
+ // a header);
+ let size = size
+ .checked_add(511)
+ .ok_or_else(|| other("size overflow"))?;
+ self.next = self
+ .next
+ .checked_add(size & !(512 - 1))
+ .ok_or_else(|| other("size overflow"))?;
+
+ Ok(Some(ret.into_entry()))
+ }
+
+ fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> {
+ if self.raw {
+ return self.next_entry_raw(None);
+ }
+
+ let mut gnu_longname = None;
+ let mut gnu_longlink = None;
+ let mut pax_extensions = None;
+ let mut pax_size = None;
+ let mut processed = 0;
+ loop {
+ processed += 1;
+ let entry = match self.next_entry_raw(pax_size)? {
+ Some(entry) => entry,
+ None if processed > 1 => {
+ return Err(other(
+ "members found describing a future member \
+ but no future member found",
+ ));
+ }
+ None => return Ok(None),
+ };
+
+ let is_recognized_header =
+ entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some();
+
+ if is_recognized_header && entry.header().entry_type().is_gnu_longname() {
+ if gnu_longname.is_some() {
+ return Err(other(
+ "two long name entries describing \
+ the same member",
+ ));
+ }
+ gnu_longname = Some(EntryFields::from(entry).read_all()?);
+ continue;
+ }
+
+ if is_recognized_header && entry.header().entry_type().is_gnu_longlink() {
+ if gnu_longlink.is_some() {
+ return Err(other(
+ "two long name entries describing \
+ the same member",
+ ));
+ }
+ gnu_longlink = Some(EntryFields::from(entry).read_all()?);
+ continue;
+ }
+
+ if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() {
+ if pax_extensions.is_some() {
+ return Err(other(
+ "two pax extensions entries describing \
+ the same member",
+ ));
+ }
+ pax_extensions = Some(EntryFields::from(entry).read_all()?);
+ if let Some(pax_extensions_ref) = &pax_extensions {
+ pax_size = pax_extensions_size(pax_extensions_ref);
+ }
+ continue;
+ }
+
+ let mut fields = EntryFields::from(entry);
+ fields.long_pathname = gnu_longname;
+ fields.long_linkname = gnu_longlink;
+ fields.pax_extensions = pax_extensions;
+ self.parse_sparse_header(&mut fields)?;
+ return Ok(Some(fields.into_entry()));
+ }
+ }
+
+ fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
+ if !entry.header.entry_type().is_gnu_sparse() {
+ return Ok(());
+ }
+ let gnu = match entry.header.as_gnu() {
+ Some(gnu) => gnu,
+ None => return Err(other("sparse entry type listed but not GNU header")),
+ };
+
+ // Sparse files are represented internally as a list of blocks that are
+ // read. Blocks are either a bunch of 0's or they're data from the
+ // underlying archive.
+ //
+ // Blocks of a sparse file are described by the `GnuSparseHeader`
+ // structure, some of which are contained in `GnuHeader` but some of
+ // which may also be contained after the first header in further
+ // headers.
+ //
+ // We read off all the blocks here and use the `add_block` function to
+ // incrementally add them to the list of I/O block (in `entry.data`).
+ // The `add_block` function also validates that each chunk comes after
+ // the previous, we don't overrun the end of the file, and each block is
+ // aligned to a 512-byte boundary in the archive itself.
+ //
+ // At the end we verify that the sparse file size (`Header::size`) is
+ // the same as the current offset (described by the list of blocks) as
+ // well as the amount of data read equals the size of the entry
+ // (`Header::entry_size`).
+ entry.data.truncate(0);
+
+ let mut cur = 0;
+ let mut remaining = entry.size;
+ {
+ let data = &mut entry.data;
+ let reader = &self.archive.inner;
+ let size = entry.size;
+ let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
+ if block.is_empty() {
+ return Ok(());
+ }
+ let off = block.offset()?;
+ let len = block.length()?;
+ if len != 0 && (size - remaining) % 512 != 0 {
+ return Err(other(
+ "previous block in sparse file was not \
+ aligned to 512-byte boundary",
+ ));
+ } else if off < cur {
+ return Err(other(
+ "out of order or overlapping sparse \
+ blocks",
+ ));
+ } else if cur < off {
+ let block = io::repeat(0).take(off - cur);
+ data.push(EntryIo::Pad(block));
+ }
+ cur = off
+ .checked_add(len)
+ .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?;
+ remaining = remaining.checked_sub(len).ok_or_else(|| {
+ other(
+ "sparse file consumed more data than the header \
+ listed",
+ )
+ })?;
+ data.push(EntryIo::Data(reader.take(len)));
+ Ok(())
+ };
+ for block in gnu.sparse.iter() {
+ add_block(block)?
+ }
+ if gnu.is_extended() {
+ let mut ext = GnuExtSparseHeader::new();
+ ext.isextended[0] = 1;
+ while ext.is_extended() {
+ if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
+ return Err(other("failed to read extension"));
+ }
+
+ self.next += 512;
+ for block in ext.sparse.iter() {
+ add_block(block)?;
+ }
+ }
+ }
+ }
+ if cur != gnu.real_size()? {
+ return Err(other(
+ "mismatch in sparse file chunks and \
+ size in header",
+ ));
+ }
+ entry.size = cur;
+ if remaining > 0 {
+ return Err(other(
+ "mismatch in sparse file chunks and \
+ entry size in header",
+ ));
+ }
+ Ok(())
+ }
+
+ fn skip(&mut self, mut amt: u64) -> io::Result<()> {
+ if let Some(seekable_archive) = self.seekable_archive {
+ let pos = io::SeekFrom::Current(
+ i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?,
+ );
+ (&seekable_archive.inner).seek(pos)?;
+ } else {
+ let mut buf = [0u8; 4096 * 8];
+ while amt > 0 {
+ let n = cmp::min(amt, buf.len() as u64);
+ let n = (&self.archive.inner).read(&mut buf[..n as usize])?;
+ if n == 0 {
+ return Err(other("unexpected EOF during skip"));
+ }
+ amt -= n as u64;
+ }
+ }
+ Ok(())
+ }
+}
+
+impl<'a> Iterator for EntriesFields<'a> {
+ type Item = io::Result<Entry<'a, io::Empty>>;
+
+ fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> {
+ if self.done {
+ None
+ } else {
+ match self.next_entry() {
+ Ok(Some(e)) => Some(Ok(e)),
+ Ok(None) => {
+ self.done = true;
+ None
+ }
+ Err(e) => {
+ self.done = true;
+ Some(Err(e))
+ }
+ }
+ }
+ }
+}
+
+impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> {
+ fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
+ let i = self.obj.borrow_mut().read(into)?;
+ self.pos.set(self.pos.get() + i as u64);
+ Ok(i)
+ }
+}
+
+impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> {
+ fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
+ let pos = self.obj.borrow_mut().seek(pos)?;
+ self.pos.set(pos);
+ Ok(pos)
+ }
+}
+
+/// Try to fill the buffer from the reader.
+///
+/// If the reader reaches its end before filling the buffer at all, returns `false`.
+/// Otherwise returns `true`.
+fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
+ let mut read = 0;
+ while read < buf.len() {
+ match r.read(&mut buf[read..])? {
+ 0 => {
+ if read == 0 {
+ return Ok(false);
+ }
+
+ return Err(other("failed to read entire block"));
+ }
+ n => read += n,
+ }
+ }
+ Ok(true)
+}