diff options
Diffstat (limited to 'vendor/tar-0.4.38/src/archive.rs')
-rw-r--r-- | vendor/tar-0.4.38/src/archive.rs | 577 |
1 files changed, 577 insertions, 0 deletions
diff --git a/vendor/tar-0.4.38/src/archive.rs b/vendor/tar-0.4.38/src/archive.rs new file mode 100644 index 000000000..1bed51249 --- /dev/null +++ b/vendor/tar-0.4.38/src/archive.rs @@ -0,0 +1,577 @@ +use std::cell::{Cell, RefCell}; +use std::cmp; +use std::convert::TryFrom; +use std::fs; +use std::io::prelude::*; +use std::io::{self, SeekFrom}; +use std::marker; +use std::path::Path; + +use crate::entry::{EntryFields, EntryIo}; +use crate::error::TarError; +use crate::other; +use crate::pax::pax_extensions_size; +use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header}; + +/// A top-level representation of an archive file. +/// +/// This archive can have an entry added to it and it can be iterated over. +pub struct Archive<R: ?Sized + Read> { + inner: ArchiveInner<R>, +} + +pub struct ArchiveInner<R: ?Sized> { + pos: Cell<u64>, + unpack_xattrs: bool, + preserve_permissions: bool, + preserve_mtime: bool, + overwrite: bool, + ignore_zeros: bool, + obj: RefCell<R>, +} + +/// An iterator over the entries of an archive. +pub struct Entries<'a, R: 'a + Read> { + fields: EntriesFields<'a>, + _ignored: marker::PhantomData<&'a Archive<R>>, +} + +trait SeekRead: Read + Seek {} +impl<R: Read + Seek> SeekRead for R {} + +struct EntriesFields<'a> { + archive: &'a Archive<dyn Read + 'a>, + seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>, + next: u64, + done: bool, + raw: bool, +} + +impl<R: Read> Archive<R> { + /// Create a new archive with the underlying object as the reader. + pub fn new(obj: R) -> Archive<R> { + Archive { + inner: ArchiveInner { + unpack_xattrs: false, + preserve_permissions: false, + preserve_mtime: true, + overwrite: true, + ignore_zeros: false, + obj: RefCell::new(obj), + pos: Cell::new(0), + }, + } + } + + /// Unwrap this archive, returning the underlying object. + pub fn into_inner(self) -> R { + self.inner.obj.into_inner() + } + + /// Construct an iterator over the entries in this archive. + /// + /// Note that care must be taken to consider each entry within an archive in + /// sequence. If entries are processed out of sequence (from what the + /// iterator returns), then the contents read for each entry may be + /// corrupted. + pub fn entries(&mut self) -> io::Result<Entries<R>> { + let me: &mut Archive<dyn Read> = self; + me._entries(None).map(|fields| Entries { + fields: fields, + _ignored: marker::PhantomData, + }) + } + + /// Unpacks the contents tarball into the specified `dst`. + /// + /// This function will iterate over the entire contents of this tarball, + /// extracting each file in turn to the location specified by the entry's + /// path name. + /// + /// This operation is relatively sensitive in that it will not write files + /// outside of the path specified by `dst`. Files in the archive which have + /// a '..' in their path are skipped during the unpacking process. + /// + /// # Examples + /// + /// ```no_run + /// use std::fs::File; + /// use tar::Archive; + /// + /// let mut ar = Archive::new(File::open("foo.tar").unwrap()); + /// ar.unpack("foo").unwrap(); + /// ``` + pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> { + let me: &mut Archive<dyn Read> = self; + me._unpack(dst.as_ref()) + } + + /// Indicate whether extended file attributes (xattrs on Unix) are preserved + /// when unpacking this archive. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix using xattr support. This may eventually be implemented for + /// Windows, however, if other archive implementations are found which do + /// this as well. + pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { + self.inner.unpack_xattrs = unpack_xattrs; + } + + /// Indicate whether extended permissions (like suid on Unix) are preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix. + pub fn set_preserve_permissions(&mut self, preserve: bool) { + self.inner.preserve_permissions = preserve; + } + + /// Indicate whether files and symlinks should be overwritten on extraction. + pub fn set_overwrite(&mut self, overwrite: bool) { + self.inner.overwrite = overwrite; + } + + /// Indicate whether access time information is preserved when unpacking + /// this entry. + /// + /// This flag is enabled by default. + pub fn set_preserve_mtime(&mut self, preserve: bool) { + self.inner.preserve_mtime = preserve; + } + + /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more + /// entries. + /// + /// This can be used in case multiple tar archives have been concatenated together. + pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) { + self.inner.ignore_zeros = ignore_zeros; + } +} + +impl<R: Seek + Read> Archive<R> { + /// Construct an iterator over the entries in this archive for a seekable + /// reader. Seek will be used to efficiently skip over file contents. + /// + /// Note that care must be taken to consider each entry within an archive in + /// sequence. If entries are processed out of sequence (from what the + /// iterator returns), then the contents read for each entry may be + /// corrupted. + pub fn entries_with_seek(&mut self) -> io::Result<Entries<R>> { + let me: &Archive<dyn Read> = self; + let me_seekable: &Archive<dyn SeekRead> = self; + me._entries(Some(me_seekable)).map(|fields| Entries { + fields: fields, + _ignored: marker::PhantomData, + }) + } +} + +impl Archive<dyn Read + '_> { + fn _entries<'a>( + &'a self, + seekable_archive: Option<&'a Archive<dyn SeekRead + 'a>>, + ) -> io::Result<EntriesFields<'a>> { + if self.inner.pos.get() != 0 { + return Err(other( + "cannot call entries unless archive is at \ + position 0", + )); + } + Ok(EntriesFields { + archive: self, + seekable_archive, + done: false, + next: 0, + raw: false, + }) + } + + fn _unpack(&mut self, dst: &Path) -> io::Result<()> { + if dst.symlink_metadata().is_err() { + fs::create_dir_all(&dst) + .map_err(|e| TarError::new(format!("failed to create `{}`", dst.display()), e))?; + } + + // Canonicalizing the dst directory will prepend the path with '\\?\' + // on windows which will allow windows APIs to treat the path as an + // extended-length path with a 32,767 character limit. Otherwise all + // unpacked paths over 260 characters will fail on creation with a + // NotFound exception. + let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf()); + + // Delay any directory entries until the end (they will be created if needed by + // descendants), to ensure that directory permissions do not interfer with descendant + // extraction. + let mut directories = Vec::new(); + for entry in self._entries(None)? { + let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?; + if file.header().entry_type() == crate::EntryType::Directory { + directories.push(file); + } else { + file.unpack_in(dst)?; + } + } + for mut dir in directories { + dir.unpack_in(dst)?; + } + + Ok(()) + } +} + +impl<'a, R: Read> Entries<'a, R> { + /// Indicates whether this iterator will return raw entries or not. + /// + /// If the raw list of entries are returned, then no preprocessing happens + /// on account of this library, for example taking into account GNU long name + /// or long link archive members. Raw iteration is disabled by default. + pub fn raw(self, raw: bool) -> Entries<'a, R> { + Entries { + fields: EntriesFields { + raw: raw, + ..self.fields + }, + _ignored: marker::PhantomData, + } + } +} +impl<'a, R: Read> Iterator for Entries<'a, R> { + type Item = io::Result<Entry<'a, R>>; + + fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> { + self.fields + .next() + .map(|result| result.map(|e| EntryFields::from(e).into_entry())) + } +} + +impl<'a> EntriesFields<'a> { + fn next_entry_raw( + &mut self, + pax_size: Option<u64>, + ) -> io::Result<Option<Entry<'a, io::Empty>>> { + let mut header = Header::new_old(); + let mut header_pos = self.next; + loop { + // Seek to the start of the next header in the archive + let delta = self.next - self.archive.inner.pos.get(); + self.skip(delta)?; + + // EOF is an indicator that we are at the end of the archive. + if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? { + return Ok(None); + } + + // If a header is not all zeros, we have another valid header. + // Otherwise, check if we are ignoring zeros and continue, or break as if this is the + // end of the archive. + if !header.as_bytes().iter().all(|i| *i == 0) { + self.next += 512; + break; + } + + if !self.archive.inner.ignore_zeros { + return Ok(None); + } + self.next += 512; + header_pos = self.next; + } + + // Make sure the checksum is ok + let sum = header.as_bytes()[..148] + .iter() + .chain(&header.as_bytes()[156..]) + .fold(0, |a, b| a + (*b as u32)) + + 8 * 32; + let cksum = header.cksum()?; + if sum != cksum { + return Err(other("archive header checksum mismatch")); + } + + let file_pos = self.next; + let mut size = header.entry_size()?; + if size == 0 { + if let Some(pax_size) = pax_size { + size = pax_size; + } + } + let ret = EntryFields { + size: size, + header_pos: header_pos, + file_pos: file_pos, + data: vec![EntryIo::Data((&self.archive.inner).take(size))], + header: header, + long_pathname: None, + long_linkname: None, + pax_extensions: None, + unpack_xattrs: self.archive.inner.unpack_xattrs, + preserve_permissions: self.archive.inner.preserve_permissions, + preserve_mtime: self.archive.inner.preserve_mtime, + overwrite: self.archive.inner.overwrite, + }; + + // Store where the next entry is, rounding up by 512 bytes (the size of + // a header); + let size = size + .checked_add(511) + .ok_or_else(|| other("size overflow"))?; + self.next = self + .next + .checked_add(size & !(512 - 1)) + .ok_or_else(|| other("size overflow"))?; + + Ok(Some(ret.into_entry())) + } + + fn next_entry(&mut self) -> io::Result<Option<Entry<'a, io::Empty>>> { + if self.raw { + return self.next_entry_raw(None); + } + + let mut gnu_longname = None; + let mut gnu_longlink = None; + let mut pax_extensions = None; + let mut pax_size = None; + let mut processed = 0; + loop { + processed += 1; + let entry = match self.next_entry_raw(pax_size)? { + Some(entry) => entry, + None if processed > 1 => { + return Err(other( + "members found describing a future member \ + but no future member found", + )); + } + None => return Ok(None), + }; + + let is_recognized_header = + entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some(); + + if is_recognized_header && entry.header().entry_type().is_gnu_longname() { + if gnu_longname.is_some() { + return Err(other( + "two long name entries describing \ + the same member", + )); + } + gnu_longname = Some(EntryFields::from(entry).read_all()?); + continue; + } + + if is_recognized_header && entry.header().entry_type().is_gnu_longlink() { + if gnu_longlink.is_some() { + return Err(other( + "two long name entries describing \ + the same member", + )); + } + gnu_longlink = Some(EntryFields::from(entry).read_all()?); + continue; + } + + if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() { + if pax_extensions.is_some() { + return Err(other( + "two pax extensions entries describing \ + the same member", + )); + } + pax_extensions = Some(EntryFields::from(entry).read_all()?); + if let Some(pax_extensions_ref) = &pax_extensions { + pax_size = pax_extensions_size(pax_extensions_ref); + } + continue; + } + + let mut fields = EntryFields::from(entry); + fields.long_pathname = gnu_longname; + fields.long_linkname = gnu_longlink; + fields.pax_extensions = pax_extensions; + self.parse_sparse_header(&mut fields)?; + return Ok(Some(fields.into_entry())); + } + } + + fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> { + if !entry.header.entry_type().is_gnu_sparse() { + return Ok(()); + } + let gnu = match entry.header.as_gnu() { + Some(gnu) => gnu, + None => return Err(other("sparse entry type listed but not GNU header")), + }; + + // Sparse files are represented internally as a list of blocks that are + // read. Blocks are either a bunch of 0's or they're data from the + // underlying archive. + // + // Blocks of a sparse file are described by the `GnuSparseHeader` + // structure, some of which are contained in `GnuHeader` but some of + // which may also be contained after the first header in further + // headers. + // + // We read off all the blocks here and use the `add_block` function to + // incrementally add them to the list of I/O block (in `entry.data`). + // The `add_block` function also validates that each chunk comes after + // the previous, we don't overrun the end of the file, and each block is + // aligned to a 512-byte boundary in the archive itself. + // + // At the end we verify that the sparse file size (`Header::size`) is + // the same as the current offset (described by the list of blocks) as + // well as the amount of data read equals the size of the entry + // (`Header::entry_size`). + entry.data.truncate(0); + + let mut cur = 0; + let mut remaining = entry.size; + { + let data = &mut entry.data; + let reader = &self.archive.inner; + let size = entry.size; + let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> { + if block.is_empty() { + return Ok(()); + } + let off = block.offset()?; + let len = block.length()?; + if len != 0 && (size - remaining) % 512 != 0 { + return Err(other( + "previous block in sparse file was not \ + aligned to 512-byte boundary", + )); + } else if off < cur { + return Err(other( + "out of order or overlapping sparse \ + blocks", + )); + } else if cur < off { + let block = io::repeat(0).take(off - cur); + data.push(EntryIo::Pad(block)); + } + cur = off + .checked_add(len) + .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?; + remaining = remaining.checked_sub(len).ok_or_else(|| { + other( + "sparse file consumed more data than the header \ + listed", + ) + })?; + data.push(EntryIo::Data(reader.take(len))); + Ok(()) + }; + for block in gnu.sparse.iter() { + add_block(block)? + } + if gnu.is_extended() { + let mut ext = GnuExtSparseHeader::new(); + ext.isextended[0] = 1; + while ext.is_extended() { + if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? { + return Err(other("failed to read extension")); + } + + self.next += 512; + for block in ext.sparse.iter() { + add_block(block)?; + } + } + } + } + if cur != gnu.real_size()? { + return Err(other( + "mismatch in sparse file chunks and \ + size in header", + )); + } + entry.size = cur; + if remaining > 0 { + return Err(other( + "mismatch in sparse file chunks and \ + entry size in header", + )); + } + Ok(()) + } + + fn skip(&mut self, mut amt: u64) -> io::Result<()> { + if let Some(seekable_archive) = self.seekable_archive { + let pos = io::SeekFrom::Current( + i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?, + ); + (&seekable_archive.inner).seek(pos)?; + } else { + let mut buf = [0u8; 4096 * 8]; + while amt > 0 { + let n = cmp::min(amt, buf.len() as u64); + let n = (&self.archive.inner).read(&mut buf[..n as usize])?; + if n == 0 { + return Err(other("unexpected EOF during skip")); + } + amt -= n as u64; + } + } + Ok(()) + } +} + +impl<'a> Iterator for EntriesFields<'a> { + type Item = io::Result<Entry<'a, io::Empty>>; + + fn next(&mut self) -> Option<io::Result<Entry<'a, io::Empty>>> { + if self.done { + None + } else { + match self.next_entry() { + Ok(Some(e)) => Some(Ok(e)), + Ok(None) => { + self.done = true; + None + } + Err(e) => { + self.done = true; + Some(Err(e)) + } + } + } + } +} + +impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> { + fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { + let i = self.obj.borrow_mut().read(into)?; + self.pos.set(self.pos.get() + i as u64); + Ok(i) + } +} + +impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner<R> { + fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> { + let pos = self.obj.borrow_mut().seek(pos)?; + self.pos.set(pos); + Ok(pos) + } +} + +/// Try to fill the buffer from the reader. +/// +/// If the reader reaches its end before filling the buffer at all, returns `false`. +/// Otherwise returns `true`. +fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> { + let mut read = 0; + while read < buf.len() { + match r.read(&mut buf[read..])? { + 0 => { + if read == 0 { + return Ok(false); + } + + return Err(other("failed to read entire block")); + } + n => read += n, + } + } + Ok(true) +} |