From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/tar/src/archive.rs | 577 +++++++++++++++ vendor/tar/src/builder.rs | 663 +++++++++++++++++ vendor/tar/src/entry.rs | 832 +++++++++++++++++++++ vendor/tar/src/entry_type.rs | 193 +++++ vendor/tar/src/error.rs | 41 ++ vendor/tar/src/header.rs | 1642 ++++++++++++++++++++++++++++++++++++++++++ vendor/tar/src/lib.rs | 44 ++ vendor/tar/src/pax.rs | 117 +++ 8 files changed, 4109 insertions(+) create mode 100644 vendor/tar/src/archive.rs create mode 100644 vendor/tar/src/builder.rs create mode 100644 vendor/tar/src/entry.rs create mode 100644 vendor/tar/src/entry_type.rs create mode 100644 vendor/tar/src/error.rs create mode 100644 vendor/tar/src/header.rs create mode 100644 vendor/tar/src/lib.rs create mode 100644 vendor/tar/src/pax.rs (limited to 'vendor/tar/src') diff --git a/vendor/tar/src/archive.rs b/vendor/tar/src/archive.rs new file mode 100644 index 000000000..1bed51249 --- /dev/null +++ b/vendor/tar/src/archive.rs @@ -0,0 +1,577 @@ +use std::cell::{Cell, RefCell}; +use std::cmp; +use std::convert::TryFrom; +use std::fs; +use std::io::prelude::*; +use std::io::{self, SeekFrom}; +use std::marker; +use std::path::Path; + +use crate::entry::{EntryFields, EntryIo}; +use crate::error::TarError; +use crate::other; +use crate::pax::pax_extensions_size; +use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header}; + +/// A top-level representation of an archive file. +/// +/// This archive can have an entry added to it and it can be iterated over. +pub struct Archive { + inner: ArchiveInner, +} + +pub struct ArchiveInner { + pos: Cell, + unpack_xattrs: bool, + preserve_permissions: bool, + preserve_mtime: bool, + overwrite: bool, + ignore_zeros: bool, + obj: RefCell, +} + +/// An iterator over the entries of an archive. +pub struct Entries<'a, R: 'a + Read> { + fields: EntriesFields<'a>, + _ignored: marker::PhantomData<&'a Archive>, +} + +trait SeekRead: Read + Seek {} +impl SeekRead for R {} + +struct EntriesFields<'a> { + archive: &'a Archive, + seekable_archive: Option<&'a Archive>, + next: u64, + done: bool, + raw: bool, +} + +impl Archive { + /// Create a new archive with the underlying object as the reader. + pub fn new(obj: R) -> Archive { + Archive { + inner: ArchiveInner { + unpack_xattrs: false, + preserve_permissions: false, + preserve_mtime: true, + overwrite: true, + ignore_zeros: false, + obj: RefCell::new(obj), + pos: Cell::new(0), + }, + } + } + + /// Unwrap this archive, returning the underlying object. + pub fn into_inner(self) -> R { + self.inner.obj.into_inner() + } + + /// Construct an iterator over the entries in this archive. + /// + /// Note that care must be taken to consider each entry within an archive in + /// sequence. If entries are processed out of sequence (from what the + /// iterator returns), then the contents read for each entry may be + /// corrupted. + pub fn entries(&mut self) -> io::Result> { + let me: &mut Archive = self; + me._entries(None).map(|fields| Entries { + fields: fields, + _ignored: marker::PhantomData, + }) + } + + /// Unpacks the contents tarball into the specified `dst`. + /// + /// This function will iterate over the entire contents of this tarball, + /// extracting each file in turn to the location specified by the entry's + /// path name. + /// + /// This operation is relatively sensitive in that it will not write files + /// outside of the path specified by `dst`. Files in the archive which have + /// a '..' in their path are skipped during the unpacking process. + /// + /// # Examples + /// + /// ```no_run + /// use std::fs::File; + /// use tar::Archive; + /// + /// let mut ar = Archive::new(File::open("foo.tar").unwrap()); + /// ar.unpack("foo").unwrap(); + /// ``` + pub fn unpack>(&mut self, dst: P) -> io::Result<()> { + let me: &mut Archive = self; + me._unpack(dst.as_ref()) + } + + /// Indicate whether extended file attributes (xattrs on Unix) are preserved + /// when unpacking this archive. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix using xattr support. This may eventually be implemented for + /// Windows, however, if other archive implementations are found which do + /// this as well. + pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { + self.inner.unpack_xattrs = unpack_xattrs; + } + + /// Indicate whether extended permissions (like suid on Unix) are preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix. + pub fn set_preserve_permissions(&mut self, preserve: bool) { + self.inner.preserve_permissions = preserve; + } + + /// Indicate whether files and symlinks should be overwritten on extraction. + pub fn set_overwrite(&mut self, overwrite: bool) { + self.inner.overwrite = overwrite; + } + + /// Indicate whether access time information is preserved when unpacking + /// this entry. + /// + /// This flag is enabled by default. + pub fn set_preserve_mtime(&mut self, preserve: bool) { + self.inner.preserve_mtime = preserve; + } + + /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more + /// entries. + /// + /// This can be used in case multiple tar archives have been concatenated together. + pub fn set_ignore_zeros(&mut self, ignore_zeros: bool) { + self.inner.ignore_zeros = ignore_zeros; + } +} + +impl Archive { + /// Construct an iterator over the entries in this archive for a seekable + /// reader. Seek will be used to efficiently skip over file contents. + /// + /// Note that care must be taken to consider each entry within an archive in + /// sequence. If entries are processed out of sequence (from what the + /// iterator returns), then the contents read for each entry may be + /// corrupted. + pub fn entries_with_seek(&mut self) -> io::Result> { + let me: &Archive = self; + let me_seekable: &Archive = self; + me._entries(Some(me_seekable)).map(|fields| Entries { + fields: fields, + _ignored: marker::PhantomData, + }) + } +} + +impl Archive { + fn _entries<'a>( + &'a self, + seekable_archive: Option<&'a Archive>, + ) -> io::Result> { + if self.inner.pos.get() != 0 { + return Err(other( + "cannot call entries unless archive is at \ + position 0", + )); + } + Ok(EntriesFields { + archive: self, + seekable_archive, + done: false, + next: 0, + raw: false, + }) + } + + fn _unpack(&mut self, dst: &Path) -> io::Result<()> { + if dst.symlink_metadata().is_err() { + fs::create_dir_all(&dst) + .map_err(|e| TarError::new(format!("failed to create `{}`", dst.display()), e))?; + } + + // Canonicalizing the dst directory will prepend the path with '\\?\' + // on windows which will allow windows APIs to treat the path as an + // extended-length path with a 32,767 character limit. Otherwise all + // unpacked paths over 260 characters will fail on creation with a + // NotFound exception. + let dst = &dst.canonicalize().unwrap_or(dst.to_path_buf()); + + // Delay any directory entries until the end (they will be created if needed by + // descendants), to ensure that directory permissions do not interfer with descendant + // extraction. + let mut directories = Vec::new(); + for entry in self._entries(None)? { + let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?; + if file.header().entry_type() == crate::EntryType::Directory { + directories.push(file); + } else { + file.unpack_in(dst)?; + } + } + for mut dir in directories { + dir.unpack_in(dst)?; + } + + Ok(()) + } +} + +impl<'a, R: Read> Entries<'a, R> { + /// Indicates whether this iterator will return raw entries or not. + /// + /// If the raw list of entries are returned, then no preprocessing happens + /// on account of this library, for example taking into account GNU long name + /// or long link archive members. Raw iteration is disabled by default. + pub fn raw(self, raw: bool) -> Entries<'a, R> { + Entries { + fields: EntriesFields { + raw: raw, + ..self.fields + }, + _ignored: marker::PhantomData, + } + } +} +impl<'a, R: Read> Iterator for Entries<'a, R> { + type Item = io::Result>; + + fn next(&mut self) -> Option>> { + self.fields + .next() + .map(|result| result.map(|e| EntryFields::from(e).into_entry())) + } +} + +impl<'a> EntriesFields<'a> { + fn next_entry_raw( + &mut self, + pax_size: Option, + ) -> io::Result>> { + let mut header = Header::new_old(); + let mut header_pos = self.next; + loop { + // Seek to the start of the next header in the archive + let delta = self.next - self.archive.inner.pos.get(); + self.skip(delta)?; + + // EOF is an indicator that we are at the end of the archive. + if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? { + return Ok(None); + } + + // If a header is not all zeros, we have another valid header. + // Otherwise, check if we are ignoring zeros and continue, or break as if this is the + // end of the archive. + if !header.as_bytes().iter().all(|i| *i == 0) { + self.next += 512; + break; + } + + if !self.archive.inner.ignore_zeros { + return Ok(None); + } + self.next += 512; + header_pos = self.next; + } + + // Make sure the checksum is ok + let sum = header.as_bytes()[..148] + .iter() + .chain(&header.as_bytes()[156..]) + .fold(0, |a, b| a + (*b as u32)) + + 8 * 32; + let cksum = header.cksum()?; + if sum != cksum { + return Err(other("archive header checksum mismatch")); + } + + let file_pos = self.next; + let mut size = header.entry_size()?; + if size == 0 { + if let Some(pax_size) = pax_size { + size = pax_size; + } + } + let ret = EntryFields { + size: size, + header_pos: header_pos, + file_pos: file_pos, + data: vec![EntryIo::Data((&self.archive.inner).take(size))], + header: header, + long_pathname: None, + long_linkname: None, + pax_extensions: None, + unpack_xattrs: self.archive.inner.unpack_xattrs, + preserve_permissions: self.archive.inner.preserve_permissions, + preserve_mtime: self.archive.inner.preserve_mtime, + overwrite: self.archive.inner.overwrite, + }; + + // Store where the next entry is, rounding up by 512 bytes (the size of + // a header); + let size = size + .checked_add(511) + .ok_or_else(|| other("size overflow"))?; + self.next = self + .next + .checked_add(size & !(512 - 1)) + .ok_or_else(|| other("size overflow"))?; + + Ok(Some(ret.into_entry())) + } + + fn next_entry(&mut self) -> io::Result>> { + if self.raw { + return self.next_entry_raw(None); + } + + let mut gnu_longname = None; + let mut gnu_longlink = None; + let mut pax_extensions = None; + let mut pax_size = None; + let mut processed = 0; + loop { + processed += 1; + let entry = match self.next_entry_raw(pax_size)? { + Some(entry) => entry, + None if processed > 1 => { + return Err(other( + "members found describing a future member \ + but no future member found", + )); + } + None => return Ok(None), + }; + + let is_recognized_header = + entry.header().as_gnu().is_some() || entry.header().as_ustar().is_some(); + + if is_recognized_header && entry.header().entry_type().is_gnu_longname() { + if gnu_longname.is_some() { + return Err(other( + "two long name entries describing \ + the same member", + )); + } + gnu_longname = Some(EntryFields::from(entry).read_all()?); + continue; + } + + if is_recognized_header && entry.header().entry_type().is_gnu_longlink() { + if gnu_longlink.is_some() { + return Err(other( + "two long name entries describing \ + the same member", + )); + } + gnu_longlink = Some(EntryFields::from(entry).read_all()?); + continue; + } + + if is_recognized_header && entry.header().entry_type().is_pax_local_extensions() { + if pax_extensions.is_some() { + return Err(other( + "two pax extensions entries describing \ + the same member", + )); + } + pax_extensions = Some(EntryFields::from(entry).read_all()?); + if let Some(pax_extensions_ref) = &pax_extensions { + pax_size = pax_extensions_size(pax_extensions_ref); + } + continue; + } + + let mut fields = EntryFields::from(entry); + fields.long_pathname = gnu_longname; + fields.long_linkname = gnu_longlink; + fields.pax_extensions = pax_extensions; + self.parse_sparse_header(&mut fields)?; + return Ok(Some(fields.into_entry())); + } + } + + fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> { + if !entry.header.entry_type().is_gnu_sparse() { + return Ok(()); + } + let gnu = match entry.header.as_gnu() { + Some(gnu) => gnu, + None => return Err(other("sparse entry type listed but not GNU header")), + }; + + // Sparse files are represented internally as a list of blocks that are + // read. Blocks are either a bunch of 0's or they're data from the + // underlying archive. + // + // Blocks of a sparse file are described by the `GnuSparseHeader` + // structure, some of which are contained in `GnuHeader` but some of + // which may also be contained after the first header in further + // headers. + // + // We read off all the blocks here and use the `add_block` function to + // incrementally add them to the list of I/O block (in `entry.data`). + // The `add_block` function also validates that each chunk comes after + // the previous, we don't overrun the end of the file, and each block is + // aligned to a 512-byte boundary in the archive itself. + // + // At the end we verify that the sparse file size (`Header::size`) is + // the same as the current offset (described by the list of blocks) as + // well as the amount of data read equals the size of the entry + // (`Header::entry_size`). + entry.data.truncate(0); + + let mut cur = 0; + let mut remaining = entry.size; + { + let data = &mut entry.data; + let reader = &self.archive.inner; + let size = entry.size; + let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> { + if block.is_empty() { + return Ok(()); + } + let off = block.offset()?; + let len = block.length()?; + if len != 0 && (size - remaining) % 512 != 0 { + return Err(other( + "previous block in sparse file was not \ + aligned to 512-byte boundary", + )); + } else if off < cur { + return Err(other( + "out of order or overlapping sparse \ + blocks", + )); + } else if cur < off { + let block = io::repeat(0).take(off - cur); + data.push(EntryIo::Pad(block)); + } + cur = off + .checked_add(len) + .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?; + remaining = remaining.checked_sub(len).ok_or_else(|| { + other( + "sparse file consumed more data than the header \ + listed", + ) + })?; + data.push(EntryIo::Data(reader.take(len))); + Ok(()) + }; + for block in gnu.sparse.iter() { + add_block(block)? + } + if gnu.is_extended() { + let mut ext = GnuExtSparseHeader::new(); + ext.isextended[0] = 1; + while ext.is_extended() { + if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? { + return Err(other("failed to read extension")); + } + + self.next += 512; + for block in ext.sparse.iter() { + add_block(block)?; + } + } + } + } + if cur != gnu.real_size()? { + return Err(other( + "mismatch in sparse file chunks and \ + size in header", + )); + } + entry.size = cur; + if remaining > 0 { + return Err(other( + "mismatch in sparse file chunks and \ + entry size in header", + )); + } + Ok(()) + } + + fn skip(&mut self, mut amt: u64) -> io::Result<()> { + if let Some(seekable_archive) = self.seekable_archive { + let pos = io::SeekFrom::Current( + i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?, + ); + (&seekable_archive.inner).seek(pos)?; + } else { + let mut buf = [0u8; 4096 * 8]; + while amt > 0 { + let n = cmp::min(amt, buf.len() as u64); + let n = (&self.archive.inner).read(&mut buf[..n as usize])?; + if n == 0 { + return Err(other("unexpected EOF during skip")); + } + amt -= n as u64; + } + } + Ok(()) + } +} + +impl<'a> Iterator for EntriesFields<'a> { + type Item = io::Result>; + + fn next(&mut self) -> Option>> { + if self.done { + None + } else { + match self.next_entry() { + Ok(Some(e)) => Some(Ok(e)), + Ok(None) => { + self.done = true; + None + } + Err(e) => { + self.done = true; + Some(Err(e)) + } + } + } + } +} + +impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner { + fn read(&mut self, into: &mut [u8]) -> io::Result { + let i = self.obj.borrow_mut().read(into)?; + self.pos.set(self.pos.get() + i as u64); + Ok(i) + } +} + +impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner { + fn seek(&mut self, pos: SeekFrom) -> io::Result { + let pos = self.obj.borrow_mut().seek(pos)?; + self.pos.set(pos); + Ok(pos) + } +} + +/// Try to fill the buffer from the reader. +/// +/// If the reader reaches its end before filling the buffer at all, returns `false`. +/// Otherwise returns `true`. +fn try_read_all(r: &mut R, buf: &mut [u8]) -> io::Result { + let mut read = 0; + while read < buf.len() { + match r.read(&mut buf[read..])? { + 0 => { + if read == 0 { + return Ok(false); + } + + return Err(other("failed to read entire block")); + } + n => read += n, + } + } + Ok(true) +} diff --git a/vendor/tar/src/builder.rs b/vendor/tar/src/builder.rs new file mode 100644 index 000000000..a26eb31dc --- /dev/null +++ b/vendor/tar/src/builder.rs @@ -0,0 +1,663 @@ +use std::fs; +use std::io; +use std::io::prelude::*; +use std::path::Path; +use std::str; + +use crate::header::{path2bytes, HeaderMode}; +use crate::{other, EntryType, Header}; + +/// A structure for building archives +/// +/// This structure has methods for building up an archive from scratch into any +/// arbitrary writer. +pub struct Builder { + mode: HeaderMode, + follow: bool, + finished: bool, + obj: Option, +} + +impl Builder { + /// Create a new archive builder with the underlying object as the + /// destination of all data written. The builder will use + /// `HeaderMode::Complete` by default. + pub fn new(obj: W) -> Builder { + Builder { + mode: HeaderMode::Complete, + follow: true, + finished: false, + obj: Some(obj), + } + } + + /// Changes the HeaderMode that will be used when reading fs Metadata for + /// methods that implicitly read metadata for an input Path. Notably, this + /// does _not_ apply to `append(Header)`. + pub fn mode(&mut self, mode: HeaderMode) { + self.mode = mode; + } + + /// Follow symlinks, archiving the contents of the file they point to rather + /// than adding a symlink to the archive. Defaults to true. + pub fn follow_symlinks(&mut self, follow: bool) { + self.follow = follow; + } + + /// Gets shared reference to the underlying object. + pub fn get_ref(&self) -> &W { + self.obj.as_ref().unwrap() + } + + /// Gets mutable reference to the underlying object. + /// + /// Note that care must be taken while writing to the underlying + /// object. But, e.g. `get_mut().flush()` is claimed to be safe and + /// useful in the situations when one needs to be ensured that + /// tar entry was flushed to the disk. + pub fn get_mut(&mut self) -> &mut W { + self.obj.as_mut().unwrap() + } + + /// Unwrap this archive, returning the underlying object. + /// + /// This function will finish writing the archive if the `finish` function + /// hasn't yet been called, returning any I/O error which happens during + /// that operation. + pub fn into_inner(mut self) -> io::Result { + if !self.finished { + self.finish()?; + } + Ok(self.obj.take().unwrap()) + } + + /// Adds a new entry to this archive. + /// + /// This function will append the header specified, followed by contents of + /// the stream specified by `data`. To produce a valid archive the `size` + /// field of `header` must be the same as the length of the stream that's + /// being written. Additionally the checksum for the header should have been + /// set via the `set_cksum` method. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all entries have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Errors + /// + /// This function will return an error for any intermittent I/O error which + /// occurs when either reading or writing. + /// + /// # Examples + /// + /// ``` + /// use tar::{Builder, Header}; + /// + /// let mut header = Header::new_gnu(); + /// header.set_path("foo").unwrap(); + /// header.set_size(4); + /// header.set_cksum(); + /// + /// let mut data: &[u8] = &[1, 2, 3, 4]; + /// + /// let mut ar = Builder::new(Vec::new()); + /// ar.append(&header, data).unwrap(); + /// let data = ar.into_inner().unwrap(); + /// ``` + pub fn append(&mut self, header: &Header, mut data: R) -> io::Result<()> { + append(self.get_mut(), header, &mut data) + } + + /// Adds a new entry to this archive with the specified path. + /// + /// This function will set the specified path in the given header, which may + /// require appending a GNU long-name extension entry to the archive first. + /// The checksum for the header will be automatically updated via the + /// `set_cksum` method after setting the path. No other metadata in the + /// header will be modified. + /// + /// Then it will append the header, followed by contents of the stream + /// specified by `data`. To produce a valid archive the `size` field of + /// `header` must be the same as the length of the stream that's being + /// written. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all entries have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Errors + /// + /// This function will return an error for any intermittent I/O error which + /// occurs when either reading or writing. + /// + /// # Examples + /// + /// ``` + /// use tar::{Builder, Header}; + /// + /// let mut header = Header::new_gnu(); + /// header.set_size(4); + /// header.set_cksum(); + /// + /// let mut data: &[u8] = &[1, 2, 3, 4]; + /// + /// let mut ar = Builder::new(Vec::new()); + /// ar.append_data(&mut header, "really/long/path/to/foo", data).unwrap(); + /// let data = ar.into_inner().unwrap(); + /// ``` + pub fn append_data, R: Read>( + &mut self, + header: &mut Header, + path: P, + data: R, + ) -> io::Result<()> { + prepare_header_path(self.get_mut(), header, path.as_ref())?; + header.set_cksum(); + self.append(&header, data) + } + + /// Adds a new link (symbolic or hard) entry to this archive with the specified path and target. + /// + /// This function is similar to [`Self::append_data`] which supports long filenames, + /// but also supports long link targets using GNU extensions if necessary. + /// You must set the entry type to either [`EntryType::Link`] or [`EntryType::Symlink`]. + /// The `set_cksum` method will be invoked after setting the path. No other metadata in the + /// header will be modified. + /// + /// If you are intending to use GNU extensions, you must use this method over calling + /// [`Header::set_link_name`] because that function will fail on long links. + /// + /// Similar constraints around the position of the archive and completion + /// apply as with [`Self::append_data`]. + /// + /// # Errors + /// + /// This function will return an error for any intermittent I/O error which + /// occurs when either reading or writing. + /// + /// # Examples + /// + /// ``` + /// use tar::{Builder, Header, EntryType}; + /// + /// let mut ar = Builder::new(Vec::new()); + /// let mut header = Header::new_gnu(); + /// header.set_username("foo"); + /// header.set_entry_type(EntryType::Symlink); + /// header.set_size(0); + /// ar.append_link(&mut header, "really/long/path/to/foo", "other/really/long/target").unwrap(); + /// let data = ar.into_inner().unwrap(); + /// ``` + pub fn append_link, T: AsRef>( + &mut self, + header: &mut Header, + path: P, + target: T, + ) -> io::Result<()> { + self._append_link(header, path.as_ref(), target.as_ref()) + } + + fn _append_link(&mut self, header: &mut Header, path: &Path, target: &Path) -> io::Result<()> { + prepare_header_path(self.get_mut(), header, path)?; + prepare_header_link(self.get_mut(), header, target)?; + header.set_cksum(); + self.append(&header, std::io::empty()) + } + + /// Adds a file on the local filesystem to this archive. + /// + /// This function will open the file specified by `path` and insert the file + /// into the archive with the appropriate metadata set, returning any I/O + /// error which occurs while writing. The path name for the file inside of + /// this archive will be the same as `path`, and it is required that the + /// path is a relative path. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ```no_run + /// use tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// ar.append_path("foo/bar.txt").unwrap(); + /// ``` + pub fn append_path>(&mut self, path: P) -> io::Result<()> { + let mode = self.mode.clone(); + let follow = self.follow; + append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow) + } + + /// Adds a file on the local filesystem to this archive under another name. + /// + /// This function will open the file specified by `path` and insert the file + /// into the archive as `name` with appropriate metadata set, returning any + /// I/O error which occurs while writing. The path name for the file inside + /// of this archive will be `name` is required to be a relative path. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Note if the `path` is a directory. This will just add an entry to the archive, + /// rather than contents of the directory. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ```no_run + /// use tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// // Insert the local file "foo/bar.txt" in the archive but with the name + /// // "bar/foo.txt". + /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").unwrap(); + /// ``` + pub fn append_path_with_name, N: AsRef>( + &mut self, + path: P, + name: N, + ) -> io::Result<()> { + let mode = self.mode.clone(); + let follow = self.follow; + append_path_with_name( + self.get_mut(), + path.as_ref(), + Some(name.as_ref()), + mode, + follow, + ) + } + + /// Adds a file to this archive with the given path as the name of the file + /// in the archive. + /// + /// This will use the metadata of `file` to populate a `Header`, and it will + /// then append the file to the archive with the name `path`. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ```no_run + /// use std::fs::File; + /// use tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// // Open the file at one location, but insert it into the archive with a + /// // different name. + /// let mut f = File::open("foo/bar/baz.txt").unwrap(); + /// ar.append_file("bar/baz.txt", &mut f).unwrap(); + /// ``` + pub fn append_file>(&mut self, path: P, file: &mut fs::File) -> io::Result<()> { + let mode = self.mode.clone(); + append_file(self.get_mut(), path.as_ref(), file, mode) + } + + /// Adds a directory to this archive with the given path as the name of the + /// directory in the archive. + /// + /// This will use `stat` to populate a `Header`, and it will then append the + /// directory to the archive with the name `path`. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Note this will not add the contents of the directory to the archive. + /// See `append_dir_all` for recusively adding the contents of the directory. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ``` + /// use std::fs; + /// use tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// // Use the directory at one location, but insert it into the archive + /// // with a different name. + /// ar.append_dir("bardir", ".").unwrap(); + /// ``` + pub fn append_dir(&mut self, path: P, src_path: Q) -> io::Result<()> + where + P: AsRef, + Q: AsRef, + { + let mode = self.mode.clone(); + append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode) + } + + /// Adds a directory and all of its contents (recursively) to this archive + /// with the given path as the name of the directory in the archive. + /// + /// Note that this will not attempt to seek the archive to a valid position, + /// so if the archive is in the middle of a read or some other similar + /// operation then this may corrupt the archive. + /// + /// Also note that after all files have been written to an archive the + /// `finish` function needs to be called to finish writing the archive. + /// + /// # Examples + /// + /// ``` + /// use std::fs; + /// use tar::Builder; + /// + /// let mut ar = Builder::new(Vec::new()); + /// + /// // Use the directory at one location, but insert it into the archive + /// // with a different name. + /// ar.append_dir_all("bardir", ".").unwrap(); + /// ``` + pub fn append_dir_all(&mut self, path: P, src_path: Q) -> io::Result<()> + where + P: AsRef, + Q: AsRef, + { + let mode = self.mode.clone(); + let follow = self.follow; + append_dir_all( + self.get_mut(), + path.as_ref(), + src_path.as_ref(), + mode, + follow, + ) + } + + /// Finish writing this archive, emitting the termination sections. + /// + /// This function should only be called when the archive has been written + /// entirely and if an I/O error happens the underlying object still needs + /// to be acquired. + /// + /// In most situations the `into_inner` method should be preferred. + pub fn finish(&mut self) -> io::Result<()> { + if self.finished { + return Ok(()); + } + self.finished = true; + self.get_mut().write_all(&[0; 1024]) + } +} + +fn append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()> { + dst.write_all(header.as_bytes())?; + let len = io::copy(&mut data, &mut dst)?; + + // Pad with zeros if necessary. + let buf = [0; 512]; + let remaining = 512 - (len % 512); + if remaining < 512 { + dst.write_all(&buf[..remaining as usize])?; + } + + Ok(()) +} + +fn append_path_with_name( + dst: &mut dyn Write, + path: &Path, + name: Option<&Path>, + mode: HeaderMode, + follow: bool, +) -> io::Result<()> { + let stat = if follow { + fs::metadata(path).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting metadata for {}", err, path.display()), + ) + })? + } else { + fs::symlink_metadata(path).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting metadata for {}", err, path.display()), + ) + })? + }; + let ar_name = name.unwrap_or(path); + if stat.is_file() { + append_fs(dst, ar_name, &stat, &mut fs::File::open(path)?, mode, None) + } else if stat.is_dir() { + append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None) + } else if stat.file_type().is_symlink() { + let link_name = fs::read_link(path)?; + append_fs( + dst, + ar_name, + &stat, + &mut io::empty(), + mode, + Some(&link_name), + ) + } else { + #[cfg(unix)] + { + append_special(dst, path, &stat, mode) + } + #[cfg(not(unix))] + { + Err(other(&format!("{} has unknown file type", path.display()))) + } + } +} + +#[cfg(unix)] +fn append_special( + dst: &mut dyn Write, + path: &Path, + stat: &fs::Metadata, + mode: HeaderMode, +) -> io::Result<()> { + use ::std::os::unix::fs::{FileTypeExt, MetadataExt}; + + let file_type = stat.file_type(); + let entry_type; + if file_type.is_socket() { + // sockets can't be archived + return Err(other(&format!( + "{}: socket can not be archived", + path.display() + ))); + } else if file_type.is_fifo() { + entry_type = EntryType::Fifo; + } else if file_type.is_char_device() { + entry_type = EntryType::Char; + } else if file_type.is_block_device() { + entry_type = EntryType::Block; + } else { + return Err(other(&format!("{} has unknown file type", path.display()))); + } + + let mut header = Header::new_gnu(); + header.set_metadata_in_mode(stat, mode); + prepare_header_path(dst, &mut header, path)?; + + header.set_entry_type(entry_type); + let dev_id = stat.rdev(); + let dev_major = ((dev_id >> 32) & 0xffff_f000) | ((dev_id >> 8) & 0x0000_0fff); + let dev_minor = ((dev_id >> 12) & 0xffff_ff00) | ((dev_id) & 0x0000_00ff); + header.set_device_major(dev_major as u32)?; + header.set_device_minor(dev_minor as u32)?; + + header.set_cksum(); + dst.write_all(header.as_bytes())?; + + Ok(()) +} + +fn append_file( + dst: &mut dyn Write, + path: &Path, + file: &mut fs::File, + mode: HeaderMode, +) -> io::Result<()> { + let stat = file.metadata()?; + append_fs(dst, path, &stat, file, mode, None) +} + +fn append_dir( + dst: &mut dyn Write, + path: &Path, + src_path: &Path, + mode: HeaderMode, +) -> io::Result<()> { + let stat = fs::metadata(src_path)?; + append_fs(dst, path, &stat, &mut io::empty(), mode, None) +} + +fn prepare_header(size: u64, entry_type: u8) -> Header { + let mut header = Header::new_gnu(); + let name = b"././@LongLink"; + header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]); + header.set_mode(0o644); + header.set_uid(0); + header.set_gid(0); + header.set_mtime(0); + // + 1 to be compliant with GNU tar + header.set_size(size + 1); + header.set_entry_type(EntryType::new(entry_type)); + header.set_cksum(); + header +} + +fn prepare_header_path(dst: &mut dyn Write, header: &mut Header, path: &Path) -> io::Result<()> { + // Try to encode the path directly in the header, but if it ends up not + // working (probably because it's too long) then try to use the GNU-specific + // long name extension by emitting an entry which indicates that it's the + // filename. + if let Err(e) = header.set_path(path) { + let data = path2bytes(&path)?; + let max = header.as_old().name.len(); + // Since `e` isn't specific enough to let us know the path is indeed too + // long, verify it first before using the extension. + if data.len() < max { + return Err(e); + } + let header2 = prepare_header(data.len() as u64, b'L'); + // null-terminated string + let mut data2 = data.chain(io::repeat(0).take(1)); + append(dst, &header2, &mut data2)?; + + // Truncate the path to store in the header we're about to emit to + // ensure we've got something at least mentioned. Note that we use + // `str`-encoding to be compatible with Windows, but in general the + // entry in the header itself shouldn't matter too much since extraction + // doesn't look at it. + let truncated = match str::from_utf8(&data[..max]) { + Ok(s) => s, + Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(), + }; + header.set_path(truncated)?; + } + Ok(()) +} + +fn prepare_header_link( + dst: &mut dyn Write, + header: &mut Header, + link_name: &Path, +) -> io::Result<()> { + // Same as previous function but for linkname + if let Err(e) = header.set_link_name(&link_name) { + let data = path2bytes(&link_name)?; + if data.len() < header.as_old().linkname.len() { + return Err(e); + } + let header2 = prepare_header(data.len() as u64, b'K'); + let mut data2 = data.chain(io::repeat(0).take(1)); + append(dst, &header2, &mut data2)?; + } + Ok(()) +} + +fn append_fs( + dst: &mut dyn Write, + path: &Path, + meta: &fs::Metadata, + read: &mut dyn Read, + mode: HeaderMode, + link_name: Option<&Path>, +) -> io::Result<()> { + let mut header = Header::new_gnu(); + + prepare_header_path(dst, &mut header, path)?; + header.set_metadata_in_mode(meta, mode); + if let Some(link_name) = link_name { + prepare_header_link(dst, &mut header, link_name)?; + } + header.set_cksum(); + append(dst, &header, read) +} + +fn append_dir_all( + dst: &mut dyn Write, + path: &Path, + src_path: &Path, + mode: HeaderMode, + follow: bool, +) -> io::Result<()> { + let mut stack = vec![(src_path.to_path_buf(), true, false)]; + while let Some((src, is_dir, is_symlink)) = stack.pop() { + let dest = path.join(src.strip_prefix(&src_path).unwrap()); + // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true + if is_dir || (is_symlink && follow && src.is_dir()) { + for entry in fs::read_dir(&src)? { + let entry = entry?; + let file_type = entry.file_type()?; + stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink())); + } + if dest != Path::new("") { + append_dir(dst, &dest, &src, mode)?; + } + } else if !follow && is_symlink { + let stat = fs::symlink_metadata(&src)?; + let link_name = fs::read_link(&src)?; + append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name))?; + } else { + #[cfg(unix)] + { + let stat = fs::metadata(&src)?; + if !stat.is_file() { + append_special(dst, &dest, &stat, mode)?; + continue; + } + } + append_file(dst, &dest, &mut fs::File::open(src)?, mode)?; + } + } + Ok(()) +} + +impl Drop for Builder { + fn drop(&mut self) { + let _ = self.finish(); + } +} diff --git a/vendor/tar/src/entry.rs b/vendor/tar/src/entry.rs new file mode 100644 index 000000000..8f0b62acf --- /dev/null +++ b/vendor/tar/src/entry.rs @@ -0,0 +1,832 @@ +use std::borrow::Cow; +use std::cmp; +use std::fs; +use std::fs::OpenOptions; +use std::io::prelude::*; +use std::io::{self, Error, ErrorKind, SeekFrom}; +use std::marker; +use std::path::{Component, Path, PathBuf}; + +use filetime::{self, FileTime}; + +use crate::archive::ArchiveInner; +use crate::error::TarError; +use crate::header::bytes2path; +use crate::other; +use crate::{Archive, Header, PaxExtensions}; + +/// A read-only view into an entry of an archive. +/// +/// This structure is a window into a portion of a borrowed archive which can +/// be inspected. It acts as a file handle by implementing the Reader trait. An +/// entry cannot be rewritten once inserted into an archive. +pub struct Entry<'a, R: 'a + Read> { + fields: EntryFields<'a>, + _ignored: marker::PhantomData<&'a Archive>, +} + +// private implementation detail of `Entry`, but concrete (no type parameters) +// and also all-public to be constructed from other modules. +pub struct EntryFields<'a> { + pub long_pathname: Option>, + pub long_linkname: Option>, + pub pax_extensions: Option>, + pub header: Header, + pub size: u64, + pub header_pos: u64, + pub file_pos: u64, + pub data: Vec>, + pub unpack_xattrs: bool, + pub preserve_permissions: bool, + pub preserve_mtime: bool, + pub overwrite: bool, +} + +pub enum EntryIo<'a> { + Pad(io::Take), + Data(io::Take<&'a ArchiveInner>), +} + +/// When unpacking items the unpacked thing is returned to allow custom +/// additional handling by users. Today the File is returned, in future +/// the enum may be extended with kinds for links, directories etc. +#[derive(Debug)] +pub enum Unpacked { + /// A file was unpacked. + File(std::fs::File), + /// A directory, hardlink, symlink, or other node was unpacked. + #[doc(hidden)] + __Nonexhaustive, +} + +impl<'a, R: Read> Entry<'a, R> { + /// Returns the path name for this entry. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. + /// + /// Note that this function will convert any `\` characters to directory + /// separators, and it will not always return the same value as + /// `self.header().path()` as some archive formats have support for longer + /// path names described in separate entries. + /// + /// It is recommended to use this method instead of inspecting the `header` + /// directly to ensure that various archive formats are handled correctly. + pub fn path(&self) -> io::Result> { + self.fields.path() + } + + /// Returns the raw bytes listed for this entry. + /// + /// Note that this function will convert any `\` characters to directory + /// separators, and it will not always return the same value as + /// `self.header().path_bytes()` as some archive formats have support for + /// longer path names described in separate entries. + pub fn path_bytes(&self) -> Cow<[u8]> { + self.fields.path_bytes() + } + + /// Returns the link name for this entry, if any is found. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. `Ok(None)` being returned, however, + /// indicates that the link name was not present. + /// + /// Note that this function will convert any `\` characters to directory + /// separators, and it will not always return the same value as + /// `self.header().link_name()` as some archive formats have support for + /// longer path names described in separate entries. + /// + /// It is recommended to use this method instead of inspecting the `header` + /// directly to ensure that various archive formats are handled correctly. + pub fn link_name(&self) -> io::Result>> { + self.fields.link_name() + } + + /// Returns the link name for this entry, in bytes, if listed. + /// + /// Note that this will not always return the same value as + /// `self.header().link_name_bytes()` as some archive formats have support for + /// longer path names described in separate entries. + pub fn link_name_bytes(&self) -> Option> { + self.fields.link_name_bytes() + } + + /// Returns an iterator over the pax extensions contained in this entry. + /// + /// Pax extensions are a form of archive where extra metadata is stored in + /// key/value pairs in entries before the entry they're intended to + /// describe. For example this can be used to describe long file name or + /// other metadata like atime/ctime/mtime in more precision. + /// + /// The returned iterator will yield key/value pairs for each extension. + /// + /// `None` will be returned if this entry does not indicate that it itself + /// contains extensions, or if there were no previous extensions describing + /// it. + /// + /// Note that global pax extensions are intended to be applied to all + /// archive entries. + /// + /// Also note that this function will read the entire entry if the entry + /// itself is a list of extensions. + pub fn pax_extensions(&mut self) -> io::Result> { + self.fields.pax_extensions() + } + + /// Returns access to the header of this entry in the archive. + /// + /// This provides access to the metadata for this entry in the archive. + pub fn header(&self) -> &Header { + &self.fields.header + } + + /// Returns access to the size of this entry in the archive. + /// + /// In the event the size is stored in a pax extension, that size value + /// will be referenced. Otherwise, the entry size will be stored in the header. + pub fn size(&self) -> u64 { + self.fields.size + } + + /// Returns the starting position, in bytes, of the header of this entry in + /// the archive. + /// + /// The header is always a contiguous section of 512 bytes, so if the + /// underlying reader implements `Seek`, then the slice from `header_pos` to + /// `header_pos + 512` contains the raw header bytes. + pub fn raw_header_position(&self) -> u64 { + self.fields.header_pos + } + + /// Returns the starting position, in bytes, of the file of this entry in + /// the archive. + /// + /// If the file of this entry is continuous (e.g. not a sparse file), and + /// if the underlying reader implements `Seek`, then the slice from + /// `file_pos` to `file_pos + entry_size` contains the raw file bytes. + pub fn raw_file_position(&self) -> u64 { + self.fields.file_pos + } + + /// Writes this file to the specified location. + /// + /// This function will write the entire contents of this file into the + /// location specified by `dst`. Metadata will also be propagated to the + /// path `dst`. + /// + /// This function will create a file at the path `dst`, and it is required + /// that the intermediate directories are created. Any existing file at the + /// location `dst` will be overwritten. + /// + /// > **Note**: This function does not have as many sanity checks as + /// > `Archive::unpack` or `Entry::unpack_in`. As a result if you're + /// > thinking of unpacking untrusted tarballs you may want to review the + /// > implementations of the previous two functions and perhaps implement + /// > similar logic yourself. + /// + /// # Examples + /// + /// ```no_run + /// use std::fs::File; + /// use tar::Archive; + /// + /// let mut ar = Archive::new(File::open("foo.tar").unwrap()); + /// + /// for (i, file) in ar.entries().unwrap().enumerate() { + /// let mut file = file.unwrap(); + /// file.unpack(format!("file-{}", i)).unwrap(); + /// } + /// ``` + pub fn unpack>(&mut self, dst: P) -> io::Result { + self.fields.unpack(None, dst.as_ref()) + } + + /// Extracts this file under the specified path, avoiding security issues. + /// + /// This function will write the entire contents of this file into the + /// location obtained by appending the path of this file in the archive to + /// `dst`, creating any intermediate directories if needed. Metadata will + /// also be propagated to the path `dst`. Any existing file at the location + /// `dst` will be overwritten. + /// + /// This function carefully avoids writing outside of `dst`. If the file has + /// a '..' in its path, this function will skip it and return false. + /// + /// # Examples + /// + /// ```no_run + /// use std::fs::File; + /// use tar::Archive; + /// + /// let mut ar = Archive::new(File::open("foo.tar").unwrap()); + /// + /// for (i, file) in ar.entries().unwrap().enumerate() { + /// let mut file = file.unwrap(); + /// file.unpack_in("target").unwrap(); + /// } + /// ``` + pub fn unpack_in>(&mut self, dst: P) -> io::Result { + self.fields.unpack_in(dst.as_ref()) + } + + /// Indicate whether extended file attributes (xattrs on Unix) are preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix using xattr support. This may eventually be implemented for + /// Windows, however, if other archive implementations are found which do + /// this as well. + pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { + self.fields.unpack_xattrs = unpack_xattrs; + } + + /// Indicate whether extended permissions (like suid on Unix) are preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is currently only implemented on + /// Unix. + pub fn set_preserve_permissions(&mut self, preserve: bool) { + self.fields.preserve_permissions = preserve; + } + + /// Indicate whether access time information is preserved when unpacking + /// this entry. + /// + /// This flag is enabled by default. + pub fn set_preserve_mtime(&mut self, preserve: bool) { + self.fields.preserve_mtime = preserve; + } +} + +impl<'a, R: Read> Read for Entry<'a, R> { + fn read(&mut self, into: &mut [u8]) -> io::Result { + self.fields.read(into) + } +} + +impl<'a> EntryFields<'a> { + pub fn from(entry: Entry) -> EntryFields { + entry.fields + } + + pub fn into_entry(self) -> Entry<'a, R> { + Entry { + fields: self, + _ignored: marker::PhantomData, + } + } + + pub fn read_all(&mut self) -> io::Result> { + // Preallocate some data but don't let ourselves get too crazy now. + let cap = cmp::min(self.size, 128 * 1024); + let mut v = Vec::with_capacity(cap as usize); + self.read_to_end(&mut v).map(|_| v) + } + + fn path(&self) -> io::Result> { + bytes2path(self.path_bytes()) + } + + fn path_bytes(&self) -> Cow<[u8]> { + match self.long_pathname { + Some(ref bytes) => { + if let Some(&0) = bytes.last() { + Cow::Borrowed(&bytes[..bytes.len() - 1]) + } else { + Cow::Borrowed(bytes) + } + } + None => { + if let Some(ref pax) = self.pax_extensions { + let pax = PaxExtensions::new(pax) + .filter_map(|f| f.ok()) + .find(|f| f.key_bytes() == b"path") + .map(|f| f.value_bytes()); + if let Some(field) = pax { + return Cow::Borrowed(field); + } + } + self.header.path_bytes() + } + } + } + + /// Gets the path in a "lossy" way, used for error reporting ONLY. + fn path_lossy(&self) -> String { + String::from_utf8_lossy(&self.path_bytes()).to_string() + } + + fn link_name(&self) -> io::Result>> { + match self.link_name_bytes() { + Some(bytes) => bytes2path(bytes).map(Some), + None => Ok(None), + } + } + + fn link_name_bytes(&self) -> Option> { + match self.long_linkname { + Some(ref bytes) => { + if let Some(&0) = bytes.last() { + Some(Cow::Borrowed(&bytes[..bytes.len() - 1])) + } else { + Some(Cow::Borrowed(bytes)) + } + } + None => { + if let Some(ref pax) = self.pax_extensions { + let pax = PaxExtensions::new(pax) + .filter_map(|f| f.ok()) + .find(|f| f.key_bytes() == b"linkpath") + .map(|f| f.value_bytes()); + if let Some(field) = pax { + return Some(Cow::Borrowed(field)); + } + } + self.header.link_name_bytes() + } + } + } + + fn pax_extensions(&mut self) -> io::Result> { + if self.pax_extensions.is_none() { + if !self.header.entry_type().is_pax_global_extensions() + && !self.header.entry_type().is_pax_local_extensions() + { + return Ok(None); + } + self.pax_extensions = Some(self.read_all()?); + } + Ok(Some(PaxExtensions::new( + self.pax_extensions.as_ref().unwrap(), + ))) + } + + fn unpack_in(&mut self, dst: &Path) -> io::Result { + // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3: + // * Leading '/'s are trimmed. For example, `///test` is treated as + // `test`. + // * If the filename contains '..', then the file is skipped when + // extracting the tarball. + // * '//' within a filename is effectively skipped. An error is + // logged, but otherwise the effect is as if any two or more + // adjacent '/'s within the filename were consolidated into one + // '/'. + // + // Most of this is handled by the `path` module of the standard + // library, but we specially handle a few cases here as well. + + let mut file_dst = dst.to_path_buf(); + { + let path = self.path().map_err(|e| { + TarError::new( + format!("invalid path in entry header: {}", self.path_lossy()), + e, + ) + })?; + for part in path.components() { + match part { + // Leading '/' characters, root paths, and '.' + // components are just ignored and treated as "empty + // components" + Component::Prefix(..) | Component::RootDir | Component::CurDir => continue, + + // If any part of the filename is '..', then skip over + // unpacking the file to prevent directory traversal + // security issues. See, e.g.: CVE-2001-1267, + // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131 + Component::ParentDir => return Ok(false), + + Component::Normal(part) => file_dst.push(part), + } + } + } + + // Skip cases where only slashes or '.' parts were seen, because + // this is effectively an empty filename. + if *dst == *file_dst { + return Ok(true); + } + + // Skip entries without a parent (i.e. outside of FS root) + let parent = match file_dst.parent() { + Some(p) => p, + None => return Ok(false), + }; + + self.ensure_dir_created(&dst, parent) + .map_err(|e| TarError::new(format!("failed to create `{}`", parent.display()), e))?; + + let canon_target = self.validate_inside_dst(&dst, parent)?; + + self.unpack(Some(&canon_target), &file_dst) + .map_err(|e| TarError::new(format!("failed to unpack `{}`", file_dst.display()), e))?; + + Ok(true) + } + + /// Unpack as destination directory `dst`. + fn unpack_dir(&mut self, dst: &Path) -> io::Result<()> { + // If the directory already exists just let it slide + fs::create_dir(dst).or_else(|err| { + if err.kind() == ErrorKind::AlreadyExists { + let prev = fs::metadata(dst); + if prev.map(|m| m.is_dir()).unwrap_or(false) { + return Ok(()); + } + } + Err(Error::new( + err.kind(), + format!("{} when creating dir {}", err, dst.display()), + )) + }) + } + + /// Returns access to the header of this entry in the archive. + fn unpack(&mut self, target_base: Option<&Path>, dst: &Path) -> io::Result { + let kind = self.header.entry_type(); + + if kind.is_dir() { + self.unpack_dir(dst)?; + if let Ok(mode) = self.header.mode() { + set_perms(dst, None, mode, self.preserve_permissions)?; + } + return Ok(Unpacked::__Nonexhaustive); + } else if kind.is_hard_link() || kind.is_symlink() { + let src = match self.link_name()? { + Some(name) => name, + None => { + return Err(other(&format!( + "hard link listed for {} but no link name found", + String::from_utf8_lossy(self.header.as_bytes()) + ))); + } + }; + + if src.iter().count() == 0 { + return Err(other(&format!( + "symlink destination for {} is empty", + String::from_utf8_lossy(self.header.as_bytes()) + ))); + } + + if kind.is_hard_link() { + let link_src = match target_base { + // If we're unpacking within a directory then ensure that + // the destination of this hard link is both present and + // inside our own directory. This is needed because we want + // to make sure to not overwrite anything outside the root. + // + // Note that this logic is only needed for hard links + // currently. With symlinks the `validate_inside_dst` which + // happens before this method as part of `unpack_in` will + // use canonicalization to ensure this guarantee. For hard + // links though they're canonicalized to their existing path + // so we need to validate at this time. + Some(ref p) => { + let link_src = p.join(src); + self.validate_inside_dst(p, &link_src)?; + link_src + } + None => src.into_owned(), + }; + fs::hard_link(&link_src, dst).map_err(|err| { + Error::new( + err.kind(), + format!( + "{} when hard linking {} to {}", + err, + link_src.display(), + dst.display() + ), + ) + })?; + } else { + symlink(&src, dst) + .or_else(|err_io| { + if err_io.kind() == io::ErrorKind::AlreadyExists && self.overwrite { + // remove dest and try once more + std::fs::remove_file(dst).and_then(|()| symlink(&src, dst)) + } else { + Err(err_io) + } + }) + .map_err(|err| { + Error::new( + err.kind(), + format!( + "{} when symlinking {} to {}", + err, + src.display(), + dst.display() + ), + ) + })?; + }; + return Ok(Unpacked::__Nonexhaustive); + + #[cfg(target_arch = "wasm32")] + #[allow(unused_variables)] + fn symlink(src: &Path, dst: &Path) -> io::Result<()> { + Err(io::Error::new(io::ErrorKind::Other, "Not implemented")) + } + + #[cfg(windows)] + fn symlink(src: &Path, dst: &Path) -> io::Result<()> { + ::std::os::windows::fs::symlink_file(src, dst) + } + + #[cfg(unix)] + fn symlink(src: &Path, dst: &Path) -> io::Result<()> { + ::std::os::unix::fs::symlink(src, dst) + } + } else if kind.is_pax_global_extensions() + || kind.is_pax_local_extensions() + || kind.is_gnu_longname() + || kind.is_gnu_longlink() + { + return Ok(Unpacked::__Nonexhaustive); + }; + + // Old BSD-tar compatibility. + // Names that have a trailing slash should be treated as a directory. + // Only applies to old headers. + if self.header.as_ustar().is_none() && self.path_bytes().ends_with(b"/") { + self.unpack_dir(dst)?; + if let Ok(mode) = self.header.mode() { + set_perms(dst, None, mode, self.preserve_permissions)?; + } + return Ok(Unpacked::__Nonexhaustive); + } + + // Note the lack of `else` clause above. According to the FreeBSD + // documentation: + // + // > A POSIX-compliant implementation must treat any unrecognized + // > typeflag value as a regular file. + // + // As a result if we don't recognize the kind we just write out the file + // as we would normally. + + // Ensure we write a new file rather than overwriting in-place which + // is attackable; if an existing file is found unlink it. + fn open(dst: &Path) -> io::Result { + OpenOptions::new().write(true).create_new(true).open(dst) + } + let mut f = (|| -> io::Result { + let mut f = open(dst).or_else(|err| { + if err.kind() != ErrorKind::AlreadyExists { + Err(err) + } else if self.overwrite { + match fs::remove_file(dst) { + Ok(()) => open(dst), + Err(ref e) if e.kind() == io::ErrorKind::NotFound => open(dst), + Err(e) => Err(e), + } + } else { + Err(err) + } + })?; + for io in self.data.drain(..) { + match io { + EntryIo::Data(mut d) => { + let expected = d.limit(); + if io::copy(&mut d, &mut f)? != expected { + return Err(other("failed to write entire file")); + } + } + EntryIo::Pad(d) => { + // TODO: checked cast to i64 + let to = SeekFrom::Current(d.limit() as i64); + let size = f.seek(to)?; + f.set_len(size)?; + } + } + } + Ok(f) + })() + .map_err(|e| { + let header = self.header.path_bytes(); + TarError::new( + format!( + "failed to unpack `{}` into `{}`", + String::from_utf8_lossy(&header), + dst.display() + ), + e, + ) + })?; + + if self.preserve_mtime { + if let Ok(mtime) = self.header.mtime() { + // For some more information on this see the comments in + // `Header::fill_platform_from`, but the general idea is that + // we're trying to avoid 0-mtime files coming out of archives + // since some tools don't ingest them well. Perhaps one day + // when Cargo stops working with 0-mtime archives we can remove + // this. + let mtime = if mtime == 0 { 1 } else { mtime }; + let mtime = FileTime::from_unix_time(mtime as i64, 0); + filetime::set_file_handle_times(&f, Some(mtime), Some(mtime)).map_err(|e| { + TarError::new(format!("failed to set mtime for `{}`", dst.display()), e) + })?; + } + } + if let Ok(mode) = self.header.mode() { + set_perms(dst, Some(&mut f), mode, self.preserve_permissions)?; + } + if self.unpack_xattrs { + set_xattrs(self, dst)?; + } + return Ok(Unpacked::File(f)); + + fn set_perms( + dst: &Path, + f: Option<&mut std::fs::File>, + mode: u32, + preserve: bool, + ) -> Result<(), TarError> { + _set_perms(dst, f, mode, preserve).map_err(|e| { + TarError::new( + format!( + "failed to set permissions to {:o} \ + for `{}`", + mode, + dst.display() + ), + e, + ) + }) + } + + #[cfg(unix)] + fn _set_perms( + dst: &Path, + f: Option<&mut std::fs::File>, + mode: u32, + preserve: bool, + ) -> io::Result<()> { + use std::os::unix::prelude::*; + + let mode = if preserve { mode } else { mode & 0o777 }; + let perm = fs::Permissions::from_mode(mode as _); + match f { + Some(f) => f.set_permissions(perm), + None => fs::set_permissions(dst, perm), + } + } + + #[cfg(windows)] + fn _set_perms( + dst: &Path, + f: Option<&mut std::fs::File>, + mode: u32, + _preserve: bool, + ) -> io::Result<()> { + if mode & 0o200 == 0o200 { + return Ok(()); + } + match f { + Some(f) => { + let mut perm = f.metadata()?.permissions(); + perm.set_readonly(true); + f.set_permissions(perm) + } + None => { + let mut perm = fs::metadata(dst)?.permissions(); + perm.set_readonly(true); + fs::set_permissions(dst, perm) + } + } + } + + #[cfg(target_arch = "wasm32")] + #[allow(unused_variables)] + fn _set_perms( + dst: &Path, + f: Option<&mut std::fs::File>, + mode: u32, + _preserve: bool, + ) -> io::Result<()> { + Err(io::Error::new(io::ErrorKind::Other, "Not implemented")) + } + + #[cfg(all(unix, feature = "xattr"))] + fn set_xattrs(me: &mut EntryFields, dst: &Path) -> io::Result<()> { + use std::ffi::OsStr; + use std::os::unix::prelude::*; + + let exts = match me.pax_extensions() { + Ok(Some(e)) => e, + _ => return Ok(()), + }; + let exts = exts + .filter_map(|e| e.ok()) + .filter_map(|e| { + let key = e.key_bytes(); + let prefix = b"SCHILY.xattr."; + if key.starts_with(prefix) { + Some((&key[prefix.len()..], e)) + } else { + None + } + }) + .map(|(key, e)| (OsStr::from_bytes(key), e.value_bytes())); + + for (key, value) in exts { + xattr::set(dst, key, value).map_err(|e| { + TarError::new( + format!( + "failed to set extended \ + attributes to {}. \ + Xattrs: key={:?}, value={:?}.", + dst.display(), + key, + String::from_utf8_lossy(value) + ), + e, + ) + })?; + } + + Ok(()) + } + // Windows does not completely support posix xattrs + // https://en.wikipedia.org/wiki/Extended_file_attributes#Windows_NT + #[cfg(any(windows, not(feature = "xattr"), target_arch = "wasm32"))] + fn set_xattrs(_: &mut EntryFields, _: &Path) -> io::Result<()> { + Ok(()) + } + } + + fn ensure_dir_created(&self, dst: &Path, dir: &Path) -> io::Result<()> { + let mut ancestor = dir; + let mut dirs_to_create = Vec::new(); + while ancestor.symlink_metadata().is_err() { + dirs_to_create.push(ancestor); + if let Some(parent) = ancestor.parent() { + ancestor = parent; + } else { + break; + } + } + for ancestor in dirs_to_create.into_iter().rev() { + if let Some(parent) = ancestor.parent() { + self.validate_inside_dst(dst, parent)?; + } + fs::create_dir_all(ancestor)?; + } + Ok(()) + } + + fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result { + // Abort if target (canonical) parent is outside of `dst` + let canon_parent = file_dst.canonicalize().map_err(|err| { + Error::new( + err.kind(), + format!("{} while canonicalizing {}", err, file_dst.display()), + ) + })?; + let canon_target = dst.canonicalize().map_err(|err| { + Error::new( + err.kind(), + format!("{} while canonicalizing {}", err, dst.display()), + ) + })?; + if !canon_parent.starts_with(&canon_target) { + let err = TarError::new( + format!( + "trying to unpack outside of destination path: {}", + canon_target.display() + ), + // TODO: use ErrorKind::InvalidInput here? (minor breaking change) + Error::new(ErrorKind::Other, "Invalid argument"), + ); + return Err(err.into()); + } + Ok(canon_target) + } +} + +impl<'a> Read for EntryFields<'a> { + fn read(&mut self, into: &mut [u8]) -> io::Result { + loop { + match self.data.get_mut(0).map(|io| io.read(into)) { + Some(Ok(0)) => { + self.data.remove(0); + } + Some(r) => return r, + None => return Ok(0), + } + } + } +} + +impl<'a> Read for EntryIo<'a> { + fn read(&mut self, into: &mut [u8]) -> io::Result { + match *self { + EntryIo::Pad(ref mut io) => io.read(into), + EntryIo::Data(ref mut io) => io.read(into), + } + } +} diff --git a/vendor/tar/src/entry_type.rs b/vendor/tar/src/entry_type.rs new file mode 100644 index 000000000..951388d57 --- /dev/null +++ b/vendor/tar/src/entry_type.rs @@ -0,0 +1,193 @@ +// See https://en.wikipedia.org/wiki/Tar_%28computing%29#UStar_format +/// Indicate for the type of file described by a header. +/// +/// Each `Header` has an `entry_type` method returning an instance of this type +/// which can be used to inspect what the header is describing. + +/// A non-exhaustive enum representing the possible entry types +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum EntryType { + /// Regular file + Regular, + /// Hard link + Link, + /// Symbolic link + Symlink, + /// Character device + Char, + /// Block device + Block, + /// Directory + Directory, + /// Named pipe (fifo) + Fifo, + /// Implementation-defined 'high-performance' type, treated as regular file + Continuous, + /// GNU extension - long file name + GNULongName, + /// GNU extension - long link name (link target) + GNULongLink, + /// GNU extension - sparse file + GNUSparse, + /// Global extended header + XGlobalHeader, + /// Extended Header + XHeader, + /// Hints that destructuring should not be exhaustive. + /// + /// This enum may grow additional variants, so this makes sure clients + /// don't count on exhaustive matching. (Otherwise, adding a new variant + /// could break existing code.) + #[doc(hidden)] + __Nonexhaustive(u8), +} + +impl EntryType { + /// Creates a new entry type from a raw byte. + /// + /// Note that the other named constructors of entry type may be more + /// appropriate to create a file type from. + pub fn new(byte: u8) -> EntryType { + match byte { + b'\x00' | b'0' => EntryType::Regular, + b'1' => EntryType::Link, + b'2' => EntryType::Symlink, + b'3' => EntryType::Char, + b'4' => EntryType::Block, + b'5' => EntryType::Directory, + b'6' => EntryType::Fifo, + b'7' => EntryType::Continuous, + b'x' => EntryType::XHeader, + b'g' => EntryType::XGlobalHeader, + b'L' => EntryType::GNULongName, + b'K' => EntryType::GNULongLink, + b'S' => EntryType::GNUSparse, + b => EntryType::__Nonexhaustive(b), + } + } + + /// Returns the raw underlying byte that this entry type represents. + pub fn as_byte(&self) -> u8 { + match *self { + EntryType::Regular => b'0', + EntryType::Link => b'1', + EntryType::Symlink => b'2', + EntryType::Char => b'3', + EntryType::Block => b'4', + EntryType::Directory => b'5', + EntryType::Fifo => b'6', + EntryType::Continuous => b'7', + EntryType::XHeader => b'x', + EntryType::XGlobalHeader => b'g', + EntryType::GNULongName => b'L', + EntryType::GNULongLink => b'K', + EntryType::GNUSparse => b'S', + EntryType::__Nonexhaustive(b) => b, + } + } + + /// Creates a new entry type representing a regular file. + pub fn file() -> EntryType { + EntryType::Regular + } + + /// Creates a new entry type representing a hard link. + pub fn hard_link() -> EntryType { + EntryType::Link + } + + /// Creates a new entry type representing a symlink. + pub fn symlink() -> EntryType { + EntryType::Symlink + } + + /// Creates a new entry type representing a character special device. + pub fn character_special() -> EntryType { + EntryType::Char + } + + /// Creates a new entry type representing a block special device. + pub fn block_special() -> EntryType { + EntryType::Block + } + + /// Creates a new entry type representing a directory. + pub fn dir() -> EntryType { + EntryType::Directory + } + + /// Creates a new entry type representing a FIFO. + pub fn fifo() -> EntryType { + EntryType::Fifo + } + + /// Creates a new entry type representing a contiguous file. + pub fn contiguous() -> EntryType { + EntryType::Continuous + } + + /// Returns whether this type represents a regular file. + pub fn is_file(&self) -> bool { + self == &EntryType::Regular + } + + /// Returns whether this type represents a hard link. + pub fn is_hard_link(&self) -> bool { + self == &EntryType::Link + } + + /// Returns whether this type represents a symlink. + pub fn is_symlink(&self) -> bool { + self == &EntryType::Symlink + } + + /// Returns whether this type represents a character special device. + pub fn is_character_special(&self) -> bool { + self == &EntryType::Char + } + + /// Returns whether this type represents a block special device. + pub fn is_block_special(&self) -> bool { + self == &EntryType::Block + } + + /// Returns whether this type represents a directory. + pub fn is_dir(&self) -> bool { + self == &EntryType::Directory + } + + /// Returns whether this type represents a FIFO. + pub fn is_fifo(&self) -> bool { + self == &EntryType::Fifo + } + + /// Returns whether this type represents a contiguous file. + pub fn is_contiguous(&self) -> bool { + self == &EntryType::Continuous + } + + /// Returns whether this type represents a GNU long name header. + pub fn is_gnu_longname(&self) -> bool { + self == &EntryType::GNULongName + } + + /// Returns whether this type represents a GNU sparse header. + pub fn is_gnu_sparse(&self) -> bool { + self == &EntryType::GNUSparse + } + + /// Returns whether this type represents a GNU long link header. + pub fn is_gnu_longlink(&self) -> bool { + self == &EntryType::GNULongLink + } + + /// Returns whether this type represents a GNU long name header. + pub fn is_pax_global_extensions(&self) -> bool { + self == &EntryType::XGlobalHeader + } + + /// Returns whether this type represents a GNU long link header. + pub fn is_pax_local_extensions(&self) -> bool { + self == &EntryType::XHeader + } +} diff --git a/vendor/tar/src/error.rs b/vendor/tar/src/error.rs new file mode 100644 index 000000000..0d3877fa2 --- /dev/null +++ b/vendor/tar/src/error.rs @@ -0,0 +1,41 @@ +use std::borrow::Cow; +use std::error; +use std::fmt; +use std::io::{self, Error}; + +#[derive(Debug)] +pub struct TarError { + desc: Cow<'static, str>, + io: io::Error, +} + +impl TarError { + pub fn new(desc: impl Into>, err: Error) -> TarError { + TarError { + desc: desc.into(), + io: err, + } + } +} + +impl error::Error for TarError { + fn description(&self) -> &str { + &self.desc + } + + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + Some(&self.io) + } +} + +impl fmt::Display for TarError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.desc.fmt(f) + } +} + +impl From for Error { + fn from(t: TarError) -> Error { + Error::new(t.io.kind(), t) + } +} diff --git a/vendor/tar/src/header.rs b/vendor/tar/src/header.rs new file mode 100644 index 000000000..7e507fc71 --- /dev/null +++ b/vendor/tar/src/header.rs @@ -0,0 +1,1642 @@ +#[cfg(unix)] +use std::os::unix::prelude::*; +#[cfg(windows)] +use std::os::windows::prelude::*; + +use std::borrow::Cow; +use std::fmt; +use std::fs; +use std::io; +use std::iter; +use std::iter::repeat; +use std::mem; +use std::path::{Component, Path, PathBuf}; +use std::str; + +use crate::other; +use crate::EntryType; + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct Header { + bytes: [u8; 512], +} + +/// Declares the information that should be included when filling a Header +/// from filesystem metadata. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[non_exhaustive] +pub enum HeaderMode { + /// All supported metadata, including mod/access times and ownership will + /// be included. + Complete, + + /// Only metadata that is directly relevant to the identity of a file will + /// be included. In particular, ownership and mod/access times are excluded. + Deterministic, +} + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct OldHeader { + pub name: [u8; 100], + pub mode: [u8; 8], + pub uid: [u8; 8], + pub gid: [u8; 8], + pub size: [u8; 12], + pub mtime: [u8; 12], + pub cksum: [u8; 8], + pub linkflag: [u8; 1], + pub linkname: [u8; 100], + pub pad: [u8; 255], +} + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct UstarHeader { + pub name: [u8; 100], + pub mode: [u8; 8], + pub uid: [u8; 8], + pub gid: [u8; 8], + pub size: [u8; 12], + pub mtime: [u8; 12], + pub cksum: [u8; 8], + pub typeflag: [u8; 1], + pub linkname: [u8; 100], + + // UStar format + pub magic: [u8; 6], + pub version: [u8; 2], + pub uname: [u8; 32], + pub gname: [u8; 32], + pub dev_major: [u8; 8], + pub dev_minor: [u8; 8], + pub prefix: [u8; 155], + pub pad: [u8; 12], +} + +/// Representation of the header of an entry in an archive +#[repr(C)] +#[allow(missing_docs)] +pub struct GnuHeader { + pub name: [u8; 100], + pub mode: [u8; 8], + pub uid: [u8; 8], + pub gid: [u8; 8], + pub size: [u8; 12], + pub mtime: [u8; 12], + pub cksum: [u8; 8], + pub typeflag: [u8; 1], + pub linkname: [u8; 100], + + // GNU format + pub magic: [u8; 6], + pub version: [u8; 2], + pub uname: [u8; 32], + pub gname: [u8; 32], + pub dev_major: [u8; 8], + pub dev_minor: [u8; 8], + pub atime: [u8; 12], + pub ctime: [u8; 12], + pub offset: [u8; 12], + pub longnames: [u8; 4], + pub unused: [u8; 1], + pub sparse: [GnuSparseHeader; 4], + pub isextended: [u8; 1], + pub realsize: [u8; 12], + pub pad: [u8; 17], +} + +/// Description of the header of a spare entry. +/// +/// Specifies the offset/number of bytes of a chunk of data in octal. +#[repr(C)] +#[allow(missing_docs)] +pub struct GnuSparseHeader { + pub offset: [u8; 12], + pub numbytes: [u8; 12], +} + +/// Representation of the entry found to represent extended GNU sparse files. +/// +/// When a `GnuHeader` has the `isextended` flag set to `1` then the contents of +/// the next entry will be one of these headers. +#[repr(C)] +#[allow(missing_docs)] +pub struct GnuExtSparseHeader { + pub sparse: [GnuSparseHeader; 21], + pub isextended: [u8; 1], + pub padding: [u8; 7], +} + +impl Header { + /// Creates a new blank GNU header. + /// + /// The GNU style header is the default for this library and allows various + /// extensions such as long path names, long link names, and setting the + /// atime/ctime metadata attributes of files. + pub fn new_gnu() -> Header { + let mut header = Header { bytes: [0; 512] }; + unsafe { + let gnu = cast_mut::<_, GnuHeader>(&mut header); + gnu.magic = *b"ustar "; + gnu.version = *b" \0"; + } + header.set_mtime(0); + header + } + + /// Creates a new blank UStar header. + /// + /// The UStar style header is an extension of the original archive header + /// which enables some extra metadata along with storing a longer (but not + /// too long) path name. + /// + /// UStar is also the basis used for pax archives. + pub fn new_ustar() -> Header { + let mut header = Header { bytes: [0; 512] }; + unsafe { + let gnu = cast_mut::<_, UstarHeader>(&mut header); + gnu.magic = *b"ustar\0"; + gnu.version = *b"00"; + } + header.set_mtime(0); + header + } + + /// Creates a new blank old header. + /// + /// This header format is the original archive header format which all other + /// versions are compatible with (e.g. they are a superset). This header + /// format limits the path name limit and isn't able to contain extra + /// metadata like atime/ctime. + pub fn new_old() -> Header { + let mut header = Header { bytes: [0; 512] }; + header.set_mtime(0); + header + } + + fn is_ustar(&self) -> bool { + let ustar = unsafe { cast::<_, UstarHeader>(self) }; + ustar.magic[..] == b"ustar\0"[..] && ustar.version[..] == b"00"[..] + } + + fn is_gnu(&self) -> bool { + let ustar = unsafe { cast::<_, UstarHeader>(self) }; + ustar.magic[..] == b"ustar "[..] && ustar.version[..] == b" \0"[..] + } + + /// View this archive header as a raw "old" archive header. + /// + /// This view will always succeed as all archive header formats will fill + /// out at least the fields specified in the old header format. + pub fn as_old(&self) -> &OldHeader { + unsafe { cast(self) } + } + + /// Same as `as_old`, but the mutable version. + pub fn as_old_mut(&mut self) -> &mut OldHeader { + unsafe { cast_mut(self) } + } + + /// View this archive header as a raw UStar archive header. + /// + /// The UStar format is an extension to the tar archive format which enables + /// longer pathnames and a few extra attributes such as the group and user + /// name. + /// + /// This cast may not succeed as this function will test whether the + /// magic/version fields of the UStar format have the appropriate values, + /// returning `None` if they aren't correct. + pub fn as_ustar(&self) -> Option<&UstarHeader> { + if self.is_ustar() { + Some(unsafe { cast(self) }) + } else { + None + } + } + + /// Same as `as_ustar_mut`, but the mutable version. + pub fn as_ustar_mut(&mut self) -> Option<&mut UstarHeader> { + if self.is_ustar() { + Some(unsafe { cast_mut(self) }) + } else { + None + } + } + + /// View this archive header as a raw GNU archive header. + /// + /// The GNU format is an extension to the tar archive format which enables + /// longer pathnames and a few extra attributes such as the group and user + /// name. + /// + /// This cast may not succeed as this function will test whether the + /// magic/version fields of the GNU format have the appropriate values, + /// returning `None` if they aren't correct. + pub fn as_gnu(&self) -> Option<&GnuHeader> { + if self.is_gnu() { + Some(unsafe { cast(self) }) + } else { + None + } + } + + /// Same as `as_gnu`, but the mutable version. + pub fn as_gnu_mut(&mut self) -> Option<&mut GnuHeader> { + if self.is_gnu() { + Some(unsafe { cast_mut(self) }) + } else { + None + } + } + + /// Treats the given byte slice as a header. + /// + /// Panics if the length of the passed slice is not equal to 512. + pub fn from_byte_slice(bytes: &[u8]) -> &Header { + assert_eq!(bytes.len(), mem::size_of::
()); + assert_eq!(mem::align_of_val(bytes), mem::align_of::
()); + unsafe { &*(bytes.as_ptr() as *const Header) } + } + + /// Returns a view into this header as a byte array. + pub fn as_bytes(&self) -> &[u8; 512] { + &self.bytes + } + + /// Returns a view into this header as a byte array. + pub fn as_mut_bytes(&mut self) -> &mut [u8; 512] { + &mut self.bytes + } + + /// Blanket sets the metadata in this header from the metadata argument + /// provided. + /// + /// This is useful for initializing a `Header` from the OS's metadata from a + /// file. By default, this will use `HeaderMode::Complete` to include all + /// metadata. + pub fn set_metadata(&mut self, meta: &fs::Metadata) { + self.fill_from(meta, HeaderMode::Complete); + } + + /// Sets only the metadata relevant to the given HeaderMode in this header + /// from the metadata argument provided. + pub fn set_metadata_in_mode(&mut self, meta: &fs::Metadata, mode: HeaderMode) { + self.fill_from(meta, mode); + } + + /// Returns the size of entry's data this header represents. + /// + /// This is different from `Header::size` for sparse files, which have + /// some longer `size()` but shorter `entry_size()`. The `entry_size()` + /// listed here should be the number of bytes in the archive this header + /// describes. + /// + /// May return an error if the field is corrupted. + pub fn entry_size(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().size).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting size for {}", err, self.path_lossy()), + ) + }) + } + + /// Returns the file size this header represents. + /// + /// May return an error if the field is corrupted. + pub fn size(&self) -> io::Result { + if self.entry_type().is_gnu_sparse() { + self.as_gnu() + .ok_or_else(|| other("sparse header was not a gnu header")) + .and_then(|h| h.real_size()) + } else { + self.entry_size() + } + } + + /// Encodes the `size` argument into the size field of this header. + pub fn set_size(&mut self, size: u64) { + num_field_wrapper_into(&mut self.as_old_mut().size, size); + } + + /// Returns the raw path name stored in this header. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn path(&self) -> io::Result> { + bytes2path(self.path_bytes()) + } + + /// Returns the pathname stored in this header as a byte array. + /// + /// This function is guaranteed to succeed, but you may wish to call the + /// `path` method to convert to a `Path`. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn path_bytes(&self) -> Cow<[u8]> { + if let Some(ustar) = self.as_ustar() { + ustar.path_bytes() + } else { + let name = truncate(&self.as_old().name); + Cow::Borrowed(name) + } + } + + /// Gets the path in a "lossy" way, used for error reporting ONLY. + fn path_lossy(&self) -> String { + String::from_utf8_lossy(&self.path_bytes()).to_string() + } + + /// Sets the path name for this header. + /// + /// This function will set the pathname listed in this header, encoding it + /// in the appropriate format. May fail if the path is too long or if the + /// path specified is not Unicode and this is a Windows platform. Will + /// strip out any "." path component, which signifies the current directory. + /// + /// Note: This function does not support names over 100 bytes, or paths + /// over 255 bytes, even for formats that support longer names. Instead, + /// use `Builder` methods to insert a long-name extension at the same time + /// as the file content. + pub fn set_path>(&mut self, p: P) -> io::Result<()> { + self._set_path(p.as_ref()) + } + + fn _set_path(&mut self, path: &Path) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + return ustar.set_path(path); + } + copy_path_into(&mut self.as_old_mut().name, path, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + }) + } + + /// Returns the link name stored in this header, if any is found. + /// + /// This method may fail if the pathname is not valid Unicode and this is + /// called on a Windows platform. `Ok(None)` being returned, however, + /// indicates that the link name was not present. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn link_name(&self) -> io::Result>> { + match self.link_name_bytes() { + Some(bytes) => bytes2path(bytes).map(Some), + None => Ok(None), + } + } + + /// Returns the link name stored in this header as a byte array, if any. + /// + /// This function is guaranteed to succeed, but you may wish to call the + /// `link_name` method to convert to a `Path`. + /// + /// Note that this function will convert any `\` characters to directory + /// separators. + pub fn link_name_bytes(&self) -> Option> { + let old = self.as_old(); + if old.linkname[0] != 0 { + Some(Cow::Borrowed(truncate(&old.linkname))) + } else { + None + } + } + + /// Sets the link name for this header. + /// + /// This function will set the linkname listed in this header, encoding it + /// in the appropriate format. May fail if the link name is too long or if + /// the path specified is not Unicode and this is a Windows platform. Will + /// strip out any "." path component, which signifies the current directory. + /// + /// To use GNU long link names, prefer instead [`crate::Builder::append_link`]. + pub fn set_link_name>(&mut self, p: P) -> io::Result<()> { + self._set_link_name(p.as_ref()) + } + + fn _set_link_name(&mut self, path: &Path) -> io::Result<()> { + copy_path_into(&mut self.as_old_mut().linkname, path, true).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting link name for {}", err, self.path_lossy()), + ) + }) + } + + /// Sets the link name for this header without any transformation. + /// + /// This function is like [`Self::set_link_name`] but accepts an arbitrary byte array. + /// Hence it will not perform any canonicalization, such as replacing duplicate `//` with `/`. + pub fn set_link_name_literal>(&mut self, p: P) -> io::Result<()> { + self._set_link_name_literal(p.as_ref()) + } + + fn _set_link_name_literal(&mut self, bytes: &[u8]) -> io::Result<()> { + copy_into(&mut self.as_old_mut().linkname, bytes) + } + + /// Returns the mode bits for this file + /// + /// May return an error if the field is corrupted. + pub fn mode(&self) -> io::Result { + octal_from(&self.as_old().mode) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting mode for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `mode` provided into this header. + pub fn set_mode(&mut self, mode: u32) { + octal_into(&mut self.as_old_mut().mode, mode); + } + + /// Returns the value of the owner's user ID field + /// + /// May return an error if the field is corrupted. + pub fn uid(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().uid) + .map(|u| u as u64) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting uid for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `uid` provided into this header. + pub fn set_uid(&mut self, uid: u64) { + num_field_wrapper_into(&mut self.as_old_mut().uid, uid); + } + + /// Returns the value of the group's user ID field + pub fn gid(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().gid) + .map(|u| u as u64) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting gid for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `gid` provided into this header. + pub fn set_gid(&mut self, gid: u64) { + num_field_wrapper_into(&mut self.as_old_mut().gid, gid); + } + + /// Returns the last modification time in Unix time format + pub fn mtime(&self) -> io::Result { + num_field_wrapper_from(&self.as_old().mtime).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting mtime for {}", err, self.path_lossy()), + ) + }) + } + + /// Encodes the `mtime` provided into this header. + /// + /// Note that this time is typically a number of seconds passed since + /// January 1, 1970. + pub fn set_mtime(&mut self, mtime: u64) { + num_field_wrapper_into(&mut self.as_old_mut().mtime, mtime); + } + + /// Return the user name of the owner of this file. + /// + /// A return value of `Ok(Some(..))` indicates that the user name was + /// present and was valid utf-8, `Ok(None)` indicates that the user name is + /// not present in this archive format, and `Err` indicates that the user + /// name was present but was not valid utf-8. + pub fn username(&self) -> Result, str::Utf8Error> { + match self.username_bytes() { + Some(bytes) => str::from_utf8(bytes).map(Some), + None => Ok(None), + } + } + + /// Returns the user name of the owner of this file, if present. + /// + /// A return value of `None` indicates that the user name is not present in + /// this header format. + pub fn username_bytes(&self) -> Option<&[u8]> { + if let Some(ustar) = self.as_ustar() { + Some(ustar.username_bytes()) + } else if let Some(gnu) = self.as_gnu() { + Some(gnu.username_bytes()) + } else { + None + } + } + + /// Sets the username inside this header. + /// + /// This function will return an error if this header format cannot encode a + /// user name or the name is too long. + pub fn set_username(&mut self, name: &str) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + return ustar.set_username(name); + } + if let Some(gnu) = self.as_gnu_mut() { + gnu.set_username(name) + } else { + Err(other("not a ustar or gnu archive, cannot set username")) + } + } + + /// Return the group name of the owner of this file. + /// + /// A return value of `Ok(Some(..))` indicates that the group name was + /// present and was valid utf-8, `Ok(None)` indicates that the group name is + /// not present in this archive format, and `Err` indicates that the group + /// name was present but was not valid utf-8. + pub fn groupname(&self) -> Result, str::Utf8Error> { + match self.groupname_bytes() { + Some(bytes) => str::from_utf8(bytes).map(Some), + None => Ok(None), + } + } + + /// Returns the group name of the owner of this file, if present. + /// + /// A return value of `None` indicates that the group name is not present in + /// this header format. + pub fn groupname_bytes(&self) -> Option<&[u8]> { + if let Some(ustar) = self.as_ustar() { + Some(ustar.groupname_bytes()) + } else if let Some(gnu) = self.as_gnu() { + Some(gnu.groupname_bytes()) + } else { + None + } + } + + /// Sets the group name inside this header. + /// + /// This function will return an error if this header format cannot encode a + /// group name or the name is too long. + pub fn set_groupname(&mut self, name: &str) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + return ustar.set_groupname(name); + } + if let Some(gnu) = self.as_gnu_mut() { + gnu.set_groupname(name) + } else { + Err(other("not a ustar or gnu archive, cannot set groupname")) + } + } + + /// Returns the device major number, if present. + /// + /// This field may not be present in all archives, and it may not be + /// correctly formed in all archives. `Ok(Some(..))` means it was present + /// and correctly decoded, `Ok(None)` indicates that this header format does + /// not include the device major number, and `Err` indicates that it was + /// present and failed to decode. + pub fn device_major(&self) -> io::Result> { + if let Some(ustar) = self.as_ustar() { + ustar.device_major().map(Some) + } else if let Some(gnu) = self.as_gnu() { + gnu.device_major().map(Some) + } else { + Ok(None) + } + } + + /// Encodes the value `major` into the dev_major field of this header. + /// + /// This function will return an error if this header format cannot encode a + /// major device number. + pub fn set_device_major(&mut self, major: u32) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + ustar.set_device_major(major); + Ok(()) + } else if let Some(gnu) = self.as_gnu_mut() { + gnu.set_device_major(major); + Ok(()) + } else { + Err(other("not a ustar or gnu archive, cannot set dev_major")) + } + } + + /// Returns the device minor number, if present. + /// + /// This field may not be present in all archives, and it may not be + /// correctly formed in all archives. `Ok(Some(..))` means it was present + /// and correctly decoded, `Ok(None)` indicates that this header format does + /// not include the device minor number, and `Err` indicates that it was + /// present and failed to decode. + pub fn device_minor(&self) -> io::Result> { + if let Some(ustar) = self.as_ustar() { + ustar.device_minor().map(Some) + } else if let Some(gnu) = self.as_gnu() { + gnu.device_minor().map(Some) + } else { + Ok(None) + } + } + + /// Encodes the value `minor` into the dev_minor field of this header. + /// + /// This function will return an error if this header format cannot encode a + /// minor device number. + pub fn set_device_minor(&mut self, minor: u32) -> io::Result<()> { + if let Some(ustar) = self.as_ustar_mut() { + ustar.set_device_minor(minor); + Ok(()) + } else if let Some(gnu) = self.as_gnu_mut() { + gnu.set_device_minor(minor); + Ok(()) + } else { + Err(other("not a ustar or gnu archive, cannot set dev_minor")) + } + } + + /// Returns the type of file described by this header. + pub fn entry_type(&self) -> EntryType { + EntryType::new(self.as_old().linkflag[0]) + } + + /// Sets the type of file that will be described by this header. + pub fn set_entry_type(&mut self, ty: EntryType) { + self.as_old_mut().linkflag = [ty.as_byte()]; + } + + /// Returns the checksum field of this header. + /// + /// May return an error if the field is corrupted. + pub fn cksum(&self) -> io::Result { + octal_from(&self.as_old().cksum) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting cksum for {}", err, self.path_lossy()), + ) + }) + } + + /// Sets the checksum field of this header based on the current fields in + /// this header. + pub fn set_cksum(&mut self) { + let cksum = self.calculate_cksum(); + octal_into(&mut self.as_old_mut().cksum, cksum); + } + + fn calculate_cksum(&self) -> u32 { + let old = self.as_old(); + let start = old as *const _ as usize; + let cksum_start = old.cksum.as_ptr() as *const _ as usize; + let offset = cksum_start - start; + let len = old.cksum.len(); + self.bytes[0..offset] + .iter() + .chain(iter::repeat(&b' ').take(len)) + .chain(&self.bytes[offset + len..]) + .fold(0, |a, b| a + (*b as u32)) + } + + fn fill_from(&mut self, meta: &fs::Metadata, mode: HeaderMode) { + self.fill_platform_from(meta, mode); + // Set size of directories to zero + self.set_size(if meta.is_dir() || meta.file_type().is_symlink() { + 0 + } else { + meta.len() + }); + if let Some(ustar) = self.as_ustar_mut() { + ustar.set_device_major(0); + ustar.set_device_minor(0); + } + if let Some(gnu) = self.as_gnu_mut() { + gnu.set_device_major(0); + gnu.set_device_minor(0); + } + } + + #[cfg(target_arch = "wasm32")] + #[allow(unused_variables)] + fn fill_platform_from(&mut self, meta: &fs::Metadata, mode: HeaderMode) { + unimplemented!(); + } + + #[cfg(unix)] + fn fill_platform_from(&mut self, meta: &fs::Metadata, mode: HeaderMode) { + match mode { + HeaderMode::Complete => { + self.set_mtime(meta.mtime() as u64); + self.set_uid(meta.uid() as u64); + self.set_gid(meta.gid() as u64); + self.set_mode(meta.mode() as u32); + } + HeaderMode::Deterministic => { + // We could in theory set the mtime to zero here, but not all + // tools seem to behave well when ingesting files with a 0 + // timestamp. For example rust-lang/cargo#9512 shows that lldb + // doesn't ingest files with a zero timestamp correctly. + // + // We just need things to be deterministic here so just pick + // something that isn't zero. This time, chosen after careful + // deliberation, corresponds to Jul 23, 2006 -- the date of the + // first commit for what would become Rust. + self.set_mtime(1153704088); + + self.set_uid(0); + self.set_gid(0); + + // Use a default umask value, but propagate the (user) execute bit. + let fs_mode = if meta.is_dir() || (0o100 & meta.mode() == 0o100) { + 0o755 + } else { + 0o644 + }; + self.set_mode(fs_mode); + } + } + + // Note that if we are a GNU header we *could* set atime/ctime, except + // the `tar` utility doesn't do that by default and it causes problems + // with 7-zip [1]. + // + // It's always possible to fill them out manually, so we just don't fill + // it out automatically here. + // + // [1]: https://github.com/alexcrichton/tar-rs/issues/70 + + // TODO: need to bind more file types + self.set_entry_type(entry_type(meta.mode())); + + fn entry_type(mode: u32) -> EntryType { + match mode as libc::mode_t & libc::S_IFMT { + libc::S_IFREG => EntryType::file(), + libc::S_IFLNK => EntryType::symlink(), + libc::S_IFCHR => EntryType::character_special(), + libc::S_IFBLK => EntryType::block_special(), + libc::S_IFDIR => EntryType::dir(), + libc::S_IFIFO => EntryType::fifo(), + _ => EntryType::new(b' '), + } + } + } + + #[cfg(windows)] + fn fill_platform_from(&mut self, meta: &fs::Metadata, mode: HeaderMode) { + // There's no concept of a file mode on Windows, so do a best approximation here. + match mode { + HeaderMode::Complete => { + self.set_uid(0); + self.set_gid(0); + // The dates listed in tarballs are always seconds relative to + // January 1, 1970. On Windows, however, the timestamps are returned as + // dates relative to January 1, 1601 (in 100ns intervals), so we need to + // add in some offset for those dates. + let mtime = (meta.last_write_time() / (1_000_000_000 / 100)) - 11644473600; + self.set_mtime(mtime); + let fs_mode = { + const FILE_ATTRIBUTE_READONLY: u32 = 0x00000001; + let readonly = meta.file_attributes() & FILE_ATTRIBUTE_READONLY; + match (meta.is_dir(), readonly != 0) { + (true, false) => 0o755, + (true, true) => 0o555, + (false, false) => 0o644, + (false, true) => 0o444, + } + }; + self.set_mode(fs_mode); + } + HeaderMode::Deterministic => { + self.set_uid(0); + self.set_gid(0); + self.set_mtime(123456789); // see above in unix + let fs_mode = if meta.is_dir() { 0o755 } else { 0o644 }; + self.set_mode(fs_mode); + } + } + + let ft = meta.file_type(); + self.set_entry_type(if ft.is_dir() { + EntryType::dir() + } else if ft.is_file() { + EntryType::file() + } else if ft.is_symlink() { + EntryType::symlink() + } else { + EntryType::new(b' ') + }); + } + + fn debug_fields(&self, b: &mut fmt::DebugStruct) { + if let Ok(entry_size) = self.entry_size() { + b.field("entry_size", &entry_size); + } + if let Ok(size) = self.size() { + b.field("size", &size); + } + if let Ok(path) = self.path() { + b.field("path", &path); + } + if let Ok(link_name) = self.link_name() { + b.field("link_name", &link_name); + } + if let Ok(mode) = self.mode() { + b.field("mode", &DebugAsOctal(mode)); + } + if let Ok(uid) = self.uid() { + b.field("uid", &uid); + } + if let Ok(gid) = self.gid() { + b.field("gid", &gid); + } + if let Ok(mtime) = self.mtime() { + b.field("mtime", &mtime); + } + if let Ok(username) = self.username() { + b.field("username", &username); + } + if let Ok(groupname) = self.groupname() { + b.field("groupname", &groupname); + } + if let Ok(device_major) = self.device_major() { + b.field("device_major", &device_major); + } + if let Ok(device_minor) = self.device_minor() { + b.field("device_minor", &device_minor); + } + if let Ok(cksum) = self.cksum() { + b.field("cksum", &cksum); + b.field("cksum_valid", &(cksum == self.calculate_cksum())); + } + } +} + +struct DebugAsOctal(T); + +impl fmt::Debug for DebugAsOctal { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Octal::fmt(&self.0, f) + } +} + +unsafe fn cast(a: &T) -> &U { + assert_eq!(mem::size_of_val(a), mem::size_of::()); + assert_eq!(mem::align_of_val(a), mem::align_of::()); + &*(a as *const T as *const U) +} + +unsafe fn cast_mut(a: &mut T) -> &mut U { + assert_eq!(mem::size_of_val(a), mem::size_of::()); + assert_eq!(mem::align_of_val(a), mem::align_of::()); + &mut *(a as *mut T as *mut U) +} + +impl Clone for Header { + fn clone(&self) -> Header { + Header { bytes: self.bytes } + } +} + +impl fmt::Debug for Header { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(me) = self.as_ustar() { + me.fmt(f) + } else if let Some(me) = self.as_gnu() { + me.fmt(f) + } else { + self.as_old().fmt(f) + } + } +} + +impl OldHeader { + /// Views this as a normal `Header` + pub fn as_header(&self) -> &Header { + unsafe { cast(self) } + } + + /// Views this as a normal `Header` + pub fn as_header_mut(&mut self) -> &mut Header { + unsafe { cast_mut(self) } + } +} + +impl fmt::Debug for OldHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("OldHeader"); + self.as_header().debug_fields(&mut f); + f.finish() + } +} + +impl UstarHeader { + /// See `Header::path_bytes` + pub fn path_bytes(&self) -> Cow<[u8]> { + if self.prefix[0] == 0 && !self.name.contains(&b'\\') { + Cow::Borrowed(truncate(&self.name)) + } else { + let mut bytes = Vec::new(); + let prefix = truncate(&self.prefix); + if !prefix.is_empty() { + bytes.extend_from_slice(prefix); + bytes.push(b'/'); + } + bytes.extend_from_slice(truncate(&self.name)); + Cow::Owned(bytes) + } + } + + /// Gets the path in a "lossy" way, used for error reporting ONLY. + fn path_lossy(&self) -> String { + String::from_utf8_lossy(&self.path_bytes()).to_string() + } + + /// See `Header::set_path` + pub fn set_path>(&mut self, p: P) -> io::Result<()> { + self._set_path(p.as_ref()) + } + + fn _set_path(&mut self, path: &Path) -> io::Result<()> { + // This can probably be optimized quite a bit more, but for now just do + // something that's relatively easy and readable. + // + // First up, if the path fits within `self.name` then we just shove it + // in there. If not then we try to split it between some existing path + // components where it can fit in name/prefix. To do that we peel off + // enough until the path fits in `prefix`, then we try to put both + // halves into their destination. + let bytes = path2bytes(path)?; + let (maxnamelen, maxprefixlen) = (self.name.len(), self.prefix.len()); + if bytes.len() <= maxnamelen { + copy_path_into(&mut self.name, path, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + })?; + } else { + let mut prefix = path; + let mut prefixlen; + loop { + match prefix.parent() { + Some(parent) => prefix = parent, + None => { + return Err(other(&format!( + "path cannot be split to be inserted into archive: {}", + path.display() + ))); + } + } + prefixlen = path2bytes(prefix)?.len(); + if prefixlen <= maxprefixlen { + break; + } + } + copy_path_into(&mut self.prefix, prefix, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + })?; + let path = bytes2path(Cow::Borrowed(&bytes[prefixlen + 1..]))?; + copy_path_into(&mut self.name, &path, false).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting path for {}", err, self.path_lossy()), + ) + })?; + } + Ok(()) + } + + /// See `Header::username_bytes` + pub fn username_bytes(&self) -> &[u8] { + truncate(&self.uname) + } + + /// See `Header::set_username` + pub fn set_username(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.uname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting username for {}", err, self.path_lossy()), + ) + }) + } + + /// See `Header::groupname_bytes` + pub fn groupname_bytes(&self) -> &[u8] { + truncate(&self.gname) + } + + /// See `Header::set_groupname` + pub fn set_groupname(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.gname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when setting groupname for {}", err, self.path_lossy()), + ) + }) + } + + /// See `Header::device_major` + pub fn device_major(&self) -> io::Result { + octal_from(&self.dev_major) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_major for {}", + err, + self.path_lossy() + ), + ) + }) + } + + /// See `Header::set_device_major` + pub fn set_device_major(&mut self, major: u32) { + octal_into(&mut self.dev_major, major); + } + + /// See `Header::device_minor` + pub fn device_minor(&self) -> io::Result { + octal_from(&self.dev_minor) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_minor for {}", + err, + self.path_lossy() + ), + ) + }) + } + + /// See `Header::set_device_minor` + pub fn set_device_minor(&mut self, minor: u32) { + octal_into(&mut self.dev_minor, minor); + } + + /// Views this as a normal `Header` + pub fn as_header(&self) -> &Header { + unsafe { cast(self) } + } + + /// Views this as a normal `Header` + pub fn as_header_mut(&mut self) -> &mut Header { + unsafe { cast_mut(self) } + } +} + +impl fmt::Debug for UstarHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("UstarHeader"); + self.as_header().debug_fields(&mut f); + f.finish() + } +} + +impl GnuHeader { + /// See `Header::username_bytes` + pub fn username_bytes(&self) -> &[u8] { + truncate(&self.uname) + } + + /// Gets the fullname (group:user) in a "lossy" way, used for error reporting ONLY. + fn fullname_lossy(&self) -> String { + format!( + "{}:{}", + String::from_utf8_lossy(self.groupname_bytes()), + String::from_utf8_lossy(self.username_bytes()), + ) + } + + /// See `Header::set_username` + pub fn set_username(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.uname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when setting username for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::groupname_bytes` + pub fn groupname_bytes(&self) -> &[u8] { + truncate(&self.gname) + } + + /// See `Header::set_groupname` + pub fn set_groupname(&mut self, name: &str) -> io::Result<()> { + copy_into(&mut self.gname, name.as_bytes()).map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when setting groupname for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::device_major` + pub fn device_major(&self) -> io::Result { + octal_from(&self.dev_major) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_major for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::set_device_major` + pub fn set_device_major(&mut self, major: u32) { + octal_into(&mut self.dev_major, major); + } + + /// See `Header::device_minor` + pub fn device_minor(&self) -> io::Result { + octal_from(&self.dev_minor) + .map(|u| u as u32) + .map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting device_minor for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// See `Header::set_device_minor` + pub fn set_device_minor(&mut self, minor: u32) { + octal_into(&mut self.dev_minor, minor); + } + + /// Returns the last modification time in Unix time format + pub fn atime(&self) -> io::Result { + num_field_wrapper_from(&self.atime).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting atime for {}", err, self.fullname_lossy()), + ) + }) + } + + /// Encodes the `atime` provided into this header. + /// + /// Note that this time is typically a number of seconds passed since + /// January 1, 1970. + pub fn set_atime(&mut self, atime: u64) { + num_field_wrapper_into(&mut self.atime, atime); + } + + /// Returns the last modification time in Unix time format + pub fn ctime(&self) -> io::Result { + num_field_wrapper_from(&self.ctime).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting ctime for {}", err, self.fullname_lossy()), + ) + }) + } + + /// Encodes the `ctime` provided into this header. + /// + /// Note that this time is typically a number of seconds passed since + /// January 1, 1970. + pub fn set_ctime(&mut self, ctime: u64) { + num_field_wrapper_into(&mut self.ctime, ctime); + } + + /// Returns the "real size" of the file this header represents. + /// + /// This is applicable for sparse files where the returned size here is the + /// size of the entire file after the sparse regions have been filled in. + pub fn real_size(&self) -> io::Result { + octal_from(&self.realsize).map_err(|err| { + io::Error::new( + err.kind(), + format!( + "{} when getting real_size for {}", + err, + self.fullname_lossy() + ), + ) + }) + } + + /// Indicates whether this header will be followed by additional + /// sparse-header records. + /// + /// Note that this is handled internally by this library, and is likely only + /// interesting if a `raw` iterator is being used. + pub fn is_extended(&self) -> bool { + self.isextended[0] == 1 + } + + /// Views this as a normal `Header` + pub fn as_header(&self) -> &Header { + unsafe { cast(self) } + } + + /// Views this as a normal `Header` + pub fn as_header_mut(&mut self) -> &mut Header { + unsafe { cast_mut(self) } + } +} + +impl fmt::Debug for GnuHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("GnuHeader"); + self.as_header().debug_fields(&mut f); + if let Ok(atime) = self.atime() { + f.field("atime", &atime); + } + if let Ok(ctime) = self.ctime() { + f.field("ctime", &ctime); + } + f.field("is_extended", &self.is_extended()) + .field("sparse", &DebugSparseHeaders(&self.sparse)) + .finish() + } +} + +struct DebugSparseHeaders<'a>(&'a [GnuSparseHeader]); + +impl<'a> fmt::Debug for DebugSparseHeaders<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_list(); + for header in self.0 { + if !header.is_empty() { + f.entry(header); + } + } + f.finish() + } +} + +impl GnuSparseHeader { + /// Returns true if block is empty + pub fn is_empty(&self) -> bool { + self.offset[0] == 0 || self.numbytes[0] == 0 + } + + /// Offset of the block from the start of the file + /// + /// Returns `Err` for a malformed `offset` field. + pub fn offset(&self) -> io::Result { + octal_from(&self.offset).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting offset from sparse header", err), + ) + }) + } + + /// Length of the block + /// + /// Returns `Err` for a malformed `numbytes` field. + pub fn length(&self) -> io::Result { + octal_from(&self.numbytes).map_err(|err| { + io::Error::new( + err.kind(), + format!("{} when getting length from sparse header", err), + ) + }) + } +} + +impl fmt::Debug for GnuSparseHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut f = f.debug_struct("GnuSparseHeader"); + if let Ok(offset) = self.offset() { + f.field("offset", &offset); + } + if let Ok(length) = self.length() { + f.field("length", &length); + } + f.finish() + } +} + +impl GnuExtSparseHeader { + /// Crates a new zero'd out sparse header entry. + pub fn new() -> GnuExtSparseHeader { + unsafe { mem::zeroed() } + } + + /// Returns a view into this header as a byte array. + pub fn as_bytes(&self) -> &[u8; 512] { + debug_assert_eq!(mem::size_of_val(self), 512); + unsafe { mem::transmute(self) } + } + + /// Returns a view into this header as a byte array. + pub fn as_mut_bytes(&mut self) -> &mut [u8; 512] { + debug_assert_eq!(mem::size_of_val(self), 512); + unsafe { mem::transmute(self) } + } + + /// Returns a slice of the underlying sparse headers. + /// + /// Some headers may represent empty chunks of both the offset and numbytes + /// fields are 0. + pub fn sparse(&self) -> &[GnuSparseHeader; 21] { + &self.sparse + } + + /// Indicates if another sparse header should be following this one. + pub fn is_extended(&self) -> bool { + self.isextended[0] == 1 + } +} + +impl Default for GnuExtSparseHeader { + fn default() -> Self { + Self::new() + } +} + +fn octal_from(slice: &[u8]) -> io::Result { + let trun = truncate(slice); + let num = match str::from_utf8(trun) { + Ok(n) => n, + Err(_) => { + return Err(other(&format!( + "numeric field did not have utf-8 text: {}", + String::from_utf8_lossy(trun) + ))); + } + }; + match u64::from_str_radix(num.trim(), 8) { + Ok(n) => Ok(n), + Err(_) => Err(other(&format!("numeric field was not a number: {}", num))), + } +} + +fn octal_into(dst: &mut [u8], val: T) { + let o = format!("{:o}", val); + let value = o.bytes().rev().chain(repeat(b'0')); + for (slot, value) in dst.iter_mut().rev().skip(1).zip(value) { + *slot = value; + } +} + +// Wrapper to figure out if we should fill the header field using tar's numeric +// extension (binary) or not (octal). +fn num_field_wrapper_into(dst: &mut [u8], src: u64) { + if src >= 8589934592 || (src >= 2097152 && dst.len() == 8) { + numeric_extended_into(dst, src); + } else { + octal_into(dst, src); + } +} + +// Wrapper to figure out if we should read the header field in binary (numeric +// extension) or octal (standard encoding). +fn num_field_wrapper_from(src: &[u8]) -> io::Result { + if src[0] & 0x80 != 0 { + Ok(numeric_extended_from(src)) + } else { + octal_from(src) + } +} + +// When writing numeric fields with is the extended form, the high bit of the +// first byte is set to 1 and the remainder of the field is treated as binary +// instead of octal ascii. +// This handles writing u64 to 8 (uid, gid) or 12 (size, *time) bytes array. +fn numeric_extended_into(dst: &mut [u8], src: u64) { + let len: usize = dst.len(); + for (slot, val) in dst.iter_mut().zip( + repeat(0) + .take(len - 8) // to zero init extra bytes + .chain((0..8).rev().map(|x| ((src >> (8 * x)) & 0xff) as u8)), + ) { + *slot = val; + } + dst[0] |= 0x80; +} + +fn numeric_extended_from(src: &[u8]) -> u64 { + let mut dst: u64 = 0; + let mut b_to_skip = 1; + if src.len() == 8 { + // read first byte without extension flag bit + dst = (src[0] ^ 0x80) as u64; + } else { + // only read last 8 bytes + b_to_skip = src.len() - 8; + } + for byte in src.iter().skip(b_to_skip) { + dst <<= 8; + dst |= *byte as u64; + } + dst +} + +fn truncate(slice: &[u8]) -> &[u8] { + match slice.iter().position(|i| *i == 0) { + Some(i) => &slice[..i], + None => slice, + } +} + +/// Copies `bytes` into the `slot` provided, returning an error if the `bytes` +/// array is too long or if it contains any nul bytes. +fn copy_into(slot: &mut [u8], bytes: &[u8]) -> io::Result<()> { + if bytes.len() > slot.len() { + Err(other("provided value is too long")) + } else if bytes.iter().any(|b| *b == 0) { + Err(other("provided value contains a nul byte")) + } else { + for (slot, val) in slot.iter_mut().zip(bytes.iter().chain(Some(&0))) { + *slot = *val; + } + Ok(()) + } +} + +/// Copies `path` into the `slot` provided +/// +/// Returns an error if: +/// +/// * the path is too long to fit +/// * a nul byte was found +/// * an invalid path component is encountered (e.g. a root path or parent dir) +/// * the path itself is empty +fn copy_path_into(mut slot: &mut [u8], path: &Path, is_link_name: bool) -> io::Result<()> { + let mut emitted = false; + let mut needs_slash = false; + for component in path.components() { + let bytes = path2bytes(Path::new(component.as_os_str()))?; + match (component, is_link_name) { + (Component::Prefix(..), false) | (Component::RootDir, false) => { + return Err(other("paths in archives must be relative")); + } + (Component::ParentDir, false) => { + return Err(other("paths in archives must not have `..`")); + } + // Allow "./" as the path + (Component::CurDir, false) if path.components().count() == 1 => {} + (Component::CurDir, false) => continue, + (Component::Normal(_), _) | (_, true) => {} + }; + if needs_slash { + copy(&mut slot, b"/")?; + } + if bytes.contains(&b'/') { + if let Component::Normal(..) = component { + return Err(other("path component in archive cannot contain `/`")); + } + } + copy(&mut slot, &*bytes)?; + if &*bytes != b"/" { + needs_slash = true; + } + emitted = true; + } + if !emitted { + return Err(other("paths in archives must have at least one component")); + } + if ends_with_slash(path) { + copy(&mut slot, &[b'/'])?; + } + return Ok(()); + + fn copy(slot: &mut &mut [u8], bytes: &[u8]) -> io::Result<()> { + copy_into(*slot, bytes)?; + let tmp = mem::replace(slot, &mut []); + *slot = &mut tmp[bytes.len()..]; + Ok(()) + } +} + +#[cfg(target_arch = "wasm32")] +fn ends_with_slash(p: &Path) -> bool { + p.to_string_lossy().ends_with('/') +} + +#[cfg(windows)] +fn ends_with_slash(p: &Path) -> bool { + let last = p.as_os_str().encode_wide().last(); + last == Some(b'/' as u16) || last == Some(b'\\' as u16) +} + +#[cfg(unix)] +fn ends_with_slash(p: &Path) -> bool { + p.as_os_str().as_bytes().ends_with(&[b'/']) +} + +#[cfg(any(windows, target_arch = "wasm32"))] +pub fn path2bytes(p: &Path) -> io::Result> { + p.as_os_str() + .to_str() + .map(|s| s.as_bytes()) + .ok_or_else(|| other(&format!("path {} was not valid Unicode", p.display()))) + .map(|bytes| { + if bytes.contains(&b'\\') { + // Normalize to Unix-style path separators + let mut bytes = bytes.to_owned(); + for b in &mut bytes { + if *b == b'\\' { + *b = b'/'; + } + } + Cow::Owned(bytes) + } else { + Cow::Borrowed(bytes) + } + }) +} + +#[cfg(unix)] +/// On unix this will never fail +pub fn path2bytes(p: &Path) -> io::Result> { + Ok(p.as_os_str().as_bytes()).map(Cow::Borrowed) +} + +#[cfg(windows)] +/// On windows we cannot accept non-Unicode bytes because it +/// is impossible to convert it to UTF-16. +pub fn bytes2path(bytes: Cow<[u8]>) -> io::Result> { + return match bytes { + Cow::Borrowed(bytes) => { + let s = str::from_utf8(bytes).map_err(|_| not_unicode(bytes))?; + Ok(Cow::Borrowed(Path::new(s))) + } + Cow::Owned(bytes) => { + let s = String::from_utf8(bytes).map_err(|uerr| not_unicode(&uerr.into_bytes()))?; + Ok(Cow::Owned(PathBuf::from(s))) + } + }; + + fn not_unicode(v: &[u8]) -> io::Error { + other(&format!( + "only Unicode paths are supported on Windows: {}", + String::from_utf8_lossy(v) + )) + } +} + +#[cfg(unix)] +/// On unix this operation can never fail. +pub fn bytes2path(bytes: Cow<[u8]>) -> io::Result> { + use std::ffi::{OsStr, OsString}; + + Ok(match bytes { + Cow::Borrowed(bytes) => Cow::Borrowed(Path::new(OsStr::from_bytes(bytes))), + Cow::Owned(bytes) => Cow::Owned(PathBuf::from(OsString::from_vec(bytes))), + }) +} + +#[cfg(target_arch = "wasm32")] +pub fn bytes2path(bytes: Cow<[u8]>) -> io::Result> { + Ok(match bytes { + Cow::Borrowed(bytes) => { + Cow::Borrowed({ Path::new(str::from_utf8(bytes).map_err(invalid_utf8)?) }) + } + Cow::Owned(bytes) => { + Cow::Owned({ PathBuf::from(String::from_utf8(bytes).map_err(invalid_utf8)?) }) + } + }) +} + +#[cfg(target_arch = "wasm32")] +fn invalid_utf8(_: T) -> io::Error { + io::Error::new(io::ErrorKind::InvalidData, "Invalid utf-8") +} diff --git a/vendor/tar/src/lib.rs b/vendor/tar/src/lib.rs new file mode 100644 index 000000000..52251cd2c --- /dev/null +++ b/vendor/tar/src/lib.rs @@ -0,0 +1,44 @@ +//! A library for reading and writing TAR archives +//! +//! This library provides utilities necessary to manage [TAR archives][1] +//! abstracted over a reader or writer. Great strides are taken to ensure that +//! an archive is never required to be fully resident in memory, and all objects +//! provide largely a streaming interface to read bytes from. +//! +//! [1]: http://en.wikipedia.org/wiki/Tar_%28computing%29 + +// More docs about the detailed tar format can also be found here: +// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current + +// NB: some of the coding patterns and idioms here may seem a little strange. +// This is currently attempting to expose a super generic interface while +// also not forcing clients to codegen the entire crate each time they use +// it. To that end lots of work is done to ensure that concrete +// implementations are all found in this crate and the generic functions are +// all just super thin wrappers (e.g. easy to codegen). + +#![doc(html_root_url = "https://docs.rs/tar/0.4")] +#![deny(missing_docs)] +#![cfg_attr(test, deny(warnings))] + +use std::io::{Error, ErrorKind}; + +pub use crate::archive::{Archive, Entries}; +pub use crate::builder::Builder; +pub use crate::entry::{Entry, Unpacked}; +pub use crate::entry_type::EntryType; +pub use crate::header::GnuExtSparseHeader; +pub use crate::header::{GnuHeader, GnuSparseHeader, Header, HeaderMode, OldHeader, UstarHeader}; +pub use crate::pax::{PaxExtension, PaxExtensions}; + +mod archive; +mod builder; +mod entry; +mod entry_type; +mod error; +mod header; +mod pax; + +fn other(msg: &str) -> Error { + Error::new(ErrorKind::Other, msg) +} diff --git a/vendor/tar/src/pax.rs b/vendor/tar/src/pax.rs new file mode 100644 index 000000000..80ca3e9b4 --- /dev/null +++ b/vendor/tar/src/pax.rs @@ -0,0 +1,117 @@ +use std::io; +use std::slice; +use std::str; + +use crate::other; + +/// An iterator over the pax extensions in an archive entry. +/// +/// This iterator yields structures which can themselves be parsed into +/// key/value pairs. +pub struct PaxExtensions<'entry> { + data: slice::Split<'entry, u8, fn(&u8) -> bool>, +} + +impl<'entry> PaxExtensions<'entry> { + /// Create new pax extensions iterator from the given entry data. + pub fn new(a: &'entry [u8]) -> Self { + fn is_newline(a: &u8) -> bool { + *a == b'\n' + } + PaxExtensions { + data: a.split(is_newline), + } + } +} + +/// A key/value pair corresponding to a pax extension. +pub struct PaxExtension<'entry> { + key: &'entry [u8], + value: &'entry [u8], +} + +pub fn pax_extensions_size(a: &[u8]) -> Option { + for extension in PaxExtensions::new(a) { + let current_extension = match extension { + Ok(ext) => ext, + Err(_) => return None, + }; + if current_extension.key() != Ok("size") { + continue; + } + + let value = match current_extension.value() { + Ok(value) => value, + Err(_) => return None, + }; + let size = match value.parse::() { + Ok(size) => size, + Err(_) => return None, + }; + return Some(size); + } + None +} + +impl<'entry> Iterator for PaxExtensions<'entry> { + type Item = io::Result>; + + fn next(&mut self) -> Option>> { + let line = match self.data.next() { + Some(line) if line.is_empty() => return None, + Some(line) => line, + None => return None, + }; + + Some( + line.iter() + .position(|b| *b == b' ') + .and_then(|i| { + str::from_utf8(&line[..i]) + .ok() + .and_then(|len| len.parse::().ok().map(|j| (i + 1, j))) + }) + .and_then(|(kvstart, reported_len)| { + if line.len() + 1 == reported_len { + line[kvstart..] + .iter() + .position(|b| *b == b'=') + .map(|equals| (kvstart, equals)) + } else { + None + } + }) + .map(|(kvstart, equals)| PaxExtension { + key: &line[kvstart..kvstart + equals], + value: &line[kvstart + equals + 1..], + }) + .ok_or_else(|| other("malformed pax extension")), + ) + } +} + +impl<'entry> PaxExtension<'entry> { + /// Returns the key for this key/value pair parsed as a string. + /// + /// May fail if the key isn't actually utf-8. + pub fn key(&self) -> Result<&'entry str, str::Utf8Error> { + str::from_utf8(self.key) + } + + /// Returns the underlying raw bytes for the key of this key/value pair. + pub fn key_bytes(&self) -> &'entry [u8] { + self.key + } + + /// Returns the value for this key/value pair parsed as a string. + /// + /// May fail if the value isn't actually utf-8. + pub fn value(&self) -> Result<&'entry str, str::Utf8Error> { + str::from_utf8(self.value) + } + + /// Returns the underlying raw bytes for this value of this key/value pair. + pub fn value_bytes(&self) -> &'entry [u8] { + self.value + } +} -- cgit v1.2.3