use std::fs; use std::io; use std::io::prelude::*; use std::path::Path; use std::str; use crate::header::{path2bytes, HeaderMode}; use crate::{other, EntryType, Header}; /// A structure for building archives /// /// This structure has methods for building up an archive from scratch into any /// arbitrary writer. pub struct Builder { mode: HeaderMode, follow: bool, finished: bool, obj: Option, } impl Builder { /// Create a new archive builder with the underlying object as the /// destination of all data written. The builder will use /// `HeaderMode::Complete` by default. pub fn new(obj: W) -> Builder { Builder { mode: HeaderMode::Complete, follow: true, finished: false, obj: Some(obj), } } /// Changes the HeaderMode that will be used when reading fs Metadata for /// methods that implicitly read metadata for an input Path. Notably, this /// does _not_ apply to `append(Header)`. pub fn mode(&mut self, mode: HeaderMode) { self.mode = mode; } /// Follow symlinks, archiving the contents of the file they point to rather /// than adding a symlink to the archive. Defaults to true. pub fn follow_symlinks(&mut self, follow: bool) { self.follow = follow; } /// Gets shared reference to the underlying object. pub fn get_ref(&self) -> &W { self.obj.as_ref().unwrap() } /// Gets mutable reference to the underlying object. /// /// Note that care must be taken while writing to the underlying /// object. But, e.g. `get_mut().flush()` is claimed to be safe and /// useful in the situations when one needs to be ensured that /// tar entry was flushed to the disk. pub fn get_mut(&mut self) -> &mut W { self.obj.as_mut().unwrap() } /// Unwrap this archive, returning the underlying object. /// /// This function will finish writing the archive if the `finish` function /// hasn't yet been called, returning any I/O error which happens during /// that operation. pub fn into_inner(mut self) -> io::Result { if !self.finished { self.finish()?; } Ok(self.obj.take().unwrap()) } /// Adds a new entry to this archive. /// /// This function will append the header specified, followed by contents of /// the stream specified by `data`. To produce a valid archive the `size` /// field of `header` must be the same as the length of the stream that's /// being written. Additionally the checksum for the header should have been /// set via the `set_cksum` method. /// /// Note that this will not attempt to seek the archive to a valid position, /// so if the archive is in the middle of a read or some other similar /// operation then this may corrupt the archive. /// /// Also note that after all entries have been written to an archive the /// `finish` function needs to be called to finish writing the archive. /// /// # Errors /// /// This function will return an error for any intermittent I/O error which /// occurs when either reading or writing. /// /// # Examples /// /// ``` /// use tar::{Builder, Header}; /// /// let mut header = Header::new_gnu(); /// header.set_path("foo").unwrap(); /// header.set_size(4); /// header.set_cksum(); /// /// let mut data: &[u8] = &[1, 2, 3, 4]; /// /// let mut ar = Builder::new(Vec::new()); /// ar.append(&header, data).unwrap(); /// let data = ar.into_inner().unwrap(); /// ``` pub fn append(&mut self, header: &Header, mut data: R) -> io::Result<()> { append(self.get_mut(), header, &mut data) } /// Adds a new entry to this archive with the specified path. /// /// This function will set the specified path in the given header, which may /// require appending a GNU long-name extension entry to the archive first. /// The checksum for the header will be automatically updated via the /// `set_cksum` method after setting the path. No other metadata in the /// header will be modified. /// /// Then it will append the header, followed by contents of the stream /// specified by `data`. To produce a valid archive the `size` field of /// `header` must be the same as the length of the stream that's being /// written. /// /// Note that this will not attempt to seek the archive to a valid position, /// so if the archive is in the middle of a read or some other similar /// operation then this may corrupt the archive. /// /// Also note that after all entries have been written to an archive the /// `finish` function needs to be called to finish writing the archive. /// /// # Errors /// /// This function will return an error for any intermittent I/O error which /// occurs when either reading or writing. /// /// # Examples /// /// ``` /// use tar::{Builder, Header}; /// /// let mut header = Header::new_gnu(); /// header.set_size(4); /// header.set_cksum(); /// /// let mut data: &[u8] = &[1, 2, 3, 4]; /// /// let mut ar = Builder::new(Vec::new()); /// ar.append_data(&mut header, "really/long/path/to/foo", data).unwrap(); /// let data = ar.into_inner().unwrap(); /// ``` pub fn append_data, R: Read>( &mut self, header: &mut Header, path: P, data: R, ) -> io::Result<()> { prepare_header_path(self.get_mut(), header, path.as_ref())?; header.set_cksum(); self.append(&header, data) } /// Adds a new link (symbolic or hard) entry to this archive with the specified path and target. /// /// This function is similar to [`Self::append_data`] which supports long filenames, /// but also supports long link targets using GNU extensions if necessary. /// You must set the entry type to either [`EntryType::Link`] or [`EntryType::Symlink`]. /// The `set_cksum` method will be invoked after setting the path. No other metadata in the /// header will be modified. /// /// If you are intending to use GNU extensions, you must use this method over calling /// [`Header::set_link_name`] because that function will fail on long links. /// /// Similar constraints around the position of the archive and completion /// apply as with [`Self::append_data`]. /// /// # Errors /// /// This function will return an error for any intermittent I/O error which /// occurs when either reading or writing. /// /// # Examples /// /// ``` /// use tar::{Builder, Header, EntryType}; /// /// let mut ar = Builder::new(Vec::new()); /// let mut header = Header::new_gnu(); /// header.set_username("foo"); /// header.set_entry_type(EntryType::Symlink); /// header.set_size(0); /// ar.append_link(&mut header, "really/long/path/to/foo", "other/really/long/target").unwrap(); /// let data = ar.into_inner().unwrap(); /// ``` pub fn append_link, T: AsRef>( &mut self, header: &mut Header, path: P, target: T, ) -> io::Result<()> { self._append_link(header, path.as_ref(), target.as_ref()) } fn _append_link(&mut self, header: &mut Header, path: &Path, target: &Path) -> io::Result<()> { prepare_header_path(self.get_mut(), header, path)?; prepare_header_link(self.get_mut(), header, target)?; header.set_cksum(); self.append(&header, std::io::empty()) } /// Adds a file on the local filesystem to this archive. /// /// This function will open the file specified by `path` and insert the file /// into the archive with the appropriate metadata set, returning any I/O /// error which occurs while writing. The path name for the file inside of /// this archive will be the same as `path`, and it is required that the /// path is a relative path. /// /// Note that this will not attempt to seek the archive to a valid position, /// so if the archive is in the middle of a read or some other similar /// operation then this may corrupt the archive. /// /// Also note that after all files have been written to an archive the /// `finish` function needs to be called to finish writing the archive. /// /// # Examples /// /// ```no_run /// use tar::Builder; /// /// let mut ar = Builder::new(Vec::new()); /// /// ar.append_path("foo/bar.txt").unwrap(); /// ``` pub fn append_path>(&mut self, path: P) -> io::Result<()> { let mode = self.mode.clone(); let follow = self.follow; append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow) } /// Adds a file on the local filesystem to this archive under another name. /// /// This function will open the file specified by `path` and insert the file /// into the archive as `name` with appropriate metadata set, returning any /// I/O error which occurs while writing. The path name for the file inside /// of this archive will be `name` is required to be a relative path. /// /// Note that this will not attempt to seek the archive to a valid position, /// so if the archive is in the middle of a read or some other similar /// operation then this may corrupt the archive. /// /// Note if the `path` is a directory. This will just add an entry to the archive, /// rather than contents of the directory. /// /// Also note that after all files have been written to an archive the /// `finish` function needs to be called to finish writing the archive. /// /// # Examples /// /// ```no_run /// use tar::Builder; /// /// let mut ar = Builder::new(Vec::new()); /// /// // Insert the local file "foo/bar.txt" in the archive but with the name /// // "bar/foo.txt". /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").unwrap(); /// ``` pub fn append_path_with_name, N: AsRef>( &mut self, path: P, name: N, ) -> io::Result<()> { let mode = self.mode.clone(); let follow = self.follow; append_path_with_name( self.get_mut(), path.as_ref(), Some(name.as_ref()), mode, follow, ) } /// Adds a file to this archive with the given path as the name of the file /// in the archive. /// /// This will use the metadata of `file` to populate a `Header`, and it will /// then append the file to the archive with the name `path`. /// /// Note that this will not attempt to seek the archive to a valid position, /// so if the archive is in the middle of a read or some other similar /// operation then this may corrupt the archive. /// /// Also note that after all files have been written to an archive the /// `finish` function needs to be called to finish writing the archive. /// /// # Examples /// /// ```no_run /// use std::fs::File; /// use tar::Builder; /// /// let mut ar = Builder::new(Vec::new()); /// /// // Open the file at one location, but insert it into the archive with a /// // different name. /// let mut f = File::open("foo/bar/baz.txt").unwrap(); /// ar.append_file("bar/baz.txt", &mut f).unwrap(); /// ``` pub fn append_file>(&mut self, path: P, file: &mut fs::File) -> io::Result<()> { let mode = self.mode.clone(); append_file(self.get_mut(), path.as_ref(), file, mode) } /// Adds a directory to this archive with the given path as the name of the /// directory in the archive. /// /// This will use `stat` to populate a `Header`, and it will then append the /// directory to the archive with the name `path`. /// /// Note that this will not attempt to seek the archive to a valid position, /// so if the archive is in the middle of a read or some other similar /// operation then this may corrupt the archive. /// /// Note this will not add the contents of the directory to the archive. /// See `append_dir_all` for recusively adding the contents of the directory. /// /// Also note that after all files have been written to an archive the /// `finish` function needs to be called to finish writing the archive. /// /// # Examples /// /// ``` /// use std::fs; /// use tar::Builder; /// /// let mut ar = Builder::new(Vec::new()); /// /// // Use the directory at one location, but insert it into the archive /// // with a different name. /// ar.append_dir("bardir", ".").unwrap(); /// ``` pub fn append_dir(&mut self, path: P, src_path: Q) -> io::Result<()> where P: AsRef, Q: AsRef, { let mode = self.mode.clone(); append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode) } /// Adds a directory and all of its contents (recursively) to this archive /// with the given path as the name of the directory in the archive. /// /// Note that this will not attempt to seek the archive to a valid position, /// so if the archive is in the middle of a read or some other similar /// operation then this may corrupt the archive. /// /// Also note that after all files have been written to an archive the /// `finish` function needs to be called to finish writing the archive. /// /// # Examples /// /// ``` /// use std::fs; /// use tar::Builder; /// /// let mut ar = Builder::new(Vec::new()); /// /// // Use the directory at one location, but insert it into the archive /// // with a different name. /// ar.append_dir_all("bardir", ".").unwrap(); /// ``` pub fn append_dir_all(&mut self, path: P, src_path: Q) -> io::Result<()> where P: AsRef, Q: AsRef, { let mode = self.mode.clone(); let follow = self.follow; append_dir_all( self.get_mut(), path.as_ref(), src_path.as_ref(), mode, follow, ) } /// Finish writing this archive, emitting the termination sections. /// /// This function should only be called when the archive has been written /// entirely and if an I/O error happens the underlying object still needs /// to be acquired. /// /// In most situations the `into_inner` method should be preferred. pub fn finish(&mut self) -> io::Result<()> { if self.finished { return Ok(()); } self.finished = true; self.get_mut().write_all(&[0; 1024]) } } fn append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()> { dst.write_all(header.as_bytes())?; let len = io::copy(&mut data, &mut dst)?; // Pad with zeros if necessary. let buf = [0; 512]; let remaining = 512 - (len % 512); if remaining < 512 { dst.write_all(&buf[..remaining as usize])?; } Ok(()) } fn append_path_with_name( dst: &mut dyn Write, path: &Path, name: Option<&Path>, mode: HeaderMode, follow: bool, ) -> io::Result<()> { let stat = if follow { fs::metadata(path).map_err(|err| { io::Error::new( err.kind(), format!("{} when getting metadata for {}", err, path.display()), ) })? } else { fs::symlink_metadata(path).map_err(|err| { io::Error::new( err.kind(), format!("{} when getting metadata for {}", err, path.display()), ) })? }; let ar_name = name.unwrap_or(path); if stat.is_file() { append_fs(dst, ar_name, &stat, &mut fs::File::open(path)?, mode, None) } else if stat.is_dir() { append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None) } else if stat.file_type().is_symlink() { let link_name = fs::read_link(path)?; append_fs( dst, ar_name, &stat, &mut io::empty(), mode, Some(&link_name), ) } else { #[cfg(unix)] { append_special(dst, path, &stat, mode) } #[cfg(not(unix))] { Err(other(&format!("{} has unknown file type", path.display()))) } } } #[cfg(unix)] fn append_special( dst: &mut dyn Write, path: &Path, stat: &fs::Metadata, mode: HeaderMode, ) -> io::Result<()> { use ::std::os::unix::fs::{FileTypeExt, MetadataExt}; let file_type = stat.file_type(); let entry_type; if file_type.is_socket() { // sockets can't be archived return Err(other(&format!( "{}: socket can not be archived", path.display() ))); } else if file_type.is_fifo() { entry_type = EntryType::Fifo; } else if file_type.is_char_device() { entry_type = EntryType::Char; } else if file_type.is_block_device() { entry_type = EntryType::Block; } else { return Err(other(&format!("{} has unknown file type", path.display()))); } let mut header = Header::new_gnu(); header.set_metadata_in_mode(stat, mode); prepare_header_path(dst, &mut header, path)?; header.set_entry_type(entry_type); let dev_id = stat.rdev(); let dev_major = ((dev_id >> 32) & 0xffff_f000) | ((dev_id >> 8) & 0x0000_0fff); let dev_minor = ((dev_id >> 12) & 0xffff_ff00) | ((dev_id) & 0x0000_00ff); header.set_device_major(dev_major as u32)?; header.set_device_minor(dev_minor as u32)?; header.set_cksum(); dst.write_all(header.as_bytes())?; Ok(()) } fn append_file( dst: &mut dyn Write, path: &Path, file: &mut fs::File, mode: HeaderMode, ) -> io::Result<()> { let stat = file.metadata()?; append_fs(dst, path, &stat, file, mode, None) } fn append_dir( dst: &mut dyn Write, path: &Path, src_path: &Path, mode: HeaderMode, ) -> io::Result<()> { let stat = fs::metadata(src_path)?; append_fs(dst, path, &stat, &mut io::empty(), mode, None) } fn prepare_header(size: u64, entry_type: u8) -> Header { let mut header = Header::new_gnu(); let name = b"././@LongLink"; header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]); header.set_mode(0o644); header.set_uid(0); header.set_gid(0); header.set_mtime(0); // + 1 to be compliant with GNU tar header.set_size(size + 1); header.set_entry_type(EntryType::new(entry_type)); header.set_cksum(); header } fn prepare_header_path(dst: &mut dyn Write, header: &mut Header, path: &Path) -> io::Result<()> { // Try to encode the path directly in the header, but if it ends up not // working (probably because it's too long) then try to use the GNU-specific // long name extension by emitting an entry which indicates that it's the // filename. if let Err(e) = header.set_path(path) { let data = path2bytes(&path)?; let max = header.as_old().name.len(); // Since `e` isn't specific enough to let us know the path is indeed too // long, verify it first before using the extension. if data.len() < max { return Err(e); } let header2 = prepare_header(data.len() as u64, b'L'); // null-terminated string let mut data2 = data.chain(io::repeat(0).take(1)); append(dst, &header2, &mut data2)?; // Truncate the path to store in the header we're about to emit to // ensure we've got something at least mentioned. Note that we use // `str`-encoding to be compatible with Windows, but in general the // entry in the header itself shouldn't matter too much since extraction // doesn't look at it. let truncated = match str::from_utf8(&data[..max]) { Ok(s) => s, Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(), }; header.set_path(truncated)?; } Ok(()) } fn prepare_header_link( dst: &mut dyn Write, header: &mut Header, link_name: &Path, ) -> io::Result<()> { // Same as previous function but for linkname if let Err(e) = header.set_link_name(&link_name) { let data = path2bytes(&link_name)?; if data.len() < header.as_old().linkname.len() { return Err(e); } let header2 = prepare_header(data.len() as u64, b'K'); let mut data2 = data.chain(io::repeat(0).take(1)); append(dst, &header2, &mut data2)?; } Ok(()) } fn append_fs( dst: &mut dyn Write, path: &Path, meta: &fs::Metadata, read: &mut dyn Read, mode: HeaderMode, link_name: Option<&Path>, ) -> io::Result<()> { let mut header = Header::new_gnu(); prepare_header_path(dst, &mut header, path)?; header.set_metadata_in_mode(meta, mode); if let Some(link_name) = link_name { prepare_header_link(dst, &mut header, link_name)?; } header.set_cksum(); append(dst, &header, read) } fn append_dir_all( dst: &mut dyn Write, path: &Path, src_path: &Path, mode: HeaderMode, follow: bool, ) -> io::Result<()> { let mut stack = vec![(src_path.to_path_buf(), true, false)]; while let Some((src, is_dir, is_symlink)) = stack.pop() { let dest = path.join(src.strip_prefix(&src_path).unwrap()); // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true if is_dir || (is_symlink && follow && src.is_dir()) { for entry in fs::read_dir(&src)? { let entry = entry?; let file_type = entry.file_type()?; stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink())); } if dest != Path::new("") { append_dir(dst, &dest, &src, mode)?; } } else if !follow && is_symlink { let stat = fs::symlink_metadata(&src)?; let link_name = fs::read_link(&src)?; append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name))?; } else { #[cfg(unix)] { let stat = fs::metadata(&src)?; if !stat.is_file() { append_special(dst, &dest, &stat, mode)?; continue; } } append_file(dst, &dest, &mut fs::File::open(src)?, mode)?; } } Ok(()) } impl Drop for Builder { fn drop(&mut self) { let _ = self.finish(); } }