diff options
Diffstat (limited to 'third_party/rust/zip/src/read.rs')
-rw-r--r-- | third_party/rust/zip/src/read.rs | 1312 |
1 files changed, 1312 insertions, 0 deletions
diff --git a/third_party/rust/zip/src/read.rs b/third_party/rust/zip/src/read.rs new file mode 100644 index 0000000000..dad20c260b --- /dev/null +++ b/third_party/rust/zip/src/read.rs @@ -0,0 +1,1312 @@ +//! Types for reading ZIP archives + +#[cfg(feature = "aes-crypto")] +use crate::aes::{AesReader, AesReaderValid}; +use crate::compression::CompressionMethod; +use crate::cp437::FromCp437; +use crate::crc32::Crc32Reader; +use crate::result::{InvalidPassword, ZipError, ZipResult}; +use crate::spec; +use crate::types::{AesMode, AesVendorVersion, AtomicU64, DateTime, System, ZipFileData}; +use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; +use byteorder::{LittleEndian, ReadBytesExt}; +use std::borrow::Cow; +use std::collections::HashMap; +use std::io::{self, prelude::*}; +use std::path::{Component, Path}; +use std::sync::Arc; + +#[cfg(any( + feature = "deflate", + feature = "deflate-miniz", + feature = "deflate-zlib" +))] +use flate2::read::DeflateDecoder; + +#[cfg(feature = "bzip2")] +use bzip2::read::BzDecoder; + +#[cfg(feature = "zstd")] +use zstd::stream::read::Decoder as ZstdDecoder; + +mod ffi { + pub const S_IFDIR: u32 = 0o0040000; + pub const S_IFREG: u32 = 0o0100000; +} + +// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely +pub(crate) mod zip_archive { + /// Extract immutable data from `ZipArchive` to make it cheap to clone + #[derive(Debug)] + pub(crate) struct Shared { + pub(super) files: Vec<super::ZipFileData>, + pub(super) names_map: super::HashMap<String, usize>, + pub(super) offset: u64, + pub(super) comment: Vec<u8>, + } + + /// ZIP archive reader + /// + /// At the moment, this type is cheap to clone if this is the case for the + /// reader it uses. However, this is not guaranteed by this crate and it may + /// change in the future. + /// + /// ```no_run + /// use std::io::prelude::*; + /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> { + /// let mut zip = zip::ZipArchive::new(reader)?; + /// + /// for i in 0..zip.len() { + /// let mut file = zip.by_index(i)?; + /// println!("Filename: {}", file.name()); + /// std::io::copy(&mut file, &mut std::io::stdout()); + /// } + /// + /// Ok(()) + /// } + /// ``` + #[derive(Clone, Debug)] + pub struct ZipArchive<R> { + pub(super) reader: R, + pub(super) shared: super::Arc<Shared>, + } +} + +pub use zip_archive::ZipArchive; +#[allow(clippy::large_enum_variant)] +enum CryptoReader<'a> { + Plaintext(io::Take<&'a mut dyn Read>), + ZipCrypto(ZipCryptoReaderValid<io::Take<&'a mut dyn Read>>), + #[cfg(feature = "aes-crypto")] + Aes { + reader: AesReaderValid<io::Take<&'a mut dyn Read>>, + vendor_version: AesVendorVersion, + }, +} + +impl<'a> Read for CryptoReader<'a> { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + match self { + CryptoReader::Plaintext(r) => r.read(buf), + CryptoReader::ZipCrypto(r) => r.read(buf), + #[cfg(feature = "aes-crypto")] + CryptoReader::Aes { reader: r, .. } => r.read(buf), + } + } +} + +impl<'a> CryptoReader<'a> { + /// Consumes this decoder, returning the underlying reader. + pub fn into_inner(self) -> io::Take<&'a mut dyn Read> { + match self { + CryptoReader::Plaintext(r) => r, + CryptoReader::ZipCrypto(r) => r.into_inner(), + #[cfg(feature = "aes-crypto")] + CryptoReader::Aes { reader: r, .. } => r.into_inner(), + } + } + + /// Returns `true` if the data is encrypted using AE2. + pub fn is_ae2_encrypted(&self) -> bool { + #[cfg(feature = "aes-crypto")] + return matches!( + self, + CryptoReader::Aes { + vendor_version: AesVendorVersion::Ae2, + .. + } + ); + #[cfg(not(feature = "aes-crypto"))] + false + } +} + +enum ZipFileReader<'a> { + NoReader, + Raw(io::Take<&'a mut dyn io::Read>), + Stored(Crc32Reader<CryptoReader<'a>>), + #[cfg(any( + feature = "deflate", + feature = "deflate-miniz", + feature = "deflate-zlib" + ))] + Deflated(Crc32Reader<flate2::read::DeflateDecoder<CryptoReader<'a>>>), + #[cfg(feature = "bzip2")] + Bzip2(Crc32Reader<BzDecoder<CryptoReader<'a>>>), + #[cfg(feature = "zstd")] + Zstd(Crc32Reader<ZstdDecoder<'a, io::BufReader<CryptoReader<'a>>>>), +} + +impl<'a> Read for ZipFileReader<'a> { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + match self { + ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::Raw(r) => r.read(buf), + ZipFileReader::Stored(r) => r.read(buf), + #[cfg(any( + feature = "deflate", + feature = "deflate-miniz", + feature = "deflate-zlib" + ))] + ZipFileReader::Deflated(r) => r.read(buf), + #[cfg(feature = "bzip2")] + ZipFileReader::Bzip2(r) => r.read(buf), + #[cfg(feature = "zstd")] + ZipFileReader::Zstd(r) => r.read(buf), + } + } +} + +impl<'a> ZipFileReader<'a> { + /// Consumes this decoder, returning the underlying reader. + pub fn into_inner(self) -> io::Take<&'a mut dyn Read> { + match self { + ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::Raw(r) => r, + ZipFileReader::Stored(r) => r.into_inner().into_inner(), + #[cfg(any( + feature = "deflate", + feature = "deflate-miniz", + feature = "deflate-zlib" + ))] + ZipFileReader::Deflated(r) => r.into_inner().into_inner().into_inner(), + #[cfg(feature = "bzip2")] + ZipFileReader::Bzip2(r) => r.into_inner().into_inner().into_inner(), + #[cfg(feature = "zstd")] + ZipFileReader::Zstd(r) => r.into_inner().finish().into_inner().into_inner(), + } + } +} + +/// A struct for reading a zip file +pub struct ZipFile<'a> { + data: Cow<'a, ZipFileData>, + crypto_reader: Option<CryptoReader<'a>>, + reader: ZipFileReader<'a>, +} + +fn find_content<'a>( + data: &ZipFileData, + reader: &'a mut (impl Read + Seek), +) -> ZipResult<io::Take<&'a mut dyn Read>> { + // Parse local header + reader.seek(io::SeekFrom::Start(data.header_start))?; + let signature = reader.read_u32::<LittleEndian>()?; + if signature != spec::LOCAL_FILE_HEADER_SIGNATURE { + return Err(ZipError::InvalidArchive("Invalid local file header")); + } + + reader.seek(io::SeekFrom::Current(22))?; + let file_name_length = reader.read_u16::<LittleEndian>()? as u64; + let extra_field_length = reader.read_u16::<LittleEndian>()? as u64; + let magic_and_header = 4 + 22 + 2 + 2; + let data_start = data.header_start + magic_and_header + file_name_length + extra_field_length; + data.data_start.store(data_start); + + reader.seek(io::SeekFrom::Start(data_start))?; + Ok((reader as &mut dyn Read).take(data.compressed_size)) +} + +#[allow(clippy::too_many_arguments)] +fn make_crypto_reader<'a>( + compression_method: crate::compression::CompressionMethod, + crc32: u32, + last_modified_time: DateTime, + using_data_descriptor: bool, + reader: io::Take<&'a mut dyn io::Read>, + password: Option<&[u8]>, + aes_info: Option<(AesMode, AesVendorVersion)>, + #[cfg(feature = "aes-crypto")] compressed_size: u64, +) -> ZipResult<Result<CryptoReader<'a>, InvalidPassword>> { + #[allow(deprecated)] + { + if let CompressionMethod::Unsupported(_) = compression_method { + return unsupported_zip_error("Compression method not supported"); + } + } + + let reader = match (password, aes_info) { + #[cfg(not(feature = "aes-crypto"))] + (Some(_), Some(_)) => { + return Err(ZipError::UnsupportedArchive( + "AES encrypted files cannot be decrypted without the aes-crypto feature.", + )) + } + #[cfg(feature = "aes-crypto")] + (Some(password), Some((aes_mode, vendor_version))) => { + match AesReader::new(reader, aes_mode, compressed_size).validate(password)? { + None => return Ok(Err(InvalidPassword)), + Some(r) => CryptoReader::Aes { + reader: r, + vendor_version, + }, + } + } + (Some(password), None) => { + let validator = if using_data_descriptor { + ZipCryptoValidator::InfoZipMsdosTime(last_modified_time.timepart()) + } else { + ZipCryptoValidator::PkzipCrc32(crc32) + }; + match ZipCryptoReader::new(reader, password).validate(validator)? { + None => return Ok(Err(InvalidPassword)), + Some(r) => CryptoReader::ZipCrypto(r), + } + } + (None, Some(_)) => return Ok(Err(InvalidPassword)), + (None, None) => CryptoReader::Plaintext(reader), + }; + Ok(Ok(reader)) +} + +fn make_reader( + compression_method: CompressionMethod, + crc32: u32, + reader: CryptoReader, +) -> ZipFileReader { + let ae2_encrypted = reader.is_ae2_encrypted(); + + match compression_method { + CompressionMethod::Stored => { + ZipFileReader::Stored(Crc32Reader::new(reader, crc32, ae2_encrypted)) + } + #[cfg(any( + feature = "deflate", + feature = "deflate-miniz", + feature = "deflate-zlib" + ))] + CompressionMethod::Deflated => { + let deflate_reader = DeflateDecoder::new(reader); + ZipFileReader::Deflated(Crc32Reader::new(deflate_reader, crc32, ae2_encrypted)) + } + #[cfg(feature = "bzip2")] + CompressionMethod::Bzip2 => { + let bzip2_reader = BzDecoder::new(reader); + ZipFileReader::Bzip2(Crc32Reader::new(bzip2_reader, crc32, ae2_encrypted)) + } + #[cfg(feature = "zstd")] + CompressionMethod::Zstd => { + let zstd_reader = ZstdDecoder::new(reader).unwrap(); + ZipFileReader::Zstd(Crc32Reader::new(zstd_reader, crc32, ae2_encrypted)) + } + _ => panic!("Compression method not supported"), + } +} + +impl<R: Read + io::Seek> ZipArchive<R> { + /// Get the directory start offset and number of files. This is done in a + /// separate function to ease the control flow design. + pub(crate) fn get_directory_counts( + reader: &mut R, + footer: &spec::CentralDirectoryEnd, + cde_start_pos: u64, + ) -> ZipResult<(u64, u64, usize)> { + // See if there's a ZIP64 footer. The ZIP64 locator if present will + // have its signature 20 bytes in front of the standard footer. The + // standard footer, in turn, is 22+N bytes large, where N is the + // comment length. Therefore: + let zip64locator = if reader + .seek(io::SeekFrom::End( + -(20 + 22 + footer.zip_file_comment.len() as i64), + )) + .is_ok() + { + match spec::Zip64CentralDirectoryEndLocator::parse(reader) { + Ok(loc) => Some(loc), + Err(ZipError::InvalidArchive(_)) => { + // No ZIP64 header; that's actually fine. We're done here. + None + } + Err(e) => { + // Yikes, a real problem + return Err(e); + } + } + } else { + // Empty Zip files will have nothing else so this error might be fine. If + // not, we'll find out soon. + None + }; + + match zip64locator { + None => { + // Some zip files have data prepended to them, resulting in the + // offsets all being too small. Get the amount of error by comparing + // the actual file position we found the CDE at with the offset + // recorded in the CDE. + let archive_offset = cde_start_pos + .checked_sub(footer.central_directory_size as u64) + .and_then(|x| x.checked_sub(footer.central_directory_offset as u64)) + .ok_or(ZipError::InvalidArchive( + "Invalid central directory size or offset", + ))?; + + let directory_start = footer.central_directory_offset as u64 + archive_offset; + let number_of_files = footer.number_of_files_on_this_disk as usize; + Ok((archive_offset, directory_start, number_of_files)) + } + Some(locator64) => { + // If we got here, this is indeed a ZIP64 file. + + if !footer.record_too_small() + && footer.disk_number as u32 != locator64.disk_with_central_directory + { + return unsupported_zip_error( + "Support for multi-disk files is not implemented", + ); + } + + // We need to reassess `archive_offset`. We know where the ZIP64 + // central-directory-end structure *should* be, but unfortunately we + // don't know how to precisely relate that location to our current + // actual offset in the file, since there may be junk at its + // beginning. Therefore we need to perform another search, as in + // read::CentralDirectoryEnd::find_and_parse, except now we search + // forward. + + let search_upper_bound = cde_start_pos + .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator + .ok_or(ZipError::InvalidArchive( + "File cannot contain ZIP64 central directory end", + ))?; + let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse( + reader, + locator64.end_of_central_directory_offset, + search_upper_bound, + )?; + + if footer.disk_number != footer.disk_with_central_directory { + return unsupported_zip_error( + "Support for multi-disk files is not implemented", + ); + } + + let directory_start = footer + .central_directory_offset + .checked_add(archive_offset) + .ok_or({ + ZipError::InvalidArchive("Invalid central directory size or offset") + })?; + + Ok(( + archive_offset, + directory_start, + footer.number_of_files as usize, + )) + } + } + } + + /// Read a ZIP archive, collecting the files it contains + /// + /// This uses the central directory record of the ZIP file, and ignores local file headers + pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> { + let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut reader)?; + + if !footer.record_too_small() && footer.disk_number != footer.disk_with_central_directory { + return unsupported_zip_error("Support for multi-disk files is not implemented"); + } + + let (archive_offset, directory_start, number_of_files) = + Self::get_directory_counts(&mut reader, &footer, cde_start_pos)?; + + // If the parsed number of files is greater than the offset then + // something fishy is going on and we shouldn't trust number_of_files. + let file_capacity = if number_of_files > cde_start_pos as usize { + 0 + } else { + number_of_files + }; + + let mut files = Vec::with_capacity(file_capacity); + let mut names_map = HashMap::with_capacity(file_capacity); + + if reader.seek(io::SeekFrom::Start(directory_start)).is_err() { + return Err(ZipError::InvalidArchive( + "Could not seek to start of central directory", + )); + } + + for _ in 0..number_of_files { + let file = central_header_to_zip_file(&mut reader, archive_offset)?; + names_map.insert(file.file_name.clone(), files.len()); + files.push(file); + } + + let shared = Arc::new(zip_archive::Shared { + files, + names_map, + offset: archive_offset, + comment: footer.zip_file_comment, + }); + + Ok(ZipArchive { reader, shared }) + } + /// Extract a Zip archive into a directory, overwriting files if they + /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. + /// + /// Extraction is not atomic; If an error is encountered, some of the files + /// may be left on disk. + pub fn extract<P: AsRef<Path>>(&mut self, directory: P) -> ZipResult<()> { + use std::fs; + + for i in 0..self.len() { + let mut file = self.by_index(i)?; + let filepath = file + .enclosed_name() + .ok_or(ZipError::InvalidArchive("Invalid file path"))?; + + let outpath = directory.as_ref().join(filepath); + + if file.name().ends_with('/') { + fs::create_dir_all(&outpath)?; + } else { + if let Some(p) = outpath.parent() { + if !p.exists() { + fs::create_dir_all(p)?; + } + } + let mut outfile = fs::File::create(&outpath)?; + io::copy(&mut file, &mut outfile)?; + } + // Get and Set permissions + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Some(mode) = file.unix_mode() { + fs::set_permissions(&outpath, fs::Permissions::from_mode(mode))?; + } + } + } + Ok(()) + } + + /// Number of files contained in this zip. + pub fn len(&self) -> usize { + self.shared.files.len() + } + + /// Whether this zip archive contains no files + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Get the offset from the beginning of the underlying reader that this zip begins at, in bytes. + /// + /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size + /// of that prepended data. + pub fn offset(&self) -> u64 { + self.shared.offset + } + + /// Get the comment of the zip archive. + pub fn comment(&self) -> &[u8] { + &self.shared.comment + } + + /// Returns an iterator over all the file and directory names in this archive. + pub fn file_names(&self) -> impl Iterator<Item = &str> { + self.shared.names_map.keys().map(|s| s.as_str()) + } + + /// Search for a file entry by name, decrypt with given password + /// + /// # Warning + /// + /// The implementation of the cryptographic algorithms has not + /// gone through a correctness review, and you should assume it is insecure: + /// passwords used with this API may be compromised. + /// + /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us + /// to check for a 1/256 chance that the password is correct. + /// There are many passwords out there that will also pass the validity checks + /// we are able to perform. This is a weakness of the ZipCrypto algorithm, + /// due to its fairly primitive approach to cryptography. + pub fn by_name_decrypt<'a>( + &'a mut self, + name: &str, + password: &[u8], + ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> { + self.by_name_with_optional_password(name, Some(password)) + } + + /// Search for a file entry by name + pub fn by_name<'a>(&'a mut self, name: &str) -> ZipResult<ZipFile<'a>> { + Ok(self.by_name_with_optional_password(name, None)?.unwrap()) + } + + fn by_name_with_optional_password<'a>( + &'a mut self, + name: &str, + password: Option<&[u8]>, + ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> { + let index = match self.shared.names_map.get(name) { + Some(index) => *index, + None => { + return Err(ZipError::FileNotFound); + } + }; + self.by_index_with_optional_password(index, password) + } + + /// Get a contained file by index, decrypt with given password + /// + /// # Warning + /// + /// The implementation of the cryptographic algorithms has not + /// gone through a correctness review, and you should assume it is insecure: + /// passwords used with this API may be compromised. + /// + /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us + /// to check for a 1/256 chance that the password is correct. + /// There are many passwords out there that will also pass the validity checks + /// we are able to perform. This is a weakness of the ZipCrypto algorithm, + /// due to its fairly primitive approach to cryptography. + pub fn by_index_decrypt<'a>( + &'a mut self, + file_number: usize, + password: &[u8], + ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> { + self.by_index_with_optional_password(file_number, Some(password)) + } + + /// Get a contained file by index + pub fn by_index(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> { + Ok(self + .by_index_with_optional_password(file_number, None)? + .unwrap()) + } + + /// Get a contained file by index without decompressing it + pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> { + let reader = &mut self.reader; + self.shared + .files + .get(file_number) + .ok_or(ZipError::FileNotFound) + .and_then(move |data| { + Ok(ZipFile { + crypto_reader: None, + reader: ZipFileReader::Raw(find_content(data, reader)?), + data: Cow::Borrowed(data), + }) + }) + } + + fn by_index_with_optional_password<'a>( + &'a mut self, + file_number: usize, + mut password: Option<&[u8]>, + ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> { + let data = self + .shared + .files + .get(file_number) + .ok_or(ZipError::FileNotFound)?; + + match (password, data.encrypted) { + (None, true) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)), + (Some(_), false) => password = None, //Password supplied, but none needed! Discard. + _ => {} + } + let limit_reader = find_content(data, &mut self.reader)?; + + match make_crypto_reader( + data.compression_method, + data.crc32, + data.last_modified_time, + data.using_data_descriptor, + limit_reader, + password, + data.aes_mode, + #[cfg(feature = "aes-crypto")] + data.compressed_size, + ) { + Ok(Ok(crypto_reader)) => Ok(Ok(ZipFile { + crypto_reader: Some(crypto_reader), + reader: ZipFileReader::NoReader, + data: Cow::Borrowed(data), + })), + Err(e) => Err(e), + Ok(Err(e)) => Ok(Err(e)), + } + } + + /// Unwrap and return the inner reader object + /// + /// The position of the reader is undefined. + pub fn into_inner(self) -> R { + self.reader + } +} + +fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> { + Err(ZipError::UnsupportedArchive(detail)) +} + +/// Parse a central directory entry to collect the information for the file. +pub(crate) fn central_header_to_zip_file<R: Read + io::Seek>( + reader: &mut R, + archive_offset: u64, +) -> ZipResult<ZipFileData> { + let central_header_start = reader.stream_position()?; + // Parse central header + let signature = reader.read_u32::<LittleEndian>()?; + if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { + return Err(ZipError::InvalidArchive("Invalid Central Directory header")); + } + + let version_made_by = reader.read_u16::<LittleEndian>()?; + let _version_to_extract = reader.read_u16::<LittleEndian>()?; + let flags = reader.read_u16::<LittleEndian>()?; + let encrypted = flags & 1 == 1; + let is_utf8 = flags & (1 << 11) != 0; + let using_data_descriptor = flags & (1 << 3) != 0; + let compression_method = reader.read_u16::<LittleEndian>()?; + let last_mod_time = reader.read_u16::<LittleEndian>()?; + let last_mod_date = reader.read_u16::<LittleEndian>()?; + let crc32 = reader.read_u32::<LittleEndian>()?; + let compressed_size = reader.read_u32::<LittleEndian>()?; + let uncompressed_size = reader.read_u32::<LittleEndian>()?; + let file_name_length = reader.read_u16::<LittleEndian>()? as usize; + let extra_field_length = reader.read_u16::<LittleEndian>()? as usize; + let file_comment_length = reader.read_u16::<LittleEndian>()? as usize; + let _disk_number = reader.read_u16::<LittleEndian>()?; + let _internal_file_attributes = reader.read_u16::<LittleEndian>()?; + let external_file_attributes = reader.read_u32::<LittleEndian>()?; + let offset = reader.read_u32::<LittleEndian>()? as u64; + let mut file_name_raw = vec![0; file_name_length]; + reader.read_exact(&mut file_name_raw)?; + let mut extra_field = vec![0; extra_field_length]; + reader.read_exact(&mut extra_field)?; + let mut file_comment_raw = vec![0; file_comment_length]; + reader.read_exact(&mut file_comment_raw)?; + + let file_name = match is_utf8 { + true => String::from_utf8_lossy(&file_name_raw).into_owned(), + false => file_name_raw.clone().from_cp437(), + }; + let file_comment = match is_utf8 { + true => String::from_utf8_lossy(&file_comment_raw).into_owned(), + false => file_comment_raw.from_cp437(), + }; + + // Construct the result + let mut result = ZipFileData { + system: System::from_u8((version_made_by >> 8) as u8), + version_made_by: version_made_by as u8, + encrypted, + using_data_descriptor, + compression_method: { + #[allow(deprecated)] + CompressionMethod::from_u16(compression_method) + }, + compression_level: None, + last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time), + crc32, + compressed_size: compressed_size as u64, + uncompressed_size: uncompressed_size as u64, + file_name, + file_name_raw, + extra_field, + file_comment, + header_start: offset, + central_header_start, + data_start: AtomicU64::new(0), + external_attributes: external_file_attributes, + large_file: false, + aes_mode: None, + }; + + match parse_extra_field(&mut result) { + Ok(..) | Err(ZipError::Io(..)) => {} + Err(e) => return Err(e), + } + + let aes_enabled = result.compression_method == CompressionMethod::AES; + if aes_enabled && result.aes_mode.is_none() { + return Err(ZipError::InvalidArchive( + "AES encryption without AES extra data field", + )); + } + + // Account for shifted zip offsets. + result.header_start = result + .header_start + .checked_add(archive_offset) + .ok_or(ZipError::InvalidArchive("Archive header is too large"))?; + + Ok(result) +} + +fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> { + let mut reader = io::Cursor::new(&file.extra_field); + + while (reader.position() as usize) < file.extra_field.len() { + let kind = reader.read_u16::<LittleEndian>()?; + let len = reader.read_u16::<LittleEndian>()?; + let mut len_left = len as i64; + match kind { + // Zip64 extended information extra field + 0x0001 => { + if file.uncompressed_size == spec::ZIP64_BYTES_THR { + file.large_file = true; + file.uncompressed_size = reader.read_u64::<LittleEndian>()?; + len_left -= 8; + } + if file.compressed_size == spec::ZIP64_BYTES_THR { + file.large_file = true; + file.compressed_size = reader.read_u64::<LittleEndian>()?; + len_left -= 8; + } + if file.header_start == spec::ZIP64_BYTES_THR { + file.header_start = reader.read_u64::<LittleEndian>()?; + len_left -= 8; + } + } + 0x9901 => { + // AES + if len != 7 { + return Err(ZipError::UnsupportedArchive( + "AES extra data field has an unsupported length", + )); + } + let vendor_version = reader.read_u16::<LittleEndian>()?; + let vendor_id = reader.read_u16::<LittleEndian>()?; + let aes_mode = reader.read_u8()?; + let compression_method = reader.read_u16::<LittleEndian>()?; + + if vendor_id != 0x4541 { + return Err(ZipError::InvalidArchive("Invalid AES vendor")); + } + let vendor_version = match vendor_version { + 0x0001 => AesVendorVersion::Ae1, + 0x0002 => AesVendorVersion::Ae2, + _ => return Err(ZipError::InvalidArchive("Invalid AES vendor version")), + }; + match aes_mode { + 0x01 => file.aes_mode = Some((AesMode::Aes128, vendor_version)), + 0x02 => file.aes_mode = Some((AesMode::Aes192, vendor_version)), + 0x03 => file.aes_mode = Some((AesMode::Aes256, vendor_version)), + _ => return Err(ZipError::InvalidArchive("Invalid AES encryption strength")), + }; + file.compression_method = { + #[allow(deprecated)] + CompressionMethod::from_u16(compression_method) + }; + } + _ => { + // Other fields are ignored + } + } + + // We could also check for < 0 to check for errors + if len_left > 0 { + reader.seek(io::SeekFrom::Current(len_left))?; + } + } + Ok(()) +} + +/// Methods for retrieving information on zip files +impl<'a> ZipFile<'a> { + fn get_reader(&mut self) -> &mut ZipFileReader<'a> { + if let ZipFileReader::NoReader = self.reader { + let data = &self.data; + let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); + self.reader = make_reader(data.compression_method, data.crc32, crypto_reader) + } + &mut self.reader + } + + pub(crate) fn get_raw_reader(&mut self) -> &mut dyn Read { + if let ZipFileReader::NoReader = self.reader { + let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); + self.reader = ZipFileReader::Raw(crypto_reader.into_inner()) + } + &mut self.reader + } + + /// Get the version of the file + pub fn version_made_by(&self) -> (u8, u8) { + ( + self.data.version_made_by / 10, + self.data.version_made_by % 10, + ) + } + + /// Get the name of the file + /// + /// # Warnings + /// + /// It is dangerous to use this name directly when extracting an archive. + /// It may contain an absolute path (`/etc/shadow`), or break out of the + /// current directory (`../runtime`). Carelessly writing to these paths + /// allows an attacker to craft a ZIP archive that will overwrite critical + /// files. + /// + /// You can use the [`ZipFile::enclosed_name`] method to validate the name + /// as a safe path. + pub fn name(&self) -> &str { + &self.data.file_name + } + + /// Get the name of the file, in the raw (internal) byte representation. + /// + /// The encoding of this data is currently undefined. + pub fn name_raw(&self) -> &[u8] { + &self.data.file_name_raw + } + + /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte, + /// removes a leading '/' and removes '..' parts. + #[deprecated( + since = "0.5.7", + note = "by stripping `..`s from the path, the meaning of paths can change. + `mangled_name` can be used if this behaviour is desirable" + )] + pub fn sanitized_name(&self) -> ::std::path::PathBuf { + self.mangled_name() + } + + /// Rewrite the path, ignoring any path components with special meaning. + /// + /// - Absolute paths are made relative + /// - [`ParentDir`]s are ignored + /// - Truncates the filename at a NULL byte + /// + /// This is appropriate if you need to be able to extract *something* from + /// any archive, but will easily misrepresent trivial paths like + /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this, + /// [`ZipFile::enclosed_name`] is the better option in most scenarios. + /// + /// [`ParentDir`]: `Component::ParentDir` + pub fn mangled_name(&self) -> ::std::path::PathBuf { + self.data.file_name_sanitized() + } + + /// Ensure the file path is safe to use as a [`Path`]. + /// + /// - It can't contain NULL bytes + /// - It can't resolve to a path outside the current directory + /// > `foo/../bar` is fine, `foo/../../bar` is not. + /// - It can't be an absolute path + /// + /// This will read well-formed ZIP files correctly, and is resistant + /// to path-based exploits. It is recommended over + /// [`ZipFile::mangled_name`]. + pub fn enclosed_name(&self) -> Option<&Path> { + if self.data.file_name.contains('\0') { + return None; + } + let path = Path::new(&self.data.file_name); + let mut depth = 0usize; + for component in path.components() { + match component { + Component::Prefix(_) | Component::RootDir => return None, + Component::ParentDir => depth = depth.checked_sub(1)?, + Component::Normal(_) => depth += 1, + Component::CurDir => (), + } + } + Some(path) + } + + /// Get the comment of the file + pub fn comment(&self) -> &str { + &self.data.file_comment + } + + /// Get the compression method used to store the file + pub fn compression(&self) -> CompressionMethod { + self.data.compression_method + } + + /// Get the size of the file, in bytes, in the archive + pub fn compressed_size(&self) -> u64 { + self.data.compressed_size + } + + /// Get the size of the file, in bytes, when uncompressed + pub fn size(&self) -> u64 { + self.data.uncompressed_size + } + + /// Get the time the file was last modified + pub fn last_modified(&self) -> DateTime { + self.data.last_modified_time + } + /// Returns whether the file is actually a directory + pub fn is_dir(&self) -> bool { + self.name() + .chars() + .rev() + .next() + .map_or(false, |c| c == '/' || c == '\\') + } + + /// Returns whether the file is a regular file + pub fn is_file(&self) -> bool { + !self.is_dir() + } + + /// Get unix mode for the file + pub fn unix_mode(&self) -> Option<u32> { + if self.data.external_attributes == 0 { + return None; + } + + match self.data.system { + System::Unix => Some(self.data.external_attributes >> 16), + System::Dos => { + // Interpret MS-DOS directory bit + let mut mode = if 0x10 == (self.data.external_attributes & 0x10) { + ffi::S_IFDIR | 0o0775 + } else { + ffi::S_IFREG | 0o0664 + }; + if 0x01 == (self.data.external_attributes & 0x01) { + // Read-only bit; strip write permissions + mode &= 0o0555; + } + Some(mode) + } + _ => None, + } + } + + /// Get the CRC32 hash of the original file + pub fn crc32(&self) -> u32 { + self.data.crc32 + } + + /// Get the extra data of the zip header for this file + pub fn extra_data(&self) -> &[u8] { + &self.data.extra_field + } + + /// Get the starting offset of the data of the compressed file + pub fn data_start(&self) -> u64 { + self.data.data_start.load() + } + + /// Get the starting offset of the zip header for this file + pub fn header_start(&self) -> u64 { + self.data.header_start + } + /// Get the starting offset of the zip header in the central directory for this file + pub fn central_header_start(&self) -> u64 { + self.data.central_header_start + } +} + +impl<'a> Read for ZipFile<'a> { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + self.get_reader().read(buf) + } +} + +impl<'a> Drop for ZipFile<'a> { + fn drop(&mut self) { + // self.data is Owned, this reader is constructed by a streaming reader. + // In this case, we want to exhaust the reader so that the next file is accessible. + if let Cow::Owned(_) = self.data { + let mut buffer = [0; 1 << 16]; + + // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped. + let mut reader: std::io::Take<&mut dyn std::io::Read> = match &mut self.reader { + ZipFileReader::NoReader => { + let innerreader = ::std::mem::replace(&mut self.crypto_reader, None); + innerreader.expect("Invalid reader state").into_inner() + } + reader => { + let innerreader = ::std::mem::replace(reader, ZipFileReader::NoReader); + innerreader.into_inner() + } + }; + + loop { + match reader.read(&mut buffer) { + Ok(0) => break, + Ok(_) => (), + Err(e) => panic!( + "Could not consume all of the output of the current ZipFile: {:?}", + e + ), + } + } + } + } +} + +/// Read ZipFile structures from a non-seekable reader. +/// +/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions +/// as some information will be missing when reading this manner. +/// +/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is +/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory +/// is encountered. No more files should be read after this. +/// +/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after +/// the structure is done. +/// +/// Missing fields are: +/// * `comment`: set to an empty string +/// * `data_start`: set to 0 +/// * `external_attributes`: `unix_mode()`: will return None +pub fn read_zipfile_from_stream<'a, R: io::Read>( + reader: &'a mut R, +) -> ZipResult<Option<ZipFile<'_>>> { + let signature = reader.read_u32::<LittleEndian>()?; + + match signature { + spec::LOCAL_FILE_HEADER_SIGNATURE => (), + spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None), + _ => return Err(ZipError::InvalidArchive("Invalid local file header")), + } + + let version_made_by = reader.read_u16::<LittleEndian>()?; + let flags = reader.read_u16::<LittleEndian>()?; + let encrypted = flags & 1 == 1; + let is_utf8 = flags & (1 << 11) != 0; + let using_data_descriptor = flags & (1 << 3) != 0; + #[allow(deprecated)] + let compression_method = CompressionMethod::from_u16(reader.read_u16::<LittleEndian>()?); + let last_mod_time = reader.read_u16::<LittleEndian>()?; + let last_mod_date = reader.read_u16::<LittleEndian>()?; + let crc32 = reader.read_u32::<LittleEndian>()?; + let compressed_size = reader.read_u32::<LittleEndian>()?; + let uncompressed_size = reader.read_u32::<LittleEndian>()?; + let file_name_length = reader.read_u16::<LittleEndian>()? as usize; + let extra_field_length = reader.read_u16::<LittleEndian>()? as usize; + + let mut file_name_raw = vec![0; file_name_length]; + reader.read_exact(&mut file_name_raw)?; + let mut extra_field = vec![0; extra_field_length]; + reader.read_exact(&mut extra_field)?; + + let file_name = match is_utf8 { + true => String::from_utf8_lossy(&file_name_raw).into_owned(), + false => file_name_raw.clone().from_cp437(), + }; + + let mut result = ZipFileData { + system: System::from_u8((version_made_by >> 8) as u8), + version_made_by: version_made_by as u8, + encrypted, + using_data_descriptor, + compression_method, + compression_level: None, + last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time), + crc32, + compressed_size: compressed_size as u64, + uncompressed_size: uncompressed_size as u64, + file_name, + file_name_raw, + extra_field, + file_comment: String::new(), // file comment is only available in the central directory + // header_start and data start are not available, but also don't matter, since seeking is + // not available. + header_start: 0, + data_start: AtomicU64::new(0), + central_header_start: 0, + // The external_attributes field is only available in the central directory. + // We set this to zero, which should be valid as the docs state 'If input came + // from standard input, this field is set to zero.' + external_attributes: 0, + large_file: false, + aes_mode: None, + }; + + match parse_extra_field(&mut result) { + Ok(..) | Err(ZipError::Io(..)) => {} + Err(e) => return Err(e), + } + + if encrypted { + return unsupported_zip_error("Encrypted files are not supported"); + } + if using_data_descriptor { + return unsupported_zip_error("The file length is not available in the local header"); + } + + let limit_reader = (reader as &'a mut dyn io::Read).take(result.compressed_size); + + let result_crc32 = result.crc32; + let result_compression_method = result.compression_method; + let crypto_reader = make_crypto_reader( + result_compression_method, + result_crc32, + result.last_modified_time, + result.using_data_descriptor, + limit_reader, + None, + None, + #[cfg(feature = "aes-crypto")] + result.compressed_size, + )? + .unwrap(); + + Ok(Some(ZipFile { + data: Cow::Owned(result), + crypto_reader: None, + reader: make_reader(result_compression_method, result_crc32, crypto_reader), + })) +} + +#[cfg(test)] +mod test { + #[test] + fn invalid_offset() { + use super::ZipArchive; + use std::io; + + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("../tests/data/invalid_offset.zip")); + let reader = ZipArchive::new(io::Cursor::new(v)); + assert!(reader.is_err()); + } + + #[test] + fn invalid_offset2() { + use super::ZipArchive; + use std::io; + + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("../tests/data/invalid_offset2.zip")); + let reader = ZipArchive::new(io::Cursor::new(v)); + assert!(reader.is_err()); + } + + #[test] + fn zip64_with_leading_junk() { + use super::ZipArchive; + use std::io; + + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip")); + let reader = ZipArchive::new(io::Cursor::new(v)).unwrap(); + assert_eq!(reader.len(), 1); + } + + #[test] + fn zip_contents() { + use super::ZipArchive; + use std::io; + + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip")); + let mut reader = ZipArchive::new(io::Cursor::new(v)).unwrap(); + assert_eq!(reader.comment(), b""); + assert_eq!(reader.by_index(0).unwrap().central_header_start(), 77); + } + + #[test] + fn zip_read_streaming() { + use super::read_zipfile_from_stream; + use std::io; + + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip")); + let mut reader = io::Cursor::new(v); + loop { + if read_zipfile_from_stream(&mut reader).unwrap().is_none() { + break; + } + } + } + + #[test] + fn zip_clone() { + use super::ZipArchive; + use std::io::{self, Read}; + + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip")); + let mut reader1 = ZipArchive::new(io::Cursor::new(v)).unwrap(); + let mut reader2 = reader1.clone(); + + let mut file1 = reader1.by_index(0).unwrap(); + let mut file2 = reader2.by_index(0).unwrap(); + + let t = file1.last_modified(); + assert_eq!( + ( + t.year(), + t.month(), + t.day(), + t.hour(), + t.minute(), + t.second() + ), + (1980, 1, 1, 0, 0, 0) + ); + + let mut buf1 = [0; 5]; + let mut buf2 = [0; 5]; + let mut buf3 = [0; 5]; + let mut buf4 = [0; 5]; + + file1.read_exact(&mut buf1).unwrap(); + file2.read_exact(&mut buf2).unwrap(); + file1.read_exact(&mut buf3).unwrap(); + file2.read_exact(&mut buf4).unwrap(); + + assert_eq!(buf1, buf2); + assert_eq!(buf3, buf4); + assert_ne!(buf1, buf3); + } + + #[test] + fn file_and_dir_predicates() { + use super::ZipArchive; + use std::io; + + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("../tests/data/files_and_dirs.zip")); + let mut zip = ZipArchive::new(io::Cursor::new(v)).unwrap(); + + for i in 0..zip.len() { + let zip_file = zip.by_index(i).unwrap(); + let full_name = zip_file.enclosed_name().unwrap(); + let file_name = full_name.file_name().unwrap().to_str().unwrap(); + assert!( + (file_name.starts_with("dir") && zip_file.is_dir()) + || (file_name.starts_with("file") && zip_file.is_file()) + ); + } + } + + /// test case to ensure we don't preemptively over allocate based on the + /// declared number of files in the CDE of an invalid zip when the number of + /// files declared is more than the alleged offset in the CDE + #[test] + fn invalid_cde_number_of_files_allocation_smaller_offset() { + use super::ZipArchive; + use std::io; + + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!( + "../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip" + )); + let reader = ZipArchive::new(io::Cursor::new(v)); + assert!(reader.is_err()); + } + + /// test case to ensure we don't preemptively over allocate based on the + /// declared number of files in the CDE of an invalid zip when the number of + /// files declared is less than the alleged offset in the CDE + #[test] + fn invalid_cde_number_of_files_allocation_greater_offset() { + use super::ZipArchive; + use std::io; + + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!( + "../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip" + )); + let reader = ZipArchive::new(io::Cursor::new(v)); + assert!(reader.is_err()); + } +} |