diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 18:31:44 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 18:31:44 +0000 |
commit | c23a457e72abe608715ac76f076f47dc42af07a5 (patch) | |
tree | 2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /vendor/flate2/src | |
parent | Releasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip |
Merging upstream version 1.74.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/flate2/src')
-rw-r--r-- | vendor/flate2/src/deflate/read.rs | 6 | ||||
-rw-r--r-- | vendor/flate2/src/ffi/mod.rs | 4 | ||||
-rw-r--r-- | vendor/flate2/src/gz/bufread.rs | 591 | ||||
-rw-r--r-- | vendor/flate2/src/gz/mod.rs | 320 | ||||
-rw-r--r-- | vendor/flate2/src/gz/read.rs | 134 | ||||
-rw-r--r-- | vendor/flate2/src/gz/write.rs | 169 | ||||
-rw-r--r-- | vendor/flate2/src/lib.rs | 30 | ||||
-rw-r--r-- | vendor/flate2/src/zlib/bufread.rs | 18 | ||||
-rw-r--r-- | vendor/flate2/src/zlib/read.rs | 43 | ||||
-rw-r--r-- | vendor/flate2/src/zlib/write.rs | 19 |
10 files changed, 740 insertions, 594 deletions
diff --git a/vendor/flate2/src/deflate/read.rs b/vendor/flate2/src/deflate/read.rs index e6af130a3..5937e6f64 100644 --- a/vendor/flate2/src/deflate/read.rs +++ b/vendor/flate2/src/deflate/read.rs @@ -25,11 +25,11 @@ use crate::bufreader::BufReader; /// # /// // Return a vector containing the Deflate compressed version of hello world /// fn deflateencoder_read_hello_world() -> io::Result<Vec<u8>> { -/// let mut ret_vec = [0;100]; +/// let mut ret_vec = Vec::new(); /// let c = b"hello world"; /// let mut deflater = DeflateEncoder::new(&c[..], Compression::fast()); -/// let count = deflater.read(&mut ret_vec)?; -/// Ok(ret_vec[0..count].to_vec()) +/// deflater.read_to_end(&mut ret_vec)?; +/// Ok(ret_vec) /// } /// ``` #[derive(Debug)] diff --git a/vendor/flate2/src/ffi/mod.rs b/vendor/flate2/src/ffi/mod.rs index 8bac6e423..20b3cae6f 100644 --- a/vendor/flate2/src/ffi/mod.rs +++ b/vendor/flate2/src/ffi/mod.rs @@ -40,9 +40,9 @@ mod c; #[cfg(feature = "any_zlib")] pub use self::c::*; -#[cfg(not(feature = "any_zlib"))] +#[cfg(all(not(feature = "any_zlib"), feature = "miniz_oxide"))] mod rust; -#[cfg(not(feature = "any_zlib"))] +#[cfg(all(not(feature = "any_zlib"), feature = "miniz_oxide"))] pub use self::rust::*; impl std::fmt::Debug for ErrorMessage { diff --git a/vendor/flate2/src/gz/bufread.rs b/vendor/flate2/src/gz/bufread.rs index c6ac5a98b..6fc48bcdd 100644 --- a/vendor/flate2/src/gz/bufread.rs +++ b/vendor/flate2/src/gz/bufread.rs @@ -3,9 +3,8 @@ use std::io; use std::io::prelude::*; use std::mem; -use super::{GzBuilder, GzHeader}; -use super::{FCOMMENT, FEXTRA, FHCRC, FNAME}; -use crate::crc::{Crc, CrcReader}; +use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser}; +use crate::crc::CrcReader; use crate::deflate; use crate::Compression; @@ -18,112 +17,6 @@ fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize { min } -pub(crate) fn corrupt() -> io::Error { - io::Error::new( - io::ErrorKind::InvalidInput, - "corrupt gzip stream does not have a matching checksum", - ) -} - -fn bad_header() -> io::Error { - io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header") -} - -fn read_le_u16<R: Read>(r: &mut Buffer<R>) -> io::Result<u16> { - let mut b = [0; 2]; - r.read_and_forget(&mut b)?; - Ok((b[0] as u16) | ((b[1] as u16) << 8)) -} - -fn read_gz_header_part<'a, R: Read>(r: &'a mut Buffer<'a, R>) -> io::Result<()> { - loop { - match r.part.state { - GzHeaderParsingState::Start => { - let mut header = [0; 10]; - r.read_and_forget(&mut header)?; - - if header[0] != 0x1f || header[1] != 0x8b { - return Err(bad_header()); - } - if header[2] != 8 { - return Err(bad_header()); - } - - r.part.flg = header[3]; - r.part.header.mtime = ((header[4] as u32) << 0) - | ((header[5] as u32) << 8) - | ((header[6] as u32) << 16) - | ((header[7] as u32) << 24); - let _xfl = header[8]; - r.part.header.operating_system = header[9]; - r.part.state = GzHeaderParsingState::Xlen; - } - GzHeaderParsingState::Xlen => { - if r.part.flg & FEXTRA != 0 { - r.part.xlen = read_le_u16(r)?; - } - r.part.state = GzHeaderParsingState::Extra; - } - GzHeaderParsingState::Extra => { - if r.part.flg & FEXTRA != 0 { - let mut extra = vec![0; r.part.xlen as usize]; - r.read_and_forget(&mut extra)?; - r.part.header.extra = Some(extra); - } - r.part.state = GzHeaderParsingState::Filename; - } - GzHeaderParsingState::Filename => { - if r.part.flg & FNAME != 0 { - if r.part.header.filename.is_none() { - r.part.header.filename = Some(Vec::new()); - }; - for byte in r.bytes() { - let byte = byte?; - if byte == 0 { - break; - } - } - } - r.part.state = GzHeaderParsingState::Comment; - } - GzHeaderParsingState::Comment => { - if r.part.flg & FCOMMENT != 0 { - if r.part.header.comment.is_none() { - r.part.header.comment = Some(Vec::new()); - }; - for byte in r.bytes() { - let byte = byte?; - if byte == 0 { - break; - } - } - } - r.part.state = GzHeaderParsingState::Crc; - } - GzHeaderParsingState::Crc => { - if r.part.flg & FHCRC != 0 { - let stored_crc = read_le_u16(r)?; - let calced_crc = r.part.crc.sum() as u16; - if stored_crc != calced_crc { - return Err(corrupt()); - } - } - return Ok(()); - } - } - } -} - -pub(crate) fn read_gz_header<R: Read>(r: &mut R) -> io::Result<GzHeader> { - let mut part = GzHeaderPartial::new(); - - let result = { - let mut reader = Buffer::new(&mut part, r); - read_gz_header_part(&mut reader) - }; - result.map(|()| part.take_header()) -} - /// A gzip streaming encoder /// /// This structure exposes a [`BufRead`] interface that will read uncompressed data @@ -270,11 +163,21 @@ impl<R: BufRead + Write> Write for GzEncoder<R> { } } -/// A gzip streaming decoder +/// A decoder for a single member of a [gzip file]. /// -/// This structure consumes a [`BufRead`] interface, reading compressed data +/// This structure exposes a [`BufRead`] interface, reading compressed data /// from the underlying reader, and emitting uncompressed data. /// +/// After reading a single member of the gzip data this reader will return +/// Ok(0) even if there are more bytes available in the underlying reader. +/// If you need the following bytes, call `into_inner()` after Ok(0) to +/// recover the underlying reader. +/// +/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] +/// or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples @@ -305,161 +208,38 @@ impl<R: BufRead + Write> Write for GzEncoder<R> { /// ``` #[derive(Debug)] pub struct GzDecoder<R> { - inner: GzState, - header: Option<GzHeader>, + state: GzState, reader: CrcReader<deflate::bufread::DeflateDecoder<R>>, multi: bool, } #[derive(Debug)] -pub enum GzHeaderParsingState { - Start, - Xlen, - Extra, - Filename, - Comment, - Crc, -} - -#[derive(Debug)] -pub struct GzHeaderPartial { - buf: Vec<u8>, - state: GzHeaderParsingState, - flg: u8, - xlen: u16, - crc: Crc, - header: GzHeader, -} - -impl GzHeaderPartial { - fn new() -> GzHeaderPartial { - GzHeaderPartial { - buf: Vec::with_capacity(10), // minimum header length - state: GzHeaderParsingState::Start, - flg: 0, - xlen: 0, - crc: Crc::new(), - header: GzHeader { - extra: None, - filename: None, - comment: None, - operating_system: 0, - mtime: 0, - }, - } - } - - pub fn take_header(self) -> GzHeader { - self.header - } -} - -#[derive(Debug)] enum GzState { - Header(GzHeaderPartial), - Body, - Finished(usize, [u8; 8]), + Header(GzHeaderParser), + Body(GzHeader), + Finished(GzHeader, usize, [u8; 8]), Err(io::Error), - End, -} - -/// A small adapter which reads data originally from `buf` and then reads all -/// further data from `reader`. This will also buffer all data read from -/// `reader` into `buf` for reuse on a further call. -struct Buffer<'a, T: 'a> { - part: &'a mut GzHeaderPartial, - buf_cur: usize, - buf_max: usize, - reader: &'a mut T, -} - -impl<'a, T> Buffer<'a, T> { - fn new(part: &'a mut GzHeaderPartial, reader: &'a mut T) -> Buffer<'a, T> { - Buffer { - reader, - buf_cur: 0, - buf_max: part.buf.len(), - part, - } - } -} - -impl<'a, T: Read> Read for Buffer<'a, T> { - fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { - let mut bufref = match self.part.state { - GzHeaderParsingState::Filename => self.part.header.filename.as_mut(), - GzHeaderParsingState::Comment => self.part.header.comment.as_mut(), - _ => None, - }; - if let Some(ref mut b) = bufref { - // we have a direct reference to a buffer where to write - let len = self.reader.read(buf)?; - if len > 0 && buf[len - 1] == 0 { - // we do not append the final 0 - b.extend_from_slice(&buf[..len - 1]); - } else { - b.extend_from_slice(&buf[..len]); - } - self.part.crc.update(&buf[..len]); - Ok(len) - } else if self.buf_cur == self.buf_max { - // we read new bytes and also save them in self.part.buf - let len = self.reader.read(buf)?; - self.part.buf.extend_from_slice(&buf[..len]); - self.part.crc.update(&buf[..len]); - Ok(len) - } else { - // we first read the previously saved bytes - let len = (&self.part.buf[self.buf_cur..self.buf_max]).read(buf)?; - self.buf_cur += len; - Ok(len) - } - } -} - -impl<'a, T> Buffer<'a, T> -where - T: std::io::Read, -{ - // If we manage to read all the bytes, we reset the buffer - fn read_and_forget(&mut self, buf: &mut [u8]) -> io::Result<usize> { - self.read_exact(buf)?; - // we managed to read the whole buf - // we will no longer need the previously saved bytes in self.part.buf - let rlen = buf.len(); - self.part.buf.truncate(0); - self.buf_cur = 0; - self.buf_max = 0; - Ok(rlen) - } + End(Option<GzHeader>), } impl<R: BufRead> GzDecoder<R> { /// Creates a new decoder from the given reader, immediately parsing the /// gzip header. pub fn new(mut r: R) -> GzDecoder<R> { - let mut part = GzHeaderPartial::new(); - let mut header = None; - - let result = { - let mut reader = Buffer::new(&mut part, &mut r); - read_gz_header_part(&mut reader) - }; + let mut header_parser = GzHeaderParser::new(); - let state = match result { - Ok(()) => { - header = Some(part.take_header()); - GzState::Body + let state = match header_parser.parse(&mut r) { + Ok(_) => GzState::Body(GzHeader::from(header_parser)), + Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => { + GzState::Header(header_parser) } - Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => GzState::Header(part), Err(err) => GzState::Err(err), }; GzDecoder { - inner: state, + state, reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)), multi: false, - header, } } @@ -472,7 +252,11 @@ impl<R: BufRead> GzDecoder<R> { impl<R> GzDecoder<R> { /// Returns the header associated with this stream, if it was valid pub fn header(&self) -> Option<&GzHeader> { - self.header.as_ref() + match &self.state { + GzState::Body(header) | GzState::Finished(header, _, _) => Some(header), + GzState::End(header) => header.as_ref(), + _ => None, + } } /// Acquires a reference to the underlying reader. @@ -496,111 +280,61 @@ impl<R> GzDecoder<R> { impl<R: BufRead> Read for GzDecoder<R> { fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { - let GzDecoder { - inner, - header, - reader, - multi, - } = self; - loop { - *inner = match mem::replace(inner, GzState::End) { - GzState::Header(mut part) => { - let result = { - let mut reader = Buffer::new(&mut part, reader.get_mut().get_mut()); - read_gz_header_part(&mut reader) - }; - match result { - Ok(()) => { - *header = Some(part.take_header()); - GzState::Body - } - Err(err) if io::ErrorKind::WouldBlock == err.kind() => { - *inner = GzState::Header(part); - return Err(err); - } - Err(err) => return Err(err), - } + match &mut self.state { + GzState::Header(parser) => { + parser.parse(self.reader.get_mut().get_mut())?; + self.state = GzState::Body(GzHeader::from(mem::take(parser))); } - GzState::Body => { + GzState::Body(header) => { if into.is_empty() { - *inner = GzState::Body; return Ok(0); } - - let n = reader.read(into).map_err(|err| { - if io::ErrorKind::WouldBlock == err.kind() { - *inner = GzState::Body; + match self.reader.read(into)? { + 0 => { + self.state = GzState::Finished(mem::take(header), 0, [0; 8]); } - - err - })?; - - match n { - 0 => GzState::Finished(0, [0; 8]), n => { - *inner = GzState::Body; return Ok(n); } } } - GzState::Finished(pos, mut buf) => { - if pos < buf.len() { - let n = reader - .get_mut() - .get_mut() - .read(&mut buf[pos..]) - .and_then(|n| { - if n == 0 { - Err(io::ErrorKind::UnexpectedEof.into()) - } else { - Ok(n) - } - }) - .map_err(|err| { - if io::ErrorKind::WouldBlock == err.kind() { - *inner = GzState::Finished(pos, buf); - } - - err - })?; - - GzState::Finished(pos + n, buf) + GzState::Finished(header, pos, buf) => { + if *pos < buf.len() { + *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?; } else { let (crc, amt) = finish(&buf); - if crc != reader.crc().sum() || amt != reader.crc().amount() { + if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() { + self.state = GzState::End(Some(mem::take(header))); return Err(corrupt()); - } else if *multi { - let is_eof = reader + } else if self.multi { + let is_eof = self + .reader .get_mut() .get_mut() .fill_buf() - .map(|buf| buf.is_empty()) - .map_err(|err| { - if io::ErrorKind::WouldBlock == err.kind() { - *inner = GzState::Finished(pos, buf); - } - - err - })?; + .map(|buf| buf.is_empty())?; if is_eof { - GzState::End + self.state = GzState::End(Some(mem::take(header))); } else { - reader.reset(); - reader.get_mut().reset_data(); - header.take(); - GzState::Header(GzHeaderPartial::new()) + self.reader.reset(); + self.reader.get_mut().reset_data(); + self.state = GzState::Header(GzHeaderParser::new()) } } else { - GzState::End + self.state = GzState::End(Some(mem::take(header))); } } } - GzState::Err(err) => return Err(err), - GzState::End => return Ok(0), - }; + GzState::Err(err) => { + let result = Err(mem::replace(err, io::ErrorKind::Other.into())); + self.state = GzState::End(None); + return result; + } + GzState::End(_) => return Ok(0), + } } } } @@ -615,18 +349,19 @@ impl<R: BufRead + Write> Write for GzDecoder<R> { } } -/// A gzip streaming decoder that decodes all members of a multistream +/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. +/// +/// This structure exposes a [`BufRead`] interface that will consume compressed +/// data from the underlying reader and emit uncompressed data. /// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while `GzDecoder` will only decompress -/// the first gzip member. The multistream format is commonly used in -/// bioinformatics, for example when using the BGZF compressed data. +/// A gzip file consists of a series of *members* concatenated one after another. +/// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the +/// underlying reader does. For a file, this reads to the end of the file. /// -/// This structure exposes a [`BufRead`] interface that will consume all gzip members -/// from the underlying reader and emit uncompressed data. +/// To handle members seperately, see [GzDecoder] or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). /// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples @@ -699,154 +434,48 @@ impl<R: BufRead> Read for MultiGzDecoder<R> { } #[cfg(test)] -pub mod tests { - use crate::gz::bufread::*; - use std::io; - use std::io::{Cursor, Read, Write}; - - //a cursor turning EOF into blocking errors - #[derive(Debug)] - pub struct BlockingCursor { - pub cursor: Cursor<Vec<u8>>, - } - - impl BlockingCursor { - pub fn new() -> BlockingCursor { - BlockingCursor { - cursor: Cursor::new(Vec::new()), - } - } - - pub fn set_position(&mut self, pos: u64) { - self.cursor.set_position(pos) - } - - pub fn position(&mut self) -> u64 { - self.cursor.position() - } - } - - impl Write for BlockingCursor { - fn write(&mut self, buf: &[u8]) -> io::Result<usize> { - self.cursor.write(buf) - } - fn flush(&mut self) -> io::Result<()> { - self.cursor.flush() - } - } - - impl Read for BlockingCursor { - fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { - //use the cursor, except it turns eof into blocking error - let r = self.cursor.read(buf); - match r { - Err(ref err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(io::ErrorKind::WouldBlock.into()); - } - } - Ok(0) => { - //regular EOF turned into blocking error - return Err(io::ErrorKind::WouldBlock.into()); - } - Ok(_n) => {} - } - r - } - } +mod test { + use crate::bufread::GzDecoder; + use crate::gz::write; + use crate::Compression; + use std::io::{Read, Write}; + + // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any + // additional data to be consumed by the caller. #[test] - // test function read_and_forget of Buffer - fn buffer_read_and_forget() { - // this is unused except for the buffering - let mut part = GzHeaderPartial::new(); - // this is a reader which receives data afterwards - let mut r = BlockingCursor::new(); - let data = vec![1, 2, 3]; - let mut out = Vec::with_capacity(7); - - match r.write_all(&data) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(0); - - // First read : successful for one byte - let mut reader = Buffer::new(&mut part, &mut r); - out.resize(1, 0); - match reader.read_and_forget(&mut out) { - Ok(1) => {} - _ => { - panic!("Unexpected result for read_and_forget with data"); - } - } - - // Second read : incomplete for 7 bytes (we have only 2) - out.resize(7, 0); - match reader.read_and_forget(&mut out) { - Err(ref err) => { - assert_eq!(io::ErrorKind::WouldBlock, err.kind()); - } - _ => { - panic!("Unexpected result for read_and_forget with incomplete"); - } - } - - // 3 more data bytes have arrived - let pos = r.position(); - let data2 = vec![4, 5, 6]; - match r.write_all(&data2) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(pos); - - // Third read : still incomplete for 7 bytes (we have 5) - let mut reader2 = Buffer::new(&mut part, &mut r); - match reader2.read_and_forget(&mut out) { - Err(ref err) => { - assert_eq!(io::ErrorKind::WouldBlock, err.kind()); - } - _ => { - panic!("Unexpected result for read_and_forget with more incomplete"); - } - } - - // 3 more data bytes have arrived again - let pos2 = r.position(); - let data3 = vec![7, 8, 9]; - match r.write_all(&data3) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(pos2); - - // Fourth read : now successful for 7 bytes - let mut reader3 = Buffer::new(&mut part, &mut r); - match reader3.read_and_forget(&mut out) { - Ok(7) => { - assert_eq!(out[0], 2); - assert_eq!(out[6], 8); - } - _ => { - panic!("Unexpected result for read_and_forget with data"); - } - } + fn decode_extra_data() { + let expected = "Hello World"; + + let compressed = { + let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); + e.write(expected.as_ref()).unwrap(); + let mut b = e.finish().unwrap(); + b.push(b'x'); + b + }; - // Fifth read : successful for one more byte - out.resize(1, 0); - match reader3.read_and_forget(&mut out) { - Ok(1) => { - assert_eq!(out[0], 9); - } - _ => { - panic!("Unexpected result for read_and_forget with data"); - } - } + let mut output = Vec::new(); + let mut decoder = GzDecoder::new(compressed.as_slice()); + let decoded_bytes = decoder.read_to_end(&mut output).unwrap(); + assert_eq!(decoded_bytes, output.len()); + let actual = std::str::from_utf8(&output).expect("String parsing error"); + assert_eq!( + actual, expected, + "after decompression we obtain the original input" + ); + + output.clear(); + assert_eq!( + decoder.read(&mut output).unwrap(), + 0, + "subsequent read of decoder returns 0, but inner reader can return additional data" + ); + let mut reader = decoder.into_inner(); + assert_eq!( + reader.read_to_end(&mut output).unwrap(), + 1, + "extra data is accessible in underlying buf-read" + ); + assert_eq!(output, b"x"); } } diff --git a/vendor/flate2/src/gz/mod.rs b/vendor/flate2/src/gz/mod.rs index d31aa60be..e8e05c6eb 100644 --- a/vendor/flate2/src/gz/mod.rs +++ b/vendor/flate2/src/gz/mod.rs @@ -1,19 +1,24 @@ use std::ffi::CString; -use std::io::prelude::*; +use std::io::{BufRead, Error, ErrorKind, Read, Result, Write}; use std::time; use crate::bufreader::BufReader; -use crate::Compression; +use crate::{Compression, Crc}; pub static FHCRC: u8 = 1 << 1; pub static FEXTRA: u8 = 1 << 2; pub static FNAME: u8 = 1 << 3; pub static FCOMMENT: u8 = 1 << 4; +pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7; pub mod bufread; pub mod read; pub mod write; +// The maximum length of the header filename and comment fields. More than +// enough for these fields in reasonable use, but prevents possible attacks. +const MAX_HEADER_BUF: usize = 65535; + /// A structure representing the header of a gzip stream. /// /// The header can contain metadata about the file that was compressed, if @@ -82,6 +87,205 @@ impl GzHeader { } } +#[derive(Debug, Default)] +pub enum GzHeaderState { + Start(u8, [u8; 10]), + Xlen(Option<Box<Crc>>, u8, [u8; 2]), + Extra(Option<Box<Crc>>, u16), + Filename(Option<Box<Crc>>), + Comment(Option<Box<Crc>>), + Crc(Option<Box<Crc>>, u8, [u8; 2]), + #[default] + Complete, +} + +#[derive(Debug, Default)] +pub struct GzHeaderParser { + state: GzHeaderState, + flags: u8, + header: GzHeader, +} + +impl GzHeaderParser { + fn new() -> Self { + GzHeaderParser { + state: GzHeaderState::Start(0, [0; 10]), + flags: 0, + header: GzHeader::default(), + } + } + + fn parse<'a, R: Read>(&mut self, r: &'a mut R) -> Result<()> { + loop { + match &mut self.state { + GzHeaderState::Start(count, buffer) => { + while (*count as usize) < buffer.len() { + *count += read_into(r, &mut buffer[*count as usize..])? as u8; + } + // Gzip identification bytes + if buffer[0] != 0x1f || buffer[1] != 0x8b { + return Err(bad_header()); + } + // Gzip compression method (8 = deflate) + if buffer[2] != 8 { + return Err(bad_header()); + } + self.flags = buffer[3]; + // RFC1952: "must give an error indication if any reserved bit is non-zero" + if self.flags & FRESERVED != 0 { + return Err(bad_header()); + } + self.header.mtime = ((buffer[4] as u32) << 0) + | ((buffer[5] as u32) << 8) + | ((buffer[6] as u32) << 16) + | ((buffer[7] as u32) << 24); + let _xfl = buffer[8]; + self.header.operating_system = buffer[9]; + let crc = if self.flags & FHCRC != 0 { + let mut crc = Box::new(Crc::new()); + crc.update(buffer); + Some(crc) + } else { + None + }; + self.state = GzHeaderState::Xlen(crc, 0, [0; 2]); + } + GzHeaderState::Xlen(crc, count, buffer) => { + if self.flags & FEXTRA != 0 { + while (*count as usize) < buffer.len() { + *count += read_into(r, &mut buffer[*count as usize..])? as u8; + } + if let Some(crc) = crc { + crc.update(buffer); + } + let xlen = parse_le_u16(&buffer); + self.header.extra = Some(vec![0; xlen as usize]); + self.state = GzHeaderState::Extra(crc.take(), 0); + } else { + self.state = GzHeaderState::Filename(crc.take()); + } + } + GzHeaderState::Extra(crc, count) => { + debug_assert!(self.header.extra.is_some()); + let extra = self.header.extra.as_mut().unwrap(); + while (*count as usize) < extra.len() { + *count += read_into(r, &mut extra[*count as usize..])? as u16; + } + if let Some(crc) = crc { + crc.update(extra); + } + self.state = GzHeaderState::Filename(crc.take()); + } + GzHeaderState::Filename(crc) => { + if self.flags & FNAME != 0 { + let filename = self.header.filename.get_or_insert_with(Vec::new); + read_to_nul(r, filename)?; + if let Some(crc) = crc { + crc.update(filename); + crc.update(b"\0"); + } + } + self.state = GzHeaderState::Comment(crc.take()); + } + GzHeaderState::Comment(crc) => { + if self.flags & FCOMMENT != 0 { + let comment = self.header.comment.get_or_insert_with(Vec::new); + read_to_nul(r, comment)?; + if let Some(crc) = crc { + crc.update(comment); + crc.update(b"\0"); + } + } + self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]); + } + GzHeaderState::Crc(crc, count, buffer) => { + if let Some(crc) = crc { + debug_assert!(self.flags & FHCRC != 0); + while (*count as usize) < buffer.len() { + *count += read_into(r, &mut buffer[*count as usize..])? as u8; + } + let stored_crc = parse_le_u16(&buffer); + let calced_crc = crc.sum() as u16; + if stored_crc != calced_crc { + return Err(corrupt()); + } + } + self.state = GzHeaderState::Complete; + } + GzHeaderState::Complete => { + return Ok(()); + } + } + } + } + + fn header(&self) -> Option<&GzHeader> { + match self.state { + GzHeaderState::Complete => Some(&self.header), + _ => None, + } + } +} + +impl From<GzHeaderParser> for GzHeader { + fn from(parser: GzHeaderParser) -> Self { + debug_assert!(matches!(parser.state, GzHeaderState::Complete)); + parser.header + } +} + +// Attempt to fill the `buffer` from `r`. Return the number of bytes read. +// Return an error if EOF is read before the buffer is full. This differs +// from `read` in that Ok(0) means that more data may be available. +fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> { + debug_assert!(!buffer.is_empty()); + match r.read(buffer) { + Ok(0) => Err(ErrorKind::UnexpectedEof.into()), + Ok(n) => Ok(n), + Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0), + Err(e) => Err(e), + } +} + +// Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`. +fn read_to_nul<R: Read>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> { + let mut bytes = r.bytes(); + loop { + match bytes.next().transpose()? { + Some(byte) if byte == 0 => { + return Ok(()); + } + Some(_) if buffer.len() == MAX_HEADER_BUF => { + return Err(Error::new( + ErrorKind::InvalidInput, + "gzip header field too long", + )); + } + Some(byte) => { + buffer.push(byte); + } + None => { + return Err(ErrorKind::UnexpectedEof.into()); + } + } + } +} + +fn parse_le_u16(buffer: &[u8; 2]) -> u16 { + (buffer[0] as u16) | ((buffer[1] as u16) << 8) +} + +fn bad_header() -> Error { + Error::new(ErrorKind::InvalidInput, "invalid gzip header") +} + +fn corrupt() -> Error { + Error::new( + ErrorKind::InvalidInput, + "corrupt gzip stream does not have a matching checksum", + ) +} + /// A builder structure to create a new gzip Encoder. /// /// This structure controls header configuration options such as the filename. @@ -253,8 +457,8 @@ impl GzBuilder { mod tests { use std::io::prelude::*; - use super::{read, write, GzBuilder}; - use crate::Compression; + use super::{read, write, GzBuilder, GzHeaderParser}; + use crate::{Compression, GzHeader}; use rand::{thread_rng, Rng}; #[test] @@ -304,6 +508,85 @@ mod tests { assert_eq!(res, v); } + // A Rust implementation of CRC that closely matches the C code in RFC1952. + // Only use this to create CRCs for tests. + struct Rfc1952Crc { + /* Table of CRCs of all 8-bit messages. */ + crc_table: [u32; 256], + } + + impl Rfc1952Crc { + fn new() -> Self { + let mut crc = Rfc1952Crc { + crc_table: [0; 256], + }; + /* Make the table for a fast CRC. */ + for n in 0usize..256 { + let mut c = n as u32; + for _k in 0..8 { + if c & 1 != 0 { + c = 0xedb88320 ^ (c >> 1); + } else { + c = c >> 1; + } + } + crc.crc_table[n] = c; + } + crc + } + + /* + Update a running crc with the bytes buf and return + the updated crc. The crc should be initialized to zero. Pre- and + post-conditioning (one's complement) is performed within this + function so it shouldn't be done by the caller. + */ + fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 { + let mut c = crc ^ 0xffffffff; + + for b in buf { + c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8); + } + c ^ 0xffffffff + } + + /* Return the CRC of the bytes buf. */ + fn crc(&self, buf: &[u8]) -> u32 { + self.update_crc(0, buf) + } + } + + #[test] + fn roundtrip_header() { + let mut header = GzBuilder::new() + .mtime(1234) + .operating_system(57) + .filename("filename") + .comment("comment") + .into_header(Compression::fast()); + + // Add a CRC to the header + header[3] = header[3] ^ super::FHCRC; + let rfc1952_crc = Rfc1952Crc::new(); + let crc32 = rfc1952_crc.crc(&header); + let crc16 = crc32 as u16; + header.extend(&crc16.to_le_bytes()); + + let mut parser = GzHeaderParser::new(); + parser.parse(&mut header.as_slice()).unwrap(); + let actual = parser.header().unwrap(); + assert_eq!( + actual, + &GzHeader { + extra: None, + filename: Some("filename".as_bytes().to_vec()), + comment: Some("comment".as_bytes().to_vec()), + operating_system: 57, + mtime: 1234 + } + ) + } + #[test] fn fields() { let r = vec![0, 2, 4, 6]; @@ -353,33 +636,4 @@ mod tests { write!(f, "Hello world").unwrap(); f.flush().unwrap(); } - - use crate::gz::bufread::tests::BlockingCursor; - #[test] - // test function read_and_forget of Buffer - fn blocked_partial_header_read() { - // this is a reader which receives data afterwards - let mut r = BlockingCursor::new(); - let data = vec![1, 2, 3]; - - match r.write_all(&data) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(0); - - // this is unused except for the buffering - let mut decoder = read::GzDecoder::new(r); - let mut out = Vec::with_capacity(7); - match decoder.read(&mut out) { - Err(e) => { - assert_eq!(e.kind(), std::io::ErrorKind::WouldBlock); - } - _ => { - panic!("Unexpected result for decoder.read"); - } - } - } } diff --git a/vendor/flate2/src/gz/read.rs b/vendor/flate2/src/gz/read.rs index cfeb992e8..5a65526ce 100644 --- a/vendor/flate2/src/gz/read.rs +++ b/vendor/flate2/src/gz/read.rs @@ -25,11 +25,11 @@ use crate::Compression; /// // Return a vector containing the GZ compressed version of hello world /// /// fn gzencode_hello_world() -> io::Result<Vec<u8>> { -/// let mut ret_vec = [0;100]; +/// let mut ret_vec = Vec::new(); /// let bytestring = b"hello world"; /// let mut gz = GzEncoder::new(&bytestring[..], Compression::fast()); -/// let count = gz.read(&mut ret_vec)?; -/// Ok(ret_vec[0..count].to_vec()) +/// gz.read_to_end(&mut ret_vec)?; +/// Ok(ret_vec) /// } /// ``` #[derive(Debug)] @@ -90,17 +90,26 @@ impl<R: Read + Write> Write for GzEncoder<R> { } } -/// A gzip streaming decoder +/// A decoder for a single member of a [gzip file]. /// /// This structure exposes a [`Read`] interface that will consume compressed /// data from the underlying reader and emit uncompressed data. /// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html +/// After reading a single member of the gzip data this reader will return +/// Ok(0) even if there are more bytes available in the underlying reader. +/// `GzDecoder` may have read additional bytes past the end of the gzip data. +/// If you need the following bytes, wrap the `Reader` in a `std::io::BufReader` +/// and use `bufread::GzDecoder` instead. +/// +/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] +/// or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// /// # Examples /// /// ``` -/// /// use std::io::prelude::*; /// use std::io; /// # use flate2::Compression; @@ -146,6 +155,9 @@ impl<R> GzDecoder<R> { } /// Acquires a reference to the underlying reader. + /// + /// Note that the decoder may have read past the end of the gzip data. + /// To prevent this use [`bufread::GzDecoder`] instead. pub fn get_ref(&self) -> &R { self.inner.get_ref().get_ref() } @@ -153,12 +165,19 @@ impl<R> GzDecoder<R> { /// Acquires a mutable reference to the underlying stream. /// /// Note that mutation of the stream may result in surprising results if - /// this decoder is continued to be used. + /// this decoder continues to be used. + /// + /// Note that the decoder may have read past the end of the gzip data. + /// To prevent this use [`bufread::GzDecoder`] instead. pub fn get_mut(&mut self) -> &mut R { self.inner.get_mut().get_mut() } /// Consumes this decoder, returning the underlying reader. + /// + /// Note that the decoder may have read past the end of the gzip data. + /// Subsequent reads will skip those bytes. To prevent this use + /// [`bufread::GzDecoder`] instead. pub fn into_inner(self) -> R { self.inner.into_inner().into_inner() } @@ -180,19 +199,19 @@ impl<R: Read + Write> Write for GzDecoder<R> { } } -/// A gzip streaming decoder that decodes all members of a multistream +/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. +/// +/// This structure exposes a [`Read`] interface that will consume compressed +/// data from the underlying reader and emit uncompressed data. /// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while `GzDecoder` will only decompress the -/// first gzip member. The multistream format is commonly used in bioinformatics, -/// for example when using the BGZF compressed data. +/// A gzip file consists of a series of *members* concatenated one after another. +/// MultiGzDecoder decodes all members of a file and returns Ok(0) once the +/// underlying reader does. /// -/// This structure exposes a [`Read`] interface that will consume all gzip members -/// from the underlying reader and emit uncompressed data. +/// To handle members seperately, see [GzDecoder] or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). /// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// /// # Examples /// @@ -276,3 +295,84 @@ impl<R: Read + Write> Write for MultiGzDecoder<R> { self.get_mut().flush() } } + +#[cfg(test)] +mod tests { + use std::io::{Cursor, ErrorKind, Read, Result, Write}; + + use super::GzDecoder; + + //a cursor turning EOF into blocking errors + #[derive(Debug)] + pub struct BlockingCursor { + pub cursor: Cursor<Vec<u8>>, + } + + impl BlockingCursor { + pub fn new() -> BlockingCursor { + BlockingCursor { + cursor: Cursor::new(Vec::new()), + } + } + + pub fn set_position(&mut self, pos: u64) { + return self.cursor.set_position(pos); + } + } + + impl Write for BlockingCursor { + fn write(&mut self, buf: &[u8]) -> Result<usize> { + return self.cursor.write(buf); + } + fn flush(&mut self) -> Result<()> { + return self.cursor.flush(); + } + } + + impl Read for BlockingCursor { + fn read(&mut self, buf: &mut [u8]) -> Result<usize> { + //use the cursor, except it turns eof into blocking error + let r = self.cursor.read(buf); + match r { + Err(ref err) => { + if err.kind() == ErrorKind::UnexpectedEof { + return Err(ErrorKind::WouldBlock.into()); + } + } + Ok(0) => { + //regular EOF turned into blocking error + return Err(ErrorKind::WouldBlock.into()); + } + Ok(_n) => {} + } + return r; + } + } + + #[test] + fn blocked_partial_header_read() { + // this is a reader which receives data afterwards + let mut r = BlockingCursor::new(); + let data = vec![1, 2, 3]; + + match r.write_all(&data) { + Ok(()) => {} + _ => { + panic!("Unexpected result for write_all"); + } + } + r.set_position(0); + + // this is unused except for the buffering + let mut decoder = GzDecoder::new(r); + let mut out = Vec::with_capacity(7); + match decoder.read(&mut out) { + Err(e) => { + assert_eq!(e.kind(), ErrorKind::WouldBlock); + } + _ => { + panic!("Unexpected result for decoder.read"); + } + } + } +} diff --git a/vendor/flate2/src/gz/write.rs b/vendor/flate2/src/gz/write.rs index 83eebb757..74d6c5acf 100644 --- a/vendor/flate2/src/gz/write.rs +++ b/vendor/flate2/src/gz/write.rs @@ -2,8 +2,7 @@ use std::cmp; use std::io; use std::io::prelude::*; -use super::bufread::{corrupt, read_gz_header}; -use super::{GzBuilder, GzHeader}; +use super::{corrupt, GzBuilder, GzHeader, GzHeaderParser}; use crate::crc::{Crc, CrcWriter}; use crate::zio; use crate::{Compress, Compression, Decompress, Status}; @@ -167,11 +166,20 @@ impl<W: Write> Drop for GzEncoder<W> { } } -/// A gzip streaming decoder +/// A decoder for a single member of a [gzip file]. /// -/// This structure exposes a [`Write`] interface that will emit uncompressed data -/// to the underlying writer `W`. +/// This structure exposes a [`Write`] interface, receiving compressed data and +/// writing uncompressed data to the underlying writer. +/// +/// After decoding a single member of the gzip data this writer will return the number of bytes up to +/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to +/// handle any data following the gzip member. +/// +/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] +/// or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). /// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html /// /// # Examples @@ -203,8 +211,7 @@ impl<W: Write> Drop for GzEncoder<W> { pub struct GzDecoder<W: Write> { inner: zio::Writer<CrcWriter<W>, Decompress>, crc_bytes: Vec<u8>, - header: Option<GzHeader>, - header_buf: Vec<u8>, + header_parser: GzHeaderParser, } const CRC_BYTES_LEN: usize = 8; @@ -218,14 +225,13 @@ impl<W: Write> GzDecoder<W> { GzDecoder { inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(false)), crc_bytes: Vec::with_capacity(CRC_BYTES_LEN), - header: None, - header_buf: Vec::new(), + header_parser: GzHeaderParser::new(), } } /// Returns the header associated with this stream. pub fn header(&self) -> Option<&GzHeader> { - self.header.as_ref() + self.header_parser.header() } /// Acquires a reference to the underlying writer. @@ -306,47 +312,24 @@ impl<W: Write> GzDecoder<W> { } } -struct Counter<T: Read> { - inner: T, - pos: usize, -} - -impl<T: Read> Read for Counter<T> { - fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { - let pos = self.inner.read(buf)?; - self.pos += pos; - Ok(pos) - } -} - impl<W: Write> Write for GzDecoder<W> { - fn write(&mut self, buf: &[u8]) -> io::Result<usize> { - if self.header.is_none() { - // trying to avoid buffer usage - let (res, pos) = { - let mut counter = Counter { - inner: self.header_buf.chain(buf), - pos: 0, - }; - let res = read_gz_header(&mut counter); - (res, counter.pos) - }; - - match res { + fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> { + let buflen = buf.len(); + if self.header().is_none() { + match self.header_parser.parse(&mut buf) { Err(err) => { if err.kind() == io::ErrorKind::UnexpectedEof { - // not enough data for header, save to the buffer - self.header_buf.extend(buf); - Ok(buf.len()) + // all data read but header still not complete + Ok(buflen) } else { Err(err) } } - Ok(header) => { - self.header = Some(header); - let pos = pos - self.header_buf.len(); - self.header_buf.truncate(0); - Ok(pos) + Ok(_) => { + debug_assert!(self.header().is_some()); + // buf now contains the unread part of the original buf + let n = buflen - buf.len(); + Ok(n) } } } else { @@ -373,17 +356,19 @@ impl<W: Read + Write> Read for GzDecoder<W> { } } -/// A gzip streaming decoder that decodes all members of a multistream +/// A gzip streaming decoder that decodes a [gzip file] with multiple members. +/// +/// This structure exposes a [`Write`] interface that will consume compressed data and +/// write uncompressed data to the underlying writer. /// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while `GzDecoder` will only decompress -/// the first gzip member. The multistream format is commonly used in -/// bioinformatics, for example when using the BGZF compressed data. +/// A gzip file consists of a series of *members* concatenated one after another. +/// `MultiGzDecoder` decodes all members of a file and writes them to the +/// underlying writer one after another. /// -/// This structure exposes a [`Write`] interface that will consume all gzip members -/// from the written buffers and write uncompressed data to the writer. +/// To handle members separately, see [GzDecoder] or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 #[derive(Debug)] pub struct MultiGzDecoder<W: Write> { inner: GzDecoder<W>, @@ -524,6 +509,56 @@ mod tests { } #[test] + fn decode_writer_partial_header_filename() { + let filename = "test.txt"; + let mut e = GzBuilder::new() + .filename(filename) + .read(STR.as_bytes(), Compression::default()); + let mut bytes = Vec::new(); + e.read_to_end(&mut bytes).unwrap(); + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12); + let n = decoder.write(&bytes[12..]).unwrap(); + if n < bytes.len() - 12 { + decoder.write(&bytes[n + 12..]).unwrap(); + } + assert_eq!( + decoder.header().unwrap().filename().unwrap(), + filename.as_bytes() + ); + writer = decoder.finish().unwrap(); + let return_string = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(return_string, STR); + } + + #[test] + fn decode_writer_partial_header_comment() { + let comment = "test comment"; + let mut e = GzBuilder::new() + .comment(comment) + .read(STR.as_bytes(), Compression::default()); + let mut bytes = Vec::new(); + e.read_to_end(&mut bytes).unwrap(); + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12); + let n = decoder.write(&bytes[12..]).unwrap(); + if n < bytes.len() - 12 { + decoder.write(&bytes[n + 12..]).unwrap(); + } + assert_eq!( + decoder.header().unwrap().comment().unwrap(), + comment.as_bytes() + ); + writer = decoder.finish().unwrap(); + let return_string = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(return_string, STR); + } + + #[test] fn decode_writer_exact_header() { let mut e = GzEncoder::new(Vec::new(), Compression::default()); e.write(STR.as_ref()).unwrap(); @@ -575,4 +610,32 @@ mod tests { let expected = STR.repeat(2); assert_eq!(return_string, expected); } + + // GzDecoder consumes one gzip member and then returns 0 for subsequent writes, allowing any + // additional data to be consumed by the caller. + #[test] + fn decode_extra_data() { + let compressed = { + let mut e = GzEncoder::new(Vec::new(), Compression::default()); + e.write(STR.as_ref()).unwrap(); + let mut b = e.finish().unwrap(); + b.push(b'x'); + b + }; + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + let mut consumed_bytes = 0; + loop { + let n = decoder.write(&compressed[consumed_bytes..]).unwrap(); + if n == 0 { + break; + } + consumed_bytes += n; + } + writer = decoder.finish().unwrap(); + let actual = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(actual, STR); + assert_eq!(&compressed[consumed_bytes..], b"x"); + } } diff --git a/vendor/flate2/src/lib.rs b/vendor/flate2/src/lib.rs index 6789c5b76..8c000b032 100644 --- a/vendor/flate2/src/lib.rs +++ b/vendor/flate2/src/lib.rs @@ -65,12 +65,30 @@ //! `Write` trait if `T: Write`. That is, the "dual trait" is forwarded directly //! to the underlying object if available. //! +//! # About multi-member Gzip files +//! +//! While most `gzip` files one encounters will have a single *member* that can be read +//! with the [`GzDecoder`], there may be some files which have multiple members. +//! +//! A [`GzDecoder`] will only read the first member of gzip data, which may unexpectedly +//! provide partial results when a multi-member gzip file is encountered. `GzDecoder` is appropriate +//! for data that is designed to be read as single members from a multi-member file. `bufread::GzDecoder` +//! and `write::GzDecoder` also allow non-gzip data following gzip data to be handled. +//! +//! The [`MultiGzDecoder`] on the other hand will decode all members of a `gzip` file +//! into one consecutive stream of bytes, which hides the underlying *members* entirely. +//! If a file contains contains non-gzip data after the gzip data, MultiGzDecoder will +//! emit an error after decoding the gzip data. This behavior matches the `gzip`, +//! `gunzip`, and `zcat` command line tools. +//! //! [`read`]: read/index.html //! [`bufread`]: bufread/index.html //! [`write`]: write/index.html //! [read]: https://doc.rust-lang.org/std/io/trait.Read.html //! [write]: https://doc.rust-lang.org/std/io/trait.Write.html //! [bufread]: https://doc.rust-lang.org/std/io/trait.BufRead.html +//! [`GzDecoder`]: read/struct.GzDecoder.html +//! [`MultiGzDecoder`]: read/struct.MultiGzDecoder.html #![doc(html_root_url = "https://docs.rs/flate2/0.2")] #![deny(missing_docs)] #![deny(missing_debug_implementations)] @@ -78,6 +96,9 @@ #![cfg_attr(test, deny(warnings))] #![cfg_attr(docsrs, feature(doc_auto_cfg))] +#[cfg(not(feature = "any_impl",))] +compile_error!("You need to choose a zlib backend"); + pub use crate::crc::{Crc, CrcReader, CrcWriter}; pub use crate::gz::GzBuilder; pub use crate::gz::GzHeader; @@ -96,7 +117,14 @@ mod zlib; /// Types which operate over [`Read`] streams, both encoders and decoders for /// various formats. /// +/// Note that the `read` decoder types may read past the end of the compressed +/// data while decoding. If the caller requires subsequent reads to start +/// immediately following the compressed data wrap the `Read` type in a +/// [`BufReader`] and use the `BufReader` with the equivalent decoder from the +/// `bufread` module and also for the subsequent reads. +/// /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html +/// [`BufReader`]: https://doc.rust-lang.org/std/io/struct.BufReader.html pub mod read { pub use crate::deflate::read::DeflateDecoder; pub use crate::deflate::read::DeflateEncoder; @@ -154,7 +182,7 @@ fn _assert_send_sync() { } /// When compressing data, the compression level can be specified by a value in -/// this enum. +/// this struct. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub struct Compression(u32); diff --git a/vendor/flate2/src/zlib/bufread.rs b/vendor/flate2/src/zlib/bufread.rs index 61d12525c..aa8af64f8 100644 --- a/vendor/flate2/src/zlib/bufread.rs +++ b/vendor/flate2/src/zlib/bufread.rs @@ -47,6 +47,15 @@ impl<R: BufRead> ZlibEncoder<R> { data: Compress::new(level, true), } } + + /// Creates a new encoder with the given `compression` settings which will + /// read uncompressed data from the given stream `r` and emit the compressed stream. + pub fn new_with_compress(r: R, compression: Compress) -> ZlibEncoder<R> { + ZlibEncoder { + obj: r, + data: compression, + } + } } pub fn reset_encoder_data<R>(zlib: &mut ZlibEncoder<R>) { @@ -165,6 +174,15 @@ impl<R: BufRead> ZlibDecoder<R> { data: Decompress::new(true), } } + + /// Creates a new decoder which will decompress data read from the given + /// stream, using the given `decompression` settings. + pub fn new_with_decompress(r: R, decompression: Decompress) -> ZlibDecoder<R> { + ZlibDecoder { + obj: r, + data: decompression, + } + } } pub fn reset_decoder_data<R>(zlib: &mut ZlibDecoder<R>) { diff --git a/vendor/flate2/src/zlib/read.rs b/vendor/flate2/src/zlib/read.rs index 330213049..fbae74867 100644 --- a/vendor/flate2/src/zlib/read.rs +++ b/vendor/flate2/src/zlib/read.rs @@ -3,6 +3,7 @@ use std::io::prelude::*; use super::bufread; use crate::bufreader::BufReader; +use crate::Decompress; /// A ZLIB encoder, or compressor. /// @@ -24,9 +25,9 @@ use crate::bufreader::BufReader; /// # fn open_hello_world() -> std::io::Result<Vec<u8>> { /// let f = File::open("examples/hello_world.txt")?; /// let mut z = ZlibEncoder::new(f, Compression::fast()); -/// let mut buffer = [0;50]; -/// let byte_count = z.read(&mut buffer)?; -/// # Ok(buffer[0..byte_count].to_vec()) +/// let mut buffer = Vec::new(); +/// z.read_to_end(&mut buffer)?; +/// # Ok(buffer) /// # } /// ``` #[derive(Debug)] @@ -42,6 +43,14 @@ impl<R: Read> ZlibEncoder<R> { inner: bufread::ZlibEncoder::new(BufReader::new(r), level), } } + + /// Creates a new encoder with the given `compression` settings which will + /// read uncompressed data from the given stream `r` and emit the compressed stream. + pub fn new_with_compress(r: R, compression: crate::Compress) -> ZlibEncoder<R> { + ZlibEncoder { + inner: bufread::ZlibEncoder::new_with_compress(BufReader::new(r), compression), + } + } } impl<R> ZlibEncoder<R> { @@ -160,7 +169,8 @@ impl<R: Read> ZlibDecoder<R> { ZlibDecoder::new_with_buf(r, vec![0; 32 * 1024]) } - /// Same as `new`, but the intermediate buffer for data is specified. + /// Creates a new decoder which will decompress data read from the given + /// stream `r`, using `buf` as backing to speed up reading. /// /// Note that the specified buffer will only be used up to its current /// length. The buffer's capacity will also not grow over time. @@ -169,6 +179,31 @@ impl<R: Read> ZlibDecoder<R> { inner: bufread::ZlibDecoder::new(BufReader::with_buf(buf, r)), } } + + /// Creates a new decoder which will decompress data read from the given + /// stream `r`, along with `decompression` settings. + pub fn new_with_decompress(r: R, decompression: Decompress) -> ZlibDecoder<R> { + ZlibDecoder::new_with_decompress_and_buf(r, vec![0; 32 * 1024], decompression) + } + + /// Creates a new decoder which will decompress data read from the given + /// stream `r`, using `buf` as backing to speed up reading, + /// along with `decompression` settings to configure decoder. + /// + /// Note that the specified buffer will only be used up to its current + /// length. The buffer's capacity will also not grow over time. + pub fn new_with_decompress_and_buf( + r: R, + buf: Vec<u8>, + decompression: Decompress, + ) -> ZlibDecoder<R> { + ZlibDecoder { + inner: bufread::ZlibDecoder::new_with_decompress( + BufReader::with_buf(buf, r), + decompression, + ), + } + } } impl<R> ZlibDecoder<R> { diff --git a/vendor/flate2/src/zlib/write.rs b/vendor/flate2/src/zlib/write.rs index c67181402..d8ad2f261 100644 --- a/vendor/flate2/src/zlib/write.rs +++ b/vendor/flate2/src/zlib/write.rs @@ -44,6 +44,14 @@ impl<W: Write> ZlibEncoder<W> { } } + /// Creates a new encoder which will write compressed data to the stream + /// `w` with the given `compression` settings. + pub fn new_with_compress(w: W, compression: Compress) -> ZlibEncoder<W> { + ZlibEncoder { + inner: zio::Writer::new(w, compression), + } + } + /// Acquires a reference to the underlying writer. pub fn get_ref(&self) -> &W { self.inner.get_ref() @@ -218,6 +226,17 @@ impl<W: Write> ZlibDecoder<W> { } } + /// Creates a new decoder which will write uncompressed data to the stream `w` + /// using the given `decompression` settings. + /// + /// When this decoder is dropped or unwrapped the final pieces of data will + /// be flushed. + pub fn new_with_decompress(w: W, decompression: Decompress) -> ZlibDecoder<W> { + ZlibDecoder { + inner: zio::Writer::new(w, decompression), + } + } + /// Acquires a reference to the underlying writer. pub fn get_ref(&self) -> &W { self.inner.get_ref() |