summaryrefslogtreecommitdiffstats
path: root/vendor/flate2/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:44 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:44 +0000
commitc23a457e72abe608715ac76f076f47dc42af07a5 (patch)
tree2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /vendor/flate2/src
parentReleasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff)
downloadrustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz
rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip
Merging upstream version 1.74.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/flate2/src')
-rw-r--r--vendor/flate2/src/deflate/read.rs6
-rw-r--r--vendor/flate2/src/ffi/mod.rs4
-rw-r--r--vendor/flate2/src/gz/bufread.rs591
-rw-r--r--vendor/flate2/src/gz/mod.rs320
-rw-r--r--vendor/flate2/src/gz/read.rs134
-rw-r--r--vendor/flate2/src/gz/write.rs169
-rw-r--r--vendor/flate2/src/lib.rs30
-rw-r--r--vendor/flate2/src/zlib/bufread.rs18
-rw-r--r--vendor/flate2/src/zlib/read.rs43
-rw-r--r--vendor/flate2/src/zlib/write.rs19
10 files changed, 740 insertions, 594 deletions
diff --git a/vendor/flate2/src/deflate/read.rs b/vendor/flate2/src/deflate/read.rs
index e6af130a3..5937e6f64 100644
--- a/vendor/flate2/src/deflate/read.rs
+++ b/vendor/flate2/src/deflate/read.rs
@@ -25,11 +25,11 @@ use crate::bufreader::BufReader;
/// #
/// // Return a vector containing the Deflate compressed version of hello world
/// fn deflateencoder_read_hello_world() -> io::Result<Vec<u8>> {
-/// let mut ret_vec = [0;100];
+/// let mut ret_vec = Vec::new();
/// let c = b"hello world";
/// let mut deflater = DeflateEncoder::new(&c[..], Compression::fast());
-/// let count = deflater.read(&mut ret_vec)?;
-/// Ok(ret_vec[0..count].to_vec())
+/// deflater.read_to_end(&mut ret_vec)?;
+/// Ok(ret_vec)
/// }
/// ```
#[derive(Debug)]
diff --git a/vendor/flate2/src/ffi/mod.rs b/vendor/flate2/src/ffi/mod.rs
index 8bac6e423..20b3cae6f 100644
--- a/vendor/flate2/src/ffi/mod.rs
+++ b/vendor/flate2/src/ffi/mod.rs
@@ -40,9 +40,9 @@ mod c;
#[cfg(feature = "any_zlib")]
pub use self::c::*;
-#[cfg(not(feature = "any_zlib"))]
+#[cfg(all(not(feature = "any_zlib"), feature = "miniz_oxide"))]
mod rust;
-#[cfg(not(feature = "any_zlib"))]
+#[cfg(all(not(feature = "any_zlib"), feature = "miniz_oxide"))]
pub use self::rust::*;
impl std::fmt::Debug for ErrorMessage {
diff --git a/vendor/flate2/src/gz/bufread.rs b/vendor/flate2/src/gz/bufread.rs
index c6ac5a98b..6fc48bcdd 100644
--- a/vendor/flate2/src/gz/bufread.rs
+++ b/vendor/flate2/src/gz/bufread.rs
@@ -3,9 +3,8 @@ use std::io;
use std::io::prelude::*;
use std::mem;
-use super::{GzBuilder, GzHeader};
-use super::{FCOMMENT, FEXTRA, FHCRC, FNAME};
-use crate::crc::{Crc, CrcReader};
+use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser};
+use crate::crc::CrcReader;
use crate::deflate;
use crate::Compression;
@@ -18,112 +17,6 @@ fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
min
}
-pub(crate) fn corrupt() -> io::Error {
- io::Error::new(
- io::ErrorKind::InvalidInput,
- "corrupt gzip stream does not have a matching checksum",
- )
-}
-
-fn bad_header() -> io::Error {
- io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header")
-}
-
-fn read_le_u16<R: Read>(r: &mut Buffer<R>) -> io::Result<u16> {
- let mut b = [0; 2];
- r.read_and_forget(&mut b)?;
- Ok((b[0] as u16) | ((b[1] as u16) << 8))
-}
-
-fn read_gz_header_part<'a, R: Read>(r: &'a mut Buffer<'a, R>) -> io::Result<()> {
- loop {
- match r.part.state {
- GzHeaderParsingState::Start => {
- let mut header = [0; 10];
- r.read_and_forget(&mut header)?;
-
- if header[0] != 0x1f || header[1] != 0x8b {
- return Err(bad_header());
- }
- if header[2] != 8 {
- return Err(bad_header());
- }
-
- r.part.flg = header[3];
- r.part.header.mtime = ((header[4] as u32) << 0)
- | ((header[5] as u32) << 8)
- | ((header[6] as u32) << 16)
- | ((header[7] as u32) << 24);
- let _xfl = header[8];
- r.part.header.operating_system = header[9];
- r.part.state = GzHeaderParsingState::Xlen;
- }
- GzHeaderParsingState::Xlen => {
- if r.part.flg & FEXTRA != 0 {
- r.part.xlen = read_le_u16(r)?;
- }
- r.part.state = GzHeaderParsingState::Extra;
- }
- GzHeaderParsingState::Extra => {
- if r.part.flg & FEXTRA != 0 {
- let mut extra = vec![0; r.part.xlen as usize];
- r.read_and_forget(&mut extra)?;
- r.part.header.extra = Some(extra);
- }
- r.part.state = GzHeaderParsingState::Filename;
- }
- GzHeaderParsingState::Filename => {
- if r.part.flg & FNAME != 0 {
- if r.part.header.filename.is_none() {
- r.part.header.filename = Some(Vec::new());
- };
- for byte in r.bytes() {
- let byte = byte?;
- if byte == 0 {
- break;
- }
- }
- }
- r.part.state = GzHeaderParsingState::Comment;
- }
- GzHeaderParsingState::Comment => {
- if r.part.flg & FCOMMENT != 0 {
- if r.part.header.comment.is_none() {
- r.part.header.comment = Some(Vec::new());
- };
- for byte in r.bytes() {
- let byte = byte?;
- if byte == 0 {
- break;
- }
- }
- }
- r.part.state = GzHeaderParsingState::Crc;
- }
- GzHeaderParsingState::Crc => {
- if r.part.flg & FHCRC != 0 {
- let stored_crc = read_le_u16(r)?;
- let calced_crc = r.part.crc.sum() as u16;
- if stored_crc != calced_crc {
- return Err(corrupt());
- }
- }
- return Ok(());
- }
- }
- }
-}
-
-pub(crate) fn read_gz_header<R: Read>(r: &mut R) -> io::Result<GzHeader> {
- let mut part = GzHeaderPartial::new();
-
- let result = {
- let mut reader = Buffer::new(&mut part, r);
- read_gz_header_part(&mut reader)
- };
- result.map(|()| part.take_header())
-}
-
/// A gzip streaming encoder
///
/// This structure exposes a [`BufRead`] interface that will read uncompressed data
@@ -270,11 +163,21 @@ impl<R: BufRead + Write> Write for GzEncoder<R> {
}
}
-/// A gzip streaming decoder
+/// A decoder for a single member of a [gzip file].
///
-/// This structure consumes a [`BufRead`] interface, reading compressed data
+/// This structure exposes a [`BufRead`] interface, reading compressed data
/// from the underlying reader, and emitting uncompressed data.
///
+/// After reading a single member of the gzip data this reader will return
+/// Ok(0) even if there are more bytes available in the underlying reader.
+/// If you need the following bytes, call `into_inner()` after Ok(0) to
+/// recover the underlying reader.
+///
+/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
+/// or read more
+/// [in the introduction](../index.html#about-multi-member-gzip-files).
+///
+/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
///
/// # Examples
@@ -305,161 +208,38 @@ impl<R: BufRead + Write> Write for GzEncoder<R> {
/// ```
#[derive(Debug)]
pub struct GzDecoder<R> {
- inner: GzState,
- header: Option<GzHeader>,
+ state: GzState,
reader: CrcReader<deflate::bufread::DeflateDecoder<R>>,
multi: bool,
}
#[derive(Debug)]
-pub enum GzHeaderParsingState {
- Start,
- Xlen,
- Extra,
- Filename,
- Comment,
- Crc,
-}
-
-#[derive(Debug)]
-pub struct GzHeaderPartial {
- buf: Vec<u8>,
- state: GzHeaderParsingState,
- flg: u8,
- xlen: u16,
- crc: Crc,
- header: GzHeader,
-}
-
-impl GzHeaderPartial {
- fn new() -> GzHeaderPartial {
- GzHeaderPartial {
- buf: Vec::with_capacity(10), // minimum header length
- state: GzHeaderParsingState::Start,
- flg: 0,
- xlen: 0,
- crc: Crc::new(),
- header: GzHeader {
- extra: None,
- filename: None,
- comment: None,
- operating_system: 0,
- mtime: 0,
- },
- }
- }
-
- pub fn take_header(self) -> GzHeader {
- self.header
- }
-}
-
-#[derive(Debug)]
enum GzState {
- Header(GzHeaderPartial),
- Body,
- Finished(usize, [u8; 8]),
+ Header(GzHeaderParser),
+ Body(GzHeader),
+ Finished(GzHeader, usize, [u8; 8]),
Err(io::Error),
- End,
-}
-
-/// A small adapter which reads data originally from `buf` and then reads all
-/// further data from `reader`. This will also buffer all data read from
-/// `reader` into `buf` for reuse on a further call.
-struct Buffer<'a, T: 'a> {
- part: &'a mut GzHeaderPartial,
- buf_cur: usize,
- buf_max: usize,
- reader: &'a mut T,
-}
-
-impl<'a, T> Buffer<'a, T> {
- fn new(part: &'a mut GzHeaderPartial, reader: &'a mut T) -> Buffer<'a, T> {
- Buffer {
- reader,
- buf_cur: 0,
- buf_max: part.buf.len(),
- part,
- }
- }
-}
-
-impl<'a, T: Read> Read for Buffer<'a, T> {
- fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
- let mut bufref = match self.part.state {
- GzHeaderParsingState::Filename => self.part.header.filename.as_mut(),
- GzHeaderParsingState::Comment => self.part.header.comment.as_mut(),
- _ => None,
- };
- if let Some(ref mut b) = bufref {
- // we have a direct reference to a buffer where to write
- let len = self.reader.read(buf)?;
- if len > 0 && buf[len - 1] == 0 {
- // we do not append the final 0
- b.extend_from_slice(&buf[..len - 1]);
- } else {
- b.extend_from_slice(&buf[..len]);
- }
- self.part.crc.update(&buf[..len]);
- Ok(len)
- } else if self.buf_cur == self.buf_max {
- // we read new bytes and also save them in self.part.buf
- let len = self.reader.read(buf)?;
- self.part.buf.extend_from_slice(&buf[..len]);
- self.part.crc.update(&buf[..len]);
- Ok(len)
- } else {
- // we first read the previously saved bytes
- let len = (&self.part.buf[self.buf_cur..self.buf_max]).read(buf)?;
- self.buf_cur += len;
- Ok(len)
- }
- }
-}
-
-impl<'a, T> Buffer<'a, T>
-where
- T: std::io::Read,
-{
- // If we manage to read all the bytes, we reset the buffer
- fn read_and_forget(&mut self, buf: &mut [u8]) -> io::Result<usize> {
- self.read_exact(buf)?;
- // we managed to read the whole buf
- // we will no longer need the previously saved bytes in self.part.buf
- let rlen = buf.len();
- self.part.buf.truncate(0);
- self.buf_cur = 0;
- self.buf_max = 0;
- Ok(rlen)
- }
+ End(Option<GzHeader>),
}
impl<R: BufRead> GzDecoder<R> {
/// Creates a new decoder from the given reader, immediately parsing the
/// gzip header.
pub fn new(mut r: R) -> GzDecoder<R> {
- let mut part = GzHeaderPartial::new();
- let mut header = None;
-
- let result = {
- let mut reader = Buffer::new(&mut part, &mut r);
- read_gz_header_part(&mut reader)
- };
+ let mut header_parser = GzHeaderParser::new();
- let state = match result {
- Ok(()) => {
- header = Some(part.take_header());
- GzState::Body
+ let state = match header_parser.parse(&mut r) {
+ Ok(_) => GzState::Body(GzHeader::from(header_parser)),
+ Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => {
+ GzState::Header(header_parser)
}
- Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => GzState::Header(part),
Err(err) => GzState::Err(err),
};
GzDecoder {
- inner: state,
+ state,
reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)),
multi: false,
- header,
}
}
@@ -472,7 +252,11 @@ impl<R: BufRead> GzDecoder<R> {
impl<R> GzDecoder<R> {
/// Returns the header associated with this stream, if it was valid
pub fn header(&self) -> Option<&GzHeader> {
- self.header.as_ref()
+ match &self.state {
+ GzState::Body(header) | GzState::Finished(header, _, _) => Some(header),
+ GzState::End(header) => header.as_ref(),
+ _ => None,
+ }
}
/// Acquires a reference to the underlying reader.
@@ -496,111 +280,61 @@ impl<R> GzDecoder<R> {
impl<R: BufRead> Read for GzDecoder<R> {
fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
- let GzDecoder {
- inner,
- header,
- reader,
- multi,
- } = self;
-
loop {
- *inner = match mem::replace(inner, GzState::End) {
- GzState::Header(mut part) => {
- let result = {
- let mut reader = Buffer::new(&mut part, reader.get_mut().get_mut());
- read_gz_header_part(&mut reader)
- };
- match result {
- Ok(()) => {
- *header = Some(part.take_header());
- GzState::Body
- }
- Err(err) if io::ErrorKind::WouldBlock == err.kind() => {
- *inner = GzState::Header(part);
- return Err(err);
- }
- Err(err) => return Err(err),
- }
+ match &mut self.state {
+ GzState::Header(parser) => {
+ parser.parse(self.reader.get_mut().get_mut())?;
+ self.state = GzState::Body(GzHeader::from(mem::take(parser)));
}
- GzState::Body => {
+ GzState::Body(header) => {
if into.is_empty() {
- *inner = GzState::Body;
return Ok(0);
}
-
- let n = reader.read(into).map_err(|err| {
- if io::ErrorKind::WouldBlock == err.kind() {
- *inner = GzState::Body;
+ match self.reader.read(into)? {
+ 0 => {
+ self.state = GzState::Finished(mem::take(header), 0, [0; 8]);
}
-
- err
- })?;
-
- match n {
- 0 => GzState::Finished(0, [0; 8]),
n => {
- *inner = GzState::Body;
return Ok(n);
}
}
}
- GzState::Finished(pos, mut buf) => {
- if pos < buf.len() {
- let n = reader
- .get_mut()
- .get_mut()
- .read(&mut buf[pos..])
- .and_then(|n| {
- if n == 0 {
- Err(io::ErrorKind::UnexpectedEof.into())
- } else {
- Ok(n)
- }
- })
- .map_err(|err| {
- if io::ErrorKind::WouldBlock == err.kind() {
- *inner = GzState::Finished(pos, buf);
- }
-
- err
- })?;
-
- GzState::Finished(pos + n, buf)
+ GzState::Finished(header, pos, buf) => {
+ if *pos < buf.len() {
+ *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?;
} else {
let (crc, amt) = finish(&buf);
- if crc != reader.crc().sum() || amt != reader.crc().amount() {
+ if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() {
+ self.state = GzState::End(Some(mem::take(header)));
return Err(corrupt());
- } else if *multi {
- let is_eof = reader
+ } else if self.multi {
+ let is_eof = self
+ .reader
.get_mut()
.get_mut()
.fill_buf()
- .map(|buf| buf.is_empty())
- .map_err(|err| {
- if io::ErrorKind::WouldBlock == err.kind() {
- *inner = GzState::Finished(pos, buf);
- }
-
- err
- })?;
+ .map(|buf| buf.is_empty())?;
if is_eof {
- GzState::End
+ self.state = GzState::End(Some(mem::take(header)));
} else {
- reader.reset();
- reader.get_mut().reset_data();
- header.take();
- GzState::Header(GzHeaderPartial::new())
+ self.reader.reset();
+ self.reader.get_mut().reset_data();
+ self.state = GzState::Header(GzHeaderParser::new())
}
} else {
- GzState::End
+ self.state = GzState::End(Some(mem::take(header)));
}
}
}
- GzState::Err(err) => return Err(err),
- GzState::End => return Ok(0),
- };
+ GzState::Err(err) => {
+ let result = Err(mem::replace(err, io::ErrorKind::Other.into()));
+ self.state = GzState::End(None);
+ return result;
+ }
+ GzState::End(_) => return Ok(0),
+ }
}
}
}
@@ -615,18 +349,19 @@ impl<R: BufRead + Write> Write for GzDecoder<R> {
}
}
-/// A gzip streaming decoder that decodes all members of a multistream
+/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members.
+///
+/// This structure exposes a [`BufRead`] interface that will consume compressed
+/// data from the underlying reader and emit uncompressed data.
///
-/// A gzip member consists of a header, compressed data and a trailer. The [gzip
-/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
-/// gzip members to be joined in a single stream. `MultiGzDecoder` will
-/// decode all consecutive members while `GzDecoder` will only decompress
-/// the first gzip member. The multistream format is commonly used in
-/// bioinformatics, for example when using the BGZF compressed data.
+/// A gzip file consists of a series of *members* concatenated one after another.
+/// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the
+/// underlying reader does. For a file, this reads to the end of the file.
///
-/// This structure exposes a [`BufRead`] interface that will consume all gzip members
-/// from the underlying reader and emit uncompressed data.
+/// To handle members seperately, see [GzDecoder] or read more
+/// [in the introduction](../index.html#about-multi-member-gzip-files).
///
+/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
///
/// # Examples
@@ -699,154 +434,48 @@ impl<R: BufRead> Read for MultiGzDecoder<R> {
}
#[cfg(test)]
-pub mod tests {
- use crate::gz::bufread::*;
- use std::io;
- use std::io::{Cursor, Read, Write};
-
- //a cursor turning EOF into blocking errors
- #[derive(Debug)]
- pub struct BlockingCursor {
- pub cursor: Cursor<Vec<u8>>,
- }
-
- impl BlockingCursor {
- pub fn new() -> BlockingCursor {
- BlockingCursor {
- cursor: Cursor::new(Vec::new()),
- }
- }
-
- pub fn set_position(&mut self, pos: u64) {
- self.cursor.set_position(pos)
- }
-
- pub fn position(&mut self) -> u64 {
- self.cursor.position()
- }
- }
-
- impl Write for BlockingCursor {
- fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
- self.cursor.write(buf)
- }
- fn flush(&mut self) -> io::Result<()> {
- self.cursor.flush()
- }
- }
-
- impl Read for BlockingCursor {
- fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
- //use the cursor, except it turns eof into blocking error
- let r = self.cursor.read(buf);
- match r {
- Err(ref err) => {
- if err.kind() == io::ErrorKind::UnexpectedEof {
- return Err(io::ErrorKind::WouldBlock.into());
- }
- }
- Ok(0) => {
- //regular EOF turned into blocking error
- return Err(io::ErrorKind::WouldBlock.into());
- }
- Ok(_n) => {}
- }
- r
- }
- }
+mod test {
+ use crate::bufread::GzDecoder;
+ use crate::gz::write;
+ use crate::Compression;
+ use std::io::{Read, Write};
+
+ // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any
+ // additional data to be consumed by the caller.
#[test]
- // test function read_and_forget of Buffer
- fn buffer_read_and_forget() {
- // this is unused except for the buffering
- let mut part = GzHeaderPartial::new();
- // this is a reader which receives data afterwards
- let mut r = BlockingCursor::new();
- let data = vec![1, 2, 3];
- let mut out = Vec::with_capacity(7);
-
- match r.write_all(&data) {
- Ok(()) => {}
- _ => {
- panic!("Unexpected result for write_all");
- }
- }
- r.set_position(0);
-
- // First read : successful for one byte
- let mut reader = Buffer::new(&mut part, &mut r);
- out.resize(1, 0);
- match reader.read_and_forget(&mut out) {
- Ok(1) => {}
- _ => {
- panic!("Unexpected result for read_and_forget with data");
- }
- }
-
- // Second read : incomplete for 7 bytes (we have only 2)
- out.resize(7, 0);
- match reader.read_and_forget(&mut out) {
- Err(ref err) => {
- assert_eq!(io::ErrorKind::WouldBlock, err.kind());
- }
- _ => {
- panic!("Unexpected result for read_and_forget with incomplete");
- }
- }
-
- // 3 more data bytes have arrived
- let pos = r.position();
- let data2 = vec![4, 5, 6];
- match r.write_all(&data2) {
- Ok(()) => {}
- _ => {
- panic!("Unexpected result for write_all");
- }
- }
- r.set_position(pos);
-
- // Third read : still incomplete for 7 bytes (we have 5)
- let mut reader2 = Buffer::new(&mut part, &mut r);
- match reader2.read_and_forget(&mut out) {
- Err(ref err) => {
- assert_eq!(io::ErrorKind::WouldBlock, err.kind());
- }
- _ => {
- panic!("Unexpected result for read_and_forget with more incomplete");
- }
- }
-
- // 3 more data bytes have arrived again
- let pos2 = r.position();
- let data3 = vec![7, 8, 9];
- match r.write_all(&data3) {
- Ok(()) => {}
- _ => {
- panic!("Unexpected result for write_all");
- }
- }
- r.set_position(pos2);
-
- // Fourth read : now successful for 7 bytes
- let mut reader3 = Buffer::new(&mut part, &mut r);
- match reader3.read_and_forget(&mut out) {
- Ok(7) => {
- assert_eq!(out[0], 2);
- assert_eq!(out[6], 8);
- }
- _ => {
- panic!("Unexpected result for read_and_forget with data");
- }
- }
+ fn decode_extra_data() {
+ let expected = "Hello World";
+
+ let compressed = {
+ let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
+ e.write(expected.as_ref()).unwrap();
+ let mut b = e.finish().unwrap();
+ b.push(b'x');
+ b
+ };
- // Fifth read : successful for one more byte
- out.resize(1, 0);
- match reader3.read_and_forget(&mut out) {
- Ok(1) => {
- assert_eq!(out[0], 9);
- }
- _ => {
- panic!("Unexpected result for read_and_forget with data");
- }
- }
+ let mut output = Vec::new();
+ let mut decoder = GzDecoder::new(compressed.as_slice());
+ let decoded_bytes = decoder.read_to_end(&mut output).unwrap();
+ assert_eq!(decoded_bytes, output.len());
+ let actual = std::str::from_utf8(&output).expect("String parsing error");
+ assert_eq!(
+ actual, expected,
+ "after decompression we obtain the original input"
+ );
+
+ output.clear();
+ assert_eq!(
+ decoder.read(&mut output).unwrap(),
+ 0,
+ "subsequent read of decoder returns 0, but inner reader can return additional data"
+ );
+ let mut reader = decoder.into_inner();
+ assert_eq!(
+ reader.read_to_end(&mut output).unwrap(),
+ 1,
+ "extra data is accessible in underlying buf-read"
+ );
+ assert_eq!(output, b"x");
}
}
diff --git a/vendor/flate2/src/gz/mod.rs b/vendor/flate2/src/gz/mod.rs
index d31aa60be..e8e05c6eb 100644
--- a/vendor/flate2/src/gz/mod.rs
+++ b/vendor/flate2/src/gz/mod.rs
@@ -1,19 +1,24 @@
use std::ffi::CString;
-use std::io::prelude::*;
+use std::io::{BufRead, Error, ErrorKind, Read, Result, Write};
use std::time;
use crate::bufreader::BufReader;
-use crate::Compression;
+use crate::{Compression, Crc};
pub static FHCRC: u8 = 1 << 1;
pub static FEXTRA: u8 = 1 << 2;
pub static FNAME: u8 = 1 << 3;
pub static FCOMMENT: u8 = 1 << 4;
+pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7;
pub mod bufread;
pub mod read;
pub mod write;
+// The maximum length of the header filename and comment fields. More than
+// enough for these fields in reasonable use, but prevents possible attacks.
+const MAX_HEADER_BUF: usize = 65535;
+
/// A structure representing the header of a gzip stream.
///
/// The header can contain metadata about the file that was compressed, if
@@ -82,6 +87,205 @@ impl GzHeader {
}
}
+#[derive(Debug, Default)]
+pub enum GzHeaderState {
+ Start(u8, [u8; 10]),
+ Xlen(Option<Box<Crc>>, u8, [u8; 2]),
+ Extra(Option<Box<Crc>>, u16),
+ Filename(Option<Box<Crc>>),
+ Comment(Option<Box<Crc>>),
+ Crc(Option<Box<Crc>>, u8, [u8; 2]),
+ #[default]
+ Complete,
+}
+
+#[derive(Debug, Default)]
+pub struct GzHeaderParser {
+ state: GzHeaderState,
+ flags: u8,
+ header: GzHeader,
+}
+
+impl GzHeaderParser {
+ fn new() -> Self {
+ GzHeaderParser {
+ state: GzHeaderState::Start(0, [0; 10]),
+ flags: 0,
+ header: GzHeader::default(),
+ }
+ }
+
+ fn parse<'a, R: Read>(&mut self, r: &'a mut R) -> Result<()> {
+ loop {
+ match &mut self.state {
+ GzHeaderState::Start(count, buffer) => {
+ while (*count as usize) < buffer.len() {
+ *count += read_into(r, &mut buffer[*count as usize..])? as u8;
+ }
+ // Gzip identification bytes
+ if buffer[0] != 0x1f || buffer[1] != 0x8b {
+ return Err(bad_header());
+ }
+ // Gzip compression method (8 = deflate)
+ if buffer[2] != 8 {
+ return Err(bad_header());
+ }
+ self.flags = buffer[3];
+ // RFC1952: "must give an error indication if any reserved bit is non-zero"
+ if self.flags & FRESERVED != 0 {
+ return Err(bad_header());
+ }
+ self.header.mtime = ((buffer[4] as u32) << 0)
+ | ((buffer[5] as u32) << 8)
+ | ((buffer[6] as u32) << 16)
+ | ((buffer[7] as u32) << 24);
+ let _xfl = buffer[8];
+ self.header.operating_system = buffer[9];
+ let crc = if self.flags & FHCRC != 0 {
+ let mut crc = Box::new(Crc::new());
+ crc.update(buffer);
+ Some(crc)
+ } else {
+ None
+ };
+ self.state = GzHeaderState::Xlen(crc, 0, [0; 2]);
+ }
+ GzHeaderState::Xlen(crc, count, buffer) => {
+ if self.flags & FEXTRA != 0 {
+ while (*count as usize) < buffer.len() {
+ *count += read_into(r, &mut buffer[*count as usize..])? as u8;
+ }
+ if let Some(crc) = crc {
+ crc.update(buffer);
+ }
+ let xlen = parse_le_u16(&buffer);
+ self.header.extra = Some(vec![0; xlen as usize]);
+ self.state = GzHeaderState::Extra(crc.take(), 0);
+ } else {
+ self.state = GzHeaderState::Filename(crc.take());
+ }
+ }
+ GzHeaderState::Extra(crc, count) => {
+ debug_assert!(self.header.extra.is_some());
+ let extra = self.header.extra.as_mut().unwrap();
+ while (*count as usize) < extra.len() {
+ *count += read_into(r, &mut extra[*count as usize..])? as u16;
+ }
+ if let Some(crc) = crc {
+ crc.update(extra);
+ }
+ self.state = GzHeaderState::Filename(crc.take());
+ }
+ GzHeaderState::Filename(crc) => {
+ if self.flags & FNAME != 0 {
+ let filename = self.header.filename.get_or_insert_with(Vec::new);
+ read_to_nul(r, filename)?;
+ if let Some(crc) = crc {
+ crc.update(filename);
+ crc.update(b"\0");
+ }
+ }
+ self.state = GzHeaderState::Comment(crc.take());
+ }
+ GzHeaderState::Comment(crc) => {
+ if self.flags & FCOMMENT != 0 {
+ let comment = self.header.comment.get_or_insert_with(Vec::new);
+ read_to_nul(r, comment)?;
+ if let Some(crc) = crc {
+ crc.update(comment);
+ crc.update(b"\0");
+ }
+ }
+ self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]);
+ }
+ GzHeaderState::Crc(crc, count, buffer) => {
+ if let Some(crc) = crc {
+ debug_assert!(self.flags & FHCRC != 0);
+ while (*count as usize) < buffer.len() {
+ *count += read_into(r, &mut buffer[*count as usize..])? as u8;
+ }
+ let stored_crc = parse_le_u16(&buffer);
+ let calced_crc = crc.sum() as u16;
+ if stored_crc != calced_crc {
+ return Err(corrupt());
+ }
+ }
+ self.state = GzHeaderState::Complete;
+ }
+ GzHeaderState::Complete => {
+ return Ok(());
+ }
+ }
+ }
+ }
+
+ fn header(&self) -> Option<&GzHeader> {
+ match self.state {
+ GzHeaderState::Complete => Some(&self.header),
+ _ => None,
+ }
+ }
+}
+
+impl From<GzHeaderParser> for GzHeader {
+ fn from(parser: GzHeaderParser) -> Self {
+ debug_assert!(matches!(parser.state, GzHeaderState::Complete));
+ parser.header
+ }
+}
+
+// Attempt to fill the `buffer` from `r`. Return the number of bytes read.
+// Return an error if EOF is read before the buffer is full. This differs
+// from `read` in that Ok(0) means that more data may be available.
+fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
+ debug_assert!(!buffer.is_empty());
+ match r.read(buffer) {
+ Ok(0) => Err(ErrorKind::UnexpectedEof.into()),
+ Ok(n) => Ok(n),
+ Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0),
+ Err(e) => Err(e),
+ }
+}
+
+// Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`.
+fn read_to_nul<R: Read>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
+ let mut bytes = r.bytes();
+ loop {
+ match bytes.next().transpose()? {
+ Some(byte) if byte == 0 => {
+ return Ok(());
+ }
+ Some(_) if buffer.len() == MAX_HEADER_BUF => {
+ return Err(Error::new(
+ ErrorKind::InvalidInput,
+ "gzip header field too long",
+ ));
+ }
+ Some(byte) => {
+ buffer.push(byte);
+ }
+ None => {
+ return Err(ErrorKind::UnexpectedEof.into());
+ }
+ }
+ }
+}
+
+fn parse_le_u16(buffer: &[u8; 2]) -> u16 {
+ (buffer[0] as u16) | ((buffer[1] as u16) << 8)
+}
+
+fn bad_header() -> Error {
+ Error::new(ErrorKind::InvalidInput, "invalid gzip header")
+}
+
+fn corrupt() -> Error {
+ Error::new(
+ ErrorKind::InvalidInput,
+ "corrupt gzip stream does not have a matching checksum",
+ )
+}
+
/// A builder structure to create a new gzip Encoder.
///
/// This structure controls header configuration options such as the filename.
@@ -253,8 +457,8 @@ impl GzBuilder {
mod tests {
use std::io::prelude::*;
- use super::{read, write, GzBuilder};
- use crate::Compression;
+ use super::{read, write, GzBuilder, GzHeaderParser};
+ use crate::{Compression, GzHeader};
use rand::{thread_rng, Rng};
#[test]
@@ -304,6 +508,85 @@ mod tests {
assert_eq!(res, v);
}
+ // A Rust implementation of CRC that closely matches the C code in RFC1952.
+ // Only use this to create CRCs for tests.
+ struct Rfc1952Crc {
+ /* Table of CRCs of all 8-bit messages. */
+ crc_table: [u32; 256],
+ }
+
+ impl Rfc1952Crc {
+ fn new() -> Self {
+ let mut crc = Rfc1952Crc {
+ crc_table: [0; 256],
+ };
+ /* Make the table for a fast CRC. */
+ for n in 0usize..256 {
+ let mut c = n as u32;
+ for _k in 0..8 {
+ if c & 1 != 0 {
+ c = 0xedb88320 ^ (c >> 1);
+ } else {
+ c = c >> 1;
+ }
+ }
+ crc.crc_table[n] = c;
+ }
+ crc
+ }
+
+ /*
+ Update a running crc with the bytes buf and return
+ the updated crc. The crc should be initialized to zero. Pre- and
+ post-conditioning (one's complement) is performed within this
+ function so it shouldn't be done by the caller.
+ */
+ fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
+ let mut c = crc ^ 0xffffffff;
+
+ for b in buf {
+ c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8);
+ }
+ c ^ 0xffffffff
+ }
+
+ /* Return the CRC of the bytes buf. */
+ fn crc(&self, buf: &[u8]) -> u32 {
+ self.update_crc(0, buf)
+ }
+ }
+
+ #[test]
+ fn roundtrip_header() {
+ let mut header = GzBuilder::new()
+ .mtime(1234)
+ .operating_system(57)
+ .filename("filename")
+ .comment("comment")
+ .into_header(Compression::fast());
+
+ // Add a CRC to the header
+ header[3] = header[3] ^ super::FHCRC;
+ let rfc1952_crc = Rfc1952Crc::new();
+ let crc32 = rfc1952_crc.crc(&header);
+ let crc16 = crc32 as u16;
+ header.extend(&crc16.to_le_bytes());
+
+ let mut parser = GzHeaderParser::new();
+ parser.parse(&mut header.as_slice()).unwrap();
+ let actual = parser.header().unwrap();
+ assert_eq!(
+ actual,
+ &GzHeader {
+ extra: None,
+ filename: Some("filename".as_bytes().to_vec()),
+ comment: Some("comment".as_bytes().to_vec()),
+ operating_system: 57,
+ mtime: 1234
+ }
+ )
+ }
+
#[test]
fn fields() {
let r = vec![0, 2, 4, 6];
@@ -353,33 +636,4 @@ mod tests {
write!(f, "Hello world").unwrap();
f.flush().unwrap();
}
-
- use crate::gz::bufread::tests::BlockingCursor;
- #[test]
- // test function read_and_forget of Buffer
- fn blocked_partial_header_read() {
- // this is a reader which receives data afterwards
- let mut r = BlockingCursor::new();
- let data = vec![1, 2, 3];
-
- match r.write_all(&data) {
- Ok(()) => {}
- _ => {
- panic!("Unexpected result for write_all");
- }
- }
- r.set_position(0);
-
- // this is unused except for the buffering
- let mut decoder = read::GzDecoder::new(r);
- let mut out = Vec::with_capacity(7);
- match decoder.read(&mut out) {
- Err(e) => {
- assert_eq!(e.kind(), std::io::ErrorKind::WouldBlock);
- }
- _ => {
- panic!("Unexpected result for decoder.read");
- }
- }
- }
}
diff --git a/vendor/flate2/src/gz/read.rs b/vendor/flate2/src/gz/read.rs
index cfeb992e8..5a65526ce 100644
--- a/vendor/flate2/src/gz/read.rs
+++ b/vendor/flate2/src/gz/read.rs
@@ -25,11 +25,11 @@ use crate::Compression;
/// // Return a vector containing the GZ compressed version of hello world
///
/// fn gzencode_hello_world() -> io::Result<Vec<u8>> {
-/// let mut ret_vec = [0;100];
+/// let mut ret_vec = Vec::new();
/// let bytestring = b"hello world";
/// let mut gz = GzEncoder::new(&bytestring[..], Compression::fast());
-/// let count = gz.read(&mut ret_vec)?;
-/// Ok(ret_vec[0..count].to_vec())
+/// gz.read_to_end(&mut ret_vec)?;
+/// Ok(ret_vec)
/// }
/// ```
#[derive(Debug)]
@@ -90,17 +90,26 @@ impl<R: Read + Write> Write for GzEncoder<R> {
}
}
-/// A gzip streaming decoder
+/// A decoder for a single member of a [gzip file].
///
/// This structure exposes a [`Read`] interface that will consume compressed
/// data from the underlying reader and emit uncompressed data.
///
-/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
+/// After reading a single member of the gzip data this reader will return
+/// Ok(0) even if there are more bytes available in the underlying reader.
+/// `GzDecoder` may have read additional bytes past the end of the gzip data.
+/// If you need the following bytes, wrap the `Reader` in a `std::io::BufReader`
+/// and use `bufread::GzDecoder` instead.
+///
+/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
+/// or read more
+/// [in the introduction](../index.html#about-multi-member-gzip-files).
+///
+/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
///
/// # Examples
///
/// ```
-///
/// use std::io::prelude::*;
/// use std::io;
/// # use flate2::Compression;
@@ -146,6 +155,9 @@ impl<R> GzDecoder<R> {
}
/// Acquires a reference to the underlying reader.
+ ///
+ /// Note that the decoder may have read past the end of the gzip data.
+ /// To prevent this use [`bufread::GzDecoder`] instead.
pub fn get_ref(&self) -> &R {
self.inner.get_ref().get_ref()
}
@@ -153,12 +165,19 @@ impl<R> GzDecoder<R> {
/// Acquires a mutable reference to the underlying stream.
///
/// Note that mutation of the stream may result in surprising results if
- /// this decoder is continued to be used.
+ /// this decoder continues to be used.
+ ///
+ /// Note that the decoder may have read past the end of the gzip data.
+ /// To prevent this use [`bufread::GzDecoder`] instead.
pub fn get_mut(&mut self) -> &mut R {
self.inner.get_mut().get_mut()
}
/// Consumes this decoder, returning the underlying reader.
+ ///
+ /// Note that the decoder may have read past the end of the gzip data.
+ /// Subsequent reads will skip those bytes. To prevent this use
+ /// [`bufread::GzDecoder`] instead.
pub fn into_inner(self) -> R {
self.inner.into_inner().into_inner()
}
@@ -180,19 +199,19 @@ impl<R: Read + Write> Write for GzDecoder<R> {
}
}
-/// A gzip streaming decoder that decodes all members of a multistream
+/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members.
+///
+/// This structure exposes a [`Read`] interface that will consume compressed
+/// data from the underlying reader and emit uncompressed data.
///
-/// A gzip member consists of a header, compressed data and a trailer. The [gzip
-/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
-/// gzip members to be joined in a single stream. `MultiGzDecoder` will
-/// decode all consecutive members while `GzDecoder` will only decompress the
-/// first gzip member. The multistream format is commonly used in bioinformatics,
-/// for example when using the BGZF compressed data.
+/// A gzip file consists of a series of *members* concatenated one after another.
+/// MultiGzDecoder decodes all members of a file and returns Ok(0) once the
+/// underlying reader does.
///
-/// This structure exposes a [`Read`] interface that will consume all gzip members
-/// from the underlying reader and emit uncompressed data.
+/// To handle members seperately, see [GzDecoder] or read more
+/// [in the introduction](../index.html#about-multi-member-gzip-files).
///
-/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
+/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
///
/// # Examples
///
@@ -276,3 +295,84 @@ impl<R: Read + Write> Write for MultiGzDecoder<R> {
self.get_mut().flush()
}
}
+
+#[cfg(test)]
+mod tests {
+ use std::io::{Cursor, ErrorKind, Read, Result, Write};
+
+ use super::GzDecoder;
+
+ //a cursor turning EOF into blocking errors
+ #[derive(Debug)]
+ pub struct BlockingCursor {
+ pub cursor: Cursor<Vec<u8>>,
+ }
+
+ impl BlockingCursor {
+ pub fn new() -> BlockingCursor {
+ BlockingCursor {
+ cursor: Cursor::new(Vec::new()),
+ }
+ }
+
+ pub fn set_position(&mut self, pos: u64) {
+ return self.cursor.set_position(pos);
+ }
+ }
+
+ impl Write for BlockingCursor {
+ fn write(&mut self, buf: &[u8]) -> Result<usize> {
+ return self.cursor.write(buf);
+ }
+ fn flush(&mut self) -> Result<()> {
+ return self.cursor.flush();
+ }
+ }
+
+ impl Read for BlockingCursor {
+ fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
+ //use the cursor, except it turns eof into blocking error
+ let r = self.cursor.read(buf);
+ match r {
+ Err(ref err) => {
+ if err.kind() == ErrorKind::UnexpectedEof {
+ return Err(ErrorKind::WouldBlock.into());
+ }
+ }
+ Ok(0) => {
+ //regular EOF turned into blocking error
+ return Err(ErrorKind::WouldBlock.into());
+ }
+ Ok(_n) => {}
+ }
+ return r;
+ }
+ }
+
+ #[test]
+ fn blocked_partial_header_read() {
+ // this is a reader which receives data afterwards
+ let mut r = BlockingCursor::new();
+ let data = vec![1, 2, 3];
+
+ match r.write_all(&data) {
+ Ok(()) => {}
+ _ => {
+ panic!("Unexpected result for write_all");
+ }
+ }
+ r.set_position(0);
+
+ // this is unused except for the buffering
+ let mut decoder = GzDecoder::new(r);
+ let mut out = Vec::with_capacity(7);
+ match decoder.read(&mut out) {
+ Err(e) => {
+ assert_eq!(e.kind(), ErrorKind::WouldBlock);
+ }
+ _ => {
+ panic!("Unexpected result for decoder.read");
+ }
+ }
+ }
+}
diff --git a/vendor/flate2/src/gz/write.rs b/vendor/flate2/src/gz/write.rs
index 83eebb757..74d6c5acf 100644
--- a/vendor/flate2/src/gz/write.rs
+++ b/vendor/flate2/src/gz/write.rs
@@ -2,8 +2,7 @@ use std::cmp;
use std::io;
use std::io::prelude::*;
-use super::bufread::{corrupt, read_gz_header};
-use super::{GzBuilder, GzHeader};
+use super::{corrupt, GzBuilder, GzHeader, GzHeaderParser};
use crate::crc::{Crc, CrcWriter};
use crate::zio;
use crate::{Compress, Compression, Decompress, Status};
@@ -167,11 +166,20 @@ impl<W: Write> Drop for GzEncoder<W> {
}
}
-/// A gzip streaming decoder
+/// A decoder for a single member of a [gzip file].
///
-/// This structure exposes a [`Write`] interface that will emit uncompressed data
-/// to the underlying writer `W`.
+/// This structure exposes a [`Write`] interface, receiving compressed data and
+/// writing uncompressed data to the underlying writer.
+///
+/// After decoding a single member of the gzip data this writer will return the number of bytes up to
+/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to
+/// handle any data following the gzip member.
+///
+/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
+/// or read more
+/// [in the introduction](../index.html#about-multi-member-gzip-files).
///
+/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
///
/// # Examples
@@ -203,8 +211,7 @@ impl<W: Write> Drop for GzEncoder<W> {
pub struct GzDecoder<W: Write> {
inner: zio::Writer<CrcWriter<W>, Decompress>,
crc_bytes: Vec<u8>,
- header: Option<GzHeader>,
- header_buf: Vec<u8>,
+ header_parser: GzHeaderParser,
}
const CRC_BYTES_LEN: usize = 8;
@@ -218,14 +225,13 @@ impl<W: Write> GzDecoder<W> {
GzDecoder {
inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(false)),
crc_bytes: Vec::with_capacity(CRC_BYTES_LEN),
- header: None,
- header_buf: Vec::new(),
+ header_parser: GzHeaderParser::new(),
}
}
/// Returns the header associated with this stream.
pub fn header(&self) -> Option<&GzHeader> {
- self.header.as_ref()
+ self.header_parser.header()
}
/// Acquires a reference to the underlying writer.
@@ -306,47 +312,24 @@ impl<W: Write> GzDecoder<W> {
}
}
-struct Counter<T: Read> {
- inner: T,
- pos: usize,
-}
-
-impl<T: Read> Read for Counter<T> {
- fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
- let pos = self.inner.read(buf)?;
- self.pos += pos;
- Ok(pos)
- }
-}
-
impl<W: Write> Write for GzDecoder<W> {
- fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
- if self.header.is_none() {
- // trying to avoid buffer usage
- let (res, pos) = {
- let mut counter = Counter {
- inner: self.header_buf.chain(buf),
- pos: 0,
- };
- let res = read_gz_header(&mut counter);
- (res, counter.pos)
- };
-
- match res {
+ fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> {
+ let buflen = buf.len();
+ if self.header().is_none() {
+ match self.header_parser.parse(&mut buf) {
Err(err) => {
if err.kind() == io::ErrorKind::UnexpectedEof {
- // not enough data for header, save to the buffer
- self.header_buf.extend(buf);
- Ok(buf.len())
+ // all data read but header still not complete
+ Ok(buflen)
} else {
Err(err)
}
}
- Ok(header) => {
- self.header = Some(header);
- let pos = pos - self.header_buf.len();
- self.header_buf.truncate(0);
- Ok(pos)
+ Ok(_) => {
+ debug_assert!(self.header().is_some());
+ // buf now contains the unread part of the original buf
+ let n = buflen - buf.len();
+ Ok(n)
}
}
} else {
@@ -373,17 +356,19 @@ impl<W: Read + Write> Read for GzDecoder<W> {
}
}
-/// A gzip streaming decoder that decodes all members of a multistream
+/// A gzip streaming decoder that decodes a [gzip file] with multiple members.
+///
+/// This structure exposes a [`Write`] interface that will consume compressed data and
+/// write uncompressed data to the underlying writer.
///
-/// A gzip member consists of a header, compressed data and a trailer. The [gzip
-/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
-/// gzip members to be joined in a single stream. `MultiGzDecoder` will
-/// decode all consecutive members while `GzDecoder` will only decompress
-/// the first gzip member. The multistream format is commonly used in
-/// bioinformatics, for example when using the BGZF compressed data.
+/// A gzip file consists of a series of *members* concatenated one after another.
+/// `MultiGzDecoder` decodes all members of a file and writes them to the
+/// underlying writer one after another.
///
-/// This structure exposes a [`Write`] interface that will consume all gzip members
-/// from the written buffers and write uncompressed data to the writer.
+/// To handle members separately, see [GzDecoder] or read more
+/// [in the introduction](../index.html#about-multi-member-gzip-files).
+///
+/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
#[derive(Debug)]
pub struct MultiGzDecoder<W: Write> {
inner: GzDecoder<W>,
@@ -524,6 +509,56 @@ mod tests {
}
#[test]
+ fn decode_writer_partial_header_filename() {
+ let filename = "test.txt";
+ let mut e = GzBuilder::new()
+ .filename(filename)
+ .read(STR.as_bytes(), Compression::default());
+ let mut bytes = Vec::new();
+ e.read_to_end(&mut bytes).unwrap();
+
+ let mut writer = Vec::new();
+ let mut decoder = GzDecoder::new(writer);
+ assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12);
+ let n = decoder.write(&bytes[12..]).unwrap();
+ if n < bytes.len() - 12 {
+ decoder.write(&bytes[n + 12..]).unwrap();
+ }
+ assert_eq!(
+ decoder.header().unwrap().filename().unwrap(),
+ filename.as_bytes()
+ );
+ writer = decoder.finish().unwrap();
+ let return_string = String::from_utf8(writer).expect("String parsing error");
+ assert_eq!(return_string, STR);
+ }
+
+ #[test]
+ fn decode_writer_partial_header_comment() {
+ let comment = "test comment";
+ let mut e = GzBuilder::new()
+ .comment(comment)
+ .read(STR.as_bytes(), Compression::default());
+ let mut bytes = Vec::new();
+ e.read_to_end(&mut bytes).unwrap();
+
+ let mut writer = Vec::new();
+ let mut decoder = GzDecoder::new(writer);
+ assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12);
+ let n = decoder.write(&bytes[12..]).unwrap();
+ if n < bytes.len() - 12 {
+ decoder.write(&bytes[n + 12..]).unwrap();
+ }
+ assert_eq!(
+ decoder.header().unwrap().comment().unwrap(),
+ comment.as_bytes()
+ );
+ writer = decoder.finish().unwrap();
+ let return_string = String::from_utf8(writer).expect("String parsing error");
+ assert_eq!(return_string, STR);
+ }
+
+ #[test]
fn decode_writer_exact_header() {
let mut e = GzEncoder::new(Vec::new(), Compression::default());
e.write(STR.as_ref()).unwrap();
@@ -575,4 +610,32 @@ mod tests {
let expected = STR.repeat(2);
assert_eq!(return_string, expected);
}
+
+ // GzDecoder consumes one gzip member and then returns 0 for subsequent writes, allowing any
+ // additional data to be consumed by the caller.
+ #[test]
+ fn decode_extra_data() {
+ let compressed = {
+ let mut e = GzEncoder::new(Vec::new(), Compression::default());
+ e.write(STR.as_ref()).unwrap();
+ let mut b = e.finish().unwrap();
+ b.push(b'x');
+ b
+ };
+
+ let mut writer = Vec::new();
+ let mut decoder = GzDecoder::new(writer);
+ let mut consumed_bytes = 0;
+ loop {
+ let n = decoder.write(&compressed[consumed_bytes..]).unwrap();
+ if n == 0 {
+ break;
+ }
+ consumed_bytes += n;
+ }
+ writer = decoder.finish().unwrap();
+ let actual = String::from_utf8(writer).expect("String parsing error");
+ assert_eq!(actual, STR);
+ assert_eq!(&compressed[consumed_bytes..], b"x");
+ }
}
diff --git a/vendor/flate2/src/lib.rs b/vendor/flate2/src/lib.rs
index 6789c5b76..8c000b032 100644
--- a/vendor/flate2/src/lib.rs
+++ b/vendor/flate2/src/lib.rs
@@ -65,12 +65,30 @@
//! `Write` trait if `T: Write`. That is, the "dual trait" is forwarded directly
//! to the underlying object if available.
//!
+//! # About multi-member Gzip files
+//!
+//! While most `gzip` files one encounters will have a single *member* that can be read
+//! with the [`GzDecoder`], there may be some files which have multiple members.
+//!
+//! A [`GzDecoder`] will only read the first member of gzip data, which may unexpectedly
+//! provide partial results when a multi-member gzip file is encountered. `GzDecoder` is appropriate
+//! for data that is designed to be read as single members from a multi-member file. `bufread::GzDecoder`
+//! and `write::GzDecoder` also allow non-gzip data following gzip data to be handled.
+//!
+//! The [`MultiGzDecoder`] on the other hand will decode all members of a `gzip` file
+//! into one consecutive stream of bytes, which hides the underlying *members* entirely.
+//! If a file contains contains non-gzip data after the gzip data, MultiGzDecoder will
+//! emit an error after decoding the gzip data. This behavior matches the `gzip`,
+//! `gunzip`, and `zcat` command line tools.
+//!
//! [`read`]: read/index.html
//! [`bufread`]: bufread/index.html
//! [`write`]: write/index.html
//! [read]: https://doc.rust-lang.org/std/io/trait.Read.html
//! [write]: https://doc.rust-lang.org/std/io/trait.Write.html
//! [bufread]: https://doc.rust-lang.org/std/io/trait.BufRead.html
+//! [`GzDecoder`]: read/struct.GzDecoder.html
+//! [`MultiGzDecoder`]: read/struct.MultiGzDecoder.html
#![doc(html_root_url = "https://docs.rs/flate2/0.2")]
#![deny(missing_docs)]
#![deny(missing_debug_implementations)]
@@ -78,6 +96,9 @@
#![cfg_attr(test, deny(warnings))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#[cfg(not(feature = "any_impl",))]
+compile_error!("You need to choose a zlib backend");
+
pub use crate::crc::{Crc, CrcReader, CrcWriter};
pub use crate::gz::GzBuilder;
pub use crate::gz::GzHeader;
@@ -96,7 +117,14 @@ mod zlib;
/// Types which operate over [`Read`] streams, both encoders and decoders for
/// various formats.
///
+/// Note that the `read` decoder types may read past the end of the compressed
+/// data while decoding. If the caller requires subsequent reads to start
+/// immediately following the compressed data wrap the `Read` type in a
+/// [`BufReader`] and use the `BufReader` with the equivalent decoder from the
+/// `bufread` module and also for the subsequent reads.
+///
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
+/// [`BufReader`]: https://doc.rust-lang.org/std/io/struct.BufReader.html
pub mod read {
pub use crate::deflate::read::DeflateDecoder;
pub use crate::deflate::read::DeflateEncoder;
@@ -154,7 +182,7 @@ fn _assert_send_sync() {
}
/// When compressing data, the compression level can be specified by a value in
-/// this enum.
+/// this struct.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub struct Compression(u32);
diff --git a/vendor/flate2/src/zlib/bufread.rs b/vendor/flate2/src/zlib/bufread.rs
index 61d12525c..aa8af64f8 100644
--- a/vendor/flate2/src/zlib/bufread.rs
+++ b/vendor/flate2/src/zlib/bufread.rs
@@ -47,6 +47,15 @@ impl<R: BufRead> ZlibEncoder<R> {
data: Compress::new(level, true),
}
}
+
+ /// Creates a new encoder with the given `compression` settings which will
+ /// read uncompressed data from the given stream `r` and emit the compressed stream.
+ pub fn new_with_compress(r: R, compression: Compress) -> ZlibEncoder<R> {
+ ZlibEncoder {
+ obj: r,
+ data: compression,
+ }
+ }
}
pub fn reset_encoder_data<R>(zlib: &mut ZlibEncoder<R>) {
@@ -165,6 +174,15 @@ impl<R: BufRead> ZlibDecoder<R> {
data: Decompress::new(true),
}
}
+
+ /// Creates a new decoder which will decompress data read from the given
+ /// stream, using the given `decompression` settings.
+ pub fn new_with_decompress(r: R, decompression: Decompress) -> ZlibDecoder<R> {
+ ZlibDecoder {
+ obj: r,
+ data: decompression,
+ }
+ }
}
pub fn reset_decoder_data<R>(zlib: &mut ZlibDecoder<R>) {
diff --git a/vendor/flate2/src/zlib/read.rs b/vendor/flate2/src/zlib/read.rs
index 330213049..fbae74867 100644
--- a/vendor/flate2/src/zlib/read.rs
+++ b/vendor/flate2/src/zlib/read.rs
@@ -3,6 +3,7 @@ use std::io::prelude::*;
use super::bufread;
use crate::bufreader::BufReader;
+use crate::Decompress;
/// A ZLIB encoder, or compressor.
///
@@ -24,9 +25,9 @@ use crate::bufreader::BufReader;
/// # fn open_hello_world() -> std::io::Result<Vec<u8>> {
/// let f = File::open("examples/hello_world.txt")?;
/// let mut z = ZlibEncoder::new(f, Compression::fast());
-/// let mut buffer = [0;50];
-/// let byte_count = z.read(&mut buffer)?;
-/// # Ok(buffer[0..byte_count].to_vec())
+/// let mut buffer = Vec::new();
+/// z.read_to_end(&mut buffer)?;
+/// # Ok(buffer)
/// # }
/// ```
#[derive(Debug)]
@@ -42,6 +43,14 @@ impl<R: Read> ZlibEncoder<R> {
inner: bufread::ZlibEncoder::new(BufReader::new(r), level),
}
}
+
+ /// Creates a new encoder with the given `compression` settings which will
+ /// read uncompressed data from the given stream `r` and emit the compressed stream.
+ pub fn new_with_compress(r: R, compression: crate::Compress) -> ZlibEncoder<R> {
+ ZlibEncoder {
+ inner: bufread::ZlibEncoder::new_with_compress(BufReader::new(r), compression),
+ }
+ }
}
impl<R> ZlibEncoder<R> {
@@ -160,7 +169,8 @@ impl<R: Read> ZlibDecoder<R> {
ZlibDecoder::new_with_buf(r, vec![0; 32 * 1024])
}
- /// Same as `new`, but the intermediate buffer for data is specified.
+ /// Creates a new decoder which will decompress data read from the given
+ /// stream `r`, using `buf` as backing to speed up reading.
///
/// Note that the specified buffer will only be used up to its current
/// length. The buffer's capacity will also not grow over time.
@@ -169,6 +179,31 @@ impl<R: Read> ZlibDecoder<R> {
inner: bufread::ZlibDecoder::new(BufReader::with_buf(buf, r)),
}
}
+
+ /// Creates a new decoder which will decompress data read from the given
+ /// stream `r`, along with `decompression` settings.
+ pub fn new_with_decompress(r: R, decompression: Decompress) -> ZlibDecoder<R> {
+ ZlibDecoder::new_with_decompress_and_buf(r, vec![0; 32 * 1024], decompression)
+ }
+
+ /// Creates a new decoder which will decompress data read from the given
+ /// stream `r`, using `buf` as backing to speed up reading,
+ /// along with `decompression` settings to configure decoder.
+ ///
+ /// Note that the specified buffer will only be used up to its current
+ /// length. The buffer's capacity will also not grow over time.
+ pub fn new_with_decompress_and_buf(
+ r: R,
+ buf: Vec<u8>,
+ decompression: Decompress,
+ ) -> ZlibDecoder<R> {
+ ZlibDecoder {
+ inner: bufread::ZlibDecoder::new_with_decompress(
+ BufReader::with_buf(buf, r),
+ decompression,
+ ),
+ }
+ }
}
impl<R> ZlibDecoder<R> {
diff --git a/vendor/flate2/src/zlib/write.rs b/vendor/flate2/src/zlib/write.rs
index c67181402..d8ad2f261 100644
--- a/vendor/flate2/src/zlib/write.rs
+++ b/vendor/flate2/src/zlib/write.rs
@@ -44,6 +44,14 @@ impl<W: Write> ZlibEncoder<W> {
}
}
+ /// Creates a new encoder which will write compressed data to the stream
+ /// `w` with the given `compression` settings.
+ pub fn new_with_compress(w: W, compression: Compress) -> ZlibEncoder<W> {
+ ZlibEncoder {
+ inner: zio::Writer::new(w, compression),
+ }
+ }
+
/// Acquires a reference to the underlying writer.
pub fn get_ref(&self) -> &W {
self.inner.get_ref()
@@ -218,6 +226,17 @@ impl<W: Write> ZlibDecoder<W> {
}
}
+ /// Creates a new decoder which will write uncompressed data to the stream `w`
+ /// using the given `decompression` settings.
+ ///
+ /// When this decoder is dropped or unwrapped the final pieces of data will
+ /// be flushed.
+ pub fn new_with_decompress(w: W, decompression: Decompress) -> ZlibDecoder<W> {
+ ZlibDecoder {
+ inner: zio::Writer::new(w, decompression),
+ }
+ }
+
/// Acquires a reference to the underlying writer.
pub fn get_ref(&self) -> &W {
self.inner.get_ref()