diff options
Diffstat (limited to 'vendor/flate2/src/gz/write.rs')
-rw-r--r-- | vendor/flate2/src/gz/write.rs | 169 |
1 files changed, 116 insertions, 53 deletions
diff --git a/vendor/flate2/src/gz/write.rs b/vendor/flate2/src/gz/write.rs index 83eebb757..74d6c5acf 100644 --- a/vendor/flate2/src/gz/write.rs +++ b/vendor/flate2/src/gz/write.rs @@ -2,8 +2,7 @@ use std::cmp; use std::io; use std::io::prelude::*; -use super::bufread::{corrupt, read_gz_header}; -use super::{GzBuilder, GzHeader}; +use super::{corrupt, GzBuilder, GzHeader, GzHeaderParser}; use crate::crc::{Crc, CrcWriter}; use crate::zio; use crate::{Compress, Compression, Decompress, Status}; @@ -167,11 +166,20 @@ impl<W: Write> Drop for GzEncoder<W> { } } -/// A gzip streaming decoder +/// A decoder for a single member of a [gzip file]. /// -/// This structure exposes a [`Write`] interface that will emit uncompressed data -/// to the underlying writer `W`. +/// This structure exposes a [`Write`] interface, receiving compressed data and +/// writing uncompressed data to the underlying writer. +/// +/// After decoding a single member of the gzip data this writer will return the number of bytes up to +/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to +/// handle any data following the gzip member. +/// +/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] +/// or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). /// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html /// /// # Examples @@ -203,8 +211,7 @@ impl<W: Write> Drop for GzEncoder<W> { pub struct GzDecoder<W: Write> { inner: zio::Writer<CrcWriter<W>, Decompress>, crc_bytes: Vec<u8>, - header: Option<GzHeader>, - header_buf: Vec<u8>, + header_parser: GzHeaderParser, } const CRC_BYTES_LEN: usize = 8; @@ -218,14 +225,13 @@ impl<W: Write> GzDecoder<W> { GzDecoder { inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(false)), crc_bytes: Vec::with_capacity(CRC_BYTES_LEN), - header: None, - header_buf: Vec::new(), + header_parser: GzHeaderParser::new(), } } /// Returns the header associated with this stream. pub fn header(&self) -> Option<&GzHeader> { - self.header.as_ref() + self.header_parser.header() } /// Acquires a reference to the underlying writer. @@ -306,47 +312,24 @@ impl<W: Write> GzDecoder<W> { } } -struct Counter<T: Read> { - inner: T, - pos: usize, -} - -impl<T: Read> Read for Counter<T> { - fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { - let pos = self.inner.read(buf)?; - self.pos += pos; - Ok(pos) - } -} - impl<W: Write> Write for GzDecoder<W> { - fn write(&mut self, buf: &[u8]) -> io::Result<usize> { - if self.header.is_none() { - // trying to avoid buffer usage - let (res, pos) = { - let mut counter = Counter { - inner: self.header_buf.chain(buf), - pos: 0, - }; - let res = read_gz_header(&mut counter); - (res, counter.pos) - }; - - match res { + fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> { + let buflen = buf.len(); + if self.header().is_none() { + match self.header_parser.parse(&mut buf) { Err(err) => { if err.kind() == io::ErrorKind::UnexpectedEof { - // not enough data for header, save to the buffer - self.header_buf.extend(buf); - Ok(buf.len()) + // all data read but header still not complete + Ok(buflen) } else { Err(err) } } - Ok(header) => { - self.header = Some(header); - let pos = pos - self.header_buf.len(); - self.header_buf.truncate(0); - Ok(pos) + Ok(_) => { + debug_assert!(self.header().is_some()); + // buf now contains the unread part of the original buf + let n = buflen - buf.len(); + Ok(n) } } } else { @@ -373,17 +356,19 @@ impl<W: Read + Write> Read for GzDecoder<W> { } } -/// A gzip streaming decoder that decodes all members of a multistream +/// A gzip streaming decoder that decodes a [gzip file] with multiple members. +/// +/// This structure exposes a [`Write`] interface that will consume compressed data and +/// write uncompressed data to the underlying writer. /// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while `GzDecoder` will only decompress -/// the first gzip member. The multistream format is commonly used in -/// bioinformatics, for example when using the BGZF compressed data. +/// A gzip file consists of a series of *members* concatenated one after another. +/// `MultiGzDecoder` decodes all members of a file and writes them to the +/// underlying writer one after another. /// -/// This structure exposes a [`Write`] interface that will consume all gzip members -/// from the written buffers and write uncompressed data to the writer. +/// To handle members separately, see [GzDecoder] or read more +/// [in the introduction](../index.html#about-multi-member-gzip-files). +/// +/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 #[derive(Debug)] pub struct MultiGzDecoder<W: Write> { inner: GzDecoder<W>, @@ -524,6 +509,56 @@ mod tests { } #[test] + fn decode_writer_partial_header_filename() { + let filename = "test.txt"; + let mut e = GzBuilder::new() + .filename(filename) + .read(STR.as_bytes(), Compression::default()); + let mut bytes = Vec::new(); + e.read_to_end(&mut bytes).unwrap(); + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12); + let n = decoder.write(&bytes[12..]).unwrap(); + if n < bytes.len() - 12 { + decoder.write(&bytes[n + 12..]).unwrap(); + } + assert_eq!( + decoder.header().unwrap().filename().unwrap(), + filename.as_bytes() + ); + writer = decoder.finish().unwrap(); + let return_string = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(return_string, STR); + } + + #[test] + fn decode_writer_partial_header_comment() { + let comment = "test comment"; + let mut e = GzBuilder::new() + .comment(comment) + .read(STR.as_bytes(), Compression::default()); + let mut bytes = Vec::new(); + e.read_to_end(&mut bytes).unwrap(); + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12); + let n = decoder.write(&bytes[12..]).unwrap(); + if n < bytes.len() - 12 { + decoder.write(&bytes[n + 12..]).unwrap(); + } + assert_eq!( + decoder.header().unwrap().comment().unwrap(), + comment.as_bytes() + ); + writer = decoder.finish().unwrap(); + let return_string = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(return_string, STR); + } + + #[test] fn decode_writer_exact_header() { let mut e = GzEncoder::new(Vec::new(), Compression::default()); e.write(STR.as_ref()).unwrap(); @@ -575,4 +610,32 @@ mod tests { let expected = STR.repeat(2); assert_eq!(return_string, expected); } + + // GzDecoder consumes one gzip member and then returns 0 for subsequent writes, allowing any + // additional data to be consumed by the caller. + #[test] + fn decode_extra_data() { + let compressed = { + let mut e = GzEncoder::new(Vec::new(), Compression::default()); + e.write(STR.as_ref()).unwrap(); + let mut b = e.finish().unwrap(); + b.push(b'x'); + b + }; + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + let mut consumed_bytes = 0; + loop { + let n = decoder.write(&compressed[consumed_bytes..]).unwrap(); + if n == 0 { + break; + } + consumed_bytes += n; + } + writer = decoder.finish().unwrap(); + let actual = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(actual, STR); + assert_eq!(&compressed[consumed_bytes..], b"x"); + } } |