1 files changed, 116 insertions, 53 deletions
diff --git a/vendor/flate2/src/gz/write.rs b/vendor/flate2/src/gz/write.rs
index 83eebb757..74d6c5acf 100644
--- a/vendor/flate2/src/gz/write.rs
+++ b/vendor/flate2/src/gz/write.rs
@@ -2,8 +2,7 @@ use std::cmp;
 use std::io;
 use std::io::prelude::*;
 
-use super::bufread::{corrupt, read_gz_header};
-use super::{GzBuilder, GzHeader};
+use super::{corrupt, GzBuilder, GzHeader, GzHeaderParser};
 use crate::crc::{Crc, CrcWriter};
 use crate::zio;
 use crate::{Compress, Compression, Decompress, Status};
@@ -167,11 +166,20 @@ impl<W: Write> Drop for GzEncoder<W> {
     }
 }
 
-/// A gzip streaming decoder
+/// A decoder for a single member of a [gzip file].
 ///
-/// This structure exposes a [`Write`] interface that will emit uncompressed data
-/// to the underlying writer `W`.
+/// This structure exposes a [`Write`] interface, receiving compressed data and
+/// writing uncompressed data to the underlying writer.
+///
+/// After decoding a single member of the gzip data this writer will return the number of bytes up to
+/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to
+/// handle any data following the gzip member.
+///
+/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
+/// or read more
+/// [in the introduction](../index.html#about-multi-member-gzip-files).
 ///
+/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
 /// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
 ///
 /// # Examples
@@ -203,8 +211,7 @@ impl<W: Write> Drop for GzEncoder<W> {
 pub struct GzDecoder<W: Write> {
     inner: zio::Writer<CrcWriter<W>, Decompress>,
     crc_bytes: Vec<u8>,
-    header: Option<GzHeader>,
-    header_buf: Vec<u8>,
+    header_parser: GzHeaderParser,
 }
 
 const CRC_BYTES_LEN: usize = 8;
@@ -218,14 +225,13 @@ impl<W: Write> GzDecoder<W> {
         GzDecoder {
             inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(false)),
             crc_bytes: Vec::with_capacity(CRC_BYTES_LEN),
-            header: None,
-            header_buf: Vec::new(),
+            header_parser: GzHeaderParser::new(),
         }
     }
 
     /// Returns the header associated with this stream.
     pub fn header(&self) -> Option<&GzHeader> {
-        self.header.as_ref()
+        self.header_parser.header()
     }
 
     /// Acquires a reference to the underlying writer.
@@ -306,47 +312,24 @@ impl<W: Write> GzDecoder<W> {
     }
 }
 
-struct Counter<T: Read> {
-    inner: T,
-    pos: usize,
-}
-
-impl<T: Read> Read for Counter<T> {
-    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
-        let pos = self.inner.read(buf)?;
-        self.pos += pos;
-        Ok(pos)
-    }
-}
-
 impl<W: Write> Write for GzDecoder<W> {
-    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
-        if self.header.is_none() {
-            // trying to avoid buffer usage
-            let (res, pos) = {
-                let mut counter = Counter {
-                    inner: self.header_buf.chain(buf),
-                    pos: 0,
-                };
-                let res = read_gz_header(&mut counter);
-                (res, counter.pos)
-            };
-
-            match res {
+    fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> {
+        let buflen = buf.len();
+        if self.header().is_none() {
+            match self.header_parser.parse(&mut buf) {
                 Err(err) => {
                     if err.kind() == io::ErrorKind::UnexpectedEof {
-                        // not enough data for header, save to the buffer
-                        self.header_buf.extend(buf);
-                        Ok(buf.len())
+                        // all data read but header still not complete
+                        Ok(buflen)
                     } else {
                         Err(err)
                     }
                 }
-                Ok(header) => {
-                    self.header = Some(header);
-                    let pos = pos - self.header_buf.len();
-                    self.header_buf.truncate(0);
-                    Ok(pos)
+                Ok(_) => {
+                    debug_assert!(self.header().is_some());
+                    // buf now contains the unread part of the original buf
+                    let n = buflen - buf.len();
+                    Ok(n)
                 }
             }
         } else {
@@ -373,17 +356,19 @@ impl<W: Read + Write> Read for GzDecoder<W> {
     }
 }
 
-/// A gzip streaming decoder that decodes all members of a multistream
+/// A gzip streaming decoder that decodes a [gzip file] with multiple members.
+///
+/// This structure exposes a [`Write`] interface that will consume compressed data and
+/// write uncompressed data to the underlying writer.
 ///
-/// A gzip member consists of a header, compressed data and a trailer. The [gzip
-/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
-/// gzip members to be joined in a single stream. `MultiGzDecoder` will
-/// decode all consecutive members while `GzDecoder` will only decompress
-/// the first gzip member. The multistream format is commonly used in
-/// bioinformatics, for example when using the BGZF compressed data.
+/// A gzip file consists of a series of *members* concatenated one after another.
+/// `MultiGzDecoder` decodes all members of a file and writes them to the
+/// underlying writer one after another.
 ///
-/// This structure exposes a [`Write`] interface that will consume all gzip members
-/// from the written buffers and write uncompressed data to the writer.
+/// To handle members separately, see [GzDecoder] or read more
+/// [in the introduction](../index.html#about-multi-member-gzip-files).
+///
+/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
 #[derive(Debug)]
 pub struct MultiGzDecoder<W: Write> {
     inner: GzDecoder<W>,
@@ -524,6 +509,56 @@ mod tests {
     }
 
     #[test]
+    fn decode_writer_partial_header_filename() {
+        let filename = "test.txt";
+        let mut e = GzBuilder::new()
+            .filename(filename)
+            .read(STR.as_bytes(), Compression::default());
+        let mut bytes = Vec::new();
+        e.read_to_end(&mut bytes).unwrap();
+
+        let mut writer = Vec::new();
+        let mut decoder = GzDecoder::new(writer);
+        assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12);
+        let n = decoder.write(&bytes[12..]).unwrap();
+        if n < bytes.len() - 12 {
+            decoder.write(&bytes[n + 12..]).unwrap();
+        }
+        assert_eq!(
+            decoder.header().unwrap().filename().unwrap(),
+            filename.as_bytes()
+        );
+        writer = decoder.finish().unwrap();
+        let return_string = String::from_utf8(writer).expect("String parsing error");
+        assert_eq!(return_string, STR);
+    }
+
+    #[test]
+    fn decode_writer_partial_header_comment() {
+        let comment = "test comment";
+        let mut e = GzBuilder::new()
+            .comment(comment)
+            .read(STR.as_bytes(), Compression::default());
+        let mut bytes = Vec::new();
+        e.read_to_end(&mut bytes).unwrap();
+
+        let mut writer = Vec::new();
+        let mut decoder = GzDecoder::new(writer);
+        assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12);
+        let n = decoder.write(&bytes[12..]).unwrap();
+        if n < bytes.len() - 12 {
+            decoder.write(&bytes[n + 12..]).unwrap();
+        }
+        assert_eq!(
+            decoder.header().unwrap().comment().unwrap(),
+            comment.as_bytes()
+        );
+        writer = decoder.finish().unwrap();
+        let return_string = String::from_utf8(writer).expect("String parsing error");
+        assert_eq!(return_string, STR);
+    }
+
+    #[test]
     fn decode_writer_exact_header() {
         let mut e = GzEncoder::new(Vec::new(), Compression::default());
         e.write(STR.as_ref()).unwrap();
@@ -575,4 +610,32 @@ mod tests {
         let expected = STR.repeat(2);
         assert_eq!(return_string, expected);
     }
+
+    // GzDecoder consumes one gzip member and then returns 0 for subsequent writes, allowing any
+    // additional data to be consumed by the caller.
+    #[test]
+    fn decode_extra_data() {
+        let compressed = {
+            let mut e = GzEncoder::new(Vec::new(), Compression::default());
+            e.write(STR.as_ref()).unwrap();
+            let mut b = e.finish().unwrap();
+            b.push(b'x');
+            b
+        };
+
+        let mut writer = Vec::new();
+        let mut decoder = GzDecoder::new(writer);
+        let mut consumed_bytes = 0;
+        loop {
+            let n = decoder.write(&compressed[consumed_bytes..]).unwrap();
+            if n == 0 {
+                break;
+            }
+            consumed_bytes += n;
+        }
+        writer = decoder.finish().unwrap();
+        let actual = String::from_utf8(writer).expect("String parsing error");
+        assert_eq!(actual, STR);
+        assert_eq!(&compressed[consumed_bytes..], b"x");
+    }
 }