summaryrefslogtreecommitdiffstats
path: root/vendor/flate2/src/gz/write.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/flate2/src/gz/write.rs')
-rw-r--r--vendor/flate2/src/gz/write.rs169
1 files changed, 116 insertions, 53 deletions
diff --git a/vendor/flate2/src/gz/write.rs b/vendor/flate2/src/gz/write.rs
index 83eebb757..74d6c5acf 100644
--- a/vendor/flate2/src/gz/write.rs
+++ b/vendor/flate2/src/gz/write.rs
@@ -2,8 +2,7 @@ use std::cmp;
use std::io;
use std::io::prelude::*;
-use super::bufread::{corrupt, read_gz_header};
-use super::{GzBuilder, GzHeader};
+use super::{corrupt, GzBuilder, GzHeader, GzHeaderParser};
use crate::crc::{Crc, CrcWriter};
use crate::zio;
use crate::{Compress, Compression, Decompress, Status};
@@ -167,11 +166,20 @@ impl<W: Write> Drop for GzEncoder<W> {
}
}
-/// A gzip streaming decoder
+/// A decoder for a single member of a [gzip file].
///
-/// This structure exposes a [`Write`] interface that will emit uncompressed data
-/// to the underlying writer `W`.
+/// This structure exposes a [`Write`] interface, receiving compressed data and
+/// writing uncompressed data to the underlying writer.
+///
+/// After decoding a single member of the gzip data this writer will return the number of bytes up to
+/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to
+/// handle any data following the gzip member.
+///
+/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
+/// or read more
+/// [in the introduction](../index.html#about-multi-member-gzip-files).
///
+/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
///
/// # Examples
@@ -203,8 +211,7 @@ impl<W: Write> Drop for GzEncoder<W> {
pub struct GzDecoder<W: Write> {
inner: zio::Writer<CrcWriter<W>, Decompress>,
crc_bytes: Vec<u8>,
- header: Option<GzHeader>,
- header_buf: Vec<u8>,
+ header_parser: GzHeaderParser,
}
const CRC_BYTES_LEN: usize = 8;
@@ -218,14 +225,13 @@ impl<W: Write> GzDecoder<W> {
GzDecoder {
inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(false)),
crc_bytes: Vec::with_capacity(CRC_BYTES_LEN),
- header: None,
- header_buf: Vec::new(),
+ header_parser: GzHeaderParser::new(),
}
}
/// Returns the header associated with this stream.
pub fn header(&self) -> Option<&GzHeader> {
- self.header.as_ref()
+ self.header_parser.header()
}
/// Acquires a reference to the underlying writer.
@@ -306,47 +312,24 @@ impl<W: Write> GzDecoder<W> {
}
}
-struct Counter<T: Read> {
- inner: T,
- pos: usize,
-}
-
-impl<T: Read> Read for Counter<T> {
- fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
- let pos = self.inner.read(buf)?;
- self.pos += pos;
- Ok(pos)
- }
-}
-
impl<W: Write> Write for GzDecoder<W> {
- fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
- if self.header.is_none() {
- // trying to avoid buffer usage
- let (res, pos) = {
- let mut counter = Counter {
- inner: self.header_buf.chain(buf),
- pos: 0,
- };
- let res = read_gz_header(&mut counter);
- (res, counter.pos)
- };
-
- match res {
+ fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> {
+ let buflen = buf.len();
+ if self.header().is_none() {
+ match self.header_parser.parse(&mut buf) {
Err(err) => {
if err.kind() == io::ErrorKind::UnexpectedEof {
- // not enough data for header, save to the buffer
- self.header_buf.extend(buf);
- Ok(buf.len())
+ // all data read but header still not complete
+ Ok(buflen)
} else {
Err(err)
}
}
- Ok(header) => {
- self.header = Some(header);
- let pos = pos - self.header_buf.len();
- self.header_buf.truncate(0);
- Ok(pos)
+ Ok(_) => {
+ debug_assert!(self.header().is_some());
+ // buf now contains the unread part of the original buf
+ let n = buflen - buf.len();
+ Ok(n)
}
}
} else {
@@ -373,17 +356,19 @@ impl<W: Read + Write> Read for GzDecoder<W> {
}
}
-/// A gzip streaming decoder that decodes all members of a multistream
+/// A gzip streaming decoder that decodes a [gzip file] with multiple members.
+///
+/// This structure exposes a [`Write`] interface that will consume compressed data and
+/// write uncompressed data to the underlying writer.
///
-/// A gzip member consists of a header, compressed data and a trailer. The [gzip
-/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
-/// gzip members to be joined in a single stream. `MultiGzDecoder` will
-/// decode all consecutive members while `GzDecoder` will only decompress
-/// the first gzip member. The multistream format is commonly used in
-/// bioinformatics, for example when using the BGZF compressed data.
+/// A gzip file consists of a series of *members* concatenated one after another.
+/// `MultiGzDecoder` decodes all members of a file and writes them to the
+/// underlying writer one after another.
///
-/// This structure exposes a [`Write`] interface that will consume all gzip members
-/// from the written buffers and write uncompressed data to the writer.
+/// To handle members separately, see [GzDecoder] or read more
+/// [in the introduction](../index.html#about-multi-member-gzip-files).
+///
+/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
#[derive(Debug)]
pub struct MultiGzDecoder<W: Write> {
inner: GzDecoder<W>,
@@ -524,6 +509,56 @@ mod tests {
}
#[test]
+ fn decode_writer_partial_header_filename() {
+ let filename = "test.txt";
+ let mut e = GzBuilder::new()
+ .filename(filename)
+ .read(STR.as_bytes(), Compression::default());
+ let mut bytes = Vec::new();
+ e.read_to_end(&mut bytes).unwrap();
+
+ let mut writer = Vec::new();
+ let mut decoder = GzDecoder::new(writer);
+ assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12);
+ let n = decoder.write(&bytes[12..]).unwrap();
+ if n < bytes.len() - 12 {
+ decoder.write(&bytes[n + 12..]).unwrap();
+ }
+ assert_eq!(
+ decoder.header().unwrap().filename().unwrap(),
+ filename.as_bytes()
+ );
+ writer = decoder.finish().unwrap();
+ let return_string = String::from_utf8(writer).expect("String parsing error");
+ assert_eq!(return_string, STR);
+ }
+
+ #[test]
+ fn decode_writer_partial_header_comment() {
+ let comment = "test comment";
+ let mut e = GzBuilder::new()
+ .comment(comment)
+ .read(STR.as_bytes(), Compression::default());
+ let mut bytes = Vec::new();
+ e.read_to_end(&mut bytes).unwrap();
+
+ let mut writer = Vec::new();
+ let mut decoder = GzDecoder::new(writer);
+ assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12);
+ let n = decoder.write(&bytes[12..]).unwrap();
+ if n < bytes.len() - 12 {
+ decoder.write(&bytes[n + 12..]).unwrap();
+ }
+ assert_eq!(
+ decoder.header().unwrap().comment().unwrap(),
+ comment.as_bytes()
+ );
+ writer = decoder.finish().unwrap();
+ let return_string = String::from_utf8(writer).expect("String parsing error");
+ assert_eq!(return_string, STR);
+ }
+
+ #[test]
fn decode_writer_exact_header() {
let mut e = GzEncoder::new(Vec::new(), Compression::default());
e.write(STR.as_ref()).unwrap();
@@ -575,4 +610,32 @@ mod tests {
let expected = STR.repeat(2);
assert_eq!(return_string, expected);
}
+
+ // GzDecoder consumes one gzip member and then returns 0 for subsequent writes, allowing any
+ // additional data to be consumed by the caller.
+ #[test]
+ fn decode_extra_data() {
+ let compressed = {
+ let mut e = GzEncoder::new(Vec::new(), Compression::default());
+ e.write(STR.as_ref()).unwrap();
+ let mut b = e.finish().unwrap();
+ b.push(b'x');
+ b
+ };
+
+ let mut writer = Vec::new();
+ let mut decoder = GzDecoder::new(writer);
+ let mut consumed_bytes = 0;
+ loop {
+ let n = decoder.write(&compressed[consumed_bytes..]).unwrap();
+ if n == 0 {
+ break;
+ }
+ consumed_bytes += n;
+ }
+ writer = decoder.finish().unwrap();
+ let actual = String::from_utf8(writer).expect("String parsing error");
+ assert_eq!(actual, STR);
+ assert_eq!(&compressed[consumed_bytes..], b"x");
+ }
}