diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/flate2/src/gz | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/flate2/src/gz')
-rw-r--r-- | third_party/rust/flate2/src/gz/bufread.rs | 852 | ||||
-rw-r--r-- | third_party/rust/flate2/src/gz/mod.rs | 385 | ||||
-rw-r--r-- | third_party/rust/flate2/src/gz/read.rs | 278 | ||||
-rw-r--r-- | third_party/rust/flate2/src/gz/write.rs | 450 |
4 files changed, 1965 insertions, 0 deletions
diff --git a/third_party/rust/flate2/src/gz/bufread.rs b/third_party/rust/flate2/src/gz/bufread.rs new file mode 100644 index 0000000000..6be144d0c8 --- /dev/null +++ b/third_party/rust/flate2/src/gz/bufread.rs @@ -0,0 +1,852 @@ +use std::cmp; +use std::io; +use std::io::prelude::*; +use std::mem; + +use super::{GzBuilder, GzHeader}; +use super::{FCOMMENT, FEXTRA, FHCRC, FNAME}; +use crate::crc::{Crc, CrcReader}; +use crate::deflate; +use crate::Compression; + +fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize { + let min = cmp::min(into.len(), from.len() - *pos); + for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) { + *slot = *val; + } + *pos += min; + min +} + +pub(crate) fn corrupt() -> io::Error { + io::Error::new( + io::ErrorKind::InvalidInput, + "corrupt gzip stream does not have a matching checksum", + ) +} + +fn bad_header() -> io::Error { + io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header") +} + +fn read_le_u16<R: Read>(r: &mut Buffer<R>) -> io::Result<u16> { + let mut b = [0; 2]; + r.read_and_forget(&mut b)?; + Ok((b[0] as u16) | ((b[1] as u16) << 8)) +} + +fn read_gz_header_part<'a, R: Read>(r: &'a mut Buffer<'a, R>) -> io::Result<()> { + loop { + match r.part.state { + GzHeaderParsingState::Start => { + let mut header = [0; 10]; + r.read_and_forget(&mut header)?; + + if header[0] != 0x1f || header[1] != 0x8b { + return Err(bad_header()); + } + if header[2] != 8 { + return Err(bad_header()); + } + + r.part.flg = header[3]; + r.part.header.mtime = ((header[4] as u32) << 0) + | ((header[5] as u32) << 8) + | ((header[6] as u32) << 16) + | ((header[7] as u32) << 24); + let _xfl = header[8]; + r.part.header.operating_system = header[9]; + r.part.state = GzHeaderParsingState::Xlen; + } + GzHeaderParsingState::Xlen => { + if r.part.flg & FEXTRA != 0 { + r.part.xlen = read_le_u16(r)?; + } + r.part.state = GzHeaderParsingState::Extra; + } + GzHeaderParsingState::Extra => { + if r.part.flg & FEXTRA != 0 { + let mut extra = vec![0; r.part.xlen as usize]; + r.read_and_forget(&mut extra)?; + r.part.header.extra = Some(extra); + } + r.part.state = GzHeaderParsingState::Filename; + } + GzHeaderParsingState::Filename => { + if r.part.flg & FNAME != 0 { + if None == r.part.header.filename { + r.part.header.filename = Some(Vec::new()); + }; + for byte in r.bytes() { + let byte = byte?; + if byte == 0 { + break; + } + } + } + r.part.state = GzHeaderParsingState::Comment; + } + GzHeaderParsingState::Comment => { + if r.part.flg & FCOMMENT != 0 { + if None == r.part.header.comment { + r.part.header.comment = Some(Vec::new()); + }; + for byte in r.bytes() { + let byte = byte?; + if byte == 0 { + break; + } + } + } + r.part.state = GzHeaderParsingState::Crc; + } + GzHeaderParsingState::Crc => { + if r.part.flg & FHCRC != 0 { + let stored_crc = read_le_u16(r)?; + let calced_crc = r.part.crc.sum() as u16; + if stored_crc != calced_crc { + return Err(corrupt()); + } + } + return Ok(()); + } + } + } +} + +pub(crate) fn read_gz_header<R: Read>(r: &mut R) -> io::Result<GzHeader> { + let mut part = GzHeaderPartial::new(); + + let result = { + let mut reader = Buffer::new(&mut part, r); + read_gz_header_part(&mut reader) + }; + result.map(|()| part.take_header()) +} + +/// A gzip streaming encoder +/// +/// This structure exposes a [`BufRead`] interface that will read uncompressed data +/// from the underlying reader and expose the compressed version as a [`BufRead`] +/// interface. +/// +/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html +/// +/// # Examples +/// +/// ``` +/// use std::io::prelude::*; +/// use std::io; +/// use flate2::Compression; +/// use flate2::bufread::GzEncoder; +/// use std::fs::File; +/// use std::io::BufReader; +/// +/// // Opens sample file, compresses the contents and returns a Vector or error +/// // File wrapped in a BufReader implements BufRead +/// +/// fn open_hello_world() -> io::Result<Vec<u8>> { +/// let f = File::open("examples/hello_world.txt")?; +/// let b = BufReader::new(f); +/// let mut gz = GzEncoder::new(b, Compression::fast()); +/// let mut buffer = Vec::new(); +/// gz.read_to_end(&mut buffer)?; +/// Ok(buffer) +/// } +/// ``` +#[derive(Debug)] +pub struct GzEncoder<R> { + inner: deflate::bufread::DeflateEncoder<CrcReader<R>>, + header: Vec<u8>, + pos: usize, + eof: bool, +} + +pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> { + let crc = CrcReader::new(r); + GzEncoder { + inner: deflate::bufread::DeflateEncoder::new(crc, lvl), + header, + pos: 0, + eof: false, + } +} + +impl<R: BufRead> GzEncoder<R> { + /// Creates a new encoder which will use the given compression level. + /// + /// The encoder is not configured specially for the emitted header. For + /// header configuration, see the `GzBuilder` type. + /// + /// The data read from the stream `r` will be compressed and available + /// through the returned reader. + pub fn new(r: R, level: Compression) -> GzEncoder<R> { + GzBuilder::new().buf_read(r, level) + } + + fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> { + if self.pos == 8 { + return Ok(0); + } + let crc = self.inner.get_ref().crc(); + let ref arr = [ + (crc.sum() >> 0) as u8, + (crc.sum() >> 8) as u8, + (crc.sum() >> 16) as u8, + (crc.sum() >> 24) as u8, + (crc.amount() >> 0) as u8, + (crc.amount() >> 8) as u8, + (crc.amount() >> 16) as u8, + (crc.amount() >> 24) as u8, + ]; + Ok(copy(into, arr, &mut self.pos)) + } +} + +impl<R> GzEncoder<R> { + /// Acquires a reference to the underlying reader. + pub fn get_ref(&self) -> &R { + self.inner.get_ref().get_ref() + } + + /// Acquires a mutable reference to the underlying reader. + /// + /// Note that mutation of the reader may result in surprising results if + /// this encoder is continued to be used. + pub fn get_mut(&mut self) -> &mut R { + self.inner.get_mut().get_mut() + } + + /// Returns the underlying stream, consuming this encoder + pub fn into_inner(self) -> R { + self.inner.into_inner().into_inner() + } +} + +#[inline] +fn finish(buf: &[u8; 8]) -> (u32, u32) { + let crc = ((buf[0] as u32) << 0) + | ((buf[1] as u32) << 8) + | ((buf[2] as u32) << 16) + | ((buf[3] as u32) << 24); + let amt = ((buf[4] as u32) << 0) + | ((buf[5] as u32) << 8) + | ((buf[6] as u32) << 16) + | ((buf[7] as u32) << 24); + (crc, amt) +} + +impl<R: BufRead> Read for GzEncoder<R> { + fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> { + let mut amt = 0; + if self.eof { + return self.read_footer(into); + } else if self.pos < self.header.len() { + amt += copy(into, &self.header, &mut self.pos); + if amt == into.len() { + return Ok(amt); + } + let tmp = into; + into = &mut tmp[amt..]; + } + match self.inner.read(into)? { + 0 => { + self.eof = true; + self.pos = 0; + self.read_footer(into) + } + n => Ok(amt + n), + } + } +} + +impl<R: BufRead + Write> Write for GzEncoder<R> { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + self.get_mut().write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.get_mut().flush() + } +} + +/// A gzip streaming decoder +/// +/// This structure consumes a [`BufRead`] interface, reading compressed data +/// from the underlying reader, and emitting uncompressed data. +/// +/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html +/// +/// # Examples +/// +/// ``` +/// use std::io::prelude::*; +/// use std::io; +/// # use flate2::Compression; +/// # use flate2::write::GzEncoder; +/// use flate2::bufread::GzDecoder; +/// +/// # fn main() { +/// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); +/// # e.write_all(b"Hello World").unwrap(); +/// # let bytes = e.finish().unwrap(); +/// # println!("{}", decode_reader(bytes).unwrap()); +/// # } +/// # +/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error +/// // Here &[u8] implements BufRead +/// +/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { +/// let mut gz = GzDecoder::new(&bytes[..]); +/// let mut s = String::new(); +/// gz.read_to_string(&mut s)?; +/// Ok(s) +/// } +/// ``` +#[derive(Debug)] +pub struct GzDecoder<R> { + inner: GzState, + header: Option<GzHeader>, + reader: CrcReader<deflate::bufread::DeflateDecoder<R>>, + multi: bool, +} + +#[derive(Debug)] +pub enum GzHeaderParsingState { + Start, + Xlen, + Extra, + Filename, + Comment, + Crc, +} + +#[derive(Debug)] +pub struct GzHeaderPartial { + buf: Vec<u8>, + state: GzHeaderParsingState, + flg: u8, + xlen: u16, + crc: Crc, + header: GzHeader, +} + +impl GzHeaderPartial { + fn new() -> GzHeaderPartial { + GzHeaderPartial { + buf: Vec::with_capacity(10), // minimum header length + state: GzHeaderParsingState::Start, + flg: 0, + xlen: 0, + crc: Crc::new(), + header: GzHeader { + extra: None, + filename: None, + comment: None, + operating_system: 0, + mtime: 0, + }, + } + } + + pub fn take_header(self) -> GzHeader { + self.header + } +} + +#[derive(Debug)] +enum GzState { + Header(GzHeaderPartial), + Body, + Finished(usize, [u8; 8]), + Err(io::Error), + End, +} + +/// A small adapter which reads data originally from `buf` and then reads all +/// further data from `reader`. This will also buffer all data read from +/// `reader` into `buf` for reuse on a further call. +struct Buffer<'a, T: 'a> { + part: &'a mut GzHeaderPartial, + buf_cur: usize, + buf_max: usize, + reader: &'a mut T, +} + +impl<'a, T> Buffer<'a, T> { + fn new(part: &'a mut GzHeaderPartial, reader: &'a mut T) -> Buffer<'a, T> { + Buffer { + reader, + buf_cur: 0, + buf_max: part.buf.len(), + part, + } + } +} + +impl<'a, T: Read> Read for Buffer<'a, T> { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + let mut bufref = match self.part.state { + GzHeaderParsingState::Filename => self.part.header.filename.as_mut(), + GzHeaderParsingState::Comment => self.part.header.comment.as_mut(), + _ => None, + }; + if let Some(ref mut b) = bufref { + // we have a direct reference to a buffer where to write + let len = self.reader.read(buf)?; + if len > 0 && buf[len - 1] == 0 { + // we do not append the final 0 + b.extend_from_slice(&buf[..len - 1]); + } else { + b.extend_from_slice(&buf[..len]); + } + self.part.crc.update(&buf[..len]); + Ok(len) + } else if self.buf_cur == self.buf_max { + // we read new bytes and also save them in self.part.buf + let len = self.reader.read(buf)?; + self.part.buf.extend_from_slice(&buf[..len]); + self.part.crc.update(&buf[..len]); + Ok(len) + } else { + // we first read the previously saved bytes + let len = (&self.part.buf[self.buf_cur..self.buf_max]).read(buf)?; + self.buf_cur += len; + Ok(len) + } + } +} + +impl<'a, T> Buffer<'a, T> +where + T: std::io::Read, +{ + // If we manage to read all the bytes, we reset the buffer + fn read_and_forget(&mut self, buf: &mut [u8]) -> io::Result<usize> { + self.read_exact(buf)?; + // we managed to read the whole buf + // we will no longer need the previously saved bytes in self.part.buf + let rlen = buf.len(); + self.part.buf.truncate(0); + self.buf_cur = 0; + self.buf_max = 0; + Ok(rlen) + } +} + +impl<R: BufRead> GzDecoder<R> { + /// Creates a new decoder from the given reader, immediately parsing the + /// gzip header. + pub fn new(mut r: R) -> GzDecoder<R> { + let mut part = GzHeaderPartial::new(); + let mut header = None; + + let result = { + let mut reader = Buffer::new(&mut part, &mut r); + read_gz_header_part(&mut reader) + }; + + let state = match result { + Ok(()) => { + header = Some(part.take_header()); + GzState::Body + } + Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => GzState::Header(part), + Err(err) => GzState::Err(err), + }; + + GzDecoder { + inner: state, + reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)), + multi: false, + header, + } + } + + fn multi(mut self, flag: bool) -> GzDecoder<R> { + self.multi = flag; + self + } +} + +impl<R> GzDecoder<R> { + /// Returns the header associated with this stream, if it was valid + pub fn header(&self) -> Option<&GzHeader> { + self.header.as_ref() + } + + /// Acquires a reference to the underlying reader. + pub fn get_ref(&self) -> &R { + self.reader.get_ref().get_ref() + } + + /// Acquires a mutable reference to the underlying stream. + /// + /// Note that mutation of the stream may result in surprising results if + /// this encoder is continued to be used. + pub fn get_mut(&mut self) -> &mut R { + self.reader.get_mut().get_mut() + } + + /// Consumes this decoder, returning the underlying reader. + pub fn into_inner(self) -> R { + self.reader.into_inner().into_inner() + } +} + +impl<R: BufRead> Read for GzDecoder<R> { + fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { + let GzDecoder { + inner, + header, + reader, + multi, + } = self; + + loop { + *inner = match mem::replace(inner, GzState::End) { + GzState::Header(mut part) => { + let result = { + let mut reader = Buffer::new(&mut part, reader.get_mut().get_mut()); + read_gz_header_part(&mut reader) + }; + match result { + Ok(()) => { + *header = Some(part.take_header()); + GzState::Body + } + Err(err) if io::ErrorKind::WouldBlock == err.kind() => { + *inner = GzState::Header(part); + return Err(err); + } + Err(err) => return Err(err), + } + } + GzState::Body => { + if into.is_empty() { + *inner = GzState::Body; + return Ok(0); + } + + let n = reader.read(into).map_err(|err| { + if io::ErrorKind::WouldBlock == err.kind() { + *inner = GzState::Body; + } + + err + })?; + + match n { + 0 => GzState::Finished(0, [0; 8]), + n => { + *inner = GzState::Body; + return Ok(n); + } + } + } + GzState::Finished(pos, mut buf) => { + if pos < buf.len() { + let n = reader + .get_mut() + .get_mut() + .read(&mut buf[pos..]) + .and_then(|n| { + if n == 0 { + Err(io::ErrorKind::UnexpectedEof.into()) + } else { + Ok(n) + } + }) + .map_err(|err| { + if io::ErrorKind::WouldBlock == err.kind() { + *inner = GzState::Finished(pos, buf); + } + + err + })?; + + GzState::Finished(pos + n, buf) + } else { + let (crc, amt) = finish(&buf); + + if crc != reader.crc().sum() || amt != reader.crc().amount() { + return Err(corrupt()); + } else if *multi { + let is_eof = reader + .get_mut() + .get_mut() + .fill_buf() + .map(|buf| buf.is_empty()) + .map_err(|err| { + if io::ErrorKind::WouldBlock == err.kind() { + *inner = GzState::Finished(pos, buf); + } + + err + })?; + + if is_eof { + GzState::End + } else { + reader.reset(); + reader.get_mut().reset_data(); + header.take(); + GzState::Header(GzHeaderPartial::new()) + } + } else { + GzState::End + } + } + } + GzState::Err(err) => return Err(err), + GzState::End => return Ok(0), + }; + } + } +} + +impl<R: BufRead + Write> Write for GzDecoder<R> { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + self.get_mut().write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.get_mut().flush() + } +} + +/// A gzip streaming decoder that decodes all members of a multistream +/// +/// A gzip member consists of a header, compressed data and a trailer. The [gzip +/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple +/// gzip members to be joined in a single stream. `MultiGzDecoder` will +/// decode all consecutive members while `GzDecoder` will only decompress +/// the first gzip member. The multistream format is commonly used in +/// bioinformatics, for example when using the BGZF compressed data. +/// +/// This structure exposes a [`BufRead`] interface that will consume all gzip members +/// from the underlying reader and emit uncompressed data. +/// +/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html +/// +/// # Examples +/// +/// ``` +/// use std::io::prelude::*; +/// use std::io; +/// # use flate2::Compression; +/// # use flate2::write::GzEncoder; +/// use flate2::bufread::MultiGzDecoder; +/// +/// # fn main() { +/// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); +/// # e.write_all(b"Hello World").unwrap(); +/// # let bytes = e.finish().unwrap(); +/// # println!("{}", decode_reader(bytes).unwrap()); +/// # } +/// # +/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error +/// // Here &[u8] implements BufRead +/// +/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { +/// let mut gz = MultiGzDecoder::new(&bytes[..]); +/// let mut s = String::new(); +/// gz.read_to_string(&mut s)?; +/// Ok(s) +/// } +/// ``` +#[derive(Debug)] +pub struct MultiGzDecoder<R>(GzDecoder<R>); + +impl<R: BufRead> MultiGzDecoder<R> { + /// Creates a new decoder from the given reader, immediately parsing the + /// (first) gzip header. If the gzip stream contains multiple members all will + /// be decoded. + pub fn new(r: R) -> MultiGzDecoder<R> { + MultiGzDecoder(GzDecoder::new(r).multi(true)) + } +} + +impl<R> MultiGzDecoder<R> { + /// Returns the current header associated with this stream, if it's valid + pub fn header(&self) -> Option<&GzHeader> { + self.0.header() + } + + /// Acquires a reference to the underlying reader. + pub fn get_ref(&self) -> &R { + self.0.get_ref() + } + + /// Acquires a mutable reference to the underlying stream. + /// + /// Note that mutation of the stream may result in surprising results if + /// this encoder is continued to be used. + pub fn get_mut(&mut self) -> &mut R { + self.0.get_mut() + } + + /// Consumes this decoder, returning the underlying reader. + pub fn into_inner(self) -> R { + self.0.into_inner() + } +} + +impl<R: BufRead> Read for MultiGzDecoder<R> { + fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { + self.0.read(into) + } +} + +#[cfg(test)] +pub mod tests { + use crate::gz::bufread::*; + use std::io; + use std::io::{Cursor, Read, Write}; + + //a cursor turning EOF into blocking errors + #[derive(Debug)] + pub struct BlockingCursor { + pub cursor: Cursor<Vec<u8>>, + } + + impl BlockingCursor { + pub fn new() -> BlockingCursor { + BlockingCursor { + cursor: Cursor::new(Vec::new()), + } + } + + pub fn set_position(&mut self, pos: u64) { + return self.cursor.set_position(pos); + } + + pub fn position(&mut self) -> u64 { + return self.cursor.position(); + } + } + + impl Write for BlockingCursor { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + return self.cursor.write(buf); + } + fn flush(&mut self) -> io::Result<()> { + return self.cursor.flush(); + } + } + + impl Read for BlockingCursor { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + //use the cursor, except it turns eof into blocking error + let r = self.cursor.read(buf); + match r { + Err(ref err) => { + if err.kind() == io::ErrorKind::UnexpectedEof { + return Err(io::ErrorKind::WouldBlock.into()); + } + } + Ok(0) => { + //regular EOF turned into blocking error + return Err(io::ErrorKind::WouldBlock.into()); + } + Ok(_n) => {} + } + return r; + } + } + #[test] + // test function read_and_forget of Buffer + fn buffer_read_and_forget() { + // this is unused except for the buffering + let mut part = GzHeaderPartial::new(); + // this is a reader which receives data afterwards + let mut r = BlockingCursor::new(); + let data = vec![1, 2, 3]; + let mut out = Vec::with_capacity(7); + + match r.write_all(&data) { + Ok(()) => {} + _ => { + panic!("Unexpected result for write_all"); + } + } + r.set_position(0); + + // First read : successful for one byte + let mut reader = Buffer::new(&mut part, &mut r); + out.resize(1, 0); + match reader.read_and_forget(&mut out) { + Ok(1) => {} + _ => { + panic!("Unexpected result for read_and_forget with data"); + } + } + + // Second read : incomplete for 7 bytes (we have only 2) + out.resize(7, 0); + match reader.read_and_forget(&mut out) { + Err(ref err) => { + assert_eq!(io::ErrorKind::WouldBlock, err.kind()); + } + _ => { + panic!("Unexpected result for read_and_forget with incomplete"); + } + } + + // 3 more data bytes have arrived + let pos = r.position(); + let data2 = vec![4, 5, 6]; + match r.write_all(&data2) { + Ok(()) => {} + _ => { + panic!("Unexpected result for write_all"); + } + } + r.set_position(pos); + + // Third read : still incomplete for 7 bytes (we have 5) + let mut reader2 = Buffer::new(&mut part, &mut r); + match reader2.read_and_forget(&mut out) { + Err(ref err) => { + assert_eq!(io::ErrorKind::WouldBlock, err.kind()); + } + _ => { + panic!("Unexpected result for read_and_forget with more incomplete"); + } + } + + // 3 more data bytes have arrived again + let pos2 = r.position(); + let data3 = vec![7, 8, 9]; + match r.write_all(&data3) { + Ok(()) => {} + _ => { + panic!("Unexpected result for write_all"); + } + } + r.set_position(pos2); + + // Fourth read : now successful for 7 bytes + let mut reader3 = Buffer::new(&mut part, &mut r); + match reader3.read_and_forget(&mut out) { + Ok(7) => { + assert_eq!(out[0], 2); + assert_eq!(out[6], 8); + } + _ => { + panic!("Unexpected result for read_and_forget with data"); + } + } + + // Fifth read : successful for one more byte + out.resize(1, 0); + match reader3.read_and_forget(&mut out) { + Ok(1) => { + assert_eq!(out[0], 9); + } + _ => { + panic!("Unexpected result for read_and_forget with data"); + } + } + } +} diff --git a/third_party/rust/flate2/src/gz/mod.rs b/third_party/rust/flate2/src/gz/mod.rs new file mode 100644 index 0000000000..505450e3e9 --- /dev/null +++ b/third_party/rust/flate2/src/gz/mod.rs @@ -0,0 +1,385 @@ +use std::ffi::CString; +use std::io::prelude::*; +use std::time; + +use crate::bufreader::BufReader; +use crate::Compression; + +pub static FHCRC: u8 = 1 << 1; +pub static FEXTRA: u8 = 1 << 2; +pub static FNAME: u8 = 1 << 3; +pub static FCOMMENT: u8 = 1 << 4; + +pub mod bufread; +pub mod read; +pub mod write; + +/// A structure representing the header of a gzip stream. +/// +/// The header can contain metadata about the file that was compressed, if +/// present. +#[derive(PartialEq, Clone, Debug, Default)] +pub struct GzHeader { + extra: Option<Vec<u8>>, + filename: Option<Vec<u8>>, + comment: Option<Vec<u8>>, + operating_system: u8, + mtime: u32, +} + +impl GzHeader { + /// Returns the `filename` field of this gzip stream's header, if present. + pub fn filename(&self) -> Option<&[u8]> { + self.filename.as_ref().map(|s| &s[..]) + } + + /// Returns the `extra` field of this gzip stream's header, if present. + pub fn extra(&self) -> Option<&[u8]> { + self.extra.as_ref().map(|s| &s[..]) + } + + /// Returns the `comment` field of this gzip stream's header, if present. + pub fn comment(&self) -> Option<&[u8]> { + self.comment.as_ref().map(|s| &s[..]) + } + + /// Returns the `operating_system` field of this gzip stream's header. + /// + /// There are predefined values for various operating systems. + /// 255 means that the value is unknown. + pub fn operating_system(&self) -> u8 { + self.operating_system + } + + /// This gives the most recent modification time of the original file being compressed. + /// + /// The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan. 1, 1970. + /// (Note that this may cause problems for MS-DOS and other systems that use local + /// rather than Universal time.) If the compressed data did not come from a file, + /// `mtime` is set to the time at which compression started. + /// `mtime` = 0 means no time stamp is available. + /// + /// The usage of `mtime` is discouraged because of Year 2038 problem. + pub fn mtime(&self) -> u32 { + self.mtime + } + + /// Returns the most recent modification time represented by a date-time type. + /// Returns `None` if the value of the underlying counter is 0, + /// indicating no time stamp is available. + /// + /// + /// The time is measured as seconds since 00:00:00 GMT, Jan. 1 1970. + /// See [`mtime`](#method.mtime) for more detail. + pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> { + if self.mtime == 0 { + None + } else { + let duration = time::Duration::new(u64::from(self.mtime), 0); + let datetime = time::UNIX_EPOCH + duration; + Some(datetime) + } + } +} + +/// A builder structure to create a new gzip Encoder. +/// +/// This structure controls header configuration options such as the filename. +/// +/// # Examples +/// +/// ``` +/// use std::io::prelude::*; +/// # use std::io; +/// use std::fs::File; +/// use flate2::GzBuilder; +/// use flate2::Compression; +/// +/// // GzBuilder opens a file and writes a sample string using GzBuilder pattern +/// +/// # fn sample_builder() -> Result<(), io::Error> { +/// let f = File::create("examples/hello_world.gz")?; +/// let mut gz = GzBuilder::new() +/// .filename("hello_world.txt") +/// .comment("test file, please delete") +/// .write(f, Compression::default()); +/// gz.write_all(b"hello world")?; +/// gz.finish()?; +/// # Ok(()) +/// # } +/// ``` +#[derive(Debug)] +pub struct GzBuilder { + extra: Option<Vec<u8>>, + filename: Option<CString>, + comment: Option<CString>, + operating_system: Option<u8>, + mtime: u32, +} + +impl Default for GzBuilder { + fn default() -> Self { + Self::new() + } +} + +impl GzBuilder { + /// Create a new blank builder with no header by default. + pub fn new() -> GzBuilder { + GzBuilder { + extra: None, + filename: None, + comment: None, + operating_system: None, + mtime: 0, + } + } + + /// Configure the `mtime` field in the gzip header. + pub fn mtime(mut self, mtime: u32) -> GzBuilder { + self.mtime = mtime; + self + } + + /// Configure the `operating_system` field in the gzip header. + pub fn operating_system(mut self, os: u8) -> GzBuilder { + self.operating_system = Some(os); + self + } + + /// Configure the `extra` field in the gzip header. + pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder { + self.extra = Some(extra.into()); + self + } + + /// Configure the `filename` field in the gzip header. + /// + /// # Panics + /// + /// Panics if the `filename` slice contains a zero. + pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder { + self.filename = Some(CString::new(filename.into()).unwrap()); + self + } + + /// Configure the `comment` field in the gzip header. + /// + /// # Panics + /// + /// Panics if the `comment` slice contains a zero. + pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder { + self.comment = Some(CString::new(comment.into()).unwrap()); + self + } + + /// Consume this builder, creating a writer encoder in the process. + /// + /// The data written to the returned encoder will be compressed and then + /// written out to the supplied parameter `w`. + pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> { + write::gz_encoder(self.into_header(lvl), w, lvl) + } + + /// Consume this builder, creating a reader encoder in the process. + /// + /// Data read from the returned encoder will be the compressed version of + /// the data read from the given reader. + pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> { + read::gz_encoder(self.buf_read(BufReader::new(r), lvl)) + } + + /// Consume this builder, creating a reader encoder in the process. + /// + /// Data read from the returned encoder will be the compressed version of + /// the data read from the given reader. + pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R> + where + R: BufRead, + { + bufread::gz_encoder(self.into_header(lvl), r, lvl) + } + + fn into_header(self, lvl: Compression) -> Vec<u8> { + let GzBuilder { + extra, + filename, + comment, + operating_system, + mtime, + } = self; + let mut flg = 0; + let mut header = vec![0u8; 10]; + if let Some(v) = extra { + flg |= FEXTRA; + header.push((v.len() >> 0) as u8); + header.push((v.len() >> 8) as u8); + header.extend(v); + } + if let Some(filename) = filename { + flg |= FNAME; + header.extend(filename.as_bytes_with_nul().iter().map(|x| *x)); + } + if let Some(comment) = comment { + flg |= FCOMMENT; + header.extend(comment.as_bytes_with_nul().iter().map(|x| *x)); + } + header[0] = 0x1f; + header[1] = 0x8b; + header[2] = 8; + header[3] = flg; + header[4] = (mtime >> 0) as u8; + header[5] = (mtime >> 8) as u8; + header[6] = (mtime >> 16) as u8; + header[7] = (mtime >> 24) as u8; + header[8] = if lvl.0 >= Compression::best().0 { + 2 + } else if lvl.0 <= Compression::fast().0 { + 4 + } else { + 0 + }; + + // Typically this byte indicates what OS the gz stream was created on, + // but in an effort to have cross-platform reproducible streams just + // default this value to 255. I'm not sure that if we "correctly" set + // this it'd do anything anyway... + header[9] = operating_system.unwrap_or(255); + header + } +} + +#[cfg(test)] +mod tests { + use std::io::prelude::*; + + use super::{read, write, GzBuilder}; + use crate::Compression; + use rand::{thread_rng, Rng}; + + #[test] + fn roundtrip() { + let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); + e.write_all(b"foo bar baz").unwrap(); + let inner = e.finish().unwrap(); + let mut d = read::GzDecoder::new(&inner[..]); + let mut s = String::new(); + d.read_to_string(&mut s).unwrap(); + assert_eq!(s, "foo bar baz"); + } + + #[test] + fn roundtrip_zero() { + let e = write::GzEncoder::new(Vec::new(), Compression::default()); + let inner = e.finish().unwrap(); + let mut d = read::GzDecoder::new(&inner[..]); + let mut s = String::new(); + d.read_to_string(&mut s).unwrap(); + assert_eq!(s, ""); + } + + #[test] + fn roundtrip_big() { + let mut real = Vec::new(); + let mut w = write::GzEncoder::new(Vec::new(), Compression::default()); + let v = crate::random_bytes().take(1024).collect::<Vec<_>>(); + for _ in 0..200 { + let to_write = &v[..thread_rng().gen_range(0..v.len())]; + real.extend(to_write.iter().map(|x| *x)); + w.write_all(to_write).unwrap(); + } + let result = w.finish().unwrap(); + let mut r = read::GzDecoder::new(&result[..]); + let mut v = Vec::new(); + r.read_to_end(&mut v).unwrap(); + assert!(v == real); + } + + #[test] + fn roundtrip_big2() { + let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>(); + let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default())); + let mut res = Vec::new(); + r.read_to_end(&mut res).unwrap(); + assert!(res == v); + } + + #[test] + fn fields() { + let r = vec![0, 2, 4, 6]; + let e = GzBuilder::new() + .filename("foo.rs") + .comment("bar") + .extra(vec![0, 1, 2, 3]) + .read(&r[..], Compression::default()); + let mut d = read::GzDecoder::new(e); + assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..])); + assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..])); + assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..])); + let mut res = Vec::new(); + d.read_to_end(&mut res).unwrap(); + assert_eq!(res, vec![0, 2, 4, 6]); + } + + #[test] + fn keep_reading_after_end() { + let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); + e.write_all(b"foo bar baz").unwrap(); + let inner = e.finish().unwrap(); + let mut d = read::GzDecoder::new(&inner[..]); + let mut s = String::new(); + d.read_to_string(&mut s).unwrap(); + assert_eq!(s, "foo bar baz"); + d.read_to_string(&mut s).unwrap(); + assert_eq!(s, "foo bar baz"); + } + + #[test] + fn qc_reader() { + ::quickcheck::quickcheck(test as fn(_) -> _); + + fn test(v: Vec<u8>) -> bool { + let r = read::GzEncoder::new(&v[..], Compression::default()); + let mut r = read::GzDecoder::new(r); + let mut v2 = Vec::new(); + r.read_to_end(&mut v2).unwrap(); + v == v2 + } + } + + #[test] + fn flush_after_write() { + let mut f = write::GzEncoder::new(Vec::new(), Compression::default()); + write!(f, "Hello world").unwrap(); + f.flush().unwrap(); + } + + use crate::gz::bufread::tests::BlockingCursor; + #[test] + // test function read_and_forget of Buffer + fn blocked_partial_header_read() { + // this is a reader which receives data afterwards + let mut r = BlockingCursor::new(); + let data = vec![1, 2, 3]; + + match r.write_all(&data) { + Ok(()) => {} + _ => { + panic!("Unexpected result for write_all"); + } + } + r.set_position(0); + + // this is unused except for the buffering + let mut decoder = read::GzDecoder::new(r); + let mut out = Vec::with_capacity(7); + match decoder.read(&mut out) { + Err(e) => { + assert_eq!(e.kind(), std::io::ErrorKind::WouldBlock); + } + _ => { + panic!("Unexpected result for decoder.read"); + } + } + } +} diff --git a/third_party/rust/flate2/src/gz/read.rs b/third_party/rust/flate2/src/gz/read.rs new file mode 100644 index 0000000000..dbbe632829 --- /dev/null +++ b/third_party/rust/flate2/src/gz/read.rs @@ -0,0 +1,278 @@ +use std::io; +use std::io::prelude::*; + +use super::bufread; +use super::{GzBuilder, GzHeader}; +use crate::bufreader::BufReader; +use crate::Compression; + +/// A gzip streaming encoder +/// +/// This structure exposes a [`Read`] interface that will read uncompressed data +/// from the underlying reader and expose the compressed version as a [`Read`] +/// interface. +/// +/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html +/// +/// # Examples +/// +/// ``` +/// use std::io::prelude::*; +/// use std::io; +/// use flate2::Compression; +/// use flate2::read::GzEncoder; +/// +/// // Return a vector containing the GZ compressed version of hello world +/// +/// fn gzencode_hello_world() -> io::Result<Vec<u8>> { +/// let mut ret_vec = [0;100]; +/// let bytestring = b"hello world"; +/// let mut gz = GzEncoder::new(&bytestring[..], Compression::fast()); +/// let count = gz.read(&mut ret_vec)?; +/// Ok(ret_vec[0..count].to_vec()) +/// } +/// ``` +#[derive(Debug)] +pub struct GzEncoder<R> { + inner: bufread::GzEncoder<BufReader<R>>, +} + +pub fn gz_encoder<R: Read>(inner: bufread::GzEncoder<BufReader<R>>) -> GzEncoder<R> { + GzEncoder { inner } +} + +impl<R: Read> GzEncoder<R> { + /// Creates a new encoder which will use the given compression level. + /// + /// The encoder is not configured specially for the emitted header. For + /// header configuration, see the `GzBuilder` type. + /// + /// The data read from the stream `r` will be compressed and available + /// through the returned reader. + pub fn new(r: R, level: Compression) -> GzEncoder<R> { + GzBuilder::new().read(r, level) + } +} + +impl<R> GzEncoder<R> { + /// Acquires a reference to the underlying reader. + pub fn get_ref(&self) -> &R { + self.inner.get_ref().get_ref() + } + + /// Acquires a mutable reference to the underlying reader. + /// + /// Note that mutation of the reader may result in surprising results if + /// this encoder is continued to be used. + pub fn get_mut(&mut self) -> &mut R { + self.inner.get_mut().get_mut() + } + + /// Returns the underlying stream, consuming this encoder + pub fn into_inner(self) -> R { + self.inner.into_inner().into_inner() + } +} + +impl<R: Read> Read for GzEncoder<R> { + fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { + self.inner.read(into) + } +} + +impl<R: Read + Write> Write for GzEncoder<R> { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + self.get_mut().write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.get_mut().flush() + } +} + +/// A gzip streaming decoder +/// +/// This structure exposes a [`Read`] interface that will consume compressed +/// data from the underlying reader and emit uncompressed data. +/// +/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html +/// +/// # Examples +/// +/// ``` +/// +/// use std::io::prelude::*; +/// use std::io; +/// # use flate2::Compression; +/// # use flate2::write::GzEncoder; +/// use flate2::read::GzDecoder; +/// +/// # fn main() { +/// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); +/// # e.write_all(b"Hello World").unwrap(); +/// # let bytes = e.finish().unwrap(); +/// # println!("{}", decode_reader(bytes).unwrap()); +/// # } +/// # +/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error +/// // Here &[u8] implements Read +/// +/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { +/// let mut gz = GzDecoder::new(&bytes[..]); +/// let mut s = String::new(); +/// gz.read_to_string(&mut s)?; +/// Ok(s) +/// } +/// ``` +#[derive(Debug)] +pub struct GzDecoder<R> { + inner: bufread::GzDecoder<BufReader<R>>, +} + +impl<R: Read> GzDecoder<R> { + /// Creates a new decoder from the given reader, immediately parsing the + /// gzip header. + pub fn new(r: R) -> GzDecoder<R> { + GzDecoder { + inner: bufread::GzDecoder::new(BufReader::new(r)), + } + } +} + +impl<R> GzDecoder<R> { + /// Returns the header associated with this stream, if it was valid. + pub fn header(&self) -> Option<&GzHeader> { + self.inner.header() + } + + /// Acquires a reference to the underlying reader. + pub fn get_ref(&self) -> &R { + self.inner.get_ref().get_ref() + } + + /// Acquires a mutable reference to the underlying stream. + /// + /// Note that mutation of the stream may result in surprising results if + /// this encoder is continued to be used. + pub fn get_mut(&mut self) -> &mut R { + self.inner.get_mut().get_mut() + } + + /// Consumes this decoder, returning the underlying reader. + pub fn into_inner(self) -> R { + self.inner.into_inner().into_inner() + } +} + +impl<R: Read> Read for GzDecoder<R> { + fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { + self.inner.read(into) + } +} + +impl<R: Read + Write> Write for GzDecoder<R> { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + self.get_mut().write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.get_mut().flush() + } +} + +/// A gzip streaming decoder that decodes all members of a multistream +/// +/// A gzip member consists of a header, compressed data and a trailer. The [gzip +/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple +/// gzip members to be joined in a single stream. `MultiGzDecoder` will +/// decode all consecutive members while `GzDecoder` will only decompress the +/// first gzip member. The multistream format is commonly used in bioinformatics, +/// for example when using the BGZF compressed data. +/// +/// This structure exposes a [`Read`] interface that will consume all gzip members +/// from the underlying reader and emit uncompressed data. +/// +/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html +/// +/// # Examples +/// +/// ``` +/// use std::io::prelude::*; +/// use std::io; +/// # use flate2::Compression; +/// # use flate2::write::GzEncoder; +/// use flate2::read::MultiGzDecoder; +/// +/// # fn main() { +/// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); +/// # e.write_all(b"Hello World").unwrap(); +/// # let bytes = e.finish().unwrap(); +/// # println!("{}", decode_reader(bytes).unwrap()); +/// # } +/// # +/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error +/// // Here &[u8] implements Read +/// +/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { +/// let mut gz = MultiGzDecoder::new(&bytes[..]); +/// let mut s = String::new(); +/// gz.read_to_string(&mut s)?; +/// Ok(s) +/// } +/// ``` +#[derive(Debug)] +pub struct MultiGzDecoder<R> { + inner: bufread::MultiGzDecoder<BufReader<R>>, +} + +impl<R: Read> MultiGzDecoder<R> { + /// Creates a new decoder from the given reader, immediately parsing the + /// (first) gzip header. If the gzip stream contains multiple members all will + /// be decoded. + pub fn new(r: R) -> MultiGzDecoder<R> { + MultiGzDecoder { + inner: bufread::MultiGzDecoder::new(BufReader::new(r)), + } + } +} + +impl<R> MultiGzDecoder<R> { + /// Returns the current header associated with this stream, if it's valid. + pub fn header(&self) -> Option<&GzHeader> { + self.inner.header() + } + + /// Acquires a reference to the underlying reader. + pub fn get_ref(&self) -> &R { + self.inner.get_ref().get_ref() + } + + /// Acquires a mutable reference to the underlying stream. + /// + /// Note that mutation of the stream may result in surprising results if + /// this encoder is continued to be used. + pub fn get_mut(&mut self) -> &mut R { + self.inner.get_mut().get_mut() + } + + /// Consumes this decoder, returning the underlying reader. + pub fn into_inner(self) -> R { + self.inner.into_inner().into_inner() + } +} + +impl<R: Read> Read for MultiGzDecoder<R> { + fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { + self.inner.read(into) + } +} + +impl<R: Read + Write> Write for MultiGzDecoder<R> { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + self.get_mut().write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.get_mut().flush() + } +} diff --git a/third_party/rust/flate2/src/gz/write.rs b/third_party/rust/flate2/src/gz/write.rs new file mode 100644 index 0000000000..7cf1a7cd41 --- /dev/null +++ b/third_party/rust/flate2/src/gz/write.rs @@ -0,0 +1,450 @@ +use std::cmp; +use std::io; +use std::io::prelude::*; + +use super::bufread::{corrupt, read_gz_header}; +use super::{GzBuilder, GzHeader}; +use crate::crc::{Crc, CrcWriter}; +use crate::zio; +use crate::{Compress, Compression, Decompress, Status}; + +/// A gzip streaming encoder +/// +/// This structure exposes a [`Write`] interface that will emit compressed data +/// to the underlying writer `W`. +/// +/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html +/// +/// # Examples +/// +/// ``` +/// use std::io::prelude::*; +/// use flate2::Compression; +/// use flate2::write::GzEncoder; +/// +/// // Vec<u8> implements Write to print the compressed bytes of sample string +/// # fn main() { +/// +/// let mut e = GzEncoder::new(Vec::new(), Compression::default()); +/// e.write_all(b"Hello World").unwrap(); +/// println!("{:?}", e.finish().unwrap()); +/// # } +/// ``` +#[derive(Debug)] +pub struct GzEncoder<W: Write> { + inner: zio::Writer<W, Compress>, + crc: Crc, + crc_bytes_written: usize, + header: Vec<u8>, +} + +pub fn gz_encoder<W: Write>(header: Vec<u8>, w: W, lvl: Compression) -> GzEncoder<W> { + GzEncoder { + inner: zio::Writer::new(w, Compress::new(lvl, false)), + crc: Crc::new(), + header, + crc_bytes_written: 0, + } +} + +impl<W: Write> GzEncoder<W> { + /// Creates a new encoder which will use the given compression level. + /// + /// The encoder is not configured specially for the emitted header. For + /// header configuration, see the `GzBuilder` type. + /// + /// The data written to the returned encoder will be compressed and then + /// written to the stream `w`. + pub fn new(w: W, level: Compression) -> GzEncoder<W> { + GzBuilder::new().write(w, level) + } + + /// Acquires a reference to the underlying writer. + pub fn get_ref(&self) -> &W { + self.inner.get_ref() + } + + /// Acquires a mutable reference to the underlying writer. + /// + /// Note that mutation of the writer may result in surprising results if + /// this encoder is continued to be used. + pub fn get_mut(&mut self) -> &mut W { + self.inner.get_mut() + } + + /// Attempt to finish this output stream, writing out final chunks of data. + /// + /// Note that this function can only be used once data has finished being + /// written to the output stream. After this function is called then further + /// calls to `write` may result in a panic. + /// + /// # Panics + /// + /// Attempts to write data to this stream may result in a panic after this + /// function is called. + /// + /// # Errors + /// + /// This function will perform I/O to complete this stream, and any I/O + /// errors which occur will be returned from this function. + pub fn try_finish(&mut self) -> io::Result<()> { + self.write_header()?; + self.inner.finish()?; + + while self.crc_bytes_written < 8 { + let (sum, amt) = (self.crc.sum() as u32, self.crc.amount()); + let buf = [ + (sum >> 0) as u8, + (sum >> 8) as u8, + (sum >> 16) as u8, + (sum >> 24) as u8, + (amt >> 0) as u8, + (amt >> 8) as u8, + (amt >> 16) as u8, + (amt >> 24) as u8, + ]; + let inner = self.inner.get_mut(); + let n = inner.write(&buf[self.crc_bytes_written..])?; + self.crc_bytes_written += n; + } + Ok(()) + } + + /// Finish encoding this stream, returning the underlying writer once the + /// encoding is done. + /// + /// Note that this function may not be suitable to call in a situation where + /// the underlying stream is an asynchronous I/O stream. To finish a stream + /// the `try_finish` (or `shutdown`) method should be used instead. To + /// re-acquire ownership of a stream it is safe to call this method after + /// `try_finish` or `shutdown` has returned `Ok`. + /// + /// # Errors + /// + /// This function will perform I/O to complete this stream, and any I/O + /// errors which occur will be returned from this function. + pub fn finish(mut self) -> io::Result<W> { + self.try_finish()?; + Ok(self.inner.take_inner()) + } + + fn write_header(&mut self) -> io::Result<()> { + while !self.header.is_empty() { + let n = self.inner.get_mut().write(&self.header)?; + self.header.drain(..n); + } + Ok(()) + } +} + +impl<W: Write> Write for GzEncoder<W> { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + assert_eq!(self.crc_bytes_written, 0); + self.write_header()?; + let n = self.inner.write(buf)?; + self.crc.update(&buf[..n]); + Ok(n) + } + + fn flush(&mut self) -> io::Result<()> { + assert_eq!(self.crc_bytes_written, 0); + self.write_header()?; + self.inner.flush() + } +} + +impl<R: Read + Write> Read for GzEncoder<R> { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + self.get_mut().read(buf) + } +} + +impl<W: Write> Drop for GzEncoder<W> { + fn drop(&mut self) { + if self.inner.is_present() { + let _ = self.try_finish(); + } + } +} + +/// A gzip streaming decoder +/// +/// This structure exposes a [`Write`] interface that will emit compressed data +/// to the underlying writer `W`. +/// +/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html +/// +/// # Examples +/// +/// ``` +/// use std::io::prelude::*; +/// use std::io; +/// use flate2::Compression; +/// use flate2::write::{GzEncoder, GzDecoder}; +/// +/// # fn main() { +/// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); +/// # e.write(b"Hello World").unwrap(); +/// # let bytes = e.finish().unwrap(); +/// # assert_eq!("Hello World", decode_writer(bytes).unwrap()); +/// # } +/// // Uncompresses a gzip encoded vector of bytes and returns a string or error +/// // Here Vec<u8> implements Write +/// fn decode_writer(bytes: Vec<u8>) -> io::Result<String> { +/// let mut writer = Vec::new(); +/// let mut decoder = GzDecoder::new(writer); +/// decoder.write_all(&bytes[..])?; +/// writer = decoder.finish()?; +/// let return_string = String::from_utf8(writer).expect("String parsing error"); +/// Ok(return_string) +/// } +/// ``` +#[derive(Debug)] +pub struct GzDecoder<W: Write> { + inner: zio::Writer<CrcWriter<W>, Decompress>, + crc_bytes: Vec<u8>, + header: Option<GzHeader>, + header_buf: Vec<u8>, +} + +const CRC_BYTES_LEN: usize = 8; + +impl<W: Write> GzDecoder<W> { + /// Creates a new decoder which will write uncompressed data to the stream. + /// + /// When this encoder is dropped or unwrapped the final pieces of data will + /// be flushed. + pub fn new(w: W) -> GzDecoder<W> { + GzDecoder { + inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(false)), + crc_bytes: Vec::with_capacity(CRC_BYTES_LEN), + header: None, + header_buf: Vec::new(), + } + } + + /// Returns the header associated with this stream. + pub fn header(&self) -> Option<&GzHeader> { + self.header.as_ref() + } + + /// Acquires a reference to the underlying writer. + pub fn get_ref(&self) -> &W { + self.inner.get_ref().get_ref() + } + + /// Acquires a mutable reference to the underlying writer. + /// + /// Note that mutating the output/input state of the stream may corrupt this + /// object, so care must be taken when using this method. + pub fn get_mut(&mut self) -> &mut W { + self.inner.get_mut().get_mut() + } + + /// Attempt to finish this output stream, writing out final chunks of data. + /// + /// Note that this function can only be used once data has finished being + /// written to the output stream. After this function is called then further + /// calls to `write` may result in a panic. + /// + /// # Panics + /// + /// Attempts to write data to this stream may result in a panic after this + /// function is called. + /// + /// # Errors + /// + /// This function will perform I/O to finish the stream, returning any + /// errors which happen. + pub fn try_finish(&mut self) -> io::Result<()> { + self.finish_and_check_crc()?; + Ok(()) + } + + /// Consumes this decoder, flushing the output stream. + /// + /// This will flush the underlying data stream and then return the contained + /// writer if the flush succeeded. + /// + /// Note that this function may not be suitable to call in a situation where + /// the underlying stream is an asynchronous I/O stream. To finish a stream + /// the `try_finish` (or `shutdown`) method should be used instead. To + /// re-acquire ownership of a stream it is safe to call this method after + /// `try_finish` or `shutdown` has returned `Ok`. + /// + /// # Errors + /// + /// This function will perform I/O to complete this stream, and any I/O + /// errors which occur will be returned from this function. + pub fn finish(mut self) -> io::Result<W> { + self.finish_and_check_crc()?; + Ok(self.inner.take_inner().into_inner()) + } + + fn finish_and_check_crc(&mut self) -> io::Result<()> { + self.inner.finish()?; + + if self.crc_bytes.len() != 8 { + return Err(corrupt()); + } + + let crc = ((self.crc_bytes[0] as u32) << 0) + | ((self.crc_bytes[1] as u32) << 8) + | ((self.crc_bytes[2] as u32) << 16) + | ((self.crc_bytes[3] as u32) << 24); + let amt = ((self.crc_bytes[4] as u32) << 0) + | ((self.crc_bytes[5] as u32) << 8) + | ((self.crc_bytes[6] as u32) << 16) + | ((self.crc_bytes[7] as u32) << 24); + if crc != self.inner.get_ref().crc().sum() as u32 { + return Err(corrupt()); + } + if amt != self.inner.get_ref().crc().amount() { + return Err(corrupt()); + } + Ok(()) + } +} + +struct Counter<T: Read> { + inner: T, + pos: usize, +} + +impl<T: Read> Read for Counter<T> { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + let pos = self.inner.read(buf)?; + self.pos += pos; + Ok(pos) + } +} + +impl<W: Write> Write for GzDecoder<W> { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + if self.header.is_none() { + // trying to avoid buffer usage + let (res, pos) = { + let mut counter = Counter { + inner: self.header_buf.chain(buf), + pos: 0, + }; + let res = read_gz_header(&mut counter); + (res, counter.pos) + }; + + match res { + Err(err) => { + if err.kind() == io::ErrorKind::UnexpectedEof { + // not enough data for header, save to the buffer + self.header_buf.extend(buf); + Ok(buf.len()) + } else { + Err(err) + } + } + Ok(header) => { + self.header = Some(header); + let pos = pos - self.header_buf.len(); + self.header_buf.truncate(0); + Ok(pos) + } + } + } else { + let (n, status) = self.inner.write_with_status(buf)?; + + if status == Status::StreamEnd && n < buf.len() && self.crc_bytes.len() < 8 { + let remaining = buf.len() - n; + let crc_bytes = cmp::min(remaining, CRC_BYTES_LEN - self.crc_bytes.len()); + self.crc_bytes.extend(&buf[n..n + crc_bytes]); + return Ok(n + crc_bytes); + } + Ok(n) + } + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } +} + +impl<W: Read + Write> Read for GzDecoder<W> { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + self.inner.get_mut().get_mut().read(buf) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const STR: &'static str = "Hello World Hello World Hello World Hello World Hello World \ + Hello World Hello World Hello World Hello World Hello World \ + Hello World Hello World Hello World Hello World Hello World \ + Hello World Hello World Hello World Hello World Hello World \ + Hello World Hello World Hello World Hello World Hello World"; + + #[test] + fn decode_writer_one_chunk() { + let mut e = GzEncoder::new(Vec::new(), Compression::default()); + e.write(STR.as_ref()).unwrap(); + let bytes = e.finish().unwrap(); + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + let n = decoder.write(&bytes[..]).unwrap(); + decoder.write(&bytes[n..]).unwrap(); + decoder.try_finish().unwrap(); + writer = decoder.finish().unwrap(); + let return_string = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(return_string, STR); + } + + #[test] + fn decode_writer_partial_header() { + let mut e = GzEncoder::new(Vec::new(), Compression::default()); + e.write(STR.as_ref()).unwrap(); + let bytes = e.finish().unwrap(); + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + assert_eq!(decoder.write(&bytes[..5]).unwrap(), 5); + let n = decoder.write(&bytes[5..]).unwrap(); + if n < bytes.len() - 5 { + decoder.write(&bytes[n + 5..]).unwrap(); + } + writer = decoder.finish().unwrap(); + let return_string = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(return_string, STR); + } + + #[test] + fn decode_writer_exact_header() { + let mut e = GzEncoder::new(Vec::new(), Compression::default()); + e.write(STR.as_ref()).unwrap(); + let bytes = e.finish().unwrap(); + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + assert_eq!(decoder.write(&bytes[..10]).unwrap(), 10); + decoder.write(&bytes[10..]).unwrap(); + writer = decoder.finish().unwrap(); + let return_string = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(return_string, STR); + } + + #[test] + fn decode_writer_partial_crc() { + let mut e = GzEncoder::new(Vec::new(), Compression::default()); + e.write(STR.as_ref()).unwrap(); + let bytes = e.finish().unwrap(); + + let mut writer = Vec::new(); + let mut decoder = GzDecoder::new(writer); + let l = bytes.len() - 5; + let n = decoder.write(&bytes[..l]).unwrap(); + decoder.write(&bytes[n..]).unwrap(); + writer = decoder.finish().unwrap(); + let return_string = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(return_string, STR); + } +} |