use crate::{engine::Engine, DecodeError, PAD_BYTE}; use std::{cmp, fmt, io}; // This should be large, but it has to fit on the stack. pub(crate) const BUF_SIZE: usize = 1024; // 4 bytes of base64 data encode 3 bytes of raw data (modulo padding). const BASE64_CHUNK_SIZE: usize = 4; const DECODED_CHUNK_SIZE: usize = 3; /// A `Read` implementation that decodes base64 data read from an underlying reader. /// /// # Examples /// /// ``` /// use std::io::Read; /// use std::io::Cursor; /// use base64::engine::general_purpose; /// /// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc. /// let mut wrapped_reader = Cursor::new(b"YXNkZg=="); /// let mut decoder = base64::read::DecoderReader::new( /// &mut wrapped_reader, /// &general_purpose::STANDARD); /// /// // handle errors as you normally would /// let mut result = Vec::new(); /// decoder.read_to_end(&mut result).unwrap(); /// /// assert_eq!(b"asdf", &result[..]); /// /// ``` pub struct DecoderReader<'e, E: Engine, R: io::Read> { engine: &'e E, /// Where b64 data is read from inner: R, // Holds b64 data read from the delegate reader. b64_buffer: [u8; BUF_SIZE], // The start of the pending buffered data in b64_buffer. b64_offset: usize, // The amount of buffered b64 data. b64_len: usize, // Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a // decoded chunk in to, we have to be able to hang on to a few decoded bytes. // Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to // decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest // into here, which seems like a lot of complexity for 1 extra byte of storage. decoded_buffer: [u8; DECODED_CHUNK_SIZE], // index of start of decoded data decoded_offset: usize, // length of decoded data decoded_len: usize, // used to provide accurate offsets in errors total_b64_decoded: usize, // offset of previously seen padding, if any padding_offset: Option, } impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("DecoderReader") .field("b64_offset", &self.b64_offset) .field("b64_len", &self.b64_len) .field("decoded_buffer", &self.decoded_buffer) .field("decoded_offset", &self.decoded_offset) .field("decoded_len", &self.decoded_len) .field("total_b64_decoded", &self.total_b64_decoded) .field("padding_offset", &self.padding_offset) .finish() } } impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> { /// Create a new decoder that will read from the provided reader `r`. pub fn new(reader: R, engine: &'e E) -> Self { DecoderReader { engine, inner: reader, b64_buffer: [0; BUF_SIZE], b64_offset: 0, b64_len: 0, decoded_buffer: [0; DECODED_CHUNK_SIZE], decoded_offset: 0, decoded_len: 0, total_b64_decoded: 0, padding_offset: None, } } /// Write as much as possible of the decoded buffer into the target buffer. /// Must only be called when there is something to write and space to write into. /// Returns a Result with the number of (decoded) bytes copied. fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result { debug_assert!(self.decoded_len > 0); debug_assert!(!buf.is_empty()); let copy_len = cmp::min(self.decoded_len, buf.len()); debug_assert!(copy_len > 0); debug_assert!(copy_len <= self.decoded_len); buf[..copy_len].copy_from_slice( &self.decoded_buffer[self.decoded_offset..self.decoded_offset + copy_len], ); self.decoded_offset += copy_len; self.decoded_len -= copy_len; debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); Ok(copy_len) } /// Read into the remaining space in the buffer after the current contents. /// Must only be called when there is space to read into in the buffer. /// Returns the number of bytes read. fn read_from_delegate(&mut self) -> io::Result { debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE); let read = self .inner .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?; self.b64_len += read; debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); Ok(read) } /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the /// caller's responsibility to choose the number of b64 bytes to decode correctly. /// /// Returns a Result with the number of decoded bytes written to `buf`. fn decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result { debug_assert!(self.b64_len >= b64_len_to_decode); debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); debug_assert!(!buf.is_empty()); let b64_to_decode = &self.b64_buffer[self.b64_offset..self.b64_offset + b64_len_to_decode]; let decode_metadata = self .engine .internal_decode( b64_to_decode, buf, self.engine.internal_decoded_len_estimate(b64_len_to_decode), ) .map_err(|e| match e { DecodeError::InvalidByte(offset, byte) => { // This can be incorrect, but not in a way that probably matters to anyone: // if there was padding handled in a previous decode, and we are now getting // InvalidByte due to more padding, we should arguably report InvalidByte with // PAD_BYTE at the original padding position (`self.padding_offset`), but we // don't have a good way to tie those two cases together, so instead we // just report the invalid byte as if the previous padding, and its possibly // related downgrade to a now invalid byte, didn't happen. DecodeError::InvalidByte(self.total_b64_decoded + offset, byte) } DecodeError::InvalidLength => DecodeError::InvalidLength, DecodeError::InvalidLastSymbol(offset, byte) => { DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte) } DecodeError::InvalidPadding => DecodeError::InvalidPadding, }) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; if let Some(offset) = self.padding_offset { // we've already seen padding if decode_metadata.decoded_len > 0 { // we read more after already finding padding; report error at first padding byte return Err(io::Error::new( io::ErrorKind::InvalidData, DecodeError::InvalidByte(offset, PAD_BYTE), )); } } self.padding_offset = self.padding_offset.or(decode_metadata .padding_offset .map(|offset| self.total_b64_decoded + offset)); self.total_b64_decoded += b64_len_to_decode; self.b64_offset += b64_len_to_decode; self.b64_len -= b64_len_to_decode; debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); Ok(decode_metadata.decoded_len) } /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded /// input from. /// /// Because `DecoderReader` performs internal buffering, the state of the inner reader is /// unspecified. This function is mainly provided because the inner reader type may provide /// additional functionality beyond the `Read` implementation which may still be useful. pub fn into_inner(self) -> R { self.inner } } impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> { /// Decode input from the wrapped reader. /// /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes /// written in `buf`. /// /// Where possible, this function buffers base64 to minimize the number of read() calls to the /// delegate reader. /// /// # Errors /// /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid /// base64 are also possible, and will have `io::ErrorKind::InvalidData`. fn read(&mut self, buf: &mut [u8]) -> io::Result { if buf.is_empty() { return Ok(0); } // offset == BUF_SIZE when we copied it all last time debug_assert!(self.b64_offset <= BUF_SIZE); debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); debug_assert!(if self.b64_offset == BUF_SIZE { self.b64_len == 0 } else { self.b64_len <= BUF_SIZE }); debug_assert!(if self.decoded_len == 0 { // can be = when we were able to copy the complete chunk self.decoded_offset <= DECODED_CHUNK_SIZE } else { self.decoded_offset < DECODED_CHUNK_SIZE }); // We shouldn't ever decode into decoded_buffer when we can't immediately write at least one // byte into the provided buf, so the effective length should only be 3 momentarily between // when we decode and when we copy into the target buffer. debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE); if self.decoded_len > 0 { // we have a few leftover decoded bytes; flush that rather than pull in more b64 self.flush_decoded_buf(buf) } else { let mut at_eof = false; while self.b64_len < BASE64_CHUNK_SIZE { // Copy any bytes we have to the start of the buffer. self.b64_buffer .copy_within(self.b64_offset..self.b64_offset + self.b64_len, 0); self.b64_offset = 0; // then fill in more data let read = self.read_from_delegate()?; if read == 0 { // we never read into an empty buf, so 0 => we've hit EOF at_eof = true; break; } } if self.b64_len == 0 { debug_assert!(at_eof); // we must be at EOF, and we have no data left to decode return Ok(0); }; debug_assert!(if at_eof { // if we are at eof, we may not have a complete chunk self.b64_len > 0 } else { // otherwise, we must have at least one chunk self.b64_len >= BASE64_CHUNK_SIZE }); debug_assert_eq!(0, self.decoded_len); if buf.len() < DECODED_CHUNK_SIZE { // caller requested an annoyingly short read // have to write to a tmp buf first to avoid double mutable borrow let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE]; // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64 // tokens, not 1, since 1 token can't decode to 1 byte). let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE); let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?; self.decoded_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]); self.decoded_offset = 0; self.decoded_len = decoded; // can be less than 3 on last block due to padding debug_assert!(decoded <= 3); self.flush_decoded_buf(buf) } else { let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE) .checked_mul(BASE64_CHUNK_SIZE) .expect("too many chunks"); debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE); let b64_bytes_available_to_decode = if at_eof { self.b64_len } else { // only use complete chunks self.b64_len - self.b64_len % 4 }; let actual_decode_len = cmp::min( b64_bytes_that_can_decode_into_buf, b64_bytes_available_to_decode, ); self.decode_to_buf(actual_decode_len, buf) } } } }