summaryrefslogtreecommitdiffstats
path: root/third_party/rust/base64/src/read/decoder.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/base64/src/read/decoder.rs')
-rw-r--r--third_party/rust/base64/src/read/decoder.rs295
1 files changed, 295 insertions, 0 deletions
diff --git a/third_party/rust/base64/src/read/decoder.rs b/third_party/rust/base64/src/read/decoder.rs
new file mode 100644
index 0000000000..4888c9c4e7
--- /dev/null
+++ b/third_party/rust/base64/src/read/decoder.rs
@@ -0,0 +1,295 @@
+use crate::{engine::Engine, DecodeError};
+use std::{cmp, fmt, io};
+
+// This should be large, but it has to fit on the stack.
+pub(crate) const BUF_SIZE: usize = 1024;
+
+// 4 bytes of base64 data encode 3 bytes of raw data (modulo padding).
+const BASE64_CHUNK_SIZE: usize = 4;
+const DECODED_CHUNK_SIZE: usize = 3;
+
+/// A `Read` implementation that decodes base64 data read from an underlying reader.
+///
+/// # Examples
+///
+/// ```
+/// use std::io::Read;
+/// use std::io::Cursor;
+/// use base64::engine::general_purpose;
+///
+/// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
+/// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
+/// let mut decoder = base64::read::DecoderReader::new(
+/// &mut wrapped_reader,
+/// &general_purpose::STANDARD);
+///
+/// // handle errors as you normally would
+/// let mut result = Vec::new();
+/// decoder.read_to_end(&mut result).unwrap();
+///
+/// assert_eq!(b"asdf", &result[..]);
+///
+/// ```
+pub struct DecoderReader<'e, E: Engine, R: io::Read> {
+ engine: &'e E,
+ /// Where b64 data is read from
+ inner: R,
+
+ // Holds b64 data read from the delegate reader.
+ b64_buffer: [u8; BUF_SIZE],
+ // The start of the pending buffered data in b64_buffer.
+ b64_offset: usize,
+ // The amount of buffered b64 data.
+ b64_len: usize,
+ // Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a
+ // decoded chunk in to, we have to be able to hang on to a few decoded bytes.
+ // Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to
+ // decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
+ // into here, which seems like a lot of complexity for 1 extra byte of storage.
+ decoded_buffer: [u8; 3],
+ // index of start of decoded data
+ decoded_offset: usize,
+ // length of decoded data
+ decoded_len: usize,
+ // used to provide accurate offsets in errors
+ total_b64_decoded: usize,
+}
+
+impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("DecoderReader")
+ .field("b64_offset", &self.b64_offset)
+ .field("b64_len", &self.b64_len)
+ .field("decoded_buffer", &self.decoded_buffer)
+ .field("decoded_offset", &self.decoded_offset)
+ .field("decoded_len", &self.decoded_len)
+ .field("total_b64_decoded", &self.total_b64_decoded)
+ .finish()
+ }
+}
+
+impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
+ /// Create a new decoder that will read from the provided reader `r`.
+ pub fn new(reader: R, engine: &'e E) -> Self {
+ DecoderReader {
+ engine,
+ inner: reader,
+ b64_buffer: [0; BUF_SIZE],
+ b64_offset: 0,
+ b64_len: 0,
+ decoded_buffer: [0; DECODED_CHUNK_SIZE],
+ decoded_offset: 0,
+ decoded_len: 0,
+ total_b64_decoded: 0,
+ }
+ }
+
+ /// Write as much as possible of the decoded buffer into the target buffer.
+ /// Must only be called when there is something to write and space to write into.
+ /// Returns a Result with the number of (decoded) bytes copied.
+ fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ debug_assert!(self.decoded_len > 0);
+ debug_assert!(!buf.is_empty());
+
+ let copy_len = cmp::min(self.decoded_len, buf.len());
+ debug_assert!(copy_len > 0);
+ debug_assert!(copy_len <= self.decoded_len);
+
+ buf[..copy_len].copy_from_slice(
+ &self.decoded_buffer[self.decoded_offset..self.decoded_offset + copy_len],
+ );
+
+ self.decoded_offset += copy_len;
+ self.decoded_len -= copy_len;
+
+ debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
+
+ Ok(copy_len)
+ }
+
+ /// Read into the remaining space in the buffer after the current contents.
+ /// Must only be called when there is space to read into in the buffer.
+ /// Returns the number of bytes read.
+ fn read_from_delegate(&mut self) -> io::Result<usize> {
+ debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE);
+
+ let read = self
+ .inner
+ .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?;
+ self.b64_len += read;
+
+ debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
+
+ Ok(read)
+ }
+
+ /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the
+ /// caller's responsibility to choose the number of b64 bytes to decode correctly.
+ ///
+ /// Returns a Result with the number of decoded bytes written to `buf`.
+ fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> {
+ debug_assert!(self.b64_len >= num_bytes);
+ debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
+ debug_assert!(!buf.is_empty());
+
+ let decoded = self
+ .engine
+ .internal_decode(
+ &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes],
+ buf,
+ self.engine.internal_decoded_len_estimate(num_bytes),
+ )
+ .map_err(|e| match e {
+ DecodeError::InvalidByte(offset, byte) => {
+ DecodeError::InvalidByte(self.total_b64_decoded + offset, byte)
+ }
+ DecodeError::InvalidLength => DecodeError::InvalidLength,
+ DecodeError::InvalidLastSymbol(offset, byte) => {
+ DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte)
+ }
+ DecodeError::InvalidPadding => DecodeError::InvalidPadding,
+ })
+ .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+
+ self.total_b64_decoded += num_bytes;
+ self.b64_offset += num_bytes;
+ self.b64_len -= num_bytes;
+
+ debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
+
+ Ok(decoded)
+ }
+
+ /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded
+ /// input from.
+ ///
+ /// Because `DecoderReader` performs internal buffering, the state of the inner reader is
+ /// unspecified. This function is mainly provided because the inner reader type may provide
+ /// additional functionality beyond the `Read` implementation which may still be useful.
+ pub fn into_inner(self) -> R {
+ self.inner
+ }
+}
+
+impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
+ /// Decode input from the wrapped reader.
+ ///
+ /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
+ /// written in `buf`.
+ ///
+ /// Where possible, this function buffers base64 to minimize the number of read() calls to the
+ /// delegate reader.
+ ///
+ /// # Errors
+ ///
+ /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid
+ /// base64 are also possible, and will have `io::ErrorKind::InvalidData`.
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ if buf.is_empty() {
+ return Ok(0);
+ }
+
+ // offset == BUF_SIZE when we copied it all last time
+ debug_assert!(self.b64_offset <= BUF_SIZE);
+ debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
+ debug_assert!(if self.b64_offset == BUF_SIZE {
+ self.b64_len == 0
+ } else {
+ self.b64_len <= BUF_SIZE
+ });
+
+ debug_assert!(if self.decoded_len == 0 {
+ // can be = when we were able to copy the complete chunk
+ self.decoded_offset <= DECODED_CHUNK_SIZE
+ } else {
+ self.decoded_offset < DECODED_CHUNK_SIZE
+ });
+
+ // We shouldn't ever decode into here when we can't immediately write at least one byte into
+ // the provided buf, so the effective length should only be 3 momentarily between when we
+ // decode and when we copy into the target buffer.
+ debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
+ debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
+
+ if self.decoded_len > 0 {
+ // we have a few leftover decoded bytes; flush that rather than pull in more b64
+ self.flush_decoded_buf(buf)
+ } else {
+ let mut at_eof = false;
+ while self.b64_len < BASE64_CHUNK_SIZE {
+ // Work around lack of copy_within, which is only present in 1.37
+ // Copy any bytes we have to the start of the buffer.
+ // We know we have < 1 chunk, so we can use a tiny tmp buffer.
+ let mut memmove_buf = [0_u8; BASE64_CHUNK_SIZE];
+ memmove_buf[..self.b64_len].copy_from_slice(
+ &self.b64_buffer[self.b64_offset..self.b64_offset + self.b64_len],
+ );
+ self.b64_buffer[0..self.b64_len].copy_from_slice(&memmove_buf[..self.b64_len]);
+ self.b64_offset = 0;
+
+ // then fill in more data
+ let read = self.read_from_delegate()?;
+ if read == 0 {
+ // we never pass in an empty buf, so 0 => we've hit EOF
+ at_eof = true;
+ break;
+ }
+ }
+
+ if self.b64_len == 0 {
+ debug_assert!(at_eof);
+ // we must be at EOF, and we have no data left to decode
+ return Ok(0);
+ };
+
+ debug_assert!(if at_eof {
+ // if we are at eof, we may not have a complete chunk
+ self.b64_len > 0
+ } else {
+ // otherwise, we must have at least one chunk
+ self.b64_len >= BASE64_CHUNK_SIZE
+ });
+
+ debug_assert_eq!(0, self.decoded_len);
+
+ if buf.len() < DECODED_CHUNK_SIZE {
+ // caller requested an annoyingly short read
+ // have to write to a tmp buf first to avoid double mutable borrow
+ let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE];
+ // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have
+ // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64
+ // tokens, not 1, since 1 token can't decode to 1 byte).
+ let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE);
+
+ let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?;
+ self.decoded_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]);
+
+ self.decoded_offset = 0;
+ self.decoded_len = decoded;
+
+ // can be less than 3 on last block due to padding
+ debug_assert!(decoded <= 3);
+
+ self.flush_decoded_buf(buf)
+ } else {
+ let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE)
+ .checked_mul(BASE64_CHUNK_SIZE)
+ .expect("too many chunks");
+ debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE);
+
+ let b64_bytes_available_to_decode = if at_eof {
+ self.b64_len
+ } else {
+ // only use complete chunks
+ self.b64_len - self.b64_len % 4
+ };
+
+ let actual_decode_len = cmp::min(
+ b64_bytes_that_can_decode_into_buf,
+ b64_bytes_available_to_decode,
+ );
+ self.decode_to_buf(actual_decode_len, buf)
+ }
+ }
+ }
+}