diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/base64/src/read/decoder.rs | |
parent | Initial commit. (diff) | |
download | firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/base64/src/read/decoder.rs')
-rw-r--r-- | third_party/rust/base64/src/read/decoder.rs | 282 |
1 files changed, 282 insertions, 0 deletions
diff --git a/third_party/rust/base64/src/read/decoder.rs b/third_party/rust/base64/src/read/decoder.rs new file mode 100644 index 0000000000..7a9c4cd292 --- /dev/null +++ b/third_party/rust/base64/src/read/decoder.rs @@ -0,0 +1,282 @@ +use crate::{decode_config_slice, Config, DecodeError}; +use std::io::Read; +use std::{cmp, fmt, io}; + +// This should be large, but it has to fit on the stack. +pub(crate) const BUF_SIZE: usize = 1024; + +// 4 bytes of base64 data encode 3 bytes of raw data (modulo padding). +const BASE64_CHUNK_SIZE: usize = 4; +const DECODED_CHUNK_SIZE: usize = 3; + +/// A `Read` implementation that decodes base64 data read from an underlying reader. +/// +/// # Examples +/// +/// ``` +/// use std::io::Read; +/// use std::io::Cursor; +/// +/// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc. +/// let mut wrapped_reader = Cursor::new(b"YXNkZg=="); +/// let mut decoder = base64::read::DecoderReader::new( +/// &mut wrapped_reader, base64::STANDARD); +/// +/// // handle errors as you normally would +/// let mut result = Vec::new(); +/// decoder.read_to_end(&mut result).unwrap(); +/// +/// assert_eq!(b"asdf", &result[..]); +/// +/// ``` +pub struct DecoderReader<'a, R: 'a + io::Read> { + config: Config, + /// Where b64 data is read from + r: &'a mut R, + + // Holds b64 data read from the delegate reader. + b64_buffer: [u8; BUF_SIZE], + // The start of the pending buffered data in b64_buffer. + b64_offset: usize, + // The amount of buffered b64 data. + b64_len: usize, + // Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a + // decoded chunk in to, we have to be able to hang on to a few decoded bytes. + // Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to + // decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest + // into here, which seems like a lot of complexity for 1 extra byte of storage. + decoded_buffer: [u8; 3], + // index of start of decoded data + decoded_offset: usize, + // length of decoded data + decoded_len: usize, + // used to provide accurate offsets in errors + total_b64_decoded: usize, +} + +impl<'a, R: io::Read> fmt::Debug for DecoderReader<'a, R> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("DecoderReader") + .field("config", &self.config) + .field("b64_offset", &self.b64_offset) + .field("b64_len", &self.b64_len) + .field("decoded_buffer", &self.decoded_buffer) + .field("decoded_offset", &self.decoded_offset) + .field("decoded_len", &self.decoded_len) + .field("total_b64_decoded", &self.total_b64_decoded) + .finish() + } +} + +impl<'a, R: io::Read> DecoderReader<'a, R> { + /// Create a new decoder that will read from the provided reader `r`. + pub fn new(r: &'a mut R, config: Config) -> Self { + DecoderReader { + config, + r, + b64_buffer: [0; BUF_SIZE], + b64_offset: 0, + b64_len: 0, + decoded_buffer: [0; DECODED_CHUNK_SIZE], + decoded_offset: 0, + decoded_len: 0, + total_b64_decoded: 0, + } + } + + /// Write as much as possible of the decoded buffer into the target buffer. + /// Must only be called when there is something to write and space to write into. + /// Returns a Result with the number of (decoded) bytes copied. + fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> { + debug_assert!(self.decoded_len > 0); + debug_assert!(buf.len() > 0); + + let copy_len = cmp::min(self.decoded_len, buf.len()); + debug_assert!(copy_len > 0); + debug_assert!(copy_len <= self.decoded_len); + + buf[..copy_len].copy_from_slice( + &self.decoded_buffer[self.decoded_offset..self.decoded_offset + copy_len], + ); + + self.decoded_offset += copy_len; + self.decoded_len -= copy_len; + + debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); + + Ok(copy_len) + } + + /// Read into the remaining space in the buffer after the current contents. + /// Must only be called when there is space to read into in the buffer. + /// Returns the number of bytes read. + fn read_from_delegate(&mut self) -> io::Result<usize> { + debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE); + + let read = self + .r + .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?; + self.b64_len += read; + + debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); + + return Ok(read); + } + + /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the + /// caller's responsibility to choose the number of b64 bytes to decode correctly. + /// + /// Returns a Result with the number of decoded bytes written to `buf`. + fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> { + debug_assert!(self.b64_len >= num_bytes); + debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); + debug_assert!(buf.len() > 0); + + let decoded = decode_config_slice( + &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes], + self.config, + &mut buf[..], + ) + .map_err(|e| match e { + DecodeError::InvalidByte(offset, byte) => { + DecodeError::InvalidByte(self.total_b64_decoded + offset, byte) + } + DecodeError::InvalidLength => DecodeError::InvalidLength, + DecodeError::InvalidLastSymbol(offset, byte) => { + DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte) + } + }) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + + self.total_b64_decoded += num_bytes; + self.b64_offset += num_bytes; + self.b64_len -= num_bytes; + + debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); + + Ok(decoded) + } +} + +impl<'a, R: Read> Read for DecoderReader<'a, R> { + /// Decode input from the wrapped reader. + /// + /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes + /// written in `buf`. + /// + /// Where possible, this function buffers base64 to minimize the number of read() calls to the + /// delegate reader. + /// + /// # Errors + /// + /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid + /// base64 are also possible, and will have `io::ErrorKind::InvalidData`. + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + if buf.len() == 0 { + return Ok(0); + } + + // offset == BUF_SIZE when we copied it all last time + debug_assert!(self.b64_offset <= BUF_SIZE); + debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); + debug_assert!(if self.b64_offset == BUF_SIZE { + self.b64_len == 0 + } else { + self.b64_len <= BUF_SIZE + }); + + debug_assert!(if self.decoded_len == 0 { + // can be = when we were able to copy the complete chunk + self.decoded_offset <= DECODED_CHUNK_SIZE + } else { + self.decoded_offset < DECODED_CHUNK_SIZE + }); + + // We shouldn't ever decode into here when we can't immediately write at least one byte into + // the provided buf, so the effective length should only be 3 momentarily between when we + // decode and when we copy into the target buffer. + debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); + debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE); + + if self.decoded_len > 0 { + // we have a few leftover decoded bytes; flush that rather than pull in more b64 + self.flush_decoded_buf(buf) + } else { + let mut at_eof = false; + while self.b64_len < BASE64_CHUNK_SIZE { + // Work around lack of copy_within, which is only present in 1.37 + // Copy any bytes we have to the start of the buffer. + // We know we have < 1 chunk, so we can use a tiny tmp buffer. + let mut memmove_buf = [0_u8; BASE64_CHUNK_SIZE]; + memmove_buf[..self.b64_len].copy_from_slice( + &self.b64_buffer[self.b64_offset..self.b64_offset + self.b64_len], + ); + self.b64_buffer[0..self.b64_len].copy_from_slice(&memmove_buf[..self.b64_len]); + self.b64_offset = 0; + + // then fill in more data + let read = self.read_from_delegate()?; + if read == 0 { + // we never pass in an empty buf, so 0 => we've hit EOF + at_eof = true; + break; + } + } + + if self.b64_len == 0 { + debug_assert!(at_eof); + // we must be at EOF, and we have no data left to decode + return Ok(0); + }; + + debug_assert!(if at_eof { + // if we are at eof, we may not have a complete chunk + self.b64_len > 0 + } else { + // otherwise, we must have at least one chunk + self.b64_len >= BASE64_CHUNK_SIZE + }); + + debug_assert_eq!(0, self.decoded_len); + + if buf.len() < DECODED_CHUNK_SIZE { + // caller requested an annoyingly short read + // have to write to a tmp buf first to avoid double mutable borrow + let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE]; + // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have + // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64 + // tokens, not 1, since 1 token can't decode to 1 byte). + let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE); + + let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?; + self.decoded_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]); + + self.decoded_offset = 0; + self.decoded_len = decoded; + + // can be less than 3 on last block due to padding + debug_assert!(decoded <= 3); + + self.flush_decoded_buf(buf) + } else { + let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE) + .checked_mul(BASE64_CHUNK_SIZE) + .expect("too many chunks"); + debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE); + + let b64_bytes_available_to_decode = if at_eof { + self.b64_len + } else { + // only use complete chunks + self.b64_len - self.b64_len % 4 + }; + + let actual_decode_len = cmp::min( + b64_bytes_that_can_decode_into_buf, + b64_bytes_available_to_decode, + ); + self.decode_to_buf(actual_decode_len, buf) + } + } + } +} |