//! Buffered Base64 decoder. use crate::{ encoding, line_ending::{CHAR_CR, CHAR_LF}, Encoding, Error::{self, InvalidLength}, MIN_LINE_WIDTH, }; use core::{cmp, marker::PhantomData}; #[cfg(feature = "alloc")] use {alloc::vec::Vec, core::iter}; #[cfg(feature = "std")] use std::io; #[cfg(doc)] use crate::{Base64, Base64Unpadded}; /// Stateful Base64 decoder with support for buffered, incremental decoding. /// /// The `E` type parameter can be any type which impls [`Encoding`] such as /// [`Base64`] or [`Base64Unpadded`]. #[derive(Clone)] pub struct Decoder<'i, E: Encoding> { /// Current line being processed. line: Line<'i>, /// Base64 input data reader. line_reader: LineReader<'i>, /// Length of the remaining data after Base64 decoding. remaining_len: usize, /// Block buffer used for non-block-aligned data. block_buffer: BlockBuffer, /// Phantom parameter for the Base64 encoding in use. encoding: PhantomData, } impl<'i, E: Encoding> Decoder<'i, E> { /// Create a new decoder for a byte slice containing contiguous /// (non-newline-delimited) Base64-encoded data. /// /// # Returns /// - `Ok(decoder)` on success. /// - `Err(Error::InvalidLength)` if the input buffer is empty. pub fn new(input: &'i [u8]) -> Result { let line_reader = LineReader::new_unwrapped(input)?; let remaining_len = line_reader.decoded_len::()?; Ok(Self { line: Line::default(), line_reader, remaining_len, block_buffer: BlockBuffer::default(), encoding: PhantomData, }) } /// Create a new decoder for a byte slice containing Base64 which /// line wraps at the given line length. /// /// Trailing newlines are not supported and must be removed in advance. /// /// Newlines are handled according to what are roughly [RFC7468] conventions: /// /// ```text /// [parsers] MUST handle different newline conventions /// ``` /// /// RFC7468 allows any of the following as newlines, and allows a mixture /// of different types of newlines: /// /// ```text /// eol = CRLF / CR / LF /// ``` /// /// # Returns /// - `Ok(decoder)` on success. /// - `Err(Error::InvalidLength)` if the input buffer is empty or the line /// width is zero. /// /// [RFC7468]: https://datatracker.ietf.org/doc/html/rfc7468 pub fn new_wrapped(input: &'i [u8], line_width: usize) -> Result { let line_reader = LineReader::new_wrapped(input, line_width)?; let remaining_len = line_reader.decoded_len::()?; Ok(Self { line: Line::default(), line_reader, remaining_len, block_buffer: BlockBuffer::default(), encoding: PhantomData, }) } /// Fill the provided buffer with data decoded from Base64. /// /// Enough Base64 input data must remain to fill the entire buffer. /// /// # Returns /// - `Ok(bytes)` if the expected amount of data was read /// - `Err(Error::InvalidLength)` if the exact amount of data couldn't be read pub fn decode<'o>(&mut self, out: &'o mut [u8]) -> Result<&'o [u8], Error> { if self.is_finished() { return Err(InvalidLength); } let mut out_pos = 0; while out_pos < out.len() { // If there's data in the block buffer, use it if !self.block_buffer.is_empty() { let out_rem = out.len().checked_sub(out_pos).ok_or(InvalidLength)?; let bytes = self.block_buffer.take(out_rem)?; out[out_pos..][..bytes.len()].copy_from_slice(bytes); out_pos = out_pos.checked_add(bytes.len()).ok_or(InvalidLength)?; } // Advance the line reader if necessary if self.line.is_empty() && !self.line_reader.is_empty() { self.advance_line()?; } // Attempt to decode a stride of block-aligned data let in_blocks = self.line.len() / 4; let out_rem = out.len().checked_sub(out_pos).ok_or(InvalidLength)?; let out_blocks = out_rem / 3; let blocks = cmp::min(in_blocks, out_blocks); let in_aligned = self.line.take(blocks.checked_mul(4).ok_or(InvalidLength)?); if !in_aligned.is_empty() { let out_buf = &mut out[out_pos..][..blocks.checked_mul(3).ok_or(InvalidLength)?]; let decoded_len = self.perform_decode(in_aligned, out_buf)?.len(); out_pos = out_pos.checked_add(decoded_len).ok_or(InvalidLength)?; } if out_pos < out.len() { if self.is_finished() { // If we're out of input then we've been requested to decode // more data than is actually available. return Err(InvalidLength); } else { // If we still have data available but haven't completely // filled the output slice, we're in a situation where // either the input or output isn't block-aligned, so fill // the internal block buffer. self.fill_block_buffer()?; } } } self.remaining_len = self .remaining_len .checked_sub(out.len()) .ok_or(InvalidLength)?; Ok(out) } /// Decode all remaining Base64 data, placing the result into `buf`. /// /// If successful, this function will return the total number of bytes /// decoded into `buf`. #[cfg(feature = "alloc")] pub fn decode_to_end<'o>(&mut self, buf: &'o mut Vec) -> Result<&'o [u8], Error> { let start_len = buf.len(); let remaining_len = self.remaining_len(); let total_len = start_len.checked_add(remaining_len).ok_or(InvalidLength)?; if total_len > buf.capacity() { buf.reserve(total_len.checked_sub(buf.capacity()).ok_or(InvalidLength)?); } // Append `decoded_len` zeroes to the vector buf.extend(iter::repeat(0).take(remaining_len)); self.decode(&mut buf[start_len..])?; Ok(&buf[start_len..]) } /// Get the length of the remaining data after Base64 decoding. /// /// Decreases every time data is decoded. pub fn remaining_len(&self) -> usize { self.remaining_len } /// Has all of the input data been decoded? pub fn is_finished(&self) -> bool { self.line.is_empty() && self.line_reader.is_empty() && self.block_buffer.is_empty() } /// Fill the block buffer with data. fn fill_block_buffer(&mut self) -> Result<(), Error> { let mut buf = [0u8; BlockBuffer::SIZE]; let decoded = if self.line.len() < 4 && !self.line_reader.is_empty() { // Handle input block which is split across lines let mut tmp = [0u8; 4]; // Copy remaining data in the line into tmp let line_end = self.line.take(4); tmp[..line_end.len()].copy_from_slice(line_end); // Advance the line and attempt to fill tmp self.advance_line()?; let len = 4usize.checked_sub(line_end.len()).ok_or(InvalidLength)?; let line_begin = self.line.take(len); tmp[line_end.len()..][..line_begin.len()].copy_from_slice(line_begin); let tmp_len = line_begin .len() .checked_add(line_end.len()) .ok_or(InvalidLength)?; self.perform_decode(&tmp[..tmp_len], &mut buf) } else { let block = self.line.take(4); self.perform_decode(block, &mut buf) }?; self.block_buffer.fill(decoded) } /// Advance the internal buffer to the next line. fn advance_line(&mut self) -> Result<(), Error> { debug_assert!(self.line.is_empty(), "expected line buffer to be empty"); if let Some(line) = self.line_reader.next().transpose()? { self.line = line; Ok(()) } else { Err(InvalidLength) } } /// Perform Base64 decoding operation. fn perform_decode<'o>(&self, src: &[u8], dst: &'o mut [u8]) -> Result<&'o [u8], Error> { if self.is_finished() { E::decode(src, dst) } else { E::Unpadded::decode(src, dst) } } } #[cfg(feature = "std")] impl<'i, E: Encoding> io::Read for Decoder<'i, E> { fn read(&mut self, buf: &mut [u8]) -> io::Result { if self.is_finished() { return Ok(0); } let slice = match buf.get_mut(..self.remaining_len()) { Some(bytes) => bytes, None => buf, }; self.decode(slice)?; Ok(slice.len()) } fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { if self.is_finished() { return Ok(0); } Ok(self.decode_to_end(buf)?.len()) } fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { self.decode(buf)?; Ok(()) } } /// Base64 decode buffer for a 1-block input. /// /// This handles a partially decoded block of data, i.e. data which has been /// decoded but not read. #[derive(Clone, Default, Debug)] struct BlockBuffer { /// 3 decoded bytes from a 4-byte Base64-encoded input. decoded: [u8; Self::SIZE], /// Length of the buffer. length: usize, /// Position within the buffer. position: usize, } impl BlockBuffer { /// Size of the buffer in bytes. const SIZE: usize = 3; /// Fill the buffer by decoding up to 3 bytes of decoded Base64 input. fn fill(&mut self, decoded_input: &[u8]) -> Result<(), Error> { debug_assert!(self.is_empty()); if decoded_input.len() > Self::SIZE { return Err(InvalidLength); } self.position = 0; self.length = decoded_input.len(); self.decoded[..decoded_input.len()].copy_from_slice(decoded_input); Ok(()) } /// Take a specified number of bytes from the buffer. /// /// Returns as many bytes as possible, or an empty slice if the buffer has /// already been read to completion. fn take(&mut self, mut nbytes: usize) -> Result<&[u8], Error> { debug_assert!(self.position <= self.length); let start_pos = self.position; let remaining_len = self.length.checked_sub(start_pos).ok_or(InvalidLength)?; if nbytes > remaining_len { nbytes = remaining_len; } self.position = self.position.checked_add(nbytes).ok_or(InvalidLength)?; Ok(&self.decoded[start_pos..][..nbytes]) } /// Have all of the bytes in this buffer been consumed? fn is_empty(&self) -> bool { self.position == self.length } } /// A single line of linewrapped data, providing a read buffer. #[derive(Clone, Debug)] pub struct Line<'i> { /// Remaining data in the line remaining: &'i [u8], } impl<'i> Default for Line<'i> { fn default() -> Self { Self::new(&[]) } } impl<'i> Line<'i> { /// Create a new line which wraps the given input data. fn new(bytes: &'i [u8]) -> Self { Self { remaining: bytes } } /// Take up to `nbytes` from this line buffer. fn take(&mut self, nbytes: usize) -> &'i [u8] { let (bytes, rest) = if nbytes < self.remaining.len() { self.remaining.split_at(nbytes) } else { (self.remaining, [].as_ref()) }; self.remaining = rest; bytes } /// Slice off a tail of a given length. fn slice_tail(&self, nbytes: usize) -> Result<&'i [u8], Error> { let offset = self.len().checked_sub(nbytes).ok_or(InvalidLength)?; self.remaining.get(offset..).ok_or(InvalidLength) } /// Get the number of bytes remaining in this line. fn len(&self) -> usize { self.remaining.len() } /// Is the buffer for this line empty? fn is_empty(&self) -> bool { self.len() == 0 } /// Trim the newline off the end of this line. fn trim_end(&self) -> Self { Line::new(match self.remaining { [line @ .., CHAR_CR, CHAR_LF] => line, [line @ .., CHAR_CR] => line, [line @ .., CHAR_LF] => line, line => line, }) } } /// Iterator over multi-line Base64 input. #[derive(Clone)] struct LineReader<'i> { /// Remaining linewrapped data to be processed. remaining: &'i [u8], /// Line width. line_width: Option, } impl<'i> LineReader<'i> { /// Create a new reader which operates over continugous unwrapped data. fn new_unwrapped(bytes: &'i [u8]) -> Result { if bytes.is_empty() { Err(InvalidLength) } else { Ok(Self { remaining: bytes, line_width: None, }) } } /// Create a new reader which operates over linewrapped data. fn new_wrapped(bytes: &'i [u8], line_width: usize) -> Result { if line_width < MIN_LINE_WIDTH { return Err(InvalidLength); } let mut reader = Self::new_unwrapped(bytes)?; reader.line_width = Some(line_width); Ok(reader) } /// Is this line reader empty? fn is_empty(&self) -> bool { self.remaining.is_empty() } /// Get the total length of the data decoded from this line reader. fn decoded_len(&self) -> Result { let mut buffer = [0u8; 4]; let mut lines = self.clone(); let mut line = match lines.next().transpose()? { Some(l) => l, None => return Ok(0), }; let mut base64_len = 0usize; loop { base64_len = base64_len.checked_add(line.len()).ok_or(InvalidLength)?; match lines.next().transpose()? { Some(l) => { // Store the end of the line in the buffer so we can // reassemble the last block to determine the real length buffer.copy_from_slice(line.slice_tail(4)?); line = l } // To compute an exact decoded length we need to decode the // last Base64 block and get the decoded length. // // This is what the somewhat complex code below is doing. None => { // Compute number of bytes in the last block (may be unpadded) let base64_last_block_len = match base64_len % 4 { 0 => 4, n => n, }; // Compute decoded length without the last block let decoded_len = encoding::decoded_len( base64_len .checked_sub(base64_last_block_len) .ok_or(InvalidLength)?, ); // Compute the decoded length of the last block let mut out = [0u8; 3]; let last_block_len = if line.len() < base64_last_block_len { let buffered_part_len = base64_last_block_len .checked_sub(line.len()) .ok_or(InvalidLength)?; let offset = 4usize.checked_sub(buffered_part_len).ok_or(InvalidLength)?; for i in 0..buffered_part_len { buffer[i] = buffer[offset.checked_add(i).ok_or(InvalidLength)?]; } buffer[buffered_part_len..][..line.len()].copy_from_slice(line.remaining); let buffer_len = buffered_part_len .checked_add(line.len()) .ok_or(InvalidLength)?; E::decode(&buffer[..buffer_len], &mut out)?.len() } else { let last_block = line.slice_tail(base64_last_block_len)?; E::decode(last_block, &mut out)?.len() }; return decoded_len.checked_add(last_block_len).ok_or(InvalidLength); } } } } } impl<'i> Iterator for LineReader<'i> { type Item = Result, Error>; fn next(&mut self) -> Option, Error>> { if let Some(line_width) = self.line_width { let rest = match self.remaining.get(line_width..) { None | Some([]) => { if self.remaining.is_empty() { return None; } else { let line = Line::new(self.remaining).trim_end(); self.remaining = &[]; return Some(Ok(line)); } } Some([CHAR_CR, CHAR_LF, rest @ ..]) => rest, Some([CHAR_CR, rest @ ..]) => rest, Some([CHAR_LF, rest @ ..]) => rest, _ => { // Expected a leading newline return Some(Err(Error::InvalidEncoding)); } }; let line = Line::new(&self.remaining[..line_width]); self.remaining = rest; Some(Ok(line)) } else if !self.remaining.is_empty() { let line = Line::new(self.remaining).trim_end(); self.remaining = b""; if line.is_empty() { None } else { Some(Ok(line)) } } else { None } } } #[cfg(test)] mod tests { use crate::{alphabet::Alphabet, test_vectors::*, Base64, Base64Unpadded, Decoder}; #[cfg(feature = "std")] use {alloc::vec::Vec, std::io::Read}; #[test] fn decode_padded() { decode_test(PADDED_BIN, || { Decoder::::new(PADDED_BASE64.as_bytes()).unwrap() }) } #[test] fn decode_unpadded() { decode_test(UNPADDED_BIN, || { Decoder::::new(UNPADDED_BASE64.as_bytes()).unwrap() }) } #[test] fn decode_multiline_padded() { decode_test(MULTILINE_PADDED_BIN, || { Decoder::::new_wrapped(MULTILINE_PADDED_BASE64.as_bytes(), 70).unwrap() }) } #[test] fn decode_multiline_unpadded() { decode_test(MULTILINE_UNPADDED_BIN, || { Decoder::::new_wrapped(MULTILINE_UNPADDED_BASE64.as_bytes(), 70) .unwrap() }) } #[cfg(feature = "std")] #[test] fn read_multiline_padded() { let mut decoder = Decoder::::new_wrapped(MULTILINE_PADDED_BASE64.as_bytes(), 70).unwrap(); let mut buf = Vec::new(); let len = decoder.read_to_end(&mut buf).unwrap(); assert_eq!(len, MULTILINE_PADDED_BIN.len()); assert_eq!(buf.as_slice(), MULTILINE_PADDED_BIN); } /// Core functionality of a decoding test fn decode_test<'a, F, V>(expected: &[u8], f: F) where F: Fn() -> Decoder<'a, V>, V: Alphabet, { for chunk_size in 1..expected.len() { let mut decoder = f(); let mut remaining_len = decoder.remaining_len(); let mut buffer = [0u8; 1024]; for chunk in expected.chunks(chunk_size) { assert!(!decoder.is_finished()); let decoded = decoder.decode(&mut buffer[..chunk.len()]).unwrap(); assert_eq!(chunk, decoded); remaining_len -= decoded.len(); assert_eq!(remaining_len, decoder.remaining_len()); } assert!(decoder.is_finished()); assert_eq!(decoder.remaining_len(), 0); } } }