summaryrefslogtreecommitdiffstats
path: root/vendor/base64/src
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/base64/src')
-rw-r--r--vendor/base64/src/chunked_encoder.rs3
-rw-r--r--vendor/base64/src/decode.rs5
-rw-r--r--vendor/base64/src/encode.rs28
-rw-r--r--vendor/base64/src/engine/general_purpose/decode.rs59
-rw-r--r--vendor/base64/src/engine/general_purpose/decode_suffix.rs17
-rw-r--r--vendor/base64/src/engine/general_purpose/mod.rs5
-rw-r--r--vendor/base64/src/engine/mod.rs70
-rw-r--r--vendor/base64/src/engine/naive.rs4
-rw-r--r--vendor/base64/src/engine/tests.rs387
-rw-r--r--vendor/base64/src/read/decoder.rs65
-rw-r--r--vendor/base64/src/read/decoder_tests.rs157
11 files changed, 643 insertions, 157 deletions
diff --git a/vendor/base64/src/chunked_encoder.rs b/vendor/base64/src/chunked_encoder.rs
index 045725974..bc3810ab7 100644
--- a/vendor/base64/src/chunked_encoder.rs
+++ b/vendor/base64/src/chunked_encoder.rs
@@ -49,7 +49,8 @@ impl<'e, E: Engine + ?Sized> ChunkedEncoder<'e, E> {
if self.engine.config().encode_padding() && !more_input_left {
// no more input, add padding if needed. Buffer will have room because
// max_input_length leaves room for it.
- b64_bytes_written += add_padding(bytes.len(), &mut encode_buf[b64_bytes_written..]);
+ b64_bytes_written +=
+ add_padding(b64_bytes_written, &mut encode_buf[b64_bytes_written..]);
}
sink.write_encoded_bytes(&encode_buf[0..b64_bytes_written])?;
diff --git a/vendor/base64/src/decode.rs b/vendor/base64/src/decode.rs
index 047151840..7d29fdc82 100644
--- a/vendor/base64/src/decode.rs
+++ b/vendor/base64/src/decode.rs
@@ -148,11 +148,6 @@ pub fn decode_engine_slice<E: Engine, T: AsRef<[u8]>>(
/// // start of the next quad of encoded symbols
/// assert_eq!(6, decoded_len_estimate(5));
/// ```
-///
-/// # Panics
-///
-/// Panics if decoded length estimation overflows.
-/// This would happen for sizes within a few bytes of the maximum value of `usize`.
pub fn decoded_len_estimate(encoded_len: usize) -> usize {
STANDARD
.internal_decoded_len_estimate(encoded_len)
diff --git a/vendor/base64/src/encode.rs b/vendor/base64/src/encode.rs
index cb176504a..15b903d2c 100644
--- a/vendor/base64/src/encode.rs
+++ b/vendor/base64/src/encode.rs
@@ -77,7 +77,7 @@ pub(crate) fn encode_with_padding<E: Engine + ?Sized>(
let b64_bytes_written = engine.internal_encode(input, output);
let padding_bytes = if engine.config().encode_padding() {
- add_padding(input.len(), &mut output[b64_bytes_written..])
+ add_padding(b64_bytes_written, &mut output[b64_bytes_written..])
} else {
0
};
@@ -117,20 +117,20 @@ pub fn encoded_len(bytes_len: usize, padding: bool) -> Option<usize> {
}
/// Write padding characters.
-/// `input_len` is the size of the original, not encoded, input.
+/// `unpadded_output_len` is the size of the unpadded but base64 encoded data.
/// `output` is the slice where padding should be written, of length at least 2.
///
/// Returns the number of padding bytes written.
-pub(crate) fn add_padding(input_len: usize, output: &mut [u8]) -> usize {
- // TODO base on encoded len to use cheaper mod by 4 (aka & 7)
- let rem = input_len % 3;
- let mut bytes_written = 0;
- for _ in 0..((3 - rem) % 3) {
- output[bytes_written] = PAD_BYTE;
- bytes_written += 1;
+pub(crate) fn add_padding(unpadded_output_len: usize, output: &mut [u8]) -> usize {
+ let pad_bytes = (4 - (unpadded_output_len % 4)) % 4;
+ // for just a couple bytes, this has better performance than using
+ // .fill(), or iterating over mutable refs, which call memset()
+ #[allow(clippy::needless_range_loop)]
+ for i in 0..pad_bytes {
+ output[i] = PAD_BYTE;
}
- bytes_written
+ pad_bytes
}
/// Errors that can occur while encoding into a slice.
@@ -434,18 +434,18 @@ mod tests {
let mut rng = rand::rngs::SmallRng::from_entropy();
- // cover our bases for length % 3
- for input_len in 0..10 {
+ // cover our bases for length % 4
+ for unpadded_output_len in 0..20 {
output.clear();
// fill output with random
- for _ in 0..10 {
+ for _ in 0..100 {
output.push(rng.gen());
}
let orig_output_buf = output.clone();
- let bytes_written = add_padding(input_len, &mut output);
+ let bytes_written = add_padding(unpadded_output_len, &mut output);
// make sure the part beyond bytes_written is the same garbage it was before
assert_eq!(orig_output_buf[bytes_written..], output[bytes_written..]);
diff --git a/vendor/base64/src/engine/general_purpose/decode.rs b/vendor/base64/src/engine/general_purpose/decode.rs
index e9fd78877..21a386fd6 100644
--- a/vendor/base64/src/engine/general_purpose/decode.rs
+++ b/vendor/base64/src/engine/general_purpose/decode.rs
@@ -1,5 +1,5 @@
use crate::{
- engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodePaddingMode},
+ engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodeMetadata, DecodePaddingMode},
DecodeError, PAD_BYTE,
};
@@ -30,16 +30,11 @@ pub struct GeneralPurposeEstimate {
impl GeneralPurposeEstimate {
pub(crate) fn new(encoded_len: usize) -> Self {
+ // Formulas that won't overflow
Self {
- num_chunks: encoded_len
- .checked_add(INPUT_CHUNK_LEN - 1)
- .expect("Overflow when calculating number of chunks in input")
- / INPUT_CHUNK_LEN,
- decoded_len_estimate: encoded_len
- .checked_add(3)
- .expect("Overflow when calculating decoded len estimate")
- / 4
- * 3,
+ num_chunks: encoded_len / INPUT_CHUNK_LEN
+ + (encoded_len % INPUT_CHUNK_LEN > 0) as usize,
+ decoded_len_estimate: (encoded_len / 4 + (encoded_len % 4 > 0) as usize) * 3,
}
}
}
@@ -51,7 +46,7 @@ impl DecodeEstimate for GeneralPurposeEstimate {
}
/// Helper to avoid duplicating num_chunks calculation, which is costly on short inputs.
-/// Returns the number of bytes written, or an error.
+/// Returns the decode metadata, or an error.
// We're on the fragile edge of compiler heuristics here. If this is not inlined, slow. If this is
// inlined(always), a different slow. plain ol' inline makes the benchmarks happiest at the moment,
// but this is fragile and the best setting changes with only minor code modifications.
@@ -63,7 +58,7 @@ pub(crate) fn decode_helper(
decode_table: &[u8; 256],
decode_allow_trailing_bits: bool,
padding_mode: DecodePaddingMode,
-) -> Result<usize, DecodeError> {
+) -> Result<DecodeMetadata, DecodeError> {
let remainder_len = input.len() % INPUT_CHUNK_LEN;
// Because the fast decode loop writes in groups of 8 bytes (unrolled to
@@ -345,4 +340,44 @@ mod tests {
decode_chunk(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap();
assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 0, 0], &output);
}
+
+ #[test]
+ fn estimate_short_lengths() {
+ for (range, (num_chunks, decoded_len_estimate)) in [
+ (0..=0, (0, 0)),
+ (1..=4, (1, 3)),
+ (5..=8, (1, 6)),
+ (9..=12, (2, 9)),
+ (13..=16, (2, 12)),
+ (17..=20, (3, 15)),
+ ] {
+ for encoded_len in range {
+ let estimate = GeneralPurposeEstimate::new(encoded_len);
+ assert_eq!(num_chunks, estimate.num_chunks);
+ assert_eq!(decoded_len_estimate, estimate.decoded_len_estimate);
+ }
+ }
+ }
+
+ #[test]
+ fn estimate_via_u128_inflation() {
+ // cover both ends of usize
+ (0..1000)
+ .chain(usize::MAX - 1000..=usize::MAX)
+ .for_each(|encoded_len| {
+ // inflate to 128 bit type to be able to safely use the easy formulas
+ let len_128 = encoded_len as u128;
+
+ let estimate = GeneralPurposeEstimate::new(encoded_len);
+ assert_eq!(
+ ((len_128 + (INPUT_CHUNK_LEN - 1) as u128) / (INPUT_CHUNK_LEN as u128))
+ as usize,
+ estimate.num_chunks
+ );
+ assert_eq!(
+ ((len_128 + 3) / 4 * 3) as usize,
+ estimate.decoded_len_estimate
+ );
+ })
+ }
}
diff --git a/vendor/base64/src/engine/general_purpose/decode_suffix.rs b/vendor/base64/src/engine/general_purpose/decode_suffix.rs
index 5652035d0..e1e005d25 100644
--- a/vendor/base64/src/engine/general_purpose/decode_suffix.rs
+++ b/vendor/base64/src/engine/general_purpose/decode_suffix.rs
@@ -1,13 +1,13 @@
use crate::{
- engine::{general_purpose::INVALID_VALUE, DecodePaddingMode},
+ engine::{general_purpose::INVALID_VALUE, DecodeMetadata, DecodePaddingMode},
DecodeError, PAD_BYTE,
};
/// Decode the last 1-8 bytes, checking for trailing set bits and padding per the provided
/// parameters.
///
-/// Returns the total number of bytes decoded, including the ones indicated as already written by
-/// `output_index`.
+/// Returns the decode metadata representing the total number of bytes decoded, including the ones
+/// indicated as already written by `output_index`.
pub(crate) fn decode_suffix(
input: &[u8],
input_index: usize,
@@ -16,7 +16,7 @@ pub(crate) fn decode_suffix(
decode_table: &[u8; 256],
decode_allow_trailing_bits: bool,
padding_mode: DecodePaddingMode,
-) -> Result<usize, DecodeError> {
+) -> Result<DecodeMetadata, DecodeError> {
// Decode any leftovers that aren't a complete input block of 8 bytes.
// Use a u64 as a stack-resident 8 byte buffer.
let mut leftover_bits: u64 = 0;
@@ -157,5 +157,12 @@ pub(crate) fn decode_suffix(
leftover_bits_appended_to_buf += 8;
}
- Ok(output_index)
+ Ok(DecodeMetadata::new(
+ output_index,
+ if padding_bytes > 0 {
+ Some(input_index + first_padding_index)
+ } else {
+ None
+ },
+ ))
}
diff --git a/vendor/base64/src/engine/general_purpose/mod.rs b/vendor/base64/src/engine/general_purpose/mod.rs
index af8897bc2..01d22049a 100644
--- a/vendor/base64/src/engine/general_purpose/mod.rs
+++ b/vendor/base64/src/engine/general_purpose/mod.rs
@@ -2,13 +2,14 @@
use crate::{
alphabet,
alphabet::Alphabet,
- engine::{Config, DecodePaddingMode},
+ engine::{Config, DecodeMetadata, DecodePaddingMode},
DecodeError,
};
use core::convert::TryInto;
mod decode;
pub(crate) mod decode_suffix;
+
pub use decode::GeneralPurposeEstimate;
pub(crate) const INVALID_VALUE: u8 = 255;
@@ -170,7 +171,7 @@ impl super::Engine for GeneralPurpose {
input: &[u8],
output: &mut [u8],
estimate: Self::DecodeEstimate,
- ) -> Result<usize, DecodeError> {
+ ) -> Result<DecodeMetadata, DecodeError> {
decode::decode_helper(
input,
estimate,
diff --git a/vendor/base64/src/engine/mod.rs b/vendor/base64/src/engine/mod.rs
index 12dfaa884..aa41dffec 100644
--- a/vendor/base64/src/engine/mod.rs
+++ b/vendor/base64/src/engine/mod.rs
@@ -62,10 +62,6 @@ pub trait Engine: Send + Sync {
/// As an optimization to prevent the decoded length from being calculated twice, it is
/// sometimes helpful to have a conservative estimate of the decoded size before doing the
/// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed.
- ///
- /// # Panics
- ///
- /// Panics if decoded length estimation overflows.
#[doc(hidden)]
fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate;
@@ -77,8 +73,6 @@ pub trait Engine: Send + Sync {
/// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid
/// calculating it again (expensive on short inputs).`
///
- /// Returns the number of bytes written to `output`.
- ///
/// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this
/// function must also handle the final possibly partial chunk.
/// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4,
@@ -99,7 +93,7 @@ pub trait Engine: Send + Sync {
input: &[u8],
output: &mut [u8],
decode_estimate: Self::DecodeEstimate,
- ) -> Result<usize, DecodeError>;
+ ) -> Result<DecodeMetadata, DecodeError>;
/// Returns the config for this engine.
fn config(&self) -> &Self::Config;
@@ -206,8 +200,7 @@ pub trait Engine: Send + Sync {
Ok(encoded_size)
}
- /// Decode from string reference as octets using the specified [Engine].
- /// Returns a `Result` containing a `Vec<u8>`.
+ /// Decode the input into a new `Vec`.
///
/// # Example
///
@@ -225,11 +218,6 @@ pub trait Engine: Send + Sync {
/// .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap();
/// println!("{:?}", bytes_url);
/// ```
- ///
- /// # Panics
- ///
- /// Panics if decoded length estimation overflows.
- /// This would happen for sizes within a few bytes of the maximum value of `usize`.
#[cfg(any(feature = "alloc", feature = "std", test))]
fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> {
let input_bytes = input.as_ref();
@@ -237,13 +225,16 @@ pub trait Engine: Send + Sync {
let estimate = self.internal_decoded_len_estimate(input_bytes.len());
let mut buffer = vec![0; estimate.decoded_len_estimate()];
- let bytes_written = self.internal_decode(input_bytes, &mut buffer, estimate)?;
+ let bytes_written = self
+ .internal_decode(input_bytes, &mut buffer, estimate)?
+ .decoded_len;
buffer.truncate(bytes_written);
Ok(buffer)
}
- /// Decode from string reference as octets.
+ /// Decode the `input` into the supplied `buffer`.
+ ///
/// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough.
/// Returns a `Result` containing an empty tuple, aka `()`.
///
@@ -272,11 +263,6 @@ pub trait Engine: Send + Sync {
/// println!("{:?}", buffer);
/// }
/// ```
- ///
- /// # Panics
- ///
- /// Panics if decoded length estimation overflows.
- /// This would happen for sizes within a few bytes of the maximum value of `usize`.
#[cfg(any(feature = "alloc", feature = "std", test))]
fn decode_vec<T: AsRef<[u8]>>(
&self,
@@ -295,7 +281,9 @@ pub trait Engine: Send + Sync {
buffer.resize(total_len_estimate, 0);
let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
- let bytes_written = self.internal_decode(input_bytes, buffer_slice, estimate)?;
+ let bytes_written = self
+ .internal_decode(input_bytes, buffer_slice, estimate)?
+ .decoded_len;
buffer.truncate(starting_output_len + bytes_written);
@@ -304,7 +292,8 @@ pub trait Engine: Send + Sync {
/// Decode the input into the provided output slice.
///
- /// Returns an error if `output` is smaller than the estimated decoded length.
+ /// Returns the number of bytes written to the slice, or an error if `output` is smaller than
+ /// the estimated decoded length.
///
/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
///
@@ -312,11 +301,6 @@ pub trait Engine: Send + Sync {
///
/// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error
/// if the output buffer is too small.
- ///
- /// # Panics
- ///
- /// Panics if decoded length estimation overflows.
- /// This would happen for sizes within a few bytes of the maximum value of `usize`.
fn decode_slice<T: AsRef<[u8]>>(
&self,
input: T,
@@ -331,10 +315,13 @@ pub trait Engine: Send + Sync {
self.internal_decode(input_bytes, output, estimate)
.map_err(|e| e.into())
+ .map(|dm| dm.decoded_len)
}
/// Decode the input into the provided output slice.
///
+ /// Returns the number of bytes written to the slice.
+ ///
/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
///
/// See [crate::decoded_len_estimate] for calculating buffer sizes.
@@ -344,9 +331,6 @@ pub trait Engine: Send + Sync {
///
/// # Panics
///
- /// Panics if decoded length estimation overflows.
- /// This would happen for sizes within a few bytes of the maximum value of `usize`.
- ///
/// Panics if the provided output buffer is too small for the decoded data.
fn decode_slice_unchecked<T: AsRef<[u8]>>(
&self,
@@ -360,6 +344,7 @@ pub trait Engine: Send + Sync {
output,
self.internal_decoded_len_estimate(input_bytes.len()),
)
+ .map(|dm| dm.decoded_len)
}
}
@@ -387,11 +372,6 @@ pub trait DecodeEstimate {
///
/// The estimate must be no larger than the next largest complete triple of decoded bytes.
/// That is, the final quad of tokens to decode may be assumed to be complete with no padding.
- ///
- /// # Panics
- ///
- /// Panics if decoded length estimation overflows.
- /// This would happen for sizes within a few bytes of the maximum value of `usize`.
fn decoded_len_estimate(&self) -> usize;
}
@@ -408,3 +388,21 @@ pub enum DecodePaddingMode {
/// Padding must be absent -- for when you want predictable padding, without any wasted bytes.
RequireNone,
}
+
+/// Metadata about the result of a decode operation
+#[derive(PartialEq, Eq, Debug)]
+pub struct DecodeMetadata {
+ /// Number of decoded bytes output
+ pub(crate) decoded_len: usize,
+ /// Offset of the first padding byte in the input, if any
+ pub(crate) padding_offset: Option<usize>,
+}
+
+impl DecodeMetadata {
+ pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self {
+ Self {
+ decoded_len: decoded_bytes,
+ padding_offset: padding_index,
+ }
+ }
+}
diff --git a/vendor/base64/src/engine/naive.rs b/vendor/base64/src/engine/naive.rs
index 6665c5eb4..42b6085bd 100644
--- a/vendor/base64/src/engine/naive.rs
+++ b/vendor/base64/src/engine/naive.rs
@@ -2,7 +2,7 @@ use crate::{
alphabet::Alphabet,
engine::{
general_purpose::{self, decode_table, encode_table},
- Config, DecodeEstimate, DecodePaddingMode, Engine,
+ Config, DecodeEstimate, DecodeMetadata, DecodePaddingMode, Engine,
},
DecodeError, PAD_BYTE,
};
@@ -112,7 +112,7 @@ impl Engine for Naive {
input: &[u8],
output: &mut [u8],
estimate: Self::DecodeEstimate,
- ) -> Result<usize, DecodeError> {
+ ) -> Result<DecodeMetadata, DecodeError> {
if estimate.rem == 1 {
// trailing whitespace is so common that it's worth it to check the last byte to
// possibly return a better error message
diff --git a/vendor/base64/src/engine/tests.rs b/vendor/base64/src/engine/tests.rs
index 906bba04d..6430b35a9 100644
--- a/vendor/base64/src/engine/tests.rs
+++ b/vendor/base64/src/engine/tests.rs
@@ -8,13 +8,16 @@ use rand::{
};
use rstest::rstest;
use rstest_reuse::{apply, template};
-use std::{collections, fmt};
+use std::{collections, fmt, io::Read as _};
use crate::{
alphabet::{Alphabet, STANDARD},
encode::add_padding,
encoded_len,
- engine::{general_purpose, naive, Config, DecodeEstimate, DecodePaddingMode, Engine},
+ engine::{
+ general_purpose, naive, Config, DecodeEstimate, DecodeMetadata, DecodePaddingMode, Engine,
+ },
+ read::DecoderReader,
tests::{assert_encode_sanity, random_alphabet, random_config},
DecodeError, PAD_BYTE,
};
@@ -24,9 +27,20 @@ use crate::{
#[rstest(engine_wrapper,
case::general_purpose(GeneralPurposeWrapper {}),
case::naive(NaiveWrapper {}),
+case::decoder_reader(DecoderReaderEngineWrapper {}),
)]
fn all_engines<E: EngineWrapper>(engine_wrapper: E) {}
+/// Some decode tests don't make sense for use with `DecoderReader` as they are difficult to
+/// reason about or otherwise inapplicable given how DecoderReader slice up its input along
+/// chunk boundaries.
+#[template]
+#[rstest(engine_wrapper,
+case::general_purpose(GeneralPurposeWrapper {}),
+case::naive(NaiveWrapper {}),
+)]
+fn all_engines_except_decoder_reader<E: EngineWrapper>(engine_wrapper: E) {}
+
#[apply(all_engines)]
fn rfc_test_vectors_std_alphabet<E: EngineWrapper>(engine_wrapper: E) {
let data = vec![
@@ -86,7 +100,7 @@ fn rfc_test_vectors_std_alphabet<E: EngineWrapper>(engine_wrapper: E) {
&encoded_without_padding,
&std::str::from_utf8(&encode_buf[0..encode_len]).unwrap()
);
- let pad_len = add_padding(orig.len(), &mut encode_buf[encode_len..]);
+ let pad_len = add_padding(encode_len, &mut encode_buf[encode_len..]);
assert_eq!(encoded.as_bytes(), &encode_buf[..encode_len + pad_len]);
let decode_len = engine
@@ -195,7 +209,10 @@ fn encode_doesnt_write_extra_bytes<E: EngineWrapper>(engine_wrapper: E) {
// pad so we can decode it in case our random engine requires padding
let pad_len = if padded {
- add_padding(orig_len, &mut encode_buf[prefix_len + encoded_len_no_pad..])
+ add_padding(
+ encoded_len_no_pad,
+ &mut encode_buf[prefix_len + encoded_len_no_pad..],
+ )
} else {
0
};
@@ -382,7 +399,7 @@ fn decode_detect_invalid_last_symbol_every_possible_two_symbols<E: EngineWrapper
for b in 0_u8..=255 {
let mut b64 = vec![0_u8; 4];
assert_eq!(2, engine.internal_encode(&[b], &mut b64[..]));
- let _ = add_padding(1, &mut b64[2..]);
+ let _ = add_padding(2, &mut b64[2..]);
assert!(base64_to_bytes.insert(b64, vec![b]).is_none());
}
@@ -442,7 +459,7 @@ fn decode_detect_invalid_last_symbol_every_possible_three_symbols<E: EngineWrapp
bytes[1] = b2;
let mut b64 = vec![0_u8; 4];
assert_eq!(3, engine.internal_encode(&bytes, &mut b64[..]));
- let _ = add_padding(2, &mut b64[3..]);
+ let _ = add_padding(3, &mut b64[3..]);
let mut v = Vec::with_capacity(2);
v.extend_from_slice(&bytes[..]);
@@ -549,7 +566,7 @@ fn decode_invalid_byte_error<E: EngineWrapper>(engine_wrapper: E) {
let len_range = distributions::Uniform::new(1, 1_000);
- for _ in 0..10_000 {
+ for _ in 0..100_000 {
let alphabet = random_alphabet(&mut rng);
let engine = E::random_alphabet(&mut rng, alphabet);
@@ -573,7 +590,7 @@ fn decode_invalid_byte_error<E: EngineWrapper>(engine_wrapper: E) {
let invalid_byte: u8 = loop {
let byte: u8 = rng.gen();
- if alphabet.symbols.contains(&byte) {
+ if alphabet.symbols.contains(&byte) || byte == PAD_BYTE {
continue;
} else {
break byte;
@@ -597,7 +614,9 @@ fn decode_invalid_byte_error<E: EngineWrapper>(engine_wrapper: E) {
/// Any amount of padding anywhere before the final non padding character = invalid byte at first
/// pad byte.
/// From this, we know padding must extend to the end of the input.
-#[apply(all_engines)]
+// DecoderReader pseudo-engine detects InvalidLastSymbol instead of InvalidLength because it
+// can end a decode on the quad that happens to contain the start of the padding
+#[apply(all_engines_except_decoder_reader)]
fn decode_padding_before_final_non_padding_char_error_invalid_byte<E: EngineWrapper>(
engine_wrapper: E,
) {
@@ -641,10 +660,13 @@ fn decode_padding_before_final_non_padding_char_error_invalid_byte<E: EngineWrap
}
}
-/// Any amount of padding before final chunk that crosses over into final chunk with 1-4 bytes =
-/// invalid byte at first pad byte (except for 1 byte suffix = invalid length).
-/// From this we know the padding must start in the final chunk.
-#[apply(all_engines)]
+/// Any amount of padding before final chunk that crosses over into final chunk with 2-4 bytes =
+/// invalid byte at first pad byte.
+/// From this and [decode_padding_starts_before_final_chunk_error_invalid_length] we know the
+/// padding must start in the final chunk.
+// DecoderReader pseudo-engine detects InvalidLastSymbol instead of InvalidLength because it
+// can end a decode on the quad that happens to contain the start of the padding
+#[apply(all_engines_except_decoder_reader)]
fn decode_padding_starts_before_final_chunk_error_invalid_byte<E: EngineWrapper>(
engine_wrapper: E,
) {
@@ -652,8 +674,8 @@ fn decode_padding_starts_before_final_chunk_error_invalid_byte<E: EngineWrapper>
// must have at least one prefix quad
let prefix_quads_range = distributions::Uniform::from(1..256);
- // including 1 just to make sure that it really does produce invalid length
- let suffix_pad_len_range = distributions::Uniform::from(1..=4);
+ // excluding 1 since we don't care about invalid length in this test
+ let suffix_pad_len_range = distributions::Uniform::from(2..=4);
for mode in all_pad_modes() {
// we don't encode so we don't care about encode padding
let engine = E::standard_with_pad_mode(true, mode);
@@ -671,14 +693,48 @@ fn decode_padding_starts_before_final_chunk_error_invalid_byte<E: EngineWrapper>
let padding_start = encoded.len() - padding_len;
encoded[padding_start..].fill(PAD_BYTE);
- if suffix_len == 1 {
- assert_eq!(Err(DecodeError::InvalidLength), engine.decode(&encoded),);
- } else {
- assert_eq!(
- Err(DecodeError::InvalidByte(padding_start, PAD_BYTE)),
- engine.decode(&encoded),
- );
- }
+ assert_eq!(
+ Err(DecodeError::InvalidByte(padding_start, PAD_BYTE)),
+ engine.decode(&encoded),
+ "suffix_len: {}, padding_len: {}, b64: {}",
+ suffix_len,
+ padding_len,
+ std::str::from_utf8(&encoded).unwrap()
+ );
+ }
+ }
+}
+
+/// Any amount of padding before final chunk that crosses over into final chunk with 1 byte =
+/// invalid length.
+/// From this we know the padding must start in the final chunk.
+// DecoderReader pseudo-engine detects InvalidByte instead of InvalidLength because it starts by
+// decoding only the available complete quads
+#[apply(all_engines_except_decoder_reader)]
+fn decode_padding_starts_before_final_chunk_error_invalid_length<E: EngineWrapper>(
+ engine_wrapper: E,
+) {
+ let mut rng = seeded_rng();
+
+ // must have at least one prefix quad
+ let prefix_quads_range = distributions::Uniform::from(1..256);
+ for mode in all_pad_modes() {
+ // we don't encode so we don't care about encode padding
+ let engine = E::standard_with_pad_mode(true, mode);
+ for _ in 0..100_000 {
+ let mut encoded = "ABCD"
+ .repeat(prefix_quads_range.sample(&mut rng))
+ .into_bytes();
+ encoded.resize(encoded.len() + 1, PAD_BYTE);
+
+ // amount of padding must be long enough to extend back from suffix into previous
+ // quads
+ let padding_len = rng.gen_range(1 + 1..encoded.len());
+ // no non-padding after padding in this test, so padding goes to the end
+ let padding_start = encoded.len() - padding_len;
+ encoded[padding_start..].fill(PAD_BYTE);
+
+ assert_eq!(Err(DecodeError::InvalidLength), engine.decode(&encoded),);
}
}
}
@@ -787,7 +843,9 @@ fn decode_malleability_test_case_2_byte_suffix_no_padding<E: EngineWrapper>(engi
}
// https://eprint.iacr.org/2022/361.pdf table 2, test 7
-#[apply(all_engines)]
+// DecoderReader pseudo-engine gets InvalidByte at 8 (extra padding) since it decodes the first
+// two complete quads correctly.
+#[apply(all_engines_except_decoder_reader)]
fn decode_malleability_test_case_2_byte_suffix_too_much_padding<E: EngineWrapper>(
engine_wrapper: E,
) {
@@ -861,7 +919,11 @@ fn decode_pad_mode_indifferent_padding_accepts_anything<E: EngineWrapper>(engine
}
//this is a MAY in the rfc: https://tools.ietf.org/html/rfc4648#section-3.3
-#[apply(all_engines)]
+// DecoderReader pseudo-engine finds the first padding, but doesn't report it as an error,
+// because in the next decode it finds more padding, which is reported as InvalidByte, just
+// with an offset at its position in the second decode, rather than being linked to the start
+// of the padding that was first seen in the previous decode.
+#[apply(all_engines_except_decoder_reader)]
fn decode_pad_byte_in_penultimate_quad_error<E: EngineWrapper>(engine_wrapper: E) {
for mode in all_pad_modes() {
// we don't encode so we don't care about encode padding
@@ -895,7 +957,7 @@ fn decode_pad_byte_in_penultimate_quad_error<E: EngineWrapper>(engine_wrapper: E
num_prefix_quads * 4 + num_valid_bytes_penultimate_quad,
b'=',
),
- engine.decode(&s).unwrap_err()
+ engine.decode(&s).unwrap_err(),
);
}
}
@@ -955,7 +1017,9 @@ fn decode_absurd_pad_error<E: EngineWrapper>(engine_wrapper: E) {
}
}
-#[apply(all_engines)]
+// DecoderReader pseudo-engine detects InvalidByte instead of InvalidLength because it starts by
+// decoding only the available complete quads
+#[apply(all_engines_except_decoder_reader)]
fn decode_too_much_padding_returns_error<E: EngineWrapper>(engine_wrapper: E) {
for mode in all_pad_modes() {
// we don't encode so we don't care about encode padding
@@ -981,7 +1045,9 @@ fn decode_too_much_padding_returns_error<E: EngineWrapper>(engine_wrapper: E) {
}
}
-#[apply(all_engines)]
+// DecoderReader pseudo-engine detects InvalidByte instead of InvalidLength because it starts by
+// decoding only the available complete quads
+#[apply(all_engines_except_decoder_reader)]
fn decode_padding_followed_by_non_padding_returns_error<E: EngineWrapper>(engine_wrapper: E) {
for mode in all_pad_modes() {
// we don't encode so we don't care about encode padding
@@ -1079,27 +1145,43 @@ fn decode_too_few_symbols_in_final_quad_error<E: EngineWrapper>(engine_wrapper:
}
}
-#[apply(all_engines)]
+// DecoderReader pseudo-engine can't handle DecodePaddingMode::RequireNone since it will decode
+// a complete quad with padding in it before encountering the stray byte that makes it an invalid
+// length
+#[apply(all_engines_except_decoder_reader)]
fn decode_invalid_trailing_bytes<E: EngineWrapper>(engine_wrapper: E) {
for mode in all_pad_modes() {
- // we don't encode so we don't care about encode padding
- let engine = E::standard_with_pad_mode(true, mode);
+ do_invalid_trailing_byte(E::standard_with_pad_mode(true, mode), mode);
+ }
+}
- for num_prefix_quads in 0..256 {
- let mut s: String = "ABCD".repeat(num_prefix_quads);
- s.push_str("Cg==\n");
+#[apply(all_engines)]
+fn decode_invalid_trailing_bytes_all_modes<E: EngineWrapper>(engine_wrapper: E) {
+ // excluding no padding mode because the DecoderWrapper pseudo-engine will fail with
+ // InvalidPadding because it will decode the last complete quad with padding first
+ for mode in pad_modes_allowing_padding() {
+ do_invalid_trailing_byte(E::standard_with_pad_mode(true, mode), mode);
+ }
+}
- // The case of trailing newlines is common enough to warrant a test for a good error
- // message.
- assert_eq!(
- Err(DecodeError::InvalidByte(num_prefix_quads * 4 + 4, b'\n')),
- engine.decode(&s)
- );
+#[apply(all_engines)]
+fn decode_invalid_trailing_padding_as_invalid_length<E: EngineWrapper>(engine_wrapper: E) {
+ // excluding no padding mode because the DecoderWrapper pseudo-engine will fail with
+ // InvalidPadding because it will decode the last complete quad with padding first
+ for mode in pad_modes_allowing_padding() {
+ do_invalid_trailing_padding_as_invalid_length(E::standard_with_pad_mode(true, mode), mode);
+ }
+}
- // extra padding, however, is still InvalidLength
- let s = s.replace('\n', "=");
- assert_eq!(Err(DecodeError::InvalidLength), engine.decode(s));
- }
+// DecoderReader pseudo-engine can't handle DecodePaddingMode::RequireNone since it will decode
+// a complete quad with padding in it before encountering the stray byte that makes it an invalid
+// length
+#[apply(all_engines_except_decoder_reader)]
+fn decode_invalid_trailing_padding_as_invalid_length_all_modes<E: EngineWrapper>(
+ engine_wrapper: E,
+) {
+ for mode in all_pad_modes() {
+ do_invalid_trailing_padding_as_invalid_length(E::standard_with_pad_mode(true, mode), mode);
}
}
@@ -1178,6 +1260,53 @@ fn decode_into_slice_fits_in_precisely_sized_slice<E: EngineWrapper>(engine_wrap
}
#[apply(all_engines)]
+fn inner_decode_reports_padding_position<E: EngineWrapper>(engine_wrapper: E) {
+ let mut b64 = String::new();
+ let mut decoded = Vec::new();
+ let engine = E::standard();
+
+ for pad_position in 1..10_000 {
+ b64.clear();
+ decoded.clear();
+ // plenty of room for original data
+ decoded.resize(pad_position, 0);
+
+ for _ in 0..pad_position {
+ b64.push('A');
+ }
+ // finish the quad with padding
+ for _ in 0..(4 - (pad_position % 4)) {
+ b64.push('=');
+ }
+
+ let decode_res = engine.internal_decode(
+ b64.as_bytes(),
+ &mut decoded[..],
+ engine.internal_decoded_len_estimate(b64.len()),
+ );
+ if pad_position % 4 < 2 {
+ // impossible padding
+ assert_eq!(
+ Err(DecodeError::InvalidByte(pad_position, PAD_BYTE)),
+ decode_res
+ );
+ } else {
+ let decoded_bytes = pad_position / 4 * 3
+ + match pad_position % 4 {
+ 0 => 0,
+ 2 => 1,
+ 3 => 2,
+ _ => unreachable!(),
+ };
+ assert_eq!(
+ Ok(DecodeMetadata::new(decoded_bytes, Some(pad_position))),
+ decode_res
+ );
+ }
+ }
+}
+
+#[apply(all_engines)]
fn decode_length_estimate_delta<E: EngineWrapper>(engine_wrapper: E) {
for engine in [E::standard(), E::standard_unpadded()] {
for &padding in &[true, false] {
@@ -1200,6 +1329,64 @@ fn decode_length_estimate_delta<E: EngineWrapper>(engine_wrapper: E) {
}
}
+#[apply(all_engines)]
+fn estimate_via_u128_inflation<E: EngineWrapper>(engine_wrapper: E) {
+ // cover both ends of usize
+ (0..1000)
+ .chain(usize::MAX - 1000..=usize::MAX)
+ .for_each(|encoded_len| {
+ // inflate to 128 bit type to be able to safely use the easy formulas
+ let len_128 = encoded_len as u128;
+
+ let estimate = E::standard()
+ .internal_decoded_len_estimate(encoded_len)
+ .decoded_len_estimate();
+
+ // This check is a little too strict: it requires using the (len + 3) / 4 * 3 formula
+ // or equivalent, but until other engines come along that use a different formula
+ // requiring that we think more carefully about what the allowable criteria are, this
+ // will do.
+ assert_eq!(
+ ((len_128 + 3) / 4 * 3) as usize,
+ estimate,
+ "enc len {}",
+ encoded_len
+ );
+ })
+}
+
+fn do_invalid_trailing_byte(engine: impl Engine, mode: DecodePaddingMode) {
+ for num_prefix_quads in 0..256 {
+ let mut s: String = "ABCD".repeat(num_prefix_quads);
+ s.push_str("Cg==\n");
+
+ // The case of trailing newlines is common enough to warrant a test for a good error
+ // message.
+ assert_eq!(
+ Err(DecodeError::InvalidByte(num_prefix_quads * 4 + 4, b'\n')),
+ engine.decode(&s),
+ "mode: {:?}, input: {}",
+ mode,
+ s
+ );
+ }
+}
+
+fn do_invalid_trailing_padding_as_invalid_length(engine: impl Engine, mode: DecodePaddingMode) {
+ for num_prefix_quads in 0..256 {
+ let mut s: String = "ABCD".repeat(num_prefix_quads);
+ s.push_str("Cg===");
+
+ assert_eq!(
+ Err(DecodeError::InvalidLength),
+ engine.decode(&s),
+ "mode: {:?}, input: {}",
+ mode,
+ s
+ );
+ }
+}
+
/// Returns a tuple of the original data length, the encoded data length (just data), and the length including padding.
///
/// Vecs provided should be empty.
@@ -1219,7 +1406,7 @@ fn generate_random_encoded_data<E: Engine, R: rand::Rng, D: distributions::Distr
let base_encoded_len = engine.internal_encode(&orig_data[..], &mut encode_buf[..]);
let enc_len_with_padding = if padding {
- base_encoded_len + add_padding(orig_len, &mut encode_buf[base_encoded_len..])
+ base_encoded_len + add_padding(base_encoded_len, &mut encode_buf[base_encoded_len..])
} else {
base_encoded_len
};
@@ -1249,11 +1436,7 @@ fn fill_rand_len<R: rand::Rng>(vec: &mut Vec<u8>, rng: &mut R, len: usize) {
}
}
-fn prefixed_data<'i, 'd>(
- input_with_prefix: &'i mut String,
- prefix_len: usize,
- data: &'d str,
-) -> &'i str {
+fn prefixed_data<'i>(input_with_prefix: &'i mut String, prefix_len: usize, data: &str) -> &'i str {
input_with_prefix.truncate(prefix_len);
input_with_prefix.push_str(data);
input_with_prefix.as_str()
@@ -1405,6 +1588,103 @@ impl EngineWrapper for NaiveWrapper {
}
}
+/// A pseudo-Engine that routes all decoding through [DecoderReader]
+struct DecoderReaderEngine<E: Engine> {
+ engine: E,
+}
+
+impl<E: Engine> From<E> for DecoderReaderEngine<E> {
+ fn from(value: E) -> Self {
+ Self { engine: value }
+ }
+}
+
+impl<E: Engine> Engine for DecoderReaderEngine<E> {
+ type Config = E::Config;
+ type DecodeEstimate = E::DecodeEstimate;
+
+ fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize {
+ self.engine.internal_encode(input, output)
+ }
+
+ fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate {
+ self.engine.internal_decoded_len_estimate(input_len)
+ }
+
+ fn internal_decode(
+ &self,
+ input: &[u8],
+ output: &mut [u8],
+ decode_estimate: Self::DecodeEstimate,
+ ) -> Result<DecodeMetadata, DecodeError> {
+ let mut reader = DecoderReader::new(input, &self.engine);
+ let mut buf = vec![0; input.len()];
+ // to avoid effects like not detecting invalid length due to progressively growing
+ // the output buffer in read_to_end etc, read into a big enough buffer in one go
+ // to make behavior more consistent with normal engines
+ let _ = reader
+ .read(&mut buf)
+ .and_then(|len| {
+ buf.truncate(len);
+ // make sure we got everything
+ reader.read_to_end(&mut buf)
+ })
+ .map_err(|io_error| {
+ *io_error
+ .into_inner()
+ .and_then(|inner| inner.downcast::<DecodeError>().ok())
+ .unwrap()
+ })?;
+ output[..buf.len()].copy_from_slice(&buf);
+ Ok(DecodeMetadata::new(
+ buf.len(),
+ input
+ .iter()
+ .enumerate()
+ .filter(|(_offset, byte)| **byte == PAD_BYTE)
+ .map(|(offset, _byte)| offset)
+ .next(),
+ ))
+ }
+
+ fn config(&self) -> &Self::Config {
+ self.engine.config()
+ }
+}
+
+struct DecoderReaderEngineWrapper {}
+
+impl EngineWrapper for DecoderReaderEngineWrapper {
+ type Engine = DecoderReaderEngine<general_purpose::GeneralPurpose>;
+
+ fn standard() -> Self::Engine {
+ GeneralPurposeWrapper::standard().into()
+ }
+
+ fn standard_unpadded() -> Self::Engine {
+ GeneralPurposeWrapper::standard_unpadded().into()
+ }
+
+ fn standard_with_pad_mode(
+ encode_pad: bool,
+ decode_pad_mode: DecodePaddingMode,
+ ) -> Self::Engine {
+ GeneralPurposeWrapper::standard_with_pad_mode(encode_pad, decode_pad_mode).into()
+ }
+
+ fn standard_allow_trailing_bits() -> Self::Engine {
+ GeneralPurposeWrapper::standard_allow_trailing_bits().into()
+ }
+
+ fn random<R: rand::Rng>(rng: &mut R) -> Self::Engine {
+ GeneralPurposeWrapper::random(rng).into()
+ }
+
+ fn random_alphabet<R: rand::Rng>(rng: &mut R, alphabet: &Alphabet) -> Self::Engine {
+ GeneralPurposeWrapper::random_alphabet(rng, alphabet).into()
+ }
+}
+
fn seeded_rng() -> impl rand::Rng {
rngs::SmallRng::from_entropy()
}
@@ -1417,6 +1697,13 @@ fn all_pad_modes() -> Vec<DecodePaddingMode> {
]
}
+fn pad_modes_allowing_padding() -> Vec<DecodePaddingMode> {
+ vec![
+ DecodePaddingMode::Indifferent,
+ DecodePaddingMode::RequireCanonical,
+ ]
+}
+
fn assert_all_suffixes_ok<E: Engine>(engine: E, suffixes: Vec<&str>) {
for num_prefix_quads in 0..256 {
for &suffix in suffixes.iter() {
diff --git a/vendor/base64/src/read/decoder.rs b/vendor/base64/src/read/decoder.rs
index 4888c9c4e..b656ae3d2 100644
--- a/vendor/base64/src/read/decoder.rs
+++ b/vendor/base64/src/read/decoder.rs
@@ -1,4 +1,4 @@
-use crate::{engine::Engine, DecodeError};
+use crate::{engine::Engine, DecodeError, PAD_BYTE};
use std::{cmp, fmt, io};
// This should be large, but it has to fit on the stack.
@@ -46,13 +46,15 @@ pub struct DecoderReader<'e, E: Engine, R: io::Read> {
// Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to
// decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
// into here, which seems like a lot of complexity for 1 extra byte of storage.
- decoded_buffer: [u8; 3],
+ decoded_buffer: [u8; DECODED_CHUNK_SIZE],
// index of start of decoded data
decoded_offset: usize,
// length of decoded data
decoded_len: usize,
// used to provide accurate offsets in errors
total_b64_decoded: usize,
+ // offset of previously seen padding, if any
+ padding_offset: Option<usize>,
}
impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
@@ -64,6 +66,7 @@ impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
.field("decoded_offset", &self.decoded_offset)
.field("decoded_len", &self.decoded_len)
.field("total_b64_decoded", &self.total_b64_decoded)
+ .field("padding_offset", &self.padding_offset)
.finish()
}
}
@@ -81,6 +84,7 @@ impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
decoded_offset: 0,
decoded_len: 0,
total_b64_decoded: 0,
+ padding_offset: None,
}
}
@@ -127,20 +131,28 @@ impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
/// caller's responsibility to choose the number of b64 bytes to decode correctly.
///
/// Returns a Result with the number of decoded bytes written to `buf`.
- fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> {
- debug_assert!(self.b64_len >= num_bytes);
+ fn decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result<usize> {
+ debug_assert!(self.b64_len >= b64_len_to_decode);
debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
debug_assert!(!buf.is_empty());
- let decoded = self
+ let b64_to_decode = &self.b64_buffer[self.b64_offset..self.b64_offset + b64_len_to_decode];
+ let decode_metadata = self
.engine
.internal_decode(
- &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes],
+ b64_to_decode,
buf,
- self.engine.internal_decoded_len_estimate(num_bytes),
+ self.engine.internal_decoded_len_estimate(b64_len_to_decode),
)
.map_err(|e| match e {
DecodeError::InvalidByte(offset, byte) => {
+ // This can be incorrect, but not in a way that probably matters to anyone:
+ // if there was padding handled in a previous decode, and we are now getting
+ // InvalidByte due to more padding, we should arguably report InvalidByte with
+ // PAD_BYTE at the original padding position (`self.padding_offset`), but we
+ // don't have a good way to tie those two cases together, so instead we
+ // just report the invalid byte as if the previous padding, and its possibly
+ // related downgrade to a now invalid byte, didn't happen.
DecodeError::InvalidByte(self.total_b64_decoded + offset, byte)
}
DecodeError::InvalidLength => DecodeError::InvalidLength,
@@ -151,13 +163,27 @@ impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
})
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
- self.total_b64_decoded += num_bytes;
- self.b64_offset += num_bytes;
- self.b64_len -= num_bytes;
+ if let Some(offset) = self.padding_offset {
+ // we've already seen padding
+ if decode_metadata.decoded_len > 0 {
+ // we read more after already finding padding; report error at first padding byte
+ return Err(io::Error::new(
+ io::ErrorKind::InvalidData,
+ DecodeError::InvalidByte(offset, PAD_BYTE),
+ ));
+ }
+ }
+
+ self.padding_offset = self.padding_offset.or(decode_metadata
+ .padding_offset
+ .map(|offset| self.total_b64_decoded + offset));
+ self.total_b64_decoded += b64_len_to_decode;
+ self.b64_offset += b64_len_to_decode;
+ self.b64_len -= b64_len_to_decode;
debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
- Ok(decoded)
+ Ok(decode_metadata.decoded_len)
}
/// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded
@@ -205,9 +231,9 @@ impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
self.decoded_offset < DECODED_CHUNK_SIZE
});
- // We shouldn't ever decode into here when we can't immediately write at least one byte into
- // the provided buf, so the effective length should only be 3 momentarily between when we
- // decode and when we copy into the target buffer.
+ // We shouldn't ever decode into decoded_buffer when we can't immediately write at least one
+ // byte into the provided buf, so the effective length should only be 3 momentarily between
+ // when we decode and when we copy into the target buffer.
debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
@@ -217,20 +243,15 @@ impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
} else {
let mut at_eof = false;
while self.b64_len < BASE64_CHUNK_SIZE {
- // Work around lack of copy_within, which is only present in 1.37
// Copy any bytes we have to the start of the buffer.
- // We know we have < 1 chunk, so we can use a tiny tmp buffer.
- let mut memmove_buf = [0_u8; BASE64_CHUNK_SIZE];
- memmove_buf[..self.b64_len].copy_from_slice(
- &self.b64_buffer[self.b64_offset..self.b64_offset + self.b64_len],
- );
- self.b64_buffer[0..self.b64_len].copy_from_slice(&memmove_buf[..self.b64_len]);
+ self.b64_buffer
+ .copy_within(self.b64_offset..self.b64_offset + self.b64_len, 0);
self.b64_offset = 0;
// then fill in more data
let read = self.read_from_delegate()?;
if read == 0 {
- // we never pass in an empty buf, so 0 => we've hit EOF
+ // we never read into an empty buf, so 0 => we've hit EOF
at_eof = true;
break;
}
diff --git a/vendor/base64/src/read/decoder_tests.rs b/vendor/base64/src/read/decoder_tests.rs
index 65d58d8e3..625a07dbd 100644
--- a/vendor/base64/src/read/decoder_tests.rs
+++ b/vendor/base64/src/read/decoder_tests.rs
@@ -8,9 +8,10 @@ use rand::{Rng as _, RngCore as _};
use super::decoder::{DecoderReader, BUF_SIZE};
use crate::{
+ alphabet,
engine::{general_purpose::STANDARD, Engine, GeneralPurpose},
tests::{random_alphabet, random_config, random_engine},
- DecodeError,
+ DecodeError, PAD_BYTE,
};
#[test]
@@ -247,19 +248,21 @@ fn reports_invalid_byte_correctly() {
let mut rng = rand::thread_rng();
let mut bytes = Vec::new();
let mut b64 = String::new();
- let mut decoded = Vec::new();
+ let mut stream_decoded = Vec::new();
+ let mut bulk_decoded = Vec::new();
for _ in 0..10_000 {
bytes.clear();
b64.clear();
- decoded.clear();
+ stream_decoded.clear();
+ bulk_decoded.clear();
let size = rng.gen_range(1..(10 * BUF_SIZE));
bytes.extend(iter::repeat(0).take(size));
rng.fill_bytes(&mut bytes[..size]);
assert_eq!(size, bytes.len());
- let engine = random_engine(&mut rng);
+ let engine = GeneralPurpose::new(&alphabet::STANDARD, random_config(&mut rng));
engine.encode_string(&bytes[..], &mut b64);
// replace one byte, somewhere, with '*', which is invalid
@@ -270,9 +273,8 @@ fn reports_invalid_byte_correctly() {
let mut wrapped_reader = io::Cursor::new(b64_bytes.clone());
let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine);
- // some gymnastics to avoid double-moving the io::Error, which is not Copy
let read_decode_err = decoder
- .read_to_end(&mut decoded)
+ .read_to_end(&mut stream_decoded)
.map_err(|e| {
let kind = e.kind();
let inner = e
@@ -283,8 +285,7 @@ fn reports_invalid_byte_correctly() {
.err()
.and_then(|o| o);
- let mut bulk_buf = Vec::new();
- let bulk_decode_err = engine.decode_vec(&b64_bytes[..], &mut bulk_buf).err();
+ let bulk_decode_err = engine.decode_vec(&b64_bytes[..], &mut bulk_decoded).err();
// it's tricky to predict where the invalid data's offset will be since if it's in the last
// chunk it will be reported at the first padding location because it's treated as invalid
@@ -296,6 +297,134 @@ fn reports_invalid_byte_correctly() {
}
}
+#[test]
+fn internal_padding_error_with_short_read_concatenated_texts_invalid_byte_error() {
+ let mut rng = rand::thread_rng();
+ let mut bytes = Vec::new();
+ let mut b64 = String::new();
+ let mut reader_decoded = Vec::new();
+ let mut bulk_decoded = Vec::new();
+
+ // encodes with padding, requires that padding be present so we don't get InvalidPadding
+ // just because padding is there at all
+ let engine = STANDARD;
+
+ for _ in 0..10_000 {
+ bytes.clear();
+ b64.clear();
+ reader_decoded.clear();
+ bulk_decoded.clear();
+
+ // at least 2 bytes so there can be a split point between bytes
+ let size = rng.gen_range(2..(10 * BUF_SIZE));
+ bytes.resize(size, 0);
+ rng.fill_bytes(&mut bytes[..size]);
+
+ // Concatenate two valid b64s, yielding padding in the middle.
+ // This avoids scenarios that are challenging to assert on, like random padding location
+ // that might be InvalidLastSymbol when decoded at certain buffer sizes but InvalidByte
+ // when done all at once.
+ let split = loop {
+ // find a split point that will produce padding on the first part
+ let s = rng.gen_range(1..size);
+ if s % 3 != 0 {
+ // short enough to need padding
+ break s;
+ };
+ };
+
+ engine.encode_string(&bytes[..split], &mut b64);
+ assert!(b64.contains('='), "split: {}, b64: {}", split, b64);
+ let bad_byte_pos = b64.find('=').unwrap();
+ engine.encode_string(&bytes[split..], &mut b64);
+ let b64_bytes = b64.as_bytes();
+
+ // short read to make it plausible for padding to happen on a read boundary
+ let read_len = rng.gen_range(1..10);
+ let mut wrapped_reader = ShortRead {
+ max_read_len: read_len,
+ delegate: io::Cursor::new(&b64_bytes),
+ };
+
+ let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine);
+
+ let read_decode_err = decoder
+ .read_to_end(&mut reader_decoded)
+ .map_err(|e| {
+ *e.into_inner()
+ .and_then(|e| e.downcast::<DecodeError>().ok())
+ .unwrap()
+ })
+ .unwrap_err();
+
+ let bulk_decode_err = engine.decode_vec(b64_bytes, &mut bulk_decoded).unwrap_err();
+
+ assert_eq!(
+ bulk_decode_err,
+ read_decode_err,
+ "read len: {}, bad byte pos: {}, b64: {}",
+ read_len,
+ bad_byte_pos,
+ std::str::from_utf8(b64_bytes).unwrap()
+ );
+ assert_eq!(
+ DecodeError::InvalidByte(
+ split / 3 * 4
+ + match split % 3 {
+ 1 => 2,
+ 2 => 3,
+ _ => unreachable!(),
+ },
+ PAD_BYTE
+ ),
+ read_decode_err
+ );
+ }
+}
+
+#[test]
+fn internal_padding_anywhere_error() {
+ let mut rng = rand::thread_rng();
+ let mut bytes = Vec::new();
+ let mut b64 = String::new();
+ let mut reader_decoded = Vec::new();
+
+ // encodes with padding, requires that padding be present so we don't get InvalidPadding
+ // just because padding is there at all
+ let engine = STANDARD;
+
+ for _ in 0..10_000 {
+ bytes.clear();
+ b64.clear();
+ reader_decoded.clear();
+
+ bytes.resize(10 * BUF_SIZE, 0);
+ rng.fill_bytes(&mut bytes[..]);
+
+ // Just shove a padding byte in there somewhere.
+ // The specific error to expect is challenging to predict precisely because it
+ // will vary based on the position of the padding in the quad and the read buffer
+ // length, but SOMETHING should go wrong.
+
+ engine.encode_string(&bytes[..], &mut b64);
+ let mut b64_bytes = b64.as_bytes().to_vec();
+ // put padding somewhere other than the last quad
+ b64_bytes[rng.gen_range(0..bytes.len() - 4)] = PAD_BYTE;
+
+ // short read to make it plausible for padding to happen on a read boundary
+ let read_len = rng.gen_range(1..10);
+ let mut wrapped_reader = ShortRead {
+ max_read_len: read_len,
+ delegate: io::Cursor::new(&b64_bytes),
+ };
+
+ let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine);
+
+ let result = decoder.read_to_end(&mut reader_decoded);
+ assert!(result.is_err());
+ }
+}
+
fn consume_with_short_reads_and_validate<R: io::Read>(
rng: &mut rand::rngs::ThreadRng,
expected_bytes: &[u8],
@@ -344,3 +473,15 @@ impl<'a, 'b, R: io::Read, N: rand::Rng> io::Read for RandomShortRead<'a, 'b, R,
self.delegate.read(&mut buf[..effective_len])
}
}
+
+struct ShortRead<R: io::Read> {
+ delegate: R,
+ max_read_len: usize,
+}
+
+impl<R: io::Read> io::Read for ShortRead<R> {
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ let len = self.max_read_len.max(buf.len());
+ self.delegate.read(&mut buf[..len])
+ }
+}