diff options
Diffstat (limited to 'third_party/rust/base64/src/write')
-rw-r--r-- | third_party/rust/base64/src/write/encoder.rs | 407 | ||||
-rw-r--r-- | third_party/rust/base64/src/write/encoder_string_writer.rs | 207 | ||||
-rw-r--r-- | third_party/rust/base64/src/write/encoder_tests.rs | 554 | ||||
-rw-r--r-- | third_party/rust/base64/src/write/mod.rs | 11 |
4 files changed, 1179 insertions, 0 deletions
diff --git a/third_party/rust/base64/src/write/encoder.rs b/third_party/rust/base64/src/write/encoder.rs new file mode 100644 index 0000000000..1c19bb42ab --- /dev/null +++ b/third_party/rust/base64/src/write/encoder.rs @@ -0,0 +1,407 @@ +use crate::engine::Engine; +use std::{ + cmp, fmt, io, + io::{ErrorKind, Result}, +}; + +pub(crate) const BUF_SIZE: usize = 1024; +/// The most bytes whose encoding will fit in `BUF_SIZE` +const MAX_INPUT_LEN: usize = BUF_SIZE / 4 * 3; +// 3 bytes of input = 4 bytes of base64, always (because we don't allow line wrapping) +const MIN_ENCODE_CHUNK_SIZE: usize = 3; + +/// A `Write` implementation that base64 encodes data before delegating to the wrapped writer. +/// +/// Because base64 has special handling for the end of the input data (padding, etc), there's a +/// `finish()` method on this type that encodes any leftover input bytes and adds padding if +/// appropriate. It's called automatically when deallocated (see the `Drop` implementation), but +/// any error that occurs when invoking the underlying writer will be suppressed. If you want to +/// handle such errors, call `finish()` yourself. +/// +/// # Examples +/// +/// ``` +/// use std::io::Write; +/// use base64::engine::general_purpose; +/// +/// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc. +/// let mut enc = base64::write::EncoderWriter::new(Vec::new(), &general_purpose::STANDARD); +/// +/// // handle errors as you normally would +/// enc.write_all(b"asdf").unwrap(); +/// +/// // could leave this out to be called by Drop, if you don't care +/// // about handling errors or getting the delegate writer back +/// let delegate = enc.finish().unwrap(); +/// +/// // base64 was written to the writer +/// assert_eq!(b"YXNkZg==", &delegate[..]); +/// +/// ``` +/// +/// # Panics +/// +/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without +/// error is invalid and will panic. +/// +/// # Errors +/// +/// Base64 encoding itself does not generate errors, but errors from the wrapped writer will be +/// returned as per the contract of `Write`. +/// +/// # Performance +/// +/// It has some minor performance loss compared to encoding slices (a couple percent). +/// It does not do any heap allocation. +/// +/// # Limitations +/// +/// Owing to the specification of the `write` and `flush` methods on the `Write` trait and their +/// implications for a buffering implementation, these methods may not behave as expected. In +/// particular, calling `write_all` on this interface may fail with `io::ErrorKind::WriteZero`. +/// See the documentation of the `Write` trait implementation for further details. +pub struct EncoderWriter<'e, E: Engine, W: io::Write> { + engine: &'e E, + /// Where encoded data is written to. It's an Option as it's None immediately before Drop is + /// called so that finish() can return the underlying writer. None implies that finish() has + /// been called successfully. + delegate: Option<W>, + /// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk + /// with the next `write()`, encode it, then proceed with the rest of the input normally. + extra_input: [u8; MIN_ENCODE_CHUNK_SIZE], + /// How much of `extra` is occupied, in `[0, MIN_ENCODE_CHUNK_SIZE]`. + extra_input_occupied_len: usize, + /// Buffer to encode into. May hold leftover encoded bytes from a previous write call that the underlying writer + /// did not write last time. + output: [u8; BUF_SIZE], + /// How much of `output` is occupied with encoded data that couldn't be written last time + output_occupied_len: usize, + /// panic safety: don't write again in destructor if writer panicked while we were writing to it + panicked: bool, +} + +impl<'e, E: Engine, W: io::Write> fmt::Debug for EncoderWriter<'e, E, W> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "extra_input: {:?} extra_input_occupied_len:{:?} output[..5]: {:?} output_occupied_len: {:?}", + self.extra_input, + self.extra_input_occupied_len, + &self.output[0..5], + self.output_occupied_len + ) + } +} + +impl<'e, E: Engine, W: io::Write> EncoderWriter<'e, E, W> { + /// Create a new encoder that will write to the provided delegate writer. + pub fn new(delegate: W, engine: &'e E) -> EncoderWriter<'e, E, W> { + EncoderWriter { + engine, + delegate: Some(delegate), + extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE], + extra_input_occupied_len: 0, + output: [0u8; BUF_SIZE], + output_occupied_len: 0, + panicked: false, + } + } + + /// Encode all remaining buffered data and write it, including any trailing incomplete input + /// triples and associated padding. + /// + /// Once this succeeds, no further writes or calls to this method are allowed. + /// + /// This may write to the delegate writer multiple times if the delegate writer does not accept + /// all input provided to its `write` each invocation. + /// + /// If you don't care about error handling, it is not necessary to call this function, as the + /// equivalent finalization is done by the Drop impl. + /// + /// Returns the writer that this was constructed around. + /// + /// # Errors + /// + /// The first error that is not of `ErrorKind::Interrupted` will be returned. + pub fn finish(&mut self) -> Result<W> { + // If we could consume self in finish(), we wouldn't have to worry about this case, but + // finish() is retryable in the face of I/O errors, so we can't consume here. + if self.delegate.is_none() { + panic!("Encoder has already had finish() called"); + }; + + self.write_final_leftovers()?; + + let writer = self.delegate.take().expect("Writer must be present"); + + Ok(writer) + } + + /// Write any remaining buffered data to the delegate writer. + fn write_final_leftovers(&mut self) -> Result<()> { + if self.delegate.is_none() { + // finish() has already successfully called this, and we are now in drop() with a None + // writer, so just no-op + return Ok(()); + } + + self.write_all_encoded_output()?; + + if self.extra_input_occupied_len > 0 { + let encoded_len = self + .engine + .encode_slice( + &self.extra_input[..self.extra_input_occupied_len], + &mut self.output[..], + ) + .expect("buffer is large enough"); + + self.output_occupied_len = encoded_len; + + self.write_all_encoded_output()?; + + // write succeeded, do not write the encoding of extra again if finish() is retried + self.extra_input_occupied_len = 0; + } + + Ok(()) + } + + /// Write as much of the encoded output to the delegate writer as it will accept, and store the + /// leftovers to be attempted at the next write() call. Updates `self.output_occupied_len`. + /// + /// # Errors + /// + /// Errors from the delegate writer are returned. In the case of an error, + /// `self.output_occupied_len` will not be updated, as errors from `write` are specified to mean + /// that no write took place. + fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> { + self.panicked = true; + let res = self + .delegate + .as_mut() + .expect("Writer must be present") + .write(&self.output[..current_output_len]); + self.panicked = false; + + res.map(|consumed| { + debug_assert!(consumed <= current_output_len); + + if consumed < current_output_len { + self.output_occupied_len = current_output_len.checked_sub(consumed).unwrap(); + // If we're blocking on I/O, the minor inefficiency of copying bytes to the + // start of the buffer is the least of our concerns... + // TODO Rotate moves more than we need to; copy_within now stable. + self.output.rotate_left(consumed); + } else { + self.output_occupied_len = 0; + } + }) + } + + /// Write all buffered encoded output. If this returns `Ok`, `self.output_occupied_len` is `0`. + /// + /// This is basically write_all for the remaining buffered data but without the undesirable + /// abort-on-`Ok(0)` behavior. + /// + /// # Errors + /// + /// Any error emitted by the delegate writer abort the write loop and is returned, unless it's + /// `Interrupted`, in which case the error is ignored and writes will continue. + fn write_all_encoded_output(&mut self) -> Result<()> { + while self.output_occupied_len > 0 { + let remaining_len = self.output_occupied_len; + match self.write_to_delegate(remaining_len) { + // try again on interrupts ala write_all + Err(ref e) if e.kind() == ErrorKind::Interrupted => {} + // other errors return + Err(e) => return Err(e), + // success no-ops because remaining length is already updated + Ok(_) => {} + }; + } + + debug_assert_eq!(0, self.output_occupied_len); + Ok(()) + } + + /// Unwraps this `EncoderWriter`, returning the base writer it writes base64 encoded output + /// to. + /// + /// Normally this method should not be needed, since `finish()` returns the inner writer if + /// it completes successfully. That will also ensure all data has been flushed, which the + /// `into_inner()` function does *not* do. + /// + /// Calling this method after `finish()` has completed successfully will panic, since the + /// writer has already been returned. + /// + /// This method may be useful if the writer implements additional APIs beyond the `Write` + /// trait. Note that the inner writer might be in an error state or have an incomplete + /// base64 string written to it. + pub fn into_inner(mut self) -> W { + self.delegate + .take() + .expect("Encoder has already had finish() called") + } +} + +impl<'e, E: Engine, W: io::Write> io::Write for EncoderWriter<'e, E, W> { + /// Encode input and then write to the delegate writer. + /// + /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes + /// of `input` consumed. The value may be `0`, which interacts poorly with `write_all`, which + /// interprets `Ok(0)` as an error, despite it being allowed by the contract of `write`. See + /// <https://github.com/rust-lang/rust/issues/56889> for more on that. + /// + /// If the previous call to `write` provided more (encoded) data than the delegate writer could + /// accept in a single call to its `write`, the remaining data is buffered. As long as buffered + /// data is present, subsequent calls to `write` will try to write the remaining buffered data + /// to the delegate and return either `Ok(0)` -- and therefore not consume any of `input` -- or + /// an error. + /// + /// # Errors + /// + /// Any errors emitted by the delegate writer are returned. + fn write(&mut self, input: &[u8]) -> Result<usize> { + if self.delegate.is_none() { + panic!("Cannot write more after calling finish()"); + } + + if input.is_empty() { + return Ok(0); + } + + // The contract of `Write::write` places some constraints on this implementation: + // - a call to `write()` represents at most one call to a wrapped `Write`, so we can't + // iterate over the input and encode multiple chunks. + // - Errors mean that "no bytes were written to this writer", so we need to reset the + // internal state to what it was before the error occurred + + // before reading any input, write any leftover encoded output from last time + if self.output_occupied_len > 0 { + let current_len = self.output_occupied_len; + return self + .write_to_delegate(current_len) + // did not read any input + .map(|_| 0); + } + + debug_assert_eq!(0, self.output_occupied_len); + + // how many bytes, if any, were read into `extra` to create a triple to encode + let mut extra_input_read_len = 0; + let mut input = input; + + let orig_extra_len = self.extra_input_occupied_len; + + let mut encoded_size = 0; + // always a multiple of MIN_ENCODE_CHUNK_SIZE + let mut max_input_len = MAX_INPUT_LEN; + + // process leftover un-encoded input from last write + if self.extra_input_occupied_len > 0 { + debug_assert!(self.extra_input_occupied_len < 3); + if input.len() + self.extra_input_occupied_len >= MIN_ENCODE_CHUNK_SIZE { + // Fill up `extra`, encode that into `output`, and consume as much of the rest of + // `input` as possible. + // We could write just the encoding of `extra` by itself but then we'd have to + // return after writing only 4 bytes, which is inefficient if the underlying writer + // would make a syscall. + extra_input_read_len = MIN_ENCODE_CHUNK_SIZE - self.extra_input_occupied_len; + debug_assert!(extra_input_read_len > 0); + // overwrite only bytes that weren't already used. If we need to rollback extra_len + // (when the subsequent write errors), the old leading bytes will still be there. + self.extra_input[self.extra_input_occupied_len..MIN_ENCODE_CHUNK_SIZE] + .copy_from_slice(&input[0..extra_input_read_len]); + + let len = self.engine.internal_encode( + &self.extra_input[0..MIN_ENCODE_CHUNK_SIZE], + &mut self.output[..], + ); + debug_assert_eq!(4, len); + + input = &input[extra_input_read_len..]; + + // consider extra to be used up, since we encoded it + self.extra_input_occupied_len = 0; + // don't clobber where we just encoded to + encoded_size = 4; + // and don't read more than can be encoded + max_input_len = MAX_INPUT_LEN - MIN_ENCODE_CHUNK_SIZE; + + // fall through to normal encoding + } else { + // `extra` and `input` are non empty, but `|extra| + |input| < 3`, so there must be + // 1 byte in each. + debug_assert_eq!(1, input.len()); + debug_assert_eq!(1, self.extra_input_occupied_len); + + self.extra_input[self.extra_input_occupied_len] = input[0]; + self.extra_input_occupied_len += 1; + return Ok(1); + }; + } else if input.len() < MIN_ENCODE_CHUNK_SIZE { + // `extra` is empty, and `input` fits inside it + self.extra_input[0..input.len()].copy_from_slice(input); + self.extra_input_occupied_len = input.len(); + return Ok(input.len()); + }; + + // either 0 or 1 complete chunks encoded from extra + debug_assert!(encoded_size == 0 || encoded_size == 4); + debug_assert!( + // didn't encode extra input + MAX_INPUT_LEN == max_input_len + // encoded one triple + || MAX_INPUT_LEN == max_input_len + MIN_ENCODE_CHUNK_SIZE + ); + + // encode complete triples only + let input_complete_chunks_len = input.len() - (input.len() % MIN_ENCODE_CHUNK_SIZE); + let input_chunks_to_encode_len = cmp::min(input_complete_chunks_len, max_input_len); + debug_assert_eq!(0, max_input_len % MIN_ENCODE_CHUNK_SIZE); + debug_assert_eq!(0, input_chunks_to_encode_len % MIN_ENCODE_CHUNK_SIZE); + + encoded_size += self.engine.internal_encode( + &input[..(input_chunks_to_encode_len)], + &mut self.output[encoded_size..], + ); + + // not updating `self.output_occupied_len` here because if the below write fails, it should + // "never take place" -- the buffer contents we encoded are ignored and perhaps retried + // later, if the consumer chooses. + + self.write_to_delegate(encoded_size) + // no matter whether we wrote the full encoded buffer or not, we consumed the same + // input + .map(|_| extra_input_read_len + input_chunks_to_encode_len) + .map_err(|e| { + // in case we filled and encoded `extra`, reset extra_len + self.extra_input_occupied_len = orig_extra_len; + + e + }) + } + + /// Because this is usually treated as OK to call multiple times, it will *not* flush any + /// incomplete chunks of input or write padding. + /// # Errors + /// + /// The first error that is not of [`ErrorKind::Interrupted`] will be returned. + fn flush(&mut self) -> Result<()> { + self.write_all_encoded_output()?; + self.delegate + .as_mut() + .expect("Writer must be present") + .flush() + } +} + +impl<'e, E: Engine, W: io::Write> Drop for EncoderWriter<'e, E, W> { + fn drop(&mut self) { + if !self.panicked { + // like `BufWriter`, ignore errors during drop + let _ = self.write_final_leftovers(); + } + } +} diff --git a/third_party/rust/base64/src/write/encoder_string_writer.rs b/third_party/rust/base64/src/write/encoder_string_writer.rs new file mode 100644 index 0000000000..9c02bcde84 --- /dev/null +++ b/third_party/rust/base64/src/write/encoder_string_writer.rs @@ -0,0 +1,207 @@ +use super::encoder::EncoderWriter; +use crate::engine::Engine; +use std::io; + +/// A `Write` implementation that base64-encodes data using the provided config and accumulates the +/// resulting base64 utf8 `&str` in a [StrConsumer] implementation (typically `String`), which is +/// then exposed via `into_inner()`. +/// +/// # Examples +/// +/// Buffer base64 in a new String: +/// +/// ``` +/// use std::io::Write; +/// use base64::engine::general_purpose; +/// +/// let mut enc = base64::write::EncoderStringWriter::new(&general_purpose::STANDARD); +/// +/// enc.write_all(b"asdf").unwrap(); +/// +/// // get the resulting String +/// let b64_string = enc.into_inner(); +/// +/// assert_eq!("YXNkZg==", &b64_string); +/// ``` +/// +/// Or, append to an existing `String`, which implements `StrConsumer`: +/// +/// ``` +/// use std::io::Write; +/// use base64::engine::general_purpose; +/// +/// let mut buf = String::from("base64: "); +/// +/// let mut enc = base64::write::EncoderStringWriter::from_consumer( +/// &mut buf, +/// &general_purpose::STANDARD); +/// +/// enc.write_all(b"asdf").unwrap(); +/// +/// // release the &mut reference on buf +/// let _ = enc.into_inner(); +/// +/// assert_eq!("base64: YXNkZg==", &buf); +/// ``` +/// +/// # Performance +/// +/// Because it has to validate that the base64 is UTF-8, it is about 80% as fast as writing plain +/// bytes to a `io::Write`. +pub struct EncoderStringWriter<'e, E: Engine, S: StrConsumer> { + encoder: EncoderWriter<'e, E, Utf8SingleCodeUnitWriter<S>>, +} + +impl<'e, E: Engine, S: StrConsumer> EncoderStringWriter<'e, E, S> { + /// Create a EncoderStringWriter that will append to the provided `StrConsumer`. + pub fn from_consumer(str_consumer: S, engine: &'e E) -> Self { + EncoderStringWriter { + encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, engine), + } + } + + /// Encode all remaining buffered data, including any trailing incomplete input triples and + /// associated padding. + /// + /// Returns the base64-encoded form of the accumulated written data. + pub fn into_inner(mut self) -> S { + self.encoder + .finish() + .expect("Writing to a consumer should never fail") + .str_consumer + } +} + +impl<'e, E: Engine> EncoderStringWriter<'e, E, String> { + /// Create a EncoderStringWriter that will encode into a new `String` with the provided config. + pub fn new(engine: &'e E) -> Self { + EncoderStringWriter::from_consumer(String::new(), engine) + } +} + +impl<'e, E: Engine, S: StrConsumer> io::Write for EncoderStringWriter<'e, E, S> { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + self.encoder.write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.encoder.flush() + } +} + +/// An abstraction around consuming `str`s produced by base64 encoding. +pub trait StrConsumer { + /// Consume the base64 encoded data in `buf` + fn consume(&mut self, buf: &str); +} + +/// As for io::Write, `StrConsumer` is implemented automatically for `&mut S`. +impl<S: StrConsumer + ?Sized> StrConsumer for &mut S { + fn consume(&mut self, buf: &str) { + (**self).consume(buf); + } +} + +/// Pushes the str onto the end of the String +impl StrConsumer for String { + fn consume(&mut self, buf: &str) { + self.push_str(buf); + } +} + +/// A `Write` that only can handle bytes that are valid single-byte UTF-8 code units. +/// +/// This is safe because we only use it when writing base64, which is always valid UTF-8. +struct Utf8SingleCodeUnitWriter<S: StrConsumer> { + str_consumer: S, +} + +impl<S: StrConsumer> io::Write for Utf8SingleCodeUnitWriter<S> { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + // Because we expect all input to be valid utf-8 individual bytes, we can encode any buffer + // length + let s = std::str::from_utf8(buf).expect("Input must be valid UTF-8"); + + self.str_consumer.consume(s); + + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + // no op + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use crate::{ + engine::Engine, tests::random_engine, write::encoder_string_writer::EncoderStringWriter, + }; + use rand::Rng; + use std::cmp; + use std::io::Write; + + #[test] + fn every_possible_split_of_input() { + let mut rng = rand::thread_rng(); + let mut orig_data = Vec::<u8>::new(); + let mut normal_encoded = String::new(); + + let size = 5_000; + + for i in 0..size { + orig_data.clear(); + normal_encoded.clear(); + + orig_data.resize(size, 0); + rng.fill(&mut orig_data[..]); + + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut normal_encoded); + + let mut stream_encoder = EncoderStringWriter::new(&engine); + // Write the first i bytes, then the rest + stream_encoder.write_all(&orig_data[0..i]).unwrap(); + stream_encoder.write_all(&orig_data[i..]).unwrap(); + + let stream_encoded = stream_encoder.into_inner(); + + assert_eq!(normal_encoded, stream_encoded); + } + } + #[test] + fn incremental_writes() { + let mut rng = rand::thread_rng(); + let mut orig_data = Vec::<u8>::new(); + let mut normal_encoded = String::new(); + + let size = 5_000; + + for _ in 0..size { + orig_data.clear(); + normal_encoded.clear(); + + orig_data.resize(size, 0); + rng.fill(&mut orig_data[..]); + + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut normal_encoded); + + let mut stream_encoder = EncoderStringWriter::new(&engine); + // write small nibbles of data + let mut offset = 0; + while offset < size { + let nibble_size = cmp::min(rng.gen_range(0..=64), size - offset); + let len = stream_encoder + .write(&orig_data[offset..offset + nibble_size]) + .unwrap(); + offset += len; + } + + let stream_encoded = stream_encoder.into_inner(); + + assert_eq!(normal_encoded, stream_encoded); + } + } +} diff --git a/third_party/rust/base64/src/write/encoder_tests.rs b/third_party/rust/base64/src/write/encoder_tests.rs new file mode 100644 index 0000000000..1f1a1650a6 --- /dev/null +++ b/third_party/rust/base64/src/write/encoder_tests.rs @@ -0,0 +1,554 @@ +use std::io::{Cursor, Write}; +use std::{cmp, io, str}; + +use rand::Rng; + +use crate::{ + alphabet::{STANDARD, URL_SAFE}, + engine::{ + general_purpose::{GeneralPurpose, NO_PAD, PAD}, + Engine, + }, + tests::random_engine, +}; + +use super::EncoderWriter; + +const URL_SAFE_ENGINE: GeneralPurpose = GeneralPurpose::new(&URL_SAFE, PAD); +const NO_PAD_ENGINE: GeneralPurpose = GeneralPurpose::new(&STANDARD, NO_PAD); + +#[test] +fn encode_three_bytes() { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); + + let sz = enc.write(b"abc").unwrap(); + assert_eq!(sz, 3); + } + assert_eq!(&c.get_ref()[..], URL_SAFE_ENGINE.encode("abc").as_bytes()); +} + +#[test] +fn encode_nine_bytes_two_writes() { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); + + let sz = enc.write(b"abcdef").unwrap(); + assert_eq!(sz, 6); + let sz = enc.write(b"ghi").unwrap(); + assert_eq!(sz, 3); + } + assert_eq!( + &c.get_ref()[..], + URL_SAFE_ENGINE.encode("abcdefghi").as_bytes() + ); +} + +#[test] +fn encode_one_then_two_bytes() { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); + + let sz = enc.write(b"a").unwrap(); + assert_eq!(sz, 1); + let sz = enc.write(b"bc").unwrap(); + assert_eq!(sz, 2); + } + assert_eq!(&c.get_ref()[..], URL_SAFE_ENGINE.encode("abc").as_bytes()); +} + +#[test] +fn encode_one_then_five_bytes() { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); + + let sz = enc.write(b"a").unwrap(); + assert_eq!(sz, 1); + let sz = enc.write(b"bcdef").unwrap(); + assert_eq!(sz, 5); + } + assert_eq!( + &c.get_ref()[..], + URL_SAFE_ENGINE.encode("abcdef").as_bytes() + ); +} + +#[test] +fn encode_1_2_3_bytes() { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); + + let sz = enc.write(b"a").unwrap(); + assert_eq!(sz, 1); + let sz = enc.write(b"bc").unwrap(); + assert_eq!(sz, 2); + let sz = enc.write(b"def").unwrap(); + assert_eq!(sz, 3); + } + assert_eq!( + &c.get_ref()[..], + URL_SAFE_ENGINE.encode("abcdef").as_bytes() + ); +} + +#[test] +fn encode_with_padding() { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); + + enc.write_all(b"abcd").unwrap(); + + enc.flush().unwrap(); + } + assert_eq!(&c.get_ref()[..], URL_SAFE_ENGINE.encode("abcd").as_bytes()); +} + +#[test] +fn encode_with_padding_multiple_writes() { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); + + assert_eq!(1, enc.write(b"a").unwrap()); + assert_eq!(2, enc.write(b"bc").unwrap()); + assert_eq!(3, enc.write(b"def").unwrap()); + assert_eq!(1, enc.write(b"g").unwrap()); + + enc.flush().unwrap(); + } + assert_eq!( + &c.get_ref()[..], + URL_SAFE_ENGINE.encode("abcdefg").as_bytes() + ); +} + +#[test] +fn finish_writes_extra_byte() { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); + + assert_eq!(6, enc.write(b"abcdef").unwrap()); + + // will be in extra + assert_eq!(1, enc.write(b"g").unwrap()); + + // 1 trailing byte = 2 encoded chars + let _ = enc.finish().unwrap(); + } + assert_eq!( + &c.get_ref()[..], + URL_SAFE_ENGINE.encode("abcdefg").as_bytes() + ); +} + +#[test] +fn write_partial_chunk_encodes_partial_chunk() { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); + + // nothing encoded yet + assert_eq!(2, enc.write(b"ab").unwrap()); + // encoded here + let _ = enc.finish().unwrap(); + } + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("ab").as_bytes()); + assert_eq!(3, c.get_ref().len()); +} + +#[test] +fn write_1_chunk_encodes_complete_chunk() { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); + + assert_eq!(3, enc.write(b"abc").unwrap()); + let _ = enc.finish().unwrap(); + } + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes()); + assert_eq!(4, c.get_ref().len()); +} + +#[test] +fn write_1_chunk_and_partial_encodes_only_complete_chunk() { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); + + // "d" not consumed since it's not a full chunk + assert_eq!(3, enc.write(b"abcd").unwrap()); + let _ = enc.finish().unwrap(); + } + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes()); + assert_eq!(4, c.get_ref().len()); +} + +#[test] +fn write_2_partials_to_exactly_complete_chunk_encodes_complete_chunk() { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); + + assert_eq!(1, enc.write(b"a").unwrap()); + assert_eq!(2, enc.write(b"bc").unwrap()); + let _ = enc.finish().unwrap(); + } + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes()); + assert_eq!(4, c.get_ref().len()); +} + +#[test] +fn write_partial_then_enough_to_complete_chunk_but_not_complete_another_chunk_encodes_complete_chunk_without_consuming_remaining( +) { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); + + assert_eq!(1, enc.write(b"a").unwrap()); + // doesn't consume "d" + assert_eq!(2, enc.write(b"bcd").unwrap()); + let _ = enc.finish().unwrap(); + } + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes()); + assert_eq!(4, c.get_ref().len()); +} + +#[test] +fn write_partial_then_enough_to_complete_chunk_and_another_chunk_encodes_complete_chunks() { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); + + assert_eq!(1, enc.write(b"a").unwrap()); + // completes partial chunk, and another chunk + assert_eq!(5, enc.write(b"bcdef").unwrap()); + let _ = enc.finish().unwrap(); + } + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abcdef").as_bytes()); + assert_eq!(8, c.get_ref().len()); +} + +#[test] +fn write_partial_then_enough_to_complete_chunk_and_another_chunk_and_another_partial_chunk_encodes_only_complete_chunks( +) { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); + + assert_eq!(1, enc.write(b"a").unwrap()); + // completes partial chunk, and another chunk, with one more partial chunk that's not + // consumed + assert_eq!(5, enc.write(b"bcdefe").unwrap()); + let _ = enc.finish().unwrap(); + } + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abcdef").as_bytes()); + assert_eq!(8, c.get_ref().len()); +} + +#[test] +fn drop_calls_finish_for_you() { + let mut c = Cursor::new(Vec::new()); + { + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); + assert_eq!(1, enc.write(b"a").unwrap()); + } + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("a").as_bytes()); + assert_eq!(2, c.get_ref().len()); +} + +#[test] +fn every_possible_split_of_input() { + let mut rng = rand::thread_rng(); + let mut orig_data = Vec::<u8>::new(); + let mut stream_encoded = Vec::<u8>::new(); + let mut normal_encoded = String::new(); + + let size = 5_000; + + for i in 0..size { + orig_data.clear(); + stream_encoded.clear(); + normal_encoded.clear(); + + for _ in 0..size { + orig_data.push(rng.gen()); + } + + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut normal_encoded); + + { + let mut stream_encoder = EncoderWriter::new(&mut stream_encoded, &engine); + // Write the first i bytes, then the rest + stream_encoder.write_all(&orig_data[0..i]).unwrap(); + stream_encoder.write_all(&orig_data[i..]).unwrap(); + } + + assert_eq!(normal_encoded, str::from_utf8(&stream_encoded).unwrap()); + } +} + +#[test] +fn encode_random_config_matches_normal_encode_reasonable_input_len() { + // choose up to 2 * buf size, so ~half the time it'll use a full buffer + do_encode_random_config_matches_normal_encode(super::encoder::BUF_SIZE * 2); +} + +#[test] +fn encode_random_config_matches_normal_encode_tiny_input_len() { + do_encode_random_config_matches_normal_encode(10); +} + +#[test] +fn retrying_writes_that_error_with_interrupted_works() { + let mut rng = rand::thread_rng(); + let mut orig_data = Vec::<u8>::new(); + let mut stream_encoded = Vec::<u8>::new(); + let mut normal_encoded = String::new(); + + for _ in 0..1_000 { + orig_data.clear(); + stream_encoded.clear(); + normal_encoded.clear(); + + let orig_len: usize = rng.gen_range(100..20_000); + for _ in 0..orig_len { + orig_data.push(rng.gen()); + } + + // encode the normal way + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut normal_encoded); + + // encode via the stream encoder + { + let mut interrupt_rng = rand::thread_rng(); + let mut interrupting_writer = InterruptingWriter { + w: &mut stream_encoded, + rng: &mut interrupt_rng, + fraction: 0.8, + }; + + let mut stream_encoder = EncoderWriter::new(&mut interrupting_writer, &engine); + let mut bytes_consumed = 0; + while bytes_consumed < orig_len { + // use short inputs since we want to use `extra` a lot as that's what needs rollback + // when errors occur + let input_len: usize = cmp::min(rng.gen_range(0..10), orig_len - bytes_consumed); + + retry_interrupted_write_all( + &mut stream_encoder, + &orig_data[bytes_consumed..bytes_consumed + input_len], + ) + .unwrap(); + + bytes_consumed += input_len; + } + + loop { + let res = stream_encoder.finish(); + match res { + Ok(_) => break, + Err(e) => match e.kind() { + io::ErrorKind::Interrupted => continue, + _ => panic!("{:?}", e), // bail + }, + } + } + + assert_eq!(orig_len, bytes_consumed); + } + + assert_eq!(normal_encoded, str::from_utf8(&stream_encoded).unwrap()); + } +} + +#[test] +fn writes_that_only_write_part_of_input_and_sometimes_interrupt_produce_correct_encoded_data() { + let mut rng = rand::thread_rng(); + let mut orig_data = Vec::<u8>::new(); + let mut stream_encoded = Vec::<u8>::new(); + let mut normal_encoded = String::new(); + + for _ in 0..1_000 { + orig_data.clear(); + stream_encoded.clear(); + normal_encoded.clear(); + + let orig_len: usize = rng.gen_range(100..20_000); + for _ in 0..orig_len { + orig_data.push(rng.gen()); + } + + // encode the normal way + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut normal_encoded); + + // encode via the stream encoder + { + let mut partial_rng = rand::thread_rng(); + let mut partial_writer = PartialInterruptingWriter { + w: &mut stream_encoded, + rng: &mut partial_rng, + full_input_fraction: 0.1, + no_interrupt_fraction: 0.1, + }; + + let mut stream_encoder = EncoderWriter::new(&mut partial_writer, &engine); + let mut bytes_consumed = 0; + while bytes_consumed < orig_len { + // use at most medium-length inputs to exercise retry logic more aggressively + let input_len: usize = cmp::min(rng.gen_range(0..100), orig_len - bytes_consumed); + + let res = + stream_encoder.write(&orig_data[bytes_consumed..bytes_consumed + input_len]); + + // retry on interrupt + match res { + Ok(len) => bytes_consumed += len, + Err(e) => match e.kind() { + io::ErrorKind::Interrupted => continue, + _ => { + panic!("should not see other errors"); + } + }, + } + } + + let _ = stream_encoder.finish().unwrap(); + + assert_eq!(orig_len, bytes_consumed); + } + + assert_eq!(normal_encoded, str::from_utf8(&stream_encoded).unwrap()); + } +} + +/// Retry writes until all the data is written or an error that isn't Interrupted is returned. +fn retry_interrupted_write_all<W: Write>(w: &mut W, buf: &[u8]) -> io::Result<()> { + let mut bytes_consumed = 0; + + while bytes_consumed < buf.len() { + let res = w.write(&buf[bytes_consumed..]); + + match res { + Ok(len) => bytes_consumed += len, + Err(e) => match e.kind() { + io::ErrorKind::Interrupted => continue, + _ => return Err(e), + }, + } + } + + Ok(()) +} + +fn do_encode_random_config_matches_normal_encode(max_input_len: usize) { + let mut rng = rand::thread_rng(); + let mut orig_data = Vec::<u8>::new(); + let mut stream_encoded = Vec::<u8>::new(); + let mut normal_encoded = String::new(); + + for _ in 0..1_000 { + orig_data.clear(); + stream_encoded.clear(); + normal_encoded.clear(); + + let orig_len: usize = rng.gen_range(100..20_000); + for _ in 0..orig_len { + orig_data.push(rng.gen()); + } + + // encode the normal way + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut normal_encoded); + + // encode via the stream encoder + { + let mut stream_encoder = EncoderWriter::new(&mut stream_encoded, &engine); + let mut bytes_consumed = 0; + while bytes_consumed < orig_len { + let input_len: usize = + cmp::min(rng.gen_range(0..max_input_len), orig_len - bytes_consumed); + + // write a little bit of the data + stream_encoder + .write_all(&orig_data[bytes_consumed..bytes_consumed + input_len]) + .unwrap(); + + bytes_consumed += input_len; + } + + let _ = stream_encoder.finish().unwrap(); + + assert_eq!(orig_len, bytes_consumed); + } + + assert_eq!(normal_encoded, str::from_utf8(&stream_encoded).unwrap()); + } +} + +/// A `Write` implementation that returns Interrupted some fraction of the time, randomly. +struct InterruptingWriter<'a, W: 'a + Write, R: 'a + Rng> { + w: &'a mut W, + rng: &'a mut R, + /// In [0, 1]. If a random number in [0, 1] is `<= threshold`, `Write` methods will return + /// an `Interrupted` error + fraction: f64, +} + +impl<'a, W: Write, R: Rng> Write for InterruptingWriter<'a, W, R> { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + if self.rng.gen_range(0.0..1.0) <= self.fraction { + return Err(io::Error::new(io::ErrorKind::Interrupted, "interrupted")); + } + + self.w.write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + if self.rng.gen_range(0.0..1.0) <= self.fraction { + return Err(io::Error::new(io::ErrorKind::Interrupted, "interrupted")); + } + + self.w.flush() + } +} + +/// A `Write` implementation that sometimes will only write part of its input. +struct PartialInterruptingWriter<'a, W: 'a + Write, R: 'a + Rng> { + w: &'a mut W, + rng: &'a mut R, + /// In [0, 1]. If a random number in [0, 1] is `<= threshold`, `write()` will write all its + /// input. Otherwise, it will write a random substring + full_input_fraction: f64, + no_interrupt_fraction: f64, +} + +impl<'a, W: Write, R: Rng> Write for PartialInterruptingWriter<'a, W, R> { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + if self.rng.gen_range(0.0..1.0) > self.no_interrupt_fraction { + return Err(io::Error::new(io::ErrorKind::Interrupted, "interrupted")); + } + + if self.rng.gen_range(0.0..1.0) <= self.full_input_fraction || buf.is_empty() { + // pass through the buf untouched + self.w.write(buf) + } else { + // only use a prefix of it + self.w + .write(&buf[0..(self.rng.gen_range(0..(buf.len() - 1)))]) + } + } + + fn flush(&mut self) -> io::Result<()> { + self.w.flush() + } +} diff --git a/third_party/rust/base64/src/write/mod.rs b/third_party/rust/base64/src/write/mod.rs new file mode 100644 index 0000000000..2a617db9de --- /dev/null +++ b/third_party/rust/base64/src/write/mod.rs @@ -0,0 +1,11 @@ +//! Implementations of `io::Write` to transparently handle base64. +mod encoder; +mod encoder_string_writer; + +pub use self::{ + encoder::EncoderWriter, + encoder_string_writer::{EncoderStringWriter, StrConsumer}, +}; + +#[cfg(test)] +mod encoder_tests; |