summaryrefslogtreecommitdiffstats
path: root/vendor/utf-8/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/utf-8/src/lib.rs')
-rw-r--r--vendor/utf-8/src/lib.rs186
1 files changed, 186 insertions, 0 deletions
diff --git a/vendor/utf-8/src/lib.rs b/vendor/utf-8/src/lib.rs
new file mode 100644
index 000000000..ec223f209
--- /dev/null
+++ b/vendor/utf-8/src/lib.rs
@@ -0,0 +1,186 @@
+mod lossy;
+mod read;
+
+pub use lossy::LossyDecoder;
+pub use read::{BufReadDecoder, BufReadDecoderError};
+
+use std::cmp;
+use std::error::Error;
+use std::fmt;
+use std::str;
+
+/// The replacement character, U+FFFD. In lossy decoding, insert it for every decoding error.
+pub const REPLACEMENT_CHARACTER: &'static str = "\u{FFFD}";
+
+#[derive(Debug, Copy, Clone)]
+pub enum DecodeError<'a> {
+ /// In lossy decoding insert `valid_prefix`, then `"\u{FFFD}"`,
+ /// then call `decode()` again with `remaining_input`.
+ Invalid {
+ valid_prefix: &'a str,
+ invalid_sequence: &'a [u8],
+ remaining_input: &'a [u8],
+ },
+
+ /// Call the `incomplete_suffix.try_complete` method with more input when available.
+ /// If no more input is available, this is an invalid byte sequence.
+ Incomplete {
+ valid_prefix: &'a str,
+ incomplete_suffix: Incomplete,
+ },
+}
+
+impl<'a> fmt::Display for DecodeError<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ DecodeError::Invalid {
+ valid_prefix,
+ invalid_sequence,
+ remaining_input,
+ } => write!(
+ f,
+ "found invalid byte sequence {invalid_sequence:02x?} after \
+ {valid_byte_count} valid bytes, followed by {unprocessed_byte_count} more \
+ unprocessed bytes",
+ invalid_sequence = invalid_sequence,
+ valid_byte_count = valid_prefix.len(),
+ unprocessed_byte_count = remaining_input.len()
+ ),
+ DecodeError::Incomplete {
+ valid_prefix,
+ incomplete_suffix,
+ } => write!(
+ f,
+ "found incomplete byte sequence {incomplete_suffix:02x?} after \
+ {valid_byte_count} bytes",
+ incomplete_suffix = incomplete_suffix,
+ valid_byte_count = valid_prefix.len()
+ ),
+ }
+ }
+}
+
+impl<'a> Error for DecodeError<'a> {}
+
+#[derive(Debug, Copy, Clone)]
+pub struct Incomplete {
+ pub buffer: [u8; 4],
+ pub buffer_len: u8,
+}
+
+pub fn decode(input: &[u8]) -> Result<&str, DecodeError> {
+ let error = match str::from_utf8(input) {
+ Ok(valid) => return Ok(valid),
+ Err(error) => error,
+ };
+
+ // FIXME: separate function from here to guide inlining?
+ let (valid, after_valid) = input.split_at(error.valid_up_to());
+ let valid = unsafe {
+ str::from_utf8_unchecked(valid)
+ };
+
+ match error.error_len() {
+ Some(invalid_sequence_length) => {
+ let (invalid, rest) = after_valid.split_at(invalid_sequence_length);
+ Err(DecodeError::Invalid {
+ valid_prefix: valid,
+ invalid_sequence: invalid,
+ remaining_input: rest
+ })
+ }
+ None => {
+ Err(DecodeError::Incomplete {
+ valid_prefix: valid,
+ incomplete_suffix: Incomplete::new(after_valid),
+ })
+ }
+ }
+}
+
+impl Incomplete {
+ pub fn empty() -> Self {
+ Incomplete {
+ buffer: [0, 0, 0, 0],
+ buffer_len: 0,
+ }
+ }
+
+ pub fn is_empty(&self) -> bool {
+ self.buffer_len == 0
+ }
+
+ pub fn new(bytes: &[u8]) -> Self {
+ let mut buffer = [0, 0, 0, 0];
+ let len = bytes.len();
+ buffer[..len].copy_from_slice(bytes);
+ Incomplete {
+ buffer: buffer,
+ buffer_len: len as u8,
+ }
+ }
+
+ /// * `None`: still incomplete, call `try_complete` again with more input.
+ /// If no more input is available, this is invalid byte sequence.
+ /// * `Some((result, remaining_input))`: We’re done with this `Incomplete`.
+ /// To keep decoding, pass `remaining_input` to `decode()`.
+ pub fn try_complete<'input>(&mut self, input: &'input [u8])
+ -> Option<(Result<&str, &[u8]>, &'input [u8])> {
+ let (consumed, opt_result) = self.try_complete_offsets(input);
+ let result = opt_result?;
+ let remaining_input = &input[consumed..];
+ let result_bytes = self.take_buffer();
+ let result = match result {
+ Ok(()) => Ok(unsafe { str::from_utf8_unchecked(result_bytes) }),
+ Err(()) => Err(result_bytes),
+ };
+ Some((result, remaining_input))
+ }
+
+ fn take_buffer(&mut self) -> &[u8] {
+ let len = self.buffer_len as usize;
+ self.buffer_len = 0;
+ &self.buffer[..len as usize]
+ }
+
+ /// (consumed_from_input, None): not enough input
+ /// (consumed_from_input, Some(Err(()))): error bytes in buffer
+ /// (consumed_from_input, Some(Ok(()))): UTF-8 string in buffer
+ fn try_complete_offsets(&mut self, input: &[u8]) -> (usize, Option<Result<(), ()>>) {
+ let initial_buffer_len = self.buffer_len as usize;
+ let copied_from_input;
+ {
+ let unwritten = &mut self.buffer[initial_buffer_len..];
+ copied_from_input = cmp::min(unwritten.len(), input.len());
+ unwritten[..copied_from_input].copy_from_slice(&input[..copied_from_input]);
+ }
+ let spliced = &self.buffer[..initial_buffer_len + copied_from_input];
+ match str::from_utf8(spliced) {
+ Ok(_) => {
+ self.buffer_len = spliced.len() as u8;
+ (copied_from_input, Some(Ok(())))
+ }
+ Err(error) => {
+ let valid_up_to = error.valid_up_to();
+ if valid_up_to > 0 {
+ let consumed = valid_up_to.checked_sub(initial_buffer_len).unwrap();
+ self.buffer_len = valid_up_to as u8;
+ (consumed, Some(Ok(())))
+ } else {
+ match error.error_len() {
+ Some(invalid_sequence_length) => {
+ let consumed = invalid_sequence_length
+ .checked_sub(initial_buffer_len).unwrap();
+ self.buffer_len = invalid_sequence_length as u8;
+ (consumed, Some(Err(())))
+ }
+ None => {
+ self.buffer_len = spliced.len() as u8;
+ (copied_from_input, None)
+ }
+ }
+ }
+ }
+ }
+ }
+}