use super::*; /// A push-based, lossy decoder for UTF-8. /// Errors are replaced with the U+FFFD replacement character. /// /// Users “push” bytes into the decoder, which in turn “pushes” `&str` slices into a callback. /// /// For example, `String::from_utf8_lossy` (but returning `String` instead of `Cow`) /// can be rewritten as: /// /// ```rust /// fn string_from_utf8_lossy(input: &[u8]) -> String { /// let mut string = String::new(); /// utf8::LossyDecoder::new(|s| string.push_str(s)).feed(input); /// string /// } /// ``` /// /// **Note:** Dropping the decoder signals the end of the input: /// If the last input chunk ended with an incomplete byte sequence for a code point, /// this is an error and a replacement character is emitted. /// Use `std::mem::forget` to inhibit this behavior. pub struct LossyDecoder { push_str: F, incomplete: Incomplete, } impl LossyDecoder { /// Create a new decoder from a callback. #[inline] pub fn new(push_str: F) -> Self { LossyDecoder { push_str: push_str, incomplete: Incomplete { buffer: [0, 0, 0, 0], buffer_len: 0, }, } } /// Feed one chunk of input into the decoder. /// /// The input is decoded lossily /// and the callback called once or more with `&str` string slices. /// /// If the UTF-8 byte sequence for one code point was split into this bytes chunk /// and previous bytes chunks, it will be correctly pieced back together. pub fn feed(&mut self, mut input: &[u8]) { if self.incomplete.buffer_len > 0 { match self.incomplete.try_complete(input) { Some((Ok(s), remaining)) => { (self.push_str)(s); input = remaining } Some((Err(_), remaining)) => { (self.push_str)(REPLACEMENT_CHARACTER); input = remaining } None => { return } } } loop { match decode(input) { Ok(s) => { (self.push_str)(s); return } Err(DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => { (self.push_str)(valid_prefix); self.incomplete = incomplete_suffix; return } Err(DecodeError::Invalid { valid_prefix, remaining_input, .. }) => { (self.push_str)(valid_prefix); (self.push_str)(REPLACEMENT_CHARACTER); input = remaining_input } } } } } impl Drop for LossyDecoder { #[inline] fn drop(&mut self) { if self.incomplete.buffer_len > 0 { (self.push_str)(REPLACEMENT_CHARACTER) } } }