summaryrefslogtreecommitdiffstats
path: root/vendor/utf-8/src/lossy.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /vendor/utf-8/src/lossy.rs
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/utf-8/src/lossy.rs')
-rw-r--r--vendor/utf-8/src/lossy.rs92
1 files changed, 92 insertions, 0 deletions
diff --git a/vendor/utf-8/src/lossy.rs b/vendor/utf-8/src/lossy.rs
new file mode 100644
index 000000000..00bcdecf0
--- /dev/null
+++ b/vendor/utf-8/src/lossy.rs
@@ -0,0 +1,92 @@
+use super::*;
+
+/// A push-based, lossy decoder for UTF-8.
+/// Errors are replaced with the U+FFFD replacement character.
+///
+/// Users “push” bytes into the decoder, which in turn “pushes” `&str` slices into a callback.
+///
+/// For example, `String::from_utf8_lossy` (but returning `String` instead of `Cow`)
+/// can be rewritten as:
+///
+/// ```rust
+/// fn string_from_utf8_lossy(input: &[u8]) -> String {
+/// let mut string = String::new();
+/// utf8::LossyDecoder::new(|s| string.push_str(s)).feed(input);
+/// string
+/// }
+/// ```
+///
+/// **Note:** Dropping the decoder signals the end of the input:
+/// If the last input chunk ended with an incomplete byte sequence for a code point,
+/// this is an error and a replacement character is emitted.
+/// Use `std::mem::forget` to inhibit this behavior.
+pub struct LossyDecoder<F: FnMut(&str)> {
+ push_str: F,
+ incomplete: Incomplete,
+}
+
+impl<F: FnMut(&str)> LossyDecoder<F> {
+ /// Create a new decoder from a callback.
+ #[inline]
+ pub fn new(push_str: F) -> Self {
+ LossyDecoder {
+ push_str: push_str,
+ incomplete: Incomplete {
+ buffer: [0, 0, 0, 0],
+ buffer_len: 0,
+ },
+ }
+ }
+
+ /// Feed one chunk of input into the decoder.
+ ///
+ /// The input is decoded lossily
+ /// and the callback called once or more with `&str` string slices.
+ ///
+ /// If the UTF-8 byte sequence for one code point was split into this bytes chunk
+ /// and previous bytes chunks, it will be correctly pieced back together.
+ pub fn feed(&mut self, mut input: &[u8]) {
+ if self.incomplete.buffer_len > 0 {
+ match self.incomplete.try_complete(input) {
+ Some((Ok(s), remaining)) => {
+ (self.push_str)(s);
+ input = remaining
+ }
+ Some((Err(_), remaining)) => {
+ (self.push_str)(REPLACEMENT_CHARACTER);
+ input = remaining
+ }
+ None => {
+ return
+ }
+ }
+ }
+ loop {
+ match decode(input) {
+ Ok(s) => {
+ (self.push_str)(s);
+ return
+ }
+ Err(DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => {
+ (self.push_str)(valid_prefix);
+ self.incomplete = incomplete_suffix;
+ return
+ }
+ Err(DecodeError::Invalid { valid_prefix, remaining_input, .. }) => {
+ (self.push_str)(valid_prefix);
+ (self.push_str)(REPLACEMENT_CHARACTER);
+ input = remaining_input
+ }
+ }
+ }
+ }
+}
+
+impl<F: FnMut(&str)> Drop for LossyDecoder<F> {
+ #[inline]
+ fn drop(&mut self) {
+ if self.incomplete.buffer_len > 0 {
+ (self.push_str)(REPLACEMENT_CHARACTER)
+ }
+ }
+}