diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/regex-automata/src/util/escape.rs | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/regex-automata/src/util/escape.rs')
-rw-r--r-- | third_party/rust/regex-automata/src/util/escape.rs | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/third_party/rust/regex-automata/src/util/escape.rs b/third_party/rust/regex-automata/src/util/escape.rs new file mode 100644 index 0000000000..7f6aa15f5d --- /dev/null +++ b/third_party/rust/regex-automata/src/util/escape.rs @@ -0,0 +1,84 @@ +/*! +Provides convenience routines for escaping raw bytes. + +Since this crate tends to deal with `&[u8]` everywhere and the default +`Debug` implementation just shows decimal integers, it makes debugging those +representations quite difficult. This module provides types that show `&[u8]` +as if it were a string, with invalid UTF-8 escaped into its byte-by-byte hex +representation. +*/ + +use crate::util::utf8; + +/// Provides a convenient `Debug` implementation for a `u8`. +/// +/// The `Debug` impl treats the byte as an ASCII, and emits a human readable +/// representation of it. If the byte isn't ASCII, then it's emitted as a hex +/// escape sequence. +#[derive(Clone, Copy)] +pub struct DebugByte(pub u8); + +impl core::fmt::Debug for DebugByte { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + // Special case ASCII space. It's too hard to read otherwise, so + // put quotes around it. I sometimes wonder whether just '\x20' would + // be better... + if self.0 == b' ' { + return write!(f, "' '"); + } + // 10 bytes is enough to cover any output from ascii::escape_default. + let mut bytes = [0u8; 10]; + let mut len = 0; + for (i, mut b) in core::ascii::escape_default(self.0).enumerate() { + // capitalize \xab to \xAB + if i >= 2 && b'a' <= b && b <= b'f' { + b -= 32; + } + bytes[len] = b; + len += 1; + } + write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap()) + } +} + +/// Provides a convenient `Debug` implementation for `&[u8]`. +/// +/// This generally works best when the bytes are presumed to be mostly UTF-8, +/// but will work for anything. For any bytes that aren't UTF-8, they are +/// emitted as hex escape sequences. +pub struct DebugHaystack<'a>(pub &'a [u8]); + +impl<'a> core::fmt::Debug for DebugHaystack<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "\"")?; + // This is a sad re-implementation of a similar impl found in bstr. + let mut bytes = self.0; + while let Some(result) = utf8::decode(bytes) { + let ch = match result { + Ok(ch) => ch, + Err(byte) => { + write!(f, r"\x{:02x}", byte)?; + bytes = &bytes[1..]; + continue; + } + }; + bytes = &bytes[ch.len_utf8()..]; + match ch { + '\0' => write!(f, "\\0")?, + // ASCII control characters except \0, \n, \r, \t + '\x01'..='\x08' + | '\x0b' + | '\x0c' + | '\x0e'..='\x19' + | '\x7f' => { + write!(f, "\\x{:02x}", u32::from(ch))?; + } + '\n' | '\r' | '\t' | _ => { + write!(f, "{}", ch.escape_debug())?; + } + } + } + write!(f, "\"")?; + Ok(()) + } +} |