summaryrefslogtreecommitdiffstats
path: root/third_party/rust/regex-automata/src/util/escape.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/regex-automata/src/util/escape.rs')
-rw-r--r--third_party/rust/regex-automata/src/util/escape.rs84
1 files changed, 84 insertions, 0 deletions
diff --git a/third_party/rust/regex-automata/src/util/escape.rs b/third_party/rust/regex-automata/src/util/escape.rs
new file mode 100644
index 0000000000..7f6aa15f5d
--- /dev/null
+++ b/third_party/rust/regex-automata/src/util/escape.rs
@@ -0,0 +1,84 @@
+/*!
+Provides convenience routines for escaping raw bytes.
+
+Since this crate tends to deal with `&[u8]` everywhere and the default
+`Debug` implementation just shows decimal integers, it makes debugging those
+representations quite difficult. This module provides types that show `&[u8]`
+as if it were a string, with invalid UTF-8 escaped into its byte-by-byte hex
+representation.
+*/
+
+use crate::util::utf8;
+
+/// Provides a convenient `Debug` implementation for a `u8`.
+///
+/// The `Debug` impl treats the byte as an ASCII, and emits a human readable
+/// representation of it. If the byte isn't ASCII, then it's emitted as a hex
+/// escape sequence.
+#[derive(Clone, Copy)]
+pub struct DebugByte(pub u8);
+
+impl core::fmt::Debug for DebugByte {
+ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+ // Special case ASCII space. It's too hard to read otherwise, so
+ // put quotes around it. I sometimes wonder whether just '\x20' would
+ // be better...
+ if self.0 == b' ' {
+ return write!(f, "' '");
+ }
+ // 10 bytes is enough to cover any output from ascii::escape_default.
+ let mut bytes = [0u8; 10];
+ let mut len = 0;
+ for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
+ // capitalize \xab to \xAB
+ if i >= 2 && b'a' <= b && b <= b'f' {
+ b -= 32;
+ }
+ bytes[len] = b;
+ len += 1;
+ }
+ write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
+ }
+}
+
+/// Provides a convenient `Debug` implementation for `&[u8]`.
+///
+/// This generally works best when the bytes are presumed to be mostly UTF-8,
+/// but will work for anything. For any bytes that aren't UTF-8, they are
+/// emitted as hex escape sequences.
+pub struct DebugHaystack<'a>(pub &'a [u8]);
+
+impl<'a> core::fmt::Debug for DebugHaystack<'a> {
+ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+ write!(f, "\"")?;
+ // This is a sad re-implementation of a similar impl found in bstr.
+ let mut bytes = self.0;
+ while let Some(result) = utf8::decode(bytes) {
+ let ch = match result {
+ Ok(ch) => ch,
+ Err(byte) => {
+ write!(f, r"\x{:02x}", byte)?;
+ bytes = &bytes[1..];
+ continue;
+ }
+ };
+ bytes = &bytes[ch.len_utf8()..];
+ match ch {
+ '\0' => write!(f, "\\0")?,
+ // ASCII control characters except \0, \n, \r, \t
+ '\x01'..='\x08'
+ | '\x0b'
+ | '\x0c'
+ | '\x0e'..='\x19'
+ | '\x7f' => {
+ write!(f, "\\x{:02x}", u32::from(ch))?;
+ }
+ '\n' | '\r' | '\t' | _ => {
+ write!(f, "{}", ch.escape_debug())?;
+ }
+ }
+ }
+ write!(f, "\"")?;
+ Ok(())
+ }
+}