1 files changed, 450 insertions, 0 deletions
diff --git a/vendor/similar/src/text/abstraction.rs b/vendor/similar/src/text/abstraction.rs
new file mode 100644
index 0000000..99678ff
--- /dev/null
+++ b/vendor/similar/src/text/abstraction.rs
@@ -0,0 +1,450 @@
+use std::borrow::Cow;
+use std::hash::Hash;
+use std::ops::Range;
+
+/// Reference to a [`DiffableStr`].
+///
+/// This type exists because while the library only really provides ways to
+/// work with `&str` and `&[u8]` there are types that deref into those string
+/// slices such as `String` and `Vec<u8>`.
+///
+/// This trait is used in the library whenever it's nice to be able to pass
+/// strings of different types in.
+///
+/// Requires the `text` feature.
+pub trait DiffableStrRef {
+    /// The type of the resolved [`DiffableStr`].
+    type Output: DiffableStr + ?Sized;
+
+    /// Resolves the reference.
+    fn as_diffable_str(&self) -> &Self::Output;
+}
+
+impl<T: DiffableStr + ?Sized> DiffableStrRef for T {
+    type Output = T;
+
+    fn as_diffable_str(&self) -> &T {
+        self
+    }
+}
+
+impl DiffableStrRef for String {
+    type Output = str;
+
+    fn as_diffable_str(&self) -> &str {
+        self.as_str()
+    }
+}
+
+impl<'a, T: DiffableStr + ?Sized> DiffableStrRef for Cow<'a, T> {
+    type Output = T;
+
+    fn as_diffable_str(&self) -> &T {
+        self
+    }
+}
+
+/// All supported diffable strings.
+///
+/// The text module can work with different types of strings depending
+/// on how the crate is compiled.  Out of the box `&str` is always supported
+/// but with the `bytes` feature one can also work with `[u8]` slices for
+/// as long as they are ASCII compatible.
+///
+/// Requires the `text` feature.
+pub trait DiffableStr: Hash + PartialEq + PartialOrd + Ord + Eq + ToOwned {
+    /// Splits the value into newlines with newlines attached.
+    fn tokenize_lines(&self) -> Vec<&Self>;
+
+    /// Splits the value into newlines with newlines separated.
+    fn tokenize_lines_and_newlines(&self) -> Vec<&Self>;
+
+    /// Tokenizes into words.
+    fn tokenize_words(&self) -> Vec<&Self>;
+
+    /// Tokenizes the input into characters.
+    fn tokenize_chars(&self) -> Vec<&Self>;
+
+    /// Tokenizes into unicode words.
+    #[cfg(feature = "unicode")]
+    fn tokenize_unicode_words(&self) -> Vec<&Self>;
+
+    /// Tokenizes into unicode graphemes.
+    #[cfg(feature = "unicode")]
+    fn tokenize_graphemes(&self) -> Vec<&Self>;
+
+    /// Decodes the string (potentially) lossy.
+    fn as_str(&self) -> Option<&str>;
+
+    /// Decodes the string (potentially) lossy.
+    fn to_string_lossy(&self) -> Cow<'_, str>;
+
+    /// Checks if the string ends in a newline.
+    fn ends_with_newline(&self) -> bool;
+
+    /// The length of the string.
+    fn len(&self) -> usize;
+
+    /// Slices the string.
+    fn slice(&self, rng: Range<usize>) -> &Self;
+
+    /// Returns the string as slice of raw bytes.
+    fn as_bytes(&self) -> &[u8];
+
+    /// Checks if the string is empty.
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+impl DiffableStr for str {
+    fn tokenize_lines(&self) -> Vec<&Self> {
+        let mut iter = self.char_indices().peekable();
+        let mut last_pos = 0;
+        let mut lines = vec![];
+
+        while let Some((idx, c)) = iter.next() {
+            if c == '\r' {
+                if iter.peek().map_or(false, |x| x.1 == '\n') {
+                    lines.push(&self[last_pos..=idx + 1]);
+                    iter.next();
+                    last_pos = idx + 2;
+                } else {
+                    lines.push(&self[last_pos..=idx]);
+                    last_pos = idx + 1;
+                }
+            } else if c == '\n' {
+                lines.push(&self[last_pos..=idx]);
+                last_pos = idx + 1;
+            }
+        }
+
+        if last_pos < self.len() {
+            lines.push(&self[last_pos..]);
+        }
+
+        lines
+    }
+
+    fn tokenize_lines_and_newlines(&self) -> Vec<&Self> {
+        let mut rv = vec![];
+        let mut iter = self.char_indices().peekable();
+
+        while let Some((idx, c)) = iter.next() {
+            let is_newline = c == '\r' || c == '\n';
+            let start = idx;
+            let mut end = idx + c.len_utf8();
+            while let Some(&(_, next_char)) = iter.peek() {
+                if (next_char == '\r' || next_char == '\n') != is_newline {
+                    break;
+                }
+                iter.next();
+                end += next_char.len_utf8();
+            }
+            rv.push(&self[start..end]);
+        }
+
+        rv
+    }
+
+    fn tokenize_words(&self) -> Vec<&Self> {
+        let mut iter = self.char_indices().peekable();
+        let mut rv = vec![];
+
+        while let Some((idx, c)) = iter.next() {
+            let is_whitespace = c.is_whitespace();
+            let start = idx;
+            let mut end = idx + c.len_utf8();
+            while let Some(&(_, next_char)) = iter.peek() {
+                if next_char.is_whitespace() != is_whitespace {
+                    break;
+                }
+                iter.next();
+                end += next_char.len_utf8();
+            }
+            rv.push(&self[start..end]);
+        }
+
+        rv
+    }
+
+    fn tokenize_chars(&self) -> Vec<&Self> {
+        self.char_indices()
+            .map(move |(i, c)| &self[i..i + c.len_utf8()])
+            .collect()
+    }
+
+    #[cfg(feature = "unicode")]
+    fn tokenize_unicode_words(&self) -> Vec<&Self> {
+        unicode_segmentation::UnicodeSegmentation::split_word_bounds(self).collect()
+    }
+
+    #[cfg(feature = "unicode")]
+    fn tokenize_graphemes(&self) -> Vec<&Self> {
+        unicode_segmentation::UnicodeSegmentation::graphemes(self, true).collect()
+    }
+
+    fn as_str(&self) -> Option<&str> {
+        Some(self)
+    }
+
+    fn to_string_lossy(&self) -> Cow<'_, str> {
+        Cow::Borrowed(self)
+    }
+
+    fn ends_with_newline(&self) -> bool {
+        self.ends_with(&['\r', '\n'][..])
+    }
+
+    fn len(&self) -> usize {
+        str::len(self)
+    }
+
+    fn slice(&self, rng: Range<usize>) -> &Self {
+        &self[rng]
+    }
+
+    fn as_bytes(&self) -> &[u8] {
+        str::as_bytes(self)
+    }
+}
+
+#[cfg(feature = "bytes")]
+mod bytes_support {
+    use super::*;
+
+    use bstr::ByteSlice;
+
+    impl DiffableStrRef for Vec<u8> {
+        type Output = [u8];
+
+        fn as_diffable_str(&self) -> &[u8] {
+            self.as_slice()
+        }
+    }
+
+    /// Allows viewing ASCII compatible byte slices as strings.
+    ///
+    /// Requires the `bytes` feature.
+    impl DiffableStr for [u8] {
+        fn tokenize_lines(&self) -> Vec<&Self> {
+            let mut iter = self.char_indices().peekable();
+            let mut last_pos = 0;
+            let mut lines = vec![];
+
+            while let Some((_, end, c)) = iter.next() {
+                if c == '\r' {
+                    if iter.peek().map_or(false, |x| x.2 == '\n') {
+                        lines.push(&self[last_pos..end + 1]);
+                        iter.next();
+                        last_pos = end + 1;
+                    } else {
+                        lines.push(&self[last_pos..end]);
+                        last_pos = end;
+                    }
+                } else if c == '\n' {
+                    lines.push(&self[last_pos..end]);
+                    last_pos = end;
+                }
+            }
+
+            if last_pos < self.len() {
+                lines.push(&self[last_pos..]);
+            }
+
+            lines
+        }
+
+        fn tokenize_lines_and_newlines(&self) -> Vec<&Self> {
+            let mut rv = vec![];
+            let mut iter = self.char_indices().peekable();
+
+            while let Some((start, mut end, c)) = iter.next() {
+                let is_newline = c == '\r' || c == '\n';
+                while let Some(&(_, new_end, next_char)) = iter.peek() {
+                    if (next_char == '\r' || next_char == '\n') != is_newline {
+                        break;
+                    }
+                    iter.next();
+                    end = new_end;
+                }
+                rv.push(&self[start..end]);
+            }
+
+            rv
+        }
+
+        fn tokenize_words(&self) -> Vec<&Self> {
+            let mut iter = self.char_indices().peekable();
+            let mut rv = vec![];
+
+            while let Some((start, mut end, c)) = iter.next() {
+                let is_whitespace = c.is_whitespace();
+                while let Some(&(_, new_end, next_char)) = iter.peek() {
+                    if next_char.is_whitespace() != is_whitespace {
+                        break;
+                    }
+                    iter.next();
+                    end = new_end;
+                }
+                rv.push(&self[start..end]);
+            }
+
+            rv
+        }
+
+        #[cfg(feature = "unicode")]
+        fn tokenize_unicode_words(&self) -> Vec<&Self> {
+            self.words_with_breaks().map(|x| x.as_bytes()).collect()
+        }
+
+        #[cfg(feature = "unicode")]
+        fn tokenize_graphemes(&self) -> Vec<&Self> {
+            self.graphemes().map(|x| x.as_bytes()).collect()
+        }
+
+        fn tokenize_chars(&self) -> Vec<&Self> {
+            self.char_indices()
+                .map(move |(start, end, _)| &self[start..end])
+                .collect()
+        }
+
+        fn as_str(&self) -> Option<&str> {
+            std::str::from_utf8(self).ok()
+        }
+
+        fn to_string_lossy(&self) -> Cow<'_, str> {
+            String::from_utf8_lossy(self)
+        }
+
+        fn ends_with_newline(&self) -> bool {
+            if let Some(b'\r') | Some(b'\n') = self.last_byte() {
+                true
+            } else {
+                false
+            }
+        }
+
+        fn len(&self) -> usize {
+            <[u8]>::len(self)
+        }
+
+        fn slice(&self, rng: Range<usize>) -> &Self {
+            &self[rng]
+        }
+
+        fn as_bytes(&self) -> &[u8] {
+            self
+        }
+    }
+}
+
+#[test]
+fn test_split_lines() {
+    assert_eq!(
+        DiffableStr::tokenize_lines("first\nsecond\rthird\r\nfourth\nlast"),
+        vec!["first\n", "second\r", "third\r\n", "fourth\n", "last"]
+    );
+    assert_eq!(DiffableStr::tokenize_lines("\n\n"), vec!["\n", "\n"]);
+    assert_eq!(DiffableStr::tokenize_lines("\n"), vec!["\n"]);
+    assert!(DiffableStr::tokenize_lines("").is_empty());
+}
+
+#[test]
+fn test_split_words() {
+    assert_eq!(
+        DiffableStr::tokenize_words("foo    bar baz\n\n  aha"),
+        ["foo", "    ", "bar", " ", "baz", "\n\n  ", "aha"]
+    );
+}
+
+#[test]
+fn test_split_chars() {
+    assert_eq!(
+        DiffableStr::tokenize_chars("abcfö❄️"),
+        vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"]
+    );
+}
+
+#[test]
+#[cfg(feature = "unicode")]
+fn test_split_graphemes() {
+    assert_eq!(
+        DiffableStr::tokenize_graphemes("abcfö❄️"),
+        vec!["a", "b", "c", "f", "ö", "❄️"]
+    );
+}
+
+#[test]
+#[cfg(feature = "bytes")]
+fn test_split_lines_bytes() {
+    assert_eq!(
+        DiffableStr::tokenize_lines("first\nsecond\rthird\r\nfourth\nlast".as_bytes()),
+        vec![
+            "first\n".as_bytes(),
+            "second\r".as_bytes(),
+            "third\r\n".as_bytes(),
+            "fourth\n".as_bytes(),
+            "last".as_bytes()
+        ]
+    );
+    assert_eq!(
+        DiffableStr::tokenize_lines("\n\n".as_bytes()),
+        vec!["\n".as_bytes(), "\n".as_bytes()]
+    );
+    assert_eq!(
+        DiffableStr::tokenize_lines("\n".as_bytes()),
+        vec!["\n".as_bytes()]
+    );
+    assert!(DiffableStr::tokenize_lines("".as_bytes()).is_empty());
+}
+
+#[test]
+#[cfg(feature = "bytes")]
+fn test_split_words_bytes() {
+    assert_eq!(
+        DiffableStr::tokenize_words("foo    bar baz\n\n  aha".as_bytes()),
+        [
+            &b"foo"[..],
+            &b"    "[..],
+            &b"bar"[..],
+            &b" "[..],
+            &b"baz"[..],
+            &b"\n\n  "[..],
+            &b"aha"[..]
+        ]
+    );
+}
+
+#[test]
+#[cfg(feature = "bytes")]
+fn test_split_chars_bytes() {
+    assert_eq!(
+        DiffableStr::tokenize_chars("abcfö❄️".as_bytes()),
+        vec![
+            &b"a"[..],
+            &b"b"[..],
+            &b"c"[..],
+            &b"f"[..],
+            "ö".as_bytes(),
+            "❄".as_bytes(),
+            "\u{fe0f}".as_bytes()
+        ]
+    );
+}
+
+#[test]
+#[cfg(all(feature = "bytes", feature = "unicode"))]
+fn test_split_graphemes_bytes() {
+    assert_eq!(
+        DiffableStr::tokenize_graphemes("abcfö❄️".as_bytes()),
+        vec![
+            &b"a"[..],
+            &b"b"[..],
+            &b"c"[..],
+            &b"f"[..],
+            "ö".as_bytes(),
+            "❄️".as_bytes()
+        ]
+    );
+}