diff options
Diffstat (limited to 'vendor/dissimilar')
-rw-r--r-- | vendor/dissimilar/.cargo-checksum.json | 2 | ||||
-rw-r--r-- | vendor/dissimilar/Cargo.toml | 10 | ||||
-rw-r--r-- | vendor/dissimilar/LICENSE-APACHE | 25 | ||||
-rw-r--r-- | vendor/dissimilar/README.md | 6 | ||||
-rw-r--r-- | vendor/dissimilar/src/find.rs | 14 | ||||
-rw-r--r-- | vendor/dissimilar/src/lib.rs | 169 | ||||
-rw-r--r-- | vendor/dissimilar/src/range.rs | 49 | ||||
-rw-r--r-- | vendor/dissimilar/src/tests.rs | 131 | ||||
-rw-r--r-- | vendor/dissimilar/tests/test.rs | 16 |
9 files changed, 211 insertions, 211 deletions
diff --git a/vendor/dissimilar/.cargo-checksum.json b/vendor/dissimilar/.cargo-checksum.json index 889c53a44..cfb8c46ea 100644 --- a/vendor/dissimilar/.cargo-checksum.json +++ b/vendor/dissimilar/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"abe95bc027ce5fe4aae082e8560c12d43f015ea85453be0ca6df6ded8f29e4da","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"a9480cd29fe4eefae782c5ec20a05f88ca28d3ca1573a893fa423b931e3ca392","benches/bench.rs":"e62b50ebe922590a7251197b50047a3924be98c6193e1f0fbca552d66fd05f9d","benches/document1.txt":"92a6f5c3992d98632eea7a6c6261cf1a26ae484b34358778b774a2d58fd356d3","benches/document2.txt":"8d106ddba8bd4a85a8bb4b59e481b88f536de98046dc8e4f76f7551c265c5dd3","src/find.rs":"32f68fa18bd547f5c716895bc580c12d6fe8503f86044fc0334f1b1e3cd3ac97","src/lib.rs":"7c1bf347cb87d22dde987da421931644dea3ed84e5e48b8dad44cb5579cb9f04","src/range.rs":"8652a374da1f7959ed912891e610a1e72026ba5315362cfb529bd2724ce69fc6","src/tests.rs":"e2a68e2b724ec65a062b634b86114e3a2e445c4693e312c08d9648f6621ea9d2","tests/test.rs":"4dcc2007359d6bf6a48590fcdab9cc81787a18aac8dc9c1c4be1019d95ca690e"},"package":"8c97b9233581d84b8e1e689cdd3a47b6f69770084fc246e86a7f78b0d9c1d4a5"}
\ No newline at end of file +{"files":{"Cargo.toml":"b5579f3eb0d811d6ad116c247138911fee4404c80f49bb6020df06394d0467ec","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"e081b60ac4ad261ee3c57f32131634017044193b94ce2ebd64d134d22185e79a","benches/bench.rs":"e62b50ebe922590a7251197b50047a3924be98c6193e1f0fbca552d66fd05f9d","benches/document1.txt":"92a6f5c3992d98632eea7a6c6261cf1a26ae484b34358778b774a2d58fd356d3","benches/document2.txt":"8d106ddba8bd4a85a8bb4b59e481b88f536de98046dc8e4f76f7551c265c5dd3","src/find.rs":"4587b9fc3fc32149898c6daf50624c41ab9de9ec4de5baa3a4d3644436dccf5f","src/lib.rs":"9749cf7915c3f5682144f3eb0ec3d1b5d9c3457b740a7ced8e91211be6a8549c","src/range.rs":"9b4f5f0125d927f985cf5c3a92452e4c28273842d9ff7debc2d3584db5d7d0f6","src/tests.rs":"222464295b0558fe505f9d2d53f315dd164cc85e323e32e523f738eec771cdbe","tests/test.rs":"2a4ccfea35304fa2fc2b2b38efe9da6a1b5fd5f6c1247ba01e85232d19b70206"},"package":"210ec60ae7d710bed8683e333e9d2855a8a56a3e9892b38bad3bb0d4d29b0d5e"}
\ No newline at end of file diff --git a/vendor/dissimilar/Cargo.toml b/vendor/dissimilar/Cargo.toml index d657b1742..3fdb995a5 100644 --- a/vendor/dissimilar/Cargo.toml +++ b/vendor/dissimilar/Cargo.toml @@ -11,9 +11,9 @@ [package] edition = "2018" -rust-version = "1.31" +rust-version = "1.36" name = "dissimilar" -version = "1.0.4" +version = "1.0.6" authors = ["David Tolnay <dtolnay@gmail.com>"] description = "Diff library with semantic cleanup, based on Google's diff-match-patch" documentation = "https://docs.rs/dissimilar" @@ -28,3 +28,9 @@ repository = "https://github.com/dtolnay/dissimilar" [package.metadata.docs.rs] targets = ["x86_64-unknown-linux-gnu"] + +[lib] +doc-scrape-examples = false + +[dev-dependencies.once_cell] +version = "1" diff --git a/vendor/dissimilar/LICENSE-APACHE b/vendor/dissimilar/LICENSE-APACHE index 16fe87b06..1b5ec8b78 100644 --- a/vendor/dissimilar/LICENSE-APACHE +++ b/vendor/dissimilar/LICENSE-APACHE @@ -174,28 +174,3 @@ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/vendor/dissimilar/README.md b/vendor/dissimilar/README.md index 492bced22..82ce66995 100644 --- a/vendor/dissimilar/README.md +++ b/vendor/dissimilar/README.md @@ -3,8 +3,8 @@ Dissimilar: diff library with semantic cleanup [<img alt="github" src="https://img.shields.io/badge/github-dtolnay/dissimilar-8da0cb?style=for-the-badge&labelColor=555555&logo=github" height="20">](https://github.com/dtolnay/dissimilar) [<img alt="crates.io" src="https://img.shields.io/crates/v/dissimilar.svg?style=for-the-badge&color=fc8d62&logo=rust" height="20">](https://crates.io/crates/dissimilar) -[<img alt="docs.rs" src="https://img.shields.io/badge/docs.rs-dissimilar-66c2a5?style=for-the-badge&labelColor=555555&logoColor=white&logo=" height="20">](https://docs.rs/dissimilar) -[<img alt="build status" src="https://img.shields.io/github/workflow/status/dtolnay/dissimilar/CI/master?style=for-the-badge" height="20">](https://github.com/dtolnay/dissimilar/actions?query=branch%3Amaster) +[<img alt="docs.rs" src="https://img.shields.io/badge/docs.rs-dissimilar-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs" height="20">](https://docs.rs/dissimilar) +[<img alt="build status" src="https://img.shields.io/github/actions/workflow/status/dtolnay/dissimilar/ci.yml?branch=master&style=for-the-badge" height="20">](https://github.com/dtolnay/dissimilar/actions?query=branch%3Amaster) This library is a port of the Diff component of [Diff Match Patch] to Rust. The diff implementation is based on [Myers' diff algorithm] but includes some @@ -22,7 +22,7 @@ Diff Match Patch was originally built in 2006 to power Google Docs. dissimilar = "1.0" ``` -*Compiler support: requires rustc 1.31+* +*Compiler support: requires rustc 1.36+* <br> diff --git a/vendor/dissimilar/src/find.rs b/vendor/dissimilar/src/find.rs index 90ca2c6c5..4af3b8bee 100644 --- a/vendor/dissimilar/src/find.rs +++ b/vendor/dissimilar/src/find.rs @@ -1,5 +1,5 @@ // The strstr implementation in this file is extracted from the Rust standard -// library's str::find. The algorithm works for arbitrary &[u8] haystack and +// library's str::find. The algorithm works for arbitrary &[T] haystack and // needle but is only exposed by the standard library on UTF-8 strings. // // https://github.com/rust-lang/rust/blob/1.40.0/src/libcore/str/pattern.rs @@ -80,7 +80,7 @@ use std::cmp; use std::usize; -pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { +pub fn find(haystack: &[char], needle: &[char]) -> Option<usize> { assert!(!needle.is_empty()); // crit_pos: critical factorization index @@ -177,12 +177,12 @@ pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { } } -fn byteset_create(bytes: &[u8]) -> u64 { - bytes.iter().fold(0, |a, &b| (1 << (b & 0x3f)) | a) +fn byteset_create(chars: &[char]) -> u64 { + chars.iter().fold(0, |a, &ch| (1 << (ch as u8 & 0x3f)) | a) } -fn byteset_contains(byteset: u64, byte: u8) -> bool { - (byteset >> ((byte & 0x3f) as usize)) & 1 != 0 +fn byteset_contains(byteset: u64, ch: char) -> bool { + (byteset >> ((ch as u8 & 0x3f) as usize)) & 1 != 0 } // Compute the maximal suffix of `arr`. @@ -197,7 +197,7 @@ fn byteset_contains(byteset: u64, byte: u8) -> bool { // a critical factorization. // // For long period cases, the resulting period is not exact (it is too short). -fn maximal_suffix(arr: &[u8], order_greater: bool) -> (usize, usize) { +fn maximal_suffix(arr: &[char], order_greater: bool) -> (usize, usize) { let mut left = 0; // Corresponds to i in the paper let mut right = 1; // Corresponds to j in the paper let mut offset = 0; // Corresponds to k in the paper, but starting at 0 diff --git a/vendor/dissimilar/src/lib.rs b/vendor/dissimilar/src/lib.rs index 8ce9faad3..b66434ade 100644 --- a/vendor/dissimilar/src/lib.rs +++ b/vendor/dissimilar/src/lib.rs @@ -2,7 +2,7 @@ //! //! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github //! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust -//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logoColor=white&logo= +//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs //! //! <br> //! @@ -37,9 +37,10 @@ //! [Myers' diff algorithm]: https://neil.fraser.name/writing/diff/myers.pdf //! [semantic cleanups]: https://neil.fraser.name/writing/diff/ -#![doc(html_root_url = "https://docs.rs/dissimilar/1.0.4")] +#![doc(html_root_url = "https://docs.rs/dissimilar/1.0.6")] #![allow( clippy::blocks_in_if_conditions, + clippy::bool_to_int_with_if, clippy::cast_possible_wrap, clippy::cast_sign_loss, clippy::cloned_instead_of_copied, // https://github.com/rust-lang/rust-clippy/issues/7127 @@ -63,10 +64,10 @@ mod range; #[cfg(test)] mod tests; -use crate::range::{bytes, str, Range}; +use crate::range::{slice, Range}; use std::cmp; use std::collections::VecDeque; -use std::fmt::{self, Debug}; +use std::fmt::{self, Debug, Display, Write}; #[derive(Copy, Clone, PartialEq, Eq)] pub enum Chunk<'a> { @@ -121,20 +122,49 @@ impl<'tmp, 'a: 'tmp, 'b: 'tmp> Diff<'a, 'b> { } pub fn diff<'a>(text1: &'a str, text2: &'a str) -> Vec<Chunk<'a>> { - let text1 = Range::new(text1, ..); - let text2 = Range::new(text2, ..); - let mut solution = main(text1, text2); + let chars1: Vec<char> = text1.chars().collect(); + let chars2: Vec<char> = text2.chars().collect(); + let range1 = Range::new(&chars1, ..); + let range2 = Range::new(&chars2, ..); + + let mut solution = main(range1, range2); cleanup_char_boundary(&mut solution); cleanup_semantic(&mut solution); cleanup_merge(&mut solution); - solution.diffs.into_iter().map(Chunk::from).collect() + + let mut chunks = Vec::new(); + let mut pos1 = 0; + let mut pos2 = 0; + for diff in solution.diffs { + chunks.push(match diff { + Diff::Equal(range, _) => { + let len = range.len_bytes(); + let chunk = Chunk::Equal(&text1[pos1..pos1 + len]); + pos1 += len; + pos2 += len; + chunk + } + Diff::Delete(range) => { + let len = range.len_bytes(); + let chunk = Chunk::Delete(&text1[pos1..pos1 + len]); + pos1 += len; + chunk + } + Diff::Insert(range) => { + let len = range.len_bytes(); + let chunk = Chunk::Insert(&text2[pos2..pos2 + len]); + pos2 += len; + chunk + } + }); + } + chunks } struct Solution<'a, 'b> { text1: Range<'a>, text2: Range<'b>, diffs: Vec<Diff<'a, 'b>>, - utf8: bool, } fn main<'a, 'b>(mut text1: Range<'a>, mut text2: Range<'b>) -> Solution<'a, 'b> { @@ -142,7 +172,7 @@ fn main<'a, 'b>(mut text1: Range<'a>, mut text2: Range<'b>) -> Solution<'a, 'b> let whole2 = text2; // Trim off common prefix. - let common_prefix_len = common_prefix_bytes(text1, text2); + let common_prefix_len = common_prefix(text1, text2); let common_prefix = Diff::Equal( text1.substring(..common_prefix_len), text2.substring(..common_prefix_len), @@ -151,7 +181,7 @@ fn main<'a, 'b>(mut text1: Range<'a>, mut text2: Range<'b>) -> Solution<'a, 'b> text2 = text2.substring(common_prefix_len..); // Trim off common suffix. - let common_suffix_len = common_suffix_bytes(text1, text2); + let common_suffix_len = common_suffix(text1, text2); let common_suffix = Diff::Equal( text1.substring(text1.len - common_suffix_len..), text2.substring(text2.len - common_suffix_len..), @@ -164,7 +194,6 @@ fn main<'a, 'b>(mut text1: Range<'a>, mut text2: Range<'b>) -> Solution<'a, 'b> text1: whole1, text2: whole2, diffs: compute(text1, text2), - utf8: false, }; // Restore the prefix and suffix. @@ -252,7 +281,7 @@ fn bisect<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec<Diff<'a, 'b>> { } as usize; let mut y1 = (x1 as isize - k1) as usize; if let (Some(s1), Some(s2)) = (text1.get(x1..), text2.get(y1..)) { - let advance = common_prefix_bytes(s1, s2); + let advance = common_prefix(s1, s2); x1 += advance; y1 += advance; } @@ -288,7 +317,7 @@ fn bisect<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec<Diff<'a, 'b>> { } as usize; let mut y2 = (x2 as isize - k2) as usize; if x2 < text1.len && y2 < text2.len { - let advance = common_suffix_bytes( + let advance = common_suffix( text1.substring(..text1.len - x2), text2.substring(..text2.len - y2), ); @@ -342,8 +371,8 @@ fn bisect_split<'a, 'b>( // Determine the length of the common prefix of two strings. fn common_prefix(text1: Range, text2: Range) -> usize { - for ((i, ch1), ch2) in text1.char_indices().zip(text2.chars()) { - if ch1 != ch2 { + for (i, (b1, b2)) in text1.chars().zip(text2.chars()).enumerate() { + if b1 != b2 { return i; } } @@ -352,25 +381,7 @@ fn common_prefix(text1: Range, text2: Range) -> usize { // Determine the length of the common suffix of two strings. fn common_suffix(text1: Range, text2: Range) -> usize { - for ((i, ch1), ch2) in text1.char_indices().rev().zip(text2.chars().rev()) { - if ch1 != ch2 { - return text1.len - i - ch1.len_utf8(); - } - } - cmp::min(text1.len, text2.len) -} - -fn common_prefix_bytes(text1: Range, text2: Range) -> usize { - for (i, (b1, b2)) in text1.bytes().zip(text2.bytes()).enumerate() { - if b1 != b2 { - return i; - } - } - cmp::min(text1.len, text2.len) -} - -fn common_suffix_bytes(text1: Range, text2: Range) -> usize { - for (i, (b1, b2)) in text1.bytes().rev().zip(text2.bytes().rev()).enumerate() { + for (i, (b1, b2)) in text1.chars().rev().zip(text2.chars().rev()).enumerate() { if b1 != b2 { return i; } @@ -394,7 +405,7 @@ fn common_overlap(mut text1: Range, mut text2: Range) -> usize { text2 = text2.substring(..text1.len); } // Quick check for the worst case. - if bytes(text1) == bytes(text2) { + if slice(text1) == slice(text2) { return text1.len; } @@ -411,7 +422,7 @@ fn common_overlap(mut text1: Range, mut text2: Range) -> usize { }; length += found; if found == 0 - || bytes(text1.substring(text1.len - length..)) == bytes(text2.substring(..length)) + || slice(text1.substring(text1.len - length..)) == slice(text2.substring(..length)) { best = length; length += 1; @@ -420,17 +431,24 @@ fn common_overlap(mut text1: Range, mut text2: Range) -> usize { } fn cleanup_char_boundary(solution: &mut Solution) { - fn boundary_down(doc: &str, pos: usize) -> usize { + fn is_segmentation_boundary(doc: &[char], pos: usize) -> bool { + // FIXME: use unicode-segmentation crate? + let _ = doc; + let _ = pos; + true + } + + fn boundary_down(doc: &[char], pos: usize) -> usize { let mut adjust = 0; - while !doc.is_char_boundary(pos - adjust) { + while !is_segmentation_boundary(doc, pos - adjust) { adjust += 1; } adjust } - fn boundary_up(doc: &str, pos: usize) -> usize { + fn boundary_up(doc: &[char], pos: usize) -> usize { let mut adjust = 0; - while !doc.is_char_boundary(pos + adjust) { + while !is_segmentation_boundary(doc, pos + adjust) { adjust += 1; } adjust @@ -498,7 +516,6 @@ fn cleanup_char_boundary(solution: &mut Solution) { } solution.diffs.truncate(retain); - solution.utf8 = true; } // Reduce the number of edits by eliminating semantically trivial equalities. @@ -658,14 +675,13 @@ fn cleanup_semantic_lossless(solution: &mut Solution) { && !next_equal1.is_empty() && edit.text().chars().next().unwrap() == next_equal1.chars().next().unwrap() { - let increment = edit.text().chars().next().unwrap().len_utf8(); - prev_equal1.len += increment; - prev_equal2.len += increment; - edit.shift_right(increment); - next_equal1.offset += increment; - next_equal1.len -= increment; - next_equal2.offset += increment; - next_equal2.len -= increment; + prev_equal1.len += 1; + prev_equal2.len += 1; + edit.shift_right(1); + next_equal1.offset += 1; + next_equal1.len -= 1; + next_equal2.offset += 1; + next_equal2.len -= 1; let score = cleanup_semantic_score(prev_equal1, edit.text()) + cleanup_semantic_score(edit.text(), next_equal1); // The >= encourages trailing rather than leading whitespace on edits. @@ -720,8 +736,10 @@ fn cleanup_semantic_score(one: Range, two: Range) -> usize { let whitespace2 = non_alphanumeric2 && char2.is_ascii_whitespace(); let line_break1 = whitespace1 && char1.is_control(); let line_break2 = whitespace2 && char2.is_control(); - let blank_line1 = line_break1 && (one.ends_with("\n\n") || one.ends_with("\n\r\n")); - let blank_line2 = line_break2 && (two.starts_with("\n\n") || two.starts_with("\r\n\r\n")); + let blank_line1 = + line_break1 && (one.ends_with(['\n', '\n']) || one.ends_with(['\n', '\r', '\n'])); + let blank_line2 = + line_break2 && (two.starts_with(['\n', '\n']) || two.starts_with(['\r', '\n', '\r', '\n'])); if blank_line1 || blank_line2 { // Five points for blank lines. @@ -747,22 +765,7 @@ fn cleanup_semantic_score(one: Range, two: Range) -> usize { // move as long as it doesn't cross an equality. fn cleanup_merge(solution: &mut Solution) { let diffs = &mut solution.diffs; - let common_prefix = if solution.utf8 { - common_prefix - } else { - common_prefix_bytes - }; - let common_suffix = if solution.utf8 { - common_suffix - } else { - common_suffix_bytes - }; - - loop { - if diffs.is_empty() { - return; - } - + while !diffs.is_empty() { diffs.push(Diff::Equal( solution.text1.substring(solution.text1.len..), solution.text2.substring(solution.text2.len..), @@ -911,22 +914,22 @@ impl Debug for Chunk<'_> { impl Debug for Diff<'_, '_> { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - let (name, bytes) = match *self { - Diff::Equal(range, _) => ("Equal", bytes(range)), - Diff::Delete(range) => ("Delete", bytes(range)), - Diff::Insert(range) => ("Insert", bytes(range)), + let (name, range) = match *self { + Diff::Equal(range, _) => ("Equal", range), + Diff::Delete(range) => ("Delete", range), + Diff::Insert(range) => ("Insert", range), }; - let text = String::from_utf8_lossy(bytes); - write!(formatter, "{}({:?})", name, text) - } -} - -impl<'a> From<Diff<'a, 'a>> for Chunk<'a> { - fn from(diff: Diff<'a, 'a>) -> Self { - match diff { - Diff::Equal(range, _) => Chunk::Equal(str(range)), - Diff::Delete(range) => Chunk::Delete(str(range)), - Diff::Insert(range) => Chunk::Insert(str(range)), + formatter.write_str(name)?; + formatter.write_str("(\"")?; + for ch in range.chars() { + if ch == '\'' { + // escape_debug turns this into "\'" which is unnecessary. + formatter.write_char(ch)?; + } else { + Display::fmt(&ch.escape_debug(), formatter)?; + } } + formatter.write_str("\")")?; + Ok(()) } } diff --git a/vendor/dissimilar/src/range.rs b/vendor/dissimilar/src/range.rs index 565a94c06..55cbc448c 100644 --- a/vendor/dissimilar/src/range.rs +++ b/vendor/dissimilar/src/range.rs @@ -1,11 +1,10 @@ use crate::find::find; use std::fmt::Debug; use std::ops::{self, RangeFrom, RangeFull, RangeTo}; -use std::str::{CharIndices, Chars}; #[derive(Copy, Clone)] pub struct Range<'a> { - pub doc: &'a str, + pub doc: &'a [char], pub offset: usize, pub len: usize, } @@ -13,13 +12,13 @@ pub struct Range<'a> { impl<'a> Range<'a> { pub fn empty() -> Self { Range { - doc: "", + doc: &[], offset: 0, len: 0, } } - pub fn new(doc: &'a str, bounds: impl RangeBounds) -> Self { + pub fn new(doc: &'a [char], bounds: impl RangeBounds) -> Self { let (offset, len) = bounds.index(doc.len()); Range { doc, offset, len } } @@ -28,6 +27,10 @@ impl<'a> Range<'a> { self.len == 0 } + pub fn len_bytes(&self) -> usize { + self.chars().map(char::len_utf8).sum() + } + pub fn substring(&self, bounds: impl RangeBounds) -> Self { let (offset, len) = bounds.index(self.len); Range { @@ -50,32 +53,26 @@ impl<'a> Range<'a> { (self.substring(..mid), self.substring(mid..)) } - pub fn chars(&self) -> Chars<'a> { - str(*self).chars() - } - - pub fn char_indices(&self) -> CharIndices<'a> { - str(*self).char_indices() + pub fn chars( + &self, + ) -> impl Iterator<Item = char> + DoubleEndedIterator + ExactSizeIterator + 'a { + slice(*self).iter().copied() } - pub fn bytes(&self) -> impl Iterator<Item = u8> + DoubleEndedIterator + ExactSizeIterator + 'a { - bytes(*self).iter().cloned() + pub fn starts_with(&self, prefix: impl AsRef<[char]>) -> bool { + slice(*self).starts_with(prefix.as_ref()) } - pub fn starts_with(&self, prefix: impl AsRef<[u8]>) -> bool { - bytes(*self).starts_with(prefix.as_ref()) + pub fn ends_with(&self, suffix: impl AsRef<[char]>) -> bool { + slice(*self).ends_with(suffix.as_ref()) } - pub fn ends_with(&self, suffix: impl AsRef<[u8]>) -> bool { - bytes(*self).ends_with(suffix.as_ref()) - } - - pub fn find(&self, needle: impl AsRef<[u8]>) -> Option<usize> { - find(bytes(*self), needle.as_ref()) + pub fn find(&self, needle: impl AsRef<[char]>) -> Option<usize> { + find(slice(*self), needle.as_ref()) } } -pub fn str(range: Range) -> &str { +pub fn slice(range: Range) -> &[char] { if cfg!(debug) && range .doc @@ -90,13 +87,9 @@ pub fn str(range: Range) -> &str { &range.doc[range.offset..range.offset + range.len] } -pub fn bytes(range: Range) -> &[u8] { - &range.doc.as_bytes()[range.offset..range.offset + range.len] -} - -impl AsRef<[u8]> for Range<'_> { - fn as_ref(&self) -> &[u8] { - bytes(*self) +impl AsRef<[char]> for Range<'_> { + fn as_ref(&self) -> &[char] { + slice(*self) } } diff --git a/vendor/dissimilar/src/tests.rs b/vendor/dissimilar/src/tests.rs index 450d7f7e4..d2e3fd643 100644 --- a/vendor/dissimilar/src/tests.rs +++ b/vendor/dissimilar/src/tests.rs @@ -1,4 +1,13 @@ use super::*; +use once_cell::sync::OnceCell; + +macro_rules! range { + ($text:expr) => {{ + static CHARS: OnceCell<Vec<char>> = OnceCell::new(); + let chars = CHARS.get_or_init(|| $text.chars().collect()); + Range::new(chars, ..) + }}; +} macro_rules! diff_list { () => { @@ -6,50 +15,52 @@ macro_rules! diff_list { text1: Range::empty(), text2: Range::empty(), diffs: Vec::new(), - utf8: true, } }; ($($kind:ident($text:literal)),+ $(,)?) => {{ + #[allow(unused_macro_rules)] macro_rules! text1 { (Insert, $s:literal) => { "" }; (Delete, $s:literal) => { $s }; (Equal, $s:literal) => { $s }; } + #[allow(unused_macro_rules)] macro_rules! text2 { (Insert, $s:literal) => { $s }; (Delete, $s:literal) => { "" }; (Equal, $s:literal) => { $s }; } - let text1 = concat!($(text1!($kind, $text)),*); - let text2 = concat!($(text2!($kind, $text)),*); + let text1 = range!(concat!($(text1!($kind, $text)),*)); + let text2 = range!(concat!($(text2!($kind, $text)),*)); let (_i, _j) = (&mut 0, &mut 0); + #[allow(unused_macro_rules)] macro_rules! range { (Insert, $s:literal) => { - Diff::Insert(range(text2, _j, $s)) + Diff::Insert(range(text2.doc, _j, $s)) }; (Delete, $s:literal) => { - Diff::Delete(range(text1, _i, $s)) + Diff::Delete(range(text1.doc, _i, $s)) }; (Equal, $s:literal) => { - Diff::Equal(range(text1, _i, $s), range(text2, _j, $s)) + Diff::Equal(range(text1.doc, _i, $s), range(text2.doc, _j, $s)) }; } Solution { - text1: Range::new(text1, ..), - text2: Range::new(text2, ..), + text1, + text2, diffs: vec![$(range!($kind, $text)),*], - utf8: true, } }}; } -fn range<'a>(doc: &'a str, offset: &mut usize, text: &str) -> Range<'a> { +fn range<'a>(doc: &'a [char], offset: &mut usize, text: &str) -> Range<'a> { + let len = text.chars().count(); let range = Range { doc, offset: *offset, - len: text.len(), + len, }; - *offset += text.len(); + *offset += len; range } @@ -65,12 +76,16 @@ macro_rules! assert_diffs { } fn same_diffs(expected: &[Chunk], actual: &[Diff]) -> bool { + fn eq(expected: &str, actual: &Range) -> bool { + expected.chars().eq(slice(*actual).iter().copied()) + } + expected.len() == actual.len() && expected.iter().zip(actual).all(|pair| match pair { - (Chunk::Insert(expected), Diff::Insert(actual)) => *expected == str(*actual), - (Chunk::Delete(expected), Diff::Delete(actual)) => *expected == str(*actual), + (Chunk::Insert(expected), Diff::Insert(actual)) => eq(expected, actual), + (Chunk::Delete(expected), Diff::Delete(actual)) => eq(expected, actual), (Chunk::Equal(expected), Diff::Equal(actual1, actual2)) => { - *expected == str(*actual1) && *expected == str(*actual2) + eq(expected, actual1) && eq(expected, actual2) } (_, _) => false, }) @@ -78,59 +93,56 @@ fn same_diffs(expected: &[Chunk], actual: &[Diff]) -> bool { #[test] fn test_common_prefix() { - let text1 = Range::new("abc", ..); - let text2 = Range::new("xyz", ..); - assert_eq!(0, common_prefix_bytes(text1, text2), "Null case"); + let text1 = range!("abc"); + let text2 = range!("xyz"); + assert_eq!(0, common_prefix(text1, text2), "Null case"); - let text1 = Range::new("1234abcdef", ..); - let text2 = Range::new("1234xyz", ..); - assert_eq!(4, common_prefix_bytes(text1, text2), "Non-null case"); + let text1 = range!("1234abcdef"); + let text2 = range!("1234xyz"); + assert_eq!(4, common_prefix(text1, text2), "Non-null case"); - let text1 = Range::new("1234", ..); - let text2 = Range::new("1234xyz", ..); - assert_eq!(4, common_prefix_bytes(text1, text2), "Whole case"); + let text1 = range!("1234"); + let text2 = range!("1234xyz"); + assert_eq!(4, common_prefix(text1, text2), "Whole case"); } #[test] fn test_common_suffix() { - let text1 = Range::new("abc", ..); - let text2 = Range::new("xyz", ..); + let text1 = range!("abc"); + let text2 = range!("xyz"); assert_eq!(0, common_suffix(text1, text2), "Null case"); - assert_eq!(0, common_suffix_bytes(text1, text2), "Null case"); - let text1 = Range::new("abcdef1234", ..); - let text2 = Range::new("xyz1234", ..); + let text1 = range!("abcdef1234"); + let text2 = range!("xyz1234"); assert_eq!(4, common_suffix(text1, text2), "Non-null case"); - assert_eq!(4, common_suffix_bytes(text1, text2), "Non-null case"); - let text1 = Range::new("1234", ..); - let text2 = Range::new("xyz1234", ..); + let text1 = range!("1234"); + let text2 = range!("xyz1234"); assert_eq!(4, common_suffix(text1, text2), "Whole case"); - assert_eq!(4, common_suffix_bytes(text1, text2), "Whole case"); } #[test] fn test_common_overlap() { let text1 = Range::empty(); - let text2 = Range::new("abcd", ..); + let text2 = range!("abcd"); assert_eq!(0, common_overlap(text1, text2), "Null case"); - let text1 = Range::new("abc", ..); - let text2 = Range::new("abcd", ..); + let text1 = range!("abc"); + let text2 = range!("abcd"); assert_eq!(3, common_overlap(text1, text2), "Whole case"); - let text1 = Range::new("123456", ..); - let text2 = Range::new("abcd", ..); + let text1 = range!("123456"); + let text2 = range!("abcd"); assert_eq!(0, common_overlap(text1, text2), "No overlap"); - let text1 = Range::new("123456xxx", ..); - let text2 = Range::new("xxxabcd", ..); + let text1 = range!("123456xxx"); + let text2 = range!("xxxabcd"); assert_eq!(3, common_overlap(text1, text2), "Overlap"); // Some overly clever languages (C#) may treat ligatures as equal to their // component letters. E.g. U+FB01 == 'fi' - let text1 = Range::new("fi", ..); - let text2 = Range::new("\u{fb01}i", ..); + let text1 = range!("fi"); + let text2 = range!("\u{fb01}i"); assert_eq!(0, common_overlap(text1, text2), "Unicode"); } @@ -420,13 +432,12 @@ fn test_cleanup_semantic() { #[test] fn test_bisect() { - let text1 = Range::new("cat", ..); - let text2 = Range::new("map", ..); + let text1 = range!("cat"); + let text2 = range!("map"); let solution = Solution { text1, text2, diffs: bisect(text1, text2), - utf8: false, }; assert_diffs!( [ @@ -446,24 +457,24 @@ fn test_main() { let solution = main(Range::empty(), Range::empty()); assert_diffs!([], solution, "Null case"); - let solution = main(Range::new("abc", ..), Range::new("abc", ..)); + let solution = main(range!("abc"), range!("abc")); assert_diffs!([Equal("abc")], solution, "Equality"); - let solution = main(Range::new("abc", ..), Range::new("ab123c", ..)); + let solution = main(range!("abc"), range!("ab123c")); assert_diffs!( [Equal("ab"), Insert("123"), Equal("c")], solution, "Simple insertion", ); - let solution = main(Range::new("a123bc", ..), Range::new("abc", ..)); + let solution = main(range!("a123bc"), range!("abc")); assert_diffs!( [Equal("a"), Delete("123"), Equal("bc")], solution, "Simple deletion", ); - let solution = main(Range::new("abc", ..), Range::new("a123b456c", ..)); + let solution = main(range!("abc"), range!("a123b456c")); assert_diffs!( [ Equal("a"), @@ -476,7 +487,7 @@ fn test_main() { "Two insertions", ); - let solution = main(Range::new("a123b456c", ..), Range::new("abc", ..)); + let solution = main(range!("a123b456c"), range!("abc")); assert_diffs!( [ Equal("a"), @@ -489,12 +500,12 @@ fn test_main() { "Two deletions", ); - let solution = main(Range::new("a", ..), Range::new("b", ..)); + let solution = main(range!("a"), range!("b")); assert_diffs!([Delete("a"), Insert("b")], solution, "Simple case #1"); let solution = main( - Range::new("Apples are a fruit.", ..), - Range::new("Bananas are also fruit.", ..), + range!("Apples are a fruit."), + range!("Bananas are also fruit."), ); assert_diffs!( [ @@ -508,7 +519,7 @@ fn test_main() { "Simple case #2", ); - let solution = main(Range::new("ax\t", ..), Range::new("\u{0680}x\000", ..)); + let solution = main(range!("ax\t"), range!("\u{0680}x\000")); assert_diffs!( [ Delete("a"), @@ -521,7 +532,7 @@ fn test_main() { "Simple case #3", ); - let solution = main(Range::new("1ayb2", ..), Range::new("abxab", ..)); + let solution = main(range!("1ayb2"), range!("abxab")); assert_diffs!( [ Delete("1"), @@ -535,7 +546,7 @@ fn test_main() { "Overlap #1", ); - let solution = main(Range::new("abcy", ..), Range::new("xaxcxabc", ..)); + let solution = main(range!("abcy"), range!("xaxcxabc")); assert_diffs!( [Insert("xaxcx"), Equal("abc"), Delete("y")], solution, @@ -543,8 +554,8 @@ fn test_main() { ); let solution = main( - Range::new("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", ..), - Range::new("a-bcd-efghijklmnopqrs", ..), + range!("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg"), + range!("a-bcd-efghijklmnopqrs"), ); assert_diffs!( [ @@ -563,8 +574,8 @@ fn test_main() { ); let solution = main( - Range::new("a [[Pennsylvania]] and [[New", ..), - Range::new(" and [[Pennsylvania]]", ..), + range!("a [[Pennsylvania]] and [[New"), + range!(" and [[Pennsylvania]]"), ); assert_diffs!( [ diff --git a/vendor/dissimilar/tests/test.rs b/vendor/dissimilar/tests/test.rs index e68fd4f11..7debb0593 100644 --- a/vendor/dissimilar/tests/test.rs +++ b/vendor/dissimilar/tests/test.rs @@ -21,7 +21,7 @@ fn test_unicode() { } #[test] -fn test_unicode2() { +fn test_issue9() { let a = "[乀丁abcd一]"; let b = "[一abcd丁]"; let d = diff(a, b); @@ -35,6 +35,18 @@ fn test_unicode2() { Chunk::Delete("一"), Chunk::Insert("丁"), Chunk::Equal("]"), - ] + ], + ); +} + +#[test] +fn test_issue15() { + let a = "A のダ"; + let b = "A ダ"; + let d = diff(a, b); + + assert_eq!( + d, + vec![Chunk::Equal("A "), Chunk::Delete("の"), Chunk::Equal("ダ")], ); } |