summaryrefslogtreecommitdiffstats
path: root/third_party/rust/strsim
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
commit2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
treeb80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/strsim
parentInitial commit. (diff)
downloadfirefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz
firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/strsim')
-rw-r--r--third_party/rust/strsim/.cargo-checksum.json1
-rw-r--r--third_party/rust/strsim/CHANGELOG.md118
-rw-r--r--third_party/rust/strsim/Cargo.toml28
-rw-r--r--third_party/rust/strsim/LICENSE23
-rw-r--r--third_party/rust/strsim/README.md62
-rw-r--r--third_party/rust/strsim/appveyor.yml13
-rwxr-xr-xthird_party/rust/strsim/dev41
-rw-r--r--third_party/rust/strsim/src/lib.rs698
-rw-r--r--third_party/rust/strsim/tests/lib.rs39
9 files changed, 1023 insertions, 0 deletions
diff --git a/third_party/rust/strsim/.cargo-checksum.json b/third_party/rust/strsim/.cargo-checksum.json
new file mode 100644
index 0000000000..68e81d3f5b
--- /dev/null
+++ b/third_party/rust/strsim/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"CHANGELOG.md":"1472e9bc914215d1c63d842e409e0b93d23a8b62b411ce09b7e3e5b2de212100","Cargo.toml":"41ef74c41992b1750d53de7878e1e200a36add38741303a98aa3e1321462097f","LICENSE":"1738b51502ae831fb59ffbeb22ebdd90bf17e5c72fe57c00b47552415f133fd8","README.md":"9cc6cae9758534e8cf3a0376f73983935fb7968463cd324bbef06328659f5030","appveyor.yml":"b41eae9798a9bb250f6046509d9bbd6e63bac9ad2655d342b3d9c8975584f0c0","dev":"5bd26dc2c86f777627abe96c5992b6c45e6b5dea52f42b7107fa5c106abe2ab4","src/lib.rs":"14b7916423df616e73974e984468584481bf84a8f7d24a0a3c392d48c5c711d2","tests/lib.rs":"eb2f829a86b54481ca97e17414fb1e4cec7c5e1cb27ead5913eea87d61f7f29f"},"package":"bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"} \ No newline at end of file
diff --git a/third_party/rust/strsim/CHANGELOG.md b/third_party/rust/strsim/CHANGELOG.md
new file mode 100644
index 0000000000..e73385d9f6
--- /dev/null
+++ b/third_party/rust/strsim/CHANGELOG.md
@@ -0,0 +1,118 @@
+# Change Log
+This project attempts to adhere to [Semantic Versioning](http://semver.org).
+
+## [Unreleased]
+
+## [0.7.0] - (2018-01-17)
+### Changed
+- Faster Levenshtein implementation (thanks @wdv4758h)
+
+### Removed
+- Remove the "against_vec" functions. They are one-liners now, so they don't
+ seem to add enough value to justify making the API larger. I didn't find
+ anybody using them when I skimmed through a GitHub search. If you do use them,
+ you can change the calls to something like:
+```rust
+let distances = strings.iter().map(|a| jaro(target, a)).collect();
+```
+
+## [0.6.0] - (2016-12-26)
+### Added
+- Add optimal string alignment distance
+
+### Fixed
+- Fix Damerau-Levenshtein implementation (previous implementation was actually
+ optimal string alignment; see this [Damerau-Levenshtein explanation])
+
+## [0.5.2] - (2016-11-21)
+### Changed
+- Remove Cargo generated documentation in favor of a [docs.rs] link
+
+## [0.5.1] - (2016-08-23)
+### Added
+- Add Cargo generated documentation
+
+### Fixed
+- Fix panic when Jaro or Jaro-Winkler are given strings both with a length of
+ one
+
+## [0.5.0] - (2016-08-11)
+### Changed
+- Make Hamming faster (thanks @IBUzPE9) when the two strings have the same
+ length but slower when they have different lengths
+
+## [0.4.1] - (2016-04-18)
+### Added
+- Add Vagrant setup for development
+- Add AppVeyor configuration for Windows CI
+
+### Fixed
+- Fix metrics when given strings with multibyte characters (thanks @WanzenBug)
+
+## [0.4.0] - (2015-06-10)
+### Added
+- For each metric, add a function that takes a vector of strings and returns a
+vector of results (thanks @ovarene)
+
+## [0.3.0] - (2015-04-30)
+### Changed
+- Remove usage of unstable Rust features
+
+## [0.2.5] - (2015-04-24)
+### Fixed
+- Remove unnecessary `Float` import from doc tests
+
+## [0.2.4] - (2015-04-15)
+### Fixed
+- Remove unused `core` feature flag
+
+## [0.2.3] - (2015-04-01)
+### Fixed
+- Remove now unnecessary `Float` import
+
+## [0.2.2] - (2015-03-29)
+### Fixed
+- Remove usage of `char_at` (marked as unstable)
+
+## [0.2.1] - (2015-02-20)
+### Fixed
+- Update bit vector import to match Rust update
+
+## [0.2.0] - (2015-02-19)
+### Added
+- Implement Damerau-Levenshtein
+- Add tests in docs
+
+## [0.1.1] - (2015-02-10)
+### Added
+- Configure Travis for CI
+- Add rustdoc comments
+
+### Fixed
+- Limit Jaro-Winkler return value to a maximum of 1.0
+- Fix float comparsions in tests
+
+## [0.1.0] - (2015-02-09)
+### Added
+- Implement Hamming, Jaro, Jaro-Winkler, and Levenshtein
+
+[Unreleased]: https://github.com/dguo/strsim-rs/compare/0.7.0...HEAD
+[0.7.0]: https://github.com/dguo/strsim-rs/compare/0.6.0...0.7.0
+[0.6.0]: https://github.com/dguo/strsim-rs/compare/0.5.2...0.6.0
+[0.5.2]: https://github.com/dguo/strsim-rs/compare/0.5.1...0.5.2
+[0.5.1]: https://github.com/dguo/strsim-rs/compare/0.5.0...0.5.1
+[0.5.0]: https://github.com/dguo/strsim-rs/compare/0.4.1...0.5.0
+[0.4.1]: https://github.com/dguo/strsim-rs/compare/0.4.0...0.4.1
+[0.4.0]: https://github.com/dguo/strsim-rs/compare/0.3.0...0.4.0
+[0.3.0]: https://github.com/dguo/strsim-rs/compare/0.2.5...0.3.0
+[0.2.5]: https://github.com/dguo/strsim-rs/compare/0.2.4...0.2.5
+[0.2.4]: https://github.com/dguo/strsim-rs/compare/0.2.3...0.2.4
+[0.2.3]: https://github.com/dguo/strsim-rs/compare/0.2.2...0.2.3
+[0.2.2]: https://github.com/dguo/strsim-rs/compare/0.2.1...0.2.2
+[0.2.1]: https://github.com/dguo/strsim-rs/compare/0.2.0...0.2.1
+[0.2.0]: https://github.com/dguo/strsim-rs/compare/0.1.1...0.2.0
+[0.1.1]: https://github.com/dguo/strsim-rs/compare/0.1.0...0.1.1
+[0.1.0]: https://github.com/dguo/strsim-rs/compare/fabad4...0.1.0
+[docs.rs]: https://docs.rs/strsim/
+[Damerau-Levenshtein explanation]:
+http://scarcitycomputing.blogspot.com/2013/04/damerau-levenshtein-edit-distance.html
diff --git a/third_party/rust/strsim/Cargo.toml b/third_party/rust/strsim/Cargo.toml
new file mode 100644
index 0000000000..aa92bde7e2
--- /dev/null
+++ b/third_party/rust/strsim/Cargo.toml
@@ -0,0 +1,28 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g. crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+name = "strsim"
+version = "0.7.0"
+authors = ["Danny Guo <dannyguo91@gmail.com>"]
+description = "Implementations of string similarity metrics.\nIncludes Hamming, Levenshtein, OSA, Damerau-Levenshtein, Jaro, and Jaro-Winkler.\n"
+homepage = "https://github.com/dguo/strsim-rs"
+documentation = "https://docs.rs/strsim/"
+readme = "README.md"
+keywords = ["string", "similarity", "Hamming", "Levenshtein", "Jaro"]
+license = "MIT"
+repository = "https://github.com/dguo/strsim-rs"
+[badges.travis-ci]
+repository = "dguo/strsim-rs"
+
+[badges.appveyor]
+repository = "dguo/strsim-rs"
diff --git a/third_party/rust/strsim/LICENSE b/third_party/rust/strsim/LICENSE
new file mode 100644
index 0000000000..1aacdb8642
--- /dev/null
+++ b/third_party/rust/strsim/LICENSE
@@ -0,0 +1,23 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Danny Guo
+Copyright (c) 2016 Titus Wormer <tituswormer@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/third_party/rust/strsim/README.md b/third_party/rust/strsim/README.md
new file mode 100644
index 0000000000..747debbc3b
--- /dev/null
+++ b/third_party/rust/strsim/README.md
@@ -0,0 +1,62 @@
+# strsim-rs [![Crates.io](https://img.shields.io/crates/v/strsim.svg)](https://crates.io/crates/strsim) [![Crates.io](https://img.shields.io/crates/l/strsim.svg?maxAge=2592000)](https://github.com/dguo/strsim-rs/blob/master/LICENSE) [![Linux build status](https://travis-ci.org/dguo/strsim-rs.svg?branch=master)](https://travis-ci.org/dguo/strsim-rs) [![Windows build status](https://ci.appveyor.com/api/projects/status/ggue6i785618a39w?svg=true)](https://ci.appveyor.com/project/dguo/strsim-rs)
+
+[Rust](https://www.rust-lang.org) implementations of [string similarity metrics]:
+ - [Hamming]
+ - [Levenshtein]
+ - [Optimal string alignment]
+ - [Damerau-Levenshtein]
+ - [Jaro and Jaro-Winkler] - this implementation of Jaro-Winkler does not limit the common prefix length
+
+### Installation
+```toml
+# Cargo.toml
+[dependencies]
+strsim = "0.7.0"
+```
+
+### [Documentation](https://docs.rs/strsim/)
+You can change the version in the url to see the documentation for an older
+version in the
+[changelog](https://github.com/dguo/strsim-rs/blob/master/CHANGELOG.md).
+
+### Usage
+```rust
+extern crate strsim;
+
+use strsim::{hamming, levenshtein, osa_distance, damerau_levenshtein, jaro,
+ jaro_winkler};
+
+fn main() {
+ match hamming("hamming", "hammers") {
+ Ok(distance) => assert_eq!(3, distance),
+ Err(why) => panic!("{:?}", why)
+ }
+
+ assert_eq!(3, levenshtein("kitten", "sitting"));
+
+ assert_eq!(3, osa_distance("ac", "cba"));
+
+ assert_eq!(2, damerau_levenshtein("ac", "cba"));
+
+ assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
+ 0.001);
+
+ assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
+ 0.001);
+}
+```
+
+### Development
+If you don't want to install Rust itself, you can run `$ ./dev` for a
+development CLI if you have [Docker] installed.
+
+### License
+[MIT](https://github.com/dguo/strsim-rs/blob/master/LICENSE)
+
+[string similarity metrics]:http://en.wikipedia.org/wiki/String_metric
+[Damerau-Levenshtein]:http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
+[Jaro and Jaro-Winkler]:http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
+[Levenshtein]:http://en.wikipedia.org/wiki/Levenshtein_distance
+[Hamming]:http://en.wikipedia.org/wiki/Hamming_distance
+[Optimal string alignment]:https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance
+[Docker]:https://docs.docker.com/engine/installation/
diff --git a/third_party/rust/strsim/appveyor.yml b/third_party/rust/strsim/appveyor.yml
new file mode 100644
index 0000000000..fb992db82b
--- /dev/null
+++ b/third_party/rust/strsim/appveyor.yml
@@ -0,0 +1,13 @@
+install:
+ - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-beta-x86_64-pc-windows-gnu.exe'
+ - rust-beta-x86_64-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust"
+ - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin
+ - rustc -V
+ - cargo -V
+
+build: false
+
+test_script:
+ - cargo build
+ - cargo test --verbose
+
diff --git a/third_party/rust/strsim/dev b/third_party/rust/strsim/dev
new file mode 100755
index 0000000000..4c85a494b7
--- /dev/null
+++ b/third_party/rust/strsim/dev
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+# ./dev --help
+
+import argparse
+import os
+from subprocess import run
+import sys
+
+parser = argparse.ArgumentParser(prog='./dev')
+subparsers = parser.add_subparsers(metavar='<command>', title='commands')
+command = [
+ 'docker', 'run', '-it', '--rm', '-v', os.getcwd() + ':/src:cached',
+ '-w=/src', 'rust:1.21.0'
+]
+
+def cargo(args, remaining):
+ sys.exit(run(command + ['cargo'] + remaining or []).returncode)
+
+parser_cargo = subparsers.add_parser('cargo', help='run a cargo command')
+parser_cargo.set_defaults(func=cargo)
+
+def sh(args, remaining):
+ sys.exit(run(command + ['bash']).returncode)
+
+parser_sh = subparsers.add_parser('sh', help='bring up a shell')
+parser_sh.set_defaults(func=sh)
+
+def test(args, remaining):
+ sys.exit(run(command + ['cargo', 'test']).returncode)
+
+parser_test = subparsers.add_parser('test', help='run tests')
+parser_test.set_defaults(func=test)
+
+if len(sys.argv) > 1:
+ args, remaining = parser.parse_known_args()
+ try:
+ args.func(args, remaining)
+ except FileNotFoundError:
+ sys.exit('Please install Docker.')
+else:
+ parser.print_help()
diff --git a/third_party/rust/strsim/src/lib.rs b/third_party/rust/strsim/src/lib.rs
new file mode 100644
index 0000000000..e6da05bad2
--- /dev/null
+++ b/third_party/rust/strsim/src/lib.rs
@@ -0,0 +1,698 @@
+//! This library implements string similarity metrics.
+
+use std::char;
+use std::cmp::{max, min};
+use std::collections::HashMap;
+
+#[derive(Debug, PartialEq)]
+pub enum StrSimError {
+ DifferentLengthArgs
+}
+
+pub type HammingResult = Result<usize, StrSimError>;
+
+/// Calculates the number of positions in the two strings where the characters
+/// differ. Returns an error if the strings have different lengths.
+///
+/// ```
+/// use strsim::hamming;
+///
+/// match hamming("hamming", "hammers") {
+/// Ok(distance) => assert_eq!(3, distance),
+/// Err(why) => panic!("{:?}", why)
+/// }
+/// ```
+pub fn hamming(a: &str, b: &str) -> HammingResult {
+ let (mut ita, mut itb, mut count) = (a.chars(), b.chars(), 0);
+ loop {
+ match (ita.next(), itb.next()){
+ (Some(x), Some(y)) => if x != y { count += 1 },
+ (None, None) => return Ok(count),
+ _ => return Err(StrSimError::DifferentLengthArgs),
+ }
+ }
+}
+
+/// Calculates the Jaro similarity between two strings. The returned value
+/// is between 0.0 and 1.0 (higher value means more similar).
+///
+/// ```
+/// use strsim::jaro;
+///
+/// assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
+/// 0.001);
+/// ```
+pub fn jaro(a: &str, b: &str) -> f64 {
+ if a == b { return 1.0; }
+
+ let a_len = a.chars().count();
+ let b_len = b.chars().count();
+
+ // The check for lengths of one here is to prevent integer overflow when
+ // calculating the search range.
+ if a_len == 0 || b_len == 0 || (a_len == 1 && b_len == 1) {
+ return 0.0;
+ }
+
+ let search_range = (max(a_len, b_len) / 2) - 1;
+
+ let mut b_consumed = Vec::with_capacity(b_len);
+ for _ in 0..b_len {
+ b_consumed.push(false);
+ }
+ let mut matches = 0.0;
+
+ let mut transpositions = 0.0;
+ let mut b_match_index = 0;
+
+ for (i, a_char) in a.chars().enumerate() {
+ let min_bound =
+ // prevent integer wrapping
+ if i > search_range {
+ max(0, i - search_range)
+ } else {
+ 0
+ };
+
+ let max_bound = min(b_len - 1, i + search_range);
+
+ if min_bound > max_bound {
+ continue;
+ }
+
+ for (j, b_char) in b.chars().enumerate() {
+ if min_bound <= j && j <= max_bound && a_char == b_char &&
+ !b_consumed[j] {
+ b_consumed[j] = true;
+ matches += 1.0;
+
+ if j < b_match_index {
+ transpositions += 1.0;
+ }
+ b_match_index = j;
+
+ break;
+ }
+ }
+ }
+
+ if matches == 0.0 {
+ 0.0
+ } else {
+ (1.0 / 3.0) * ((matches / a_len as f64) +
+ (matches / b_len as f64) +
+ ((matches - transpositions) / matches))
+ }
+}
+
+/// Like Jaro but gives a boost to strings that have a common prefix.
+///
+/// ```
+/// use strsim::jaro_winkler;
+///
+/// assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
+/// 0.001);
+/// ```
+pub fn jaro_winkler(a: &str, b: &str) -> f64 {
+ let jaro_distance = jaro(a, b);
+
+ // Don't limit the length of the common prefix
+ let prefix_length = a.chars()
+ .zip(b.chars())
+ .take_while(|&(a_char, b_char)| a_char == b_char)
+ .count();
+
+ let jaro_winkler_distance =
+ jaro_distance + (0.1 * prefix_length as f64 * (1.0 - jaro_distance));
+
+ if jaro_winkler_distance <= 1.0 {
+ jaro_winkler_distance
+ } else {
+ 1.0
+ }
+}
+
+/// Calculates the minimum number of insertions, deletions, and substitutions
+/// required to change one string into the other.
+///
+/// ```
+/// use strsim::levenshtein;
+///
+/// assert_eq!(3, levenshtein("kitten", "sitting"));
+/// ```
+pub fn levenshtein(a: &str, b: &str) -> usize {
+ if a == b { return 0; }
+
+ let a_len = a.chars().count();
+ let b_len = b.chars().count();
+
+ if a_len == 0 { return b_len; }
+ if b_len == 0 { return a_len; }
+
+ let mut cache: Vec<usize> = (1..b_len+1).collect();
+
+ let mut result = 0;
+ let mut distance_a;
+ let mut distance_b;
+
+ for (i, a_char) in a.chars().enumerate() {
+ result = i;
+ distance_b = i;
+
+ for (j, b_char) in b.chars().enumerate() {
+ let cost = if a_char == b_char { 0 } else { 1 };
+ distance_a = distance_b + cost;
+ distance_b = cache[j];
+ result = min(result + 1, min(distance_a, distance_b + 1));
+ cache[j] = result;
+ }
+ }
+
+ result
+}
+
+/// Like Levenshtein but allows for adjacent transpositions. Each substring can
+/// only be edited once.
+///
+/// ```
+/// use strsim::osa_distance;
+///
+/// assert_eq!(3, osa_distance("ab", "bca"));
+/// ```
+pub fn osa_distance(a: &str, b: &str) -> usize {
+ let a_len = a.chars().count();
+ let b_len = b.chars().count();
+ if a == b { return 0; }
+ else if a_len == 0 { return b_len; }
+ else if b_len == 0 { return a_len; }
+
+ let mut prev_two_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
+ let mut prev_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
+ let mut curr_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
+
+ let mut prev_a_char = char::MAX;
+ let mut prev_b_char = char::MAX;
+
+ for i in 0..(b_len + 1) {
+ prev_two_distances.push(i);
+ prev_distances.push(i);
+ curr_distances.push(0);
+ }
+
+ for (i, a_char) in a.chars().enumerate() {
+ curr_distances[0] = i + 1;
+
+ for (j, b_char) in b.chars().enumerate() {
+ let cost = if a_char == b_char { 0 } else { 1 };
+ curr_distances[j + 1] = min(curr_distances[j] + 1,
+ min(prev_distances[j + 1] + 1,
+ prev_distances[j] + cost));
+ if i > 0 && j > 0 && a_char != b_char &&
+ a_char == prev_b_char && b_char == prev_a_char {
+ curr_distances[j + 1] = min(curr_distances[j + 1],
+ prev_two_distances[j - 1] + 1);
+ }
+
+ prev_b_char = b_char;
+ }
+
+ prev_two_distances.clone_from(&prev_distances);
+ prev_distances.clone_from(&curr_distances);
+ prev_a_char = a_char;
+ }
+
+ curr_distances[b_len]
+
+}
+
+/// Like optimal string alignment, but substrings can be edited an unlimited
+/// number of times, and the triangle inequality holds.
+///
+/// ```
+/// use strsim::damerau_levenshtein;
+///
+/// assert_eq!(2, damerau_levenshtein("ab", "bca"));
+/// ```
+pub fn damerau_levenshtein(a: &str, b: &str) -> usize {
+ if a == b { return 0; }
+
+ let a_chars: Vec<char> = a.chars().collect();
+ let b_chars: Vec<char> = b.chars().collect();
+ let a_len = a_chars.len();
+ let b_len = b_chars.len();
+
+ if a_len == 0 { return b_len; }
+ if b_len == 0 { return a_len; }
+
+ let mut distances = vec![vec![0; b_len + 2]; a_len + 2];
+ let max_distance = a_len + b_len;
+ distances[0][0] = max_distance;
+
+ for i in 0..(a_len + 1) {
+ distances[i + 1][0] = max_distance;
+ distances[i + 1][1] = i;
+ }
+
+ for j in 0..(b_len + 1) {
+ distances[0][j + 1] = max_distance;
+ distances[1][j + 1] = j;
+ }
+
+ let mut chars: HashMap<char, usize> = HashMap::new();
+
+ for i in 1..(a_len + 1) {
+ let mut db = 0;
+
+ for j in 1..(b_len + 1) {
+ let k = match chars.get(&b_chars[j - 1]) {
+ Some(value) => value.clone(),
+ None => 0
+ };
+
+ let l = db;
+
+ let mut cost = 1;
+ if a_chars[i - 1] == b_chars[j - 1] {
+ cost = 0;
+ db = j;
+ }
+
+ let substitution_cost = distances[i][j] + cost;
+ let insertion_cost = distances[i][j + 1] + 1;
+ let deletion_cost = distances[i + 1][j] + 1;
+ let transposition_cost = distances[k][l] + (i - k - 1) + 1 +
+ (j - l - 1);
+
+ distances[i + 1][j + 1] = min(substitution_cost,
+ min(insertion_cost,
+ min(deletion_cost,
+ transposition_cost)));
+ }
+
+ chars.insert(a_chars[i - 1], i);
+ }
+
+ distances[a_len + 1][b_len + 1]
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn hamming_empty() {
+ match hamming("", "") {
+ Ok(distance) => { assert_eq!(0, distance); },
+ Err(why) => { panic!("{:?}", why); }
+ }
+ }
+
+ #[test]
+ fn hamming_same() {
+ match hamming("hamming", "hamming") {
+ Ok(distance) => { assert_eq!(0, distance); },
+ Err(why) => { panic!("{:?}", why); }
+ }
+ }
+
+ #[test]
+ fn hamming_diff() {
+ match hamming("hamming", "hammers") {
+ Ok(distance) => { assert_eq!(3, distance); },
+ Err(why) => { panic!("{:?}", why); }
+ }
+ }
+
+ #[test]
+ fn hamming_diff_multibyte() {
+ match hamming("hamming", "h香mmüng") {
+ Ok(distance) => { assert_eq!(2, distance); },
+ Err(why) => { panic!("{:?}", why); }
+ }
+ }
+
+ #[test]
+ fn hamming_unequal_length() {
+ match hamming("ham", "hamming") {
+ Ok(_) => { panic!(); },
+ Err(why) => { assert_eq!(why, StrSimError::DifferentLengthArgs); }
+ }
+ }
+
+ #[test]
+ fn hamming_names() {
+ match hamming("Friedrich Nietzs", "Jean-Paul Sartre") {
+ Ok(distance) => { assert_eq!(14, distance); },
+ Err(why) => { panic!("{:?}", why); }
+ }
+ }
+
+ #[test]
+ fn jaro_both_empty() {
+ assert_eq!(1.0, jaro("", ""));
+ }
+
+ #[test]
+ fn jaro_first_empty() {
+ assert_eq!(0.0, jaro("", "jaro"));
+ }
+
+ #[test]
+ fn jaro_second_empty() {
+ assert_eq!(0.0, jaro("distance", ""));
+ }
+
+ #[test]
+ fn jaro_same() {
+ assert_eq!(1.0, jaro("jaro", "jaro"));
+ }
+
+ #[test]
+ fn jaro_multibyte() {
+ assert!((0.818 - jaro("testabctest", "testöঙ香test")) < 0.001);
+ assert!((0.818 - jaro("testöঙ香test", "testabctest")) < 0.001);
+ }
+
+ #[test]
+ fn jaro_diff_short() {
+ assert!((0.767 - jaro("dixon", "dicksonx")).abs() < 0.001);
+ }
+
+ #[test]
+ fn jaro_diff_one_character() {
+ assert_eq!(0.0, jaro("a", "b"));
+ }
+
+ #[test]
+ fn jaro_diff_one_and_two() {
+ assert!((0.83 - jaro("a", "ab")).abs() < 0.01);
+ }
+
+ #[test]
+ fn jaro_diff_two_and_one() {
+ assert!((0.83 - jaro("ab", "a")).abs() < 0.01);
+ }
+
+ #[test]
+ fn jaro_diff_no_transposition() {
+ assert!((0.822 - jaro("dwayne", "duane")).abs() < 0.001);
+ }
+
+ #[test]
+ fn jaro_diff_with_transposition() {
+ assert!((0.944 - jaro("martha", "marhta")).abs() < 0.001);
+ }
+
+ #[test]
+ fn jaro_names() {
+ assert!((0.392 - jaro("Friedrich Nietzsche",
+ "Jean-Paul Sartre")).abs() < 0.001);
+ }
+
+ #[test]
+ fn jaro_winkler_both_empty() {
+ assert_eq!(1.0, jaro_winkler("", ""));
+ }
+
+ #[test]
+ fn jaro_winkler_first_empty() {
+ assert_eq!(0.0, jaro_winkler("", "jaro-winkler"));
+ }
+
+ #[test]
+ fn jaro_winkler_second_empty() {
+ assert_eq!(0.0, jaro_winkler("distance", ""));
+ }
+
+ #[test]
+ fn jaro_winkler_same() {
+ assert_eq!(1.0, jaro_winkler("Jaro-Winkler", "Jaro-Winkler"));
+ }
+
+ #[test]
+ fn jaro_winkler_multibyte() {
+ assert!((0.89 - jaro_winkler("testabctest", "testöঙ香test")).abs() <
+ 0.001);
+ assert!((0.89 - jaro_winkler("testöঙ香test", "testabctest")).abs() <
+ 0.001);
+ }
+
+ #[test]
+ fn jaro_winkler_diff_short() {
+ assert!((0.813 - jaro_winkler("dixon", "dicksonx")).abs() < 0.001);
+ assert!((0.813 - jaro_winkler("dicksonx", "dixon")).abs() < 0.001);
+ }
+
+ #[test]
+ fn jaro_winkler_diff_one_character() {
+ assert_eq!(0.0, jaro_winkler("a", "b"));
+ }
+
+ #[test]
+ fn jaro_winkler_diff_no_transposition() {
+ assert!((0.840 - jaro_winkler("dwayne", "duane")).abs() < 0.001);
+ }
+
+ #[test]
+ fn jaro_winkler_diff_with_transposition() {
+ assert!((0.961 - jaro_winkler("martha", "marhta")).abs() < 0.001);
+ }
+
+ #[test]
+ fn jaro_winkler_names() {
+ assert!((0.562 - jaro_winkler("Friedrich Nietzsche",
+ "Fran-Paul Sartre")).abs() < 0.001);
+ }
+
+ #[test]
+ fn jaro_winkler_long_prefix() {
+ assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
+ 0.001);
+ }
+
+ #[test]
+ fn jaro_winkler_more_names() {
+ assert!((0.868 - jaro_winkler("Thorkel", "Thorgier")).abs() < 0.001);
+ }
+
+ #[test]
+ fn jaro_winkler_length_of_one() {
+ assert!((0.738 - jaro_winkler("Dinsdale", "D")).abs() < 0.001);
+ }
+
+ #[test]
+ fn jaro_winkler_very_long_prefix() {
+ assert!((1.0 - jaro_winkler("thequickbrownfoxjumpedoverx",
+ "thequickbrownfoxjumpedovery")).abs() <
+ 0.001);
+ }
+
+ #[test]
+ fn levenshtein_empty() {
+ assert_eq!(0, levenshtein("", ""));
+ }
+
+ #[test]
+ fn levenshtein_same() {
+ assert_eq!(0, levenshtein("levenshtein", "levenshtein"));
+ }
+
+ #[test]
+ fn levenshtein_diff_short() {
+ assert_eq!(3, levenshtein("kitten", "sitting"));
+ }
+
+ #[test]
+ fn levenshtein_diff_with_space() {
+ assert_eq!(5, levenshtein("hello, world", "bye, world"));
+ }
+
+ #[test]
+ fn levenshtein_diff_multibyte() {
+ assert_eq!(3, levenshtein("öঙ香", "abc"));
+ assert_eq!(3, levenshtein("abc", "öঙ香"));
+ }
+
+ #[test]
+ fn levenshtein_diff_longer() {
+ let a = "The quick brown fox jumped over the angry dog.";
+ let b = "Lorem ipsum dolor sit amet, dicta latine an eam.";
+ assert_eq!(37, levenshtein(a, b));
+ }
+
+ #[test]
+ fn levenshtein_first_empty() {
+ assert_eq!(7, levenshtein("", "sitting"));
+ }
+
+ #[test]
+ fn levenshtein_second_empty() {
+ assert_eq!(6, levenshtein("kitten", ""));
+ }
+
+ #[test]
+ fn osa_distance_empty() {
+ assert_eq!(0, osa_distance("", ""));
+ }
+
+ #[test]
+ fn osa_distance_same() {
+ assert_eq!(0, osa_distance("damerau", "damerau"));
+ }
+
+ #[test]
+ fn osa_distance_first_empty() {
+ assert_eq!(7, osa_distance("", "damerau"));
+ }
+
+ #[test]
+ fn osa_distance_second_empty() {
+ assert_eq!(7, osa_distance("damerau", ""));
+ }
+
+ #[test]
+ fn osa_distance_diff() {
+ assert_eq!(3, osa_distance("ca", "abc"));
+ }
+
+ #[test]
+ fn osa_distance_diff_short() {
+ assert_eq!(3, osa_distance("damerau", "aderua"));
+ }
+
+ #[test]
+ fn osa_distance_diff_reversed() {
+ assert_eq!(3, osa_distance("aderua", "damerau"));
+ }
+
+ #[test]
+ fn osa_distance_diff_multibyte() {
+ assert_eq!(3, osa_distance("öঙ香", "abc"));
+ assert_eq!(3, osa_distance("abc", "öঙ香"));
+ }
+
+ #[test]
+ fn osa_distance_diff_unequal_length() {
+ assert_eq!(6, osa_distance("damerau", "aderuaxyz"));
+ }
+
+ #[test]
+ fn osa_distance_diff_unequal_length_reversed() {
+ assert_eq!(6, osa_distance("aderuaxyz", "damerau"));
+ }
+
+ #[test]
+ fn osa_distance_diff_comedians() {
+ assert_eq!(5, osa_distance("Stewart", "Colbert"));
+ }
+
+ #[test]
+ fn osa_distance_many_transpositions() {
+ assert_eq!(4, osa_distance("abcdefghijkl", "bacedfgihjlk"));
+ }
+
+ #[test]
+ fn osa_distance_diff_longer() {
+ let a = "The quick brown fox jumped over the angry dog.";
+ let b = "Lehem ipsum dolor sit amet, dicta latine an eam.";
+ assert_eq!(36, osa_distance(a, b));
+ }
+
+ #[test]
+ fn osa_distance_beginning_transposition() {
+ assert_eq!(1, osa_distance("foobar", "ofobar"));
+ }
+
+ #[test]
+ fn osa_distance_end_transposition() {
+ assert_eq!(1, osa_distance("specter", "spectre"));
+ }
+
+ #[test]
+ fn osa_distance_restricted_edit() {
+ assert_eq!(4, osa_distance("a cat", "an abct"));
+ }
+
+ #[test]
+ fn damerau_levenshtein_empty() {
+ assert_eq!(0, damerau_levenshtein("", ""));
+ }
+
+ #[test]
+ fn damerau_levenshtein_same() {
+ assert_eq!(0, damerau_levenshtein("damerau", "damerau"));
+ }
+
+ #[test]
+ fn damerau_levenshtein_first_empty() {
+ assert_eq!(7, damerau_levenshtein("", "damerau"));
+ }
+
+ #[test]
+ fn damerau_levenshtein_second_empty() {
+ assert_eq!(7, damerau_levenshtein("damerau", ""));
+ }
+
+ #[test]
+ fn damerau_levenshtein_diff() {
+ assert_eq!(2, damerau_levenshtein("ca", "abc"));
+ }
+
+ #[test]
+ fn damerau_levenshtein_diff_short() {
+ assert_eq!(3, damerau_levenshtein("damerau", "aderua"));
+ }
+
+ #[test]
+ fn damerau_levenshtein_diff_reversed() {
+ assert_eq!(3, damerau_levenshtein("aderua", "damerau"));
+ }
+
+ #[test]
+ fn damerau_levenshtein_diff_multibyte() {
+ assert_eq!(3, damerau_levenshtein("öঙ香", "abc"));
+ assert_eq!(3, damerau_levenshtein("abc", "öঙ香"));
+ }
+
+ #[test]
+ fn damerau_levenshtein_diff_unequal_length() {
+ assert_eq!(6, damerau_levenshtein("damerau", "aderuaxyz"));
+ }
+
+ #[test]
+ fn damerau_levenshtein_diff_unequal_length_reversed() {
+ assert_eq!(6, damerau_levenshtein("aderuaxyz", "damerau"));
+ }
+
+ #[test]
+ fn damerau_levenshtein_diff_comedians() {
+ assert_eq!(5, damerau_levenshtein("Stewart", "Colbert"));
+ }
+
+ #[test]
+ fn damerau_levenshtein_many_transpositions() {
+ assert_eq!(4, damerau_levenshtein("abcdefghijkl", "bacedfgihjlk"));
+ }
+
+ #[test]
+ fn damerau_levenshtein_diff_longer() {
+ let a = "The quick brown fox jumped over the angry dog.";
+ let b = "Lehem ipsum dolor sit amet, dicta latine an eam.";
+ assert_eq!(36, damerau_levenshtein(a, b));
+ }
+
+ #[test]
+ fn damerau_levenshtein_beginning_transposition() {
+ assert_eq!(1, damerau_levenshtein("foobar", "ofobar"));
+ }
+
+ #[test]
+ fn damerau_levenshtein_end_transposition() {
+ assert_eq!(1, damerau_levenshtein("specter", "spectre"));
+ }
+
+ #[test]
+ fn damerau_levenshtein_unrestricted_edit() {
+ assert_eq!(3, damerau_levenshtein("a cat", "an abct"));
+ }
+}
diff --git a/third_party/rust/strsim/tests/lib.rs b/third_party/rust/strsim/tests/lib.rs
new file mode 100644
index 0000000000..46478828f4
--- /dev/null
+++ b/third_party/rust/strsim/tests/lib.rs
@@ -0,0 +1,39 @@
+extern crate strsim;
+
+use strsim::{hamming, levenshtein, osa_distance, damerau_levenshtein, jaro,
+ jaro_winkler};
+
+#[test]
+fn hamming_works() {
+ match hamming("hamming", "hammers") {
+ Ok(distance) => assert_eq!(3, distance),
+ Err(why) => panic!("{:?}", why)
+ }
+}
+
+#[test]
+fn levenshtein_works() {
+ assert_eq!(3, levenshtein("kitten", "sitting"));
+}
+
+#[test]
+fn osa_distance_works() {
+ assert_eq!(3, osa_distance("ac", "cba"));
+}
+
+#[test]
+fn damerau_levenshtein_works() {
+ assert_eq!(2, damerau_levenshtein("ac", "cba"));
+}
+
+#[test]
+fn jaro_works() {
+ assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
+ 0.001);
+}
+
+#[test]
+fn jaro_winkler_works() {
+ assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
+ 0.001);
+}