diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/strsim | |
parent | Initial commit. (diff) | |
download | firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/strsim')
-rw-r--r-- | third_party/rust/strsim/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | third_party/rust/strsim/CHANGELOG.md | 118 | ||||
-rw-r--r-- | third_party/rust/strsim/Cargo.toml | 28 | ||||
-rw-r--r-- | third_party/rust/strsim/LICENSE | 23 | ||||
-rw-r--r-- | third_party/rust/strsim/README.md | 62 | ||||
-rw-r--r-- | third_party/rust/strsim/appveyor.yml | 13 | ||||
-rwxr-xr-x | third_party/rust/strsim/dev | 41 | ||||
-rw-r--r-- | third_party/rust/strsim/src/lib.rs | 698 | ||||
-rw-r--r-- | third_party/rust/strsim/tests/lib.rs | 39 |
9 files changed, 1023 insertions, 0 deletions
diff --git a/third_party/rust/strsim/.cargo-checksum.json b/third_party/rust/strsim/.cargo-checksum.json new file mode 100644 index 0000000000..68e81d3f5b --- /dev/null +++ b/third_party/rust/strsim/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"CHANGELOG.md":"1472e9bc914215d1c63d842e409e0b93d23a8b62b411ce09b7e3e5b2de212100","Cargo.toml":"41ef74c41992b1750d53de7878e1e200a36add38741303a98aa3e1321462097f","LICENSE":"1738b51502ae831fb59ffbeb22ebdd90bf17e5c72fe57c00b47552415f133fd8","README.md":"9cc6cae9758534e8cf3a0376f73983935fb7968463cd324bbef06328659f5030","appveyor.yml":"b41eae9798a9bb250f6046509d9bbd6e63bac9ad2655d342b3d9c8975584f0c0","dev":"5bd26dc2c86f777627abe96c5992b6c45e6b5dea52f42b7107fa5c106abe2ab4","src/lib.rs":"14b7916423df616e73974e984468584481bf84a8f7d24a0a3c392d48c5c711d2","tests/lib.rs":"eb2f829a86b54481ca97e17414fb1e4cec7c5e1cb27ead5913eea87d61f7f29f"},"package":"bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"}
\ No newline at end of file diff --git a/third_party/rust/strsim/CHANGELOG.md b/third_party/rust/strsim/CHANGELOG.md new file mode 100644 index 0000000000..e73385d9f6 --- /dev/null +++ b/third_party/rust/strsim/CHANGELOG.md @@ -0,0 +1,118 @@ +# Change Log +This project attempts to adhere to [Semantic Versioning](http://semver.org). + +## [Unreleased] + +## [0.7.0] - (2018-01-17) +### Changed +- Faster Levenshtein implementation (thanks @wdv4758h) + +### Removed +- Remove the "against_vec" functions. They are one-liners now, so they don't + seem to add enough value to justify making the API larger. I didn't find + anybody using them when I skimmed through a GitHub search. If you do use them, + you can change the calls to something like: +```rust +let distances = strings.iter().map(|a| jaro(target, a)).collect(); +``` + +## [0.6.0] - (2016-12-26) +### Added +- Add optimal string alignment distance + +### Fixed +- Fix Damerau-Levenshtein implementation (previous implementation was actually + optimal string alignment; see this [Damerau-Levenshtein explanation]) + +## [0.5.2] - (2016-11-21) +### Changed +- Remove Cargo generated documentation in favor of a [docs.rs] link + +## [0.5.1] - (2016-08-23) +### Added +- Add Cargo generated documentation + +### Fixed +- Fix panic when Jaro or Jaro-Winkler are given strings both with a length of + one + +## [0.5.0] - (2016-08-11) +### Changed +- Make Hamming faster (thanks @IBUzPE9) when the two strings have the same + length but slower when they have different lengths + +## [0.4.1] - (2016-04-18) +### Added +- Add Vagrant setup for development +- Add AppVeyor configuration for Windows CI + +### Fixed +- Fix metrics when given strings with multibyte characters (thanks @WanzenBug) + +## [0.4.0] - (2015-06-10) +### Added +- For each metric, add a function that takes a vector of strings and returns a +vector of results (thanks @ovarene) + +## [0.3.0] - (2015-04-30) +### Changed +- Remove usage of unstable Rust features + +## [0.2.5] - (2015-04-24) +### Fixed +- Remove unnecessary `Float` import from doc tests + +## [0.2.4] - (2015-04-15) +### Fixed +- Remove unused `core` feature flag + +## [0.2.3] - (2015-04-01) +### Fixed +- Remove now unnecessary `Float` import + +## [0.2.2] - (2015-03-29) +### Fixed +- Remove usage of `char_at` (marked as unstable) + +## [0.2.1] - (2015-02-20) +### Fixed +- Update bit vector import to match Rust update + +## [0.2.0] - (2015-02-19) +### Added +- Implement Damerau-Levenshtein +- Add tests in docs + +## [0.1.1] - (2015-02-10) +### Added +- Configure Travis for CI +- Add rustdoc comments + +### Fixed +- Limit Jaro-Winkler return value to a maximum of 1.0 +- Fix float comparsions in tests + +## [0.1.0] - (2015-02-09) +### Added +- Implement Hamming, Jaro, Jaro-Winkler, and Levenshtein + +[Unreleased]: https://github.com/dguo/strsim-rs/compare/0.7.0...HEAD +[0.7.0]: https://github.com/dguo/strsim-rs/compare/0.6.0...0.7.0 +[0.6.0]: https://github.com/dguo/strsim-rs/compare/0.5.2...0.6.0 +[0.5.2]: https://github.com/dguo/strsim-rs/compare/0.5.1...0.5.2 +[0.5.1]: https://github.com/dguo/strsim-rs/compare/0.5.0...0.5.1 +[0.5.0]: https://github.com/dguo/strsim-rs/compare/0.4.1...0.5.0 +[0.4.1]: https://github.com/dguo/strsim-rs/compare/0.4.0...0.4.1 +[0.4.0]: https://github.com/dguo/strsim-rs/compare/0.3.0...0.4.0 +[0.3.0]: https://github.com/dguo/strsim-rs/compare/0.2.5...0.3.0 +[0.2.5]: https://github.com/dguo/strsim-rs/compare/0.2.4...0.2.5 +[0.2.4]: https://github.com/dguo/strsim-rs/compare/0.2.3...0.2.4 +[0.2.3]: https://github.com/dguo/strsim-rs/compare/0.2.2...0.2.3 +[0.2.2]: https://github.com/dguo/strsim-rs/compare/0.2.1...0.2.2 +[0.2.1]: https://github.com/dguo/strsim-rs/compare/0.2.0...0.2.1 +[0.2.0]: https://github.com/dguo/strsim-rs/compare/0.1.1...0.2.0 +[0.1.1]: https://github.com/dguo/strsim-rs/compare/0.1.0...0.1.1 +[0.1.0]: https://github.com/dguo/strsim-rs/compare/fabad4...0.1.0 +[docs.rs]: https://docs.rs/strsim/ +[Damerau-Levenshtein explanation]: +http://scarcitycomputing.blogspot.com/2013/04/damerau-levenshtein-edit-distance.html diff --git a/third_party/rust/strsim/Cargo.toml b/third_party/rust/strsim/Cargo.toml new file mode 100644 index 0000000000..aa92bde7e2 --- /dev/null +++ b/third_party/rust/strsim/Cargo.toml @@ -0,0 +1,28 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g. crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +name = "strsim" +version = "0.7.0" +authors = ["Danny Guo <dannyguo91@gmail.com>"] +description = "Implementations of string similarity metrics.\nIncludes Hamming, Levenshtein, OSA, Damerau-Levenshtein, Jaro, and Jaro-Winkler.\n" +homepage = "https://github.com/dguo/strsim-rs" +documentation = "https://docs.rs/strsim/" +readme = "README.md" +keywords = ["string", "similarity", "Hamming", "Levenshtein", "Jaro"] +license = "MIT" +repository = "https://github.com/dguo/strsim-rs" +[badges.travis-ci] +repository = "dguo/strsim-rs" + +[badges.appveyor] +repository = "dguo/strsim-rs" diff --git a/third_party/rust/strsim/LICENSE b/third_party/rust/strsim/LICENSE new file mode 100644 index 0000000000..1aacdb8642 --- /dev/null +++ b/third_party/rust/strsim/LICENSE @@ -0,0 +1,23 @@ +The MIT License (MIT) + +Copyright (c) 2015 Danny Guo +Copyright (c) 2016 Titus Wormer <tituswormer@gmail.com> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/third_party/rust/strsim/README.md b/third_party/rust/strsim/README.md new file mode 100644 index 0000000000..747debbc3b --- /dev/null +++ b/third_party/rust/strsim/README.md @@ -0,0 +1,62 @@ +# strsim-rs [![Crates.io](https://img.shields.io/crates/v/strsim.svg)](https://crates.io/crates/strsim) [![Crates.io](https://img.shields.io/crates/l/strsim.svg?maxAge=2592000)](https://github.com/dguo/strsim-rs/blob/master/LICENSE) [![Linux build status](https://travis-ci.org/dguo/strsim-rs.svg?branch=master)](https://travis-ci.org/dguo/strsim-rs) [![Windows build status](https://ci.appveyor.com/api/projects/status/ggue6i785618a39w?svg=true)](https://ci.appveyor.com/project/dguo/strsim-rs) + +[Rust](https://www.rust-lang.org) implementations of [string similarity metrics]: + - [Hamming] + - [Levenshtein] + - [Optimal string alignment] + - [Damerau-Levenshtein] + - [Jaro and Jaro-Winkler] - this implementation of Jaro-Winkler does not limit the common prefix length + +### Installation +```toml +# Cargo.toml +[dependencies] +strsim = "0.7.0" +``` + +### [Documentation](https://docs.rs/strsim/) +You can change the version in the url to see the documentation for an older +version in the +[changelog](https://github.com/dguo/strsim-rs/blob/master/CHANGELOG.md). + +### Usage +```rust +extern crate strsim; + +use strsim::{hamming, levenshtein, osa_distance, damerau_levenshtein, jaro, + jaro_winkler}; + +fn main() { + match hamming("hamming", "hammers") { + Ok(distance) => assert_eq!(3, distance), + Err(why) => panic!("{:?}", why) + } + + assert_eq!(3, levenshtein("kitten", "sitting")); + + assert_eq!(3, osa_distance("ac", "cba")); + + assert_eq!(2, damerau_levenshtein("ac", "cba")); + + assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() < + 0.001); + + assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() < + 0.001); +} +``` + +### Development +If you don't want to install Rust itself, you can run `$ ./dev` for a +development CLI if you have [Docker] installed. + +### License +[MIT](https://github.com/dguo/strsim-rs/blob/master/LICENSE) + +[string similarity metrics]:http://en.wikipedia.org/wiki/String_metric +[Damerau-Levenshtein]:http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance +[Jaro and Jaro-Winkler]:http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance +[Levenshtein]:http://en.wikipedia.org/wiki/Levenshtein_distance +[Hamming]:http://en.wikipedia.org/wiki/Hamming_distance +[Optimal string alignment]:https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance +[Docker]:https://docs.docker.com/engine/installation/ diff --git a/third_party/rust/strsim/appveyor.yml b/third_party/rust/strsim/appveyor.yml new file mode 100644 index 0000000000..fb992db82b --- /dev/null +++ b/third_party/rust/strsim/appveyor.yml @@ -0,0 +1,13 @@ +install: + - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-beta-x86_64-pc-windows-gnu.exe' + - rust-beta-x86_64-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" + - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin + - rustc -V + - cargo -V + +build: false + +test_script: + - cargo build + - cargo test --verbose + diff --git a/third_party/rust/strsim/dev b/third_party/rust/strsim/dev new file mode 100755 index 0000000000..4c85a494b7 --- /dev/null +++ b/third_party/rust/strsim/dev @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +# ./dev --help + +import argparse +import os +from subprocess import run +import sys + +parser = argparse.ArgumentParser(prog='./dev') +subparsers = parser.add_subparsers(metavar='<command>', title='commands') +command = [ + 'docker', 'run', '-it', '--rm', '-v', os.getcwd() + ':/src:cached', + '-w=/src', 'rust:1.21.0' +] + +def cargo(args, remaining): + sys.exit(run(command + ['cargo'] + remaining or []).returncode) + +parser_cargo = subparsers.add_parser('cargo', help='run a cargo command') +parser_cargo.set_defaults(func=cargo) + +def sh(args, remaining): + sys.exit(run(command + ['bash']).returncode) + +parser_sh = subparsers.add_parser('sh', help='bring up a shell') +parser_sh.set_defaults(func=sh) + +def test(args, remaining): + sys.exit(run(command + ['cargo', 'test']).returncode) + +parser_test = subparsers.add_parser('test', help='run tests') +parser_test.set_defaults(func=test) + +if len(sys.argv) > 1: + args, remaining = parser.parse_known_args() + try: + args.func(args, remaining) + except FileNotFoundError: + sys.exit('Please install Docker.') +else: + parser.print_help() diff --git a/third_party/rust/strsim/src/lib.rs b/third_party/rust/strsim/src/lib.rs new file mode 100644 index 0000000000..e6da05bad2 --- /dev/null +++ b/third_party/rust/strsim/src/lib.rs @@ -0,0 +1,698 @@ +//! This library implements string similarity metrics. + +use std::char; +use std::cmp::{max, min}; +use std::collections::HashMap; + +#[derive(Debug, PartialEq)] +pub enum StrSimError { + DifferentLengthArgs +} + +pub type HammingResult = Result<usize, StrSimError>; + +/// Calculates the number of positions in the two strings where the characters +/// differ. Returns an error if the strings have different lengths. +/// +/// ``` +/// use strsim::hamming; +/// +/// match hamming("hamming", "hammers") { +/// Ok(distance) => assert_eq!(3, distance), +/// Err(why) => panic!("{:?}", why) +/// } +/// ``` +pub fn hamming(a: &str, b: &str) -> HammingResult { + let (mut ita, mut itb, mut count) = (a.chars(), b.chars(), 0); + loop { + match (ita.next(), itb.next()){ + (Some(x), Some(y)) => if x != y { count += 1 }, + (None, None) => return Ok(count), + _ => return Err(StrSimError::DifferentLengthArgs), + } + } +} + +/// Calculates the Jaro similarity between two strings. The returned value +/// is between 0.0 and 1.0 (higher value means more similar). +/// +/// ``` +/// use strsim::jaro; +/// +/// assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() < +/// 0.001); +/// ``` +pub fn jaro(a: &str, b: &str) -> f64 { + if a == b { return 1.0; } + + let a_len = a.chars().count(); + let b_len = b.chars().count(); + + // The check for lengths of one here is to prevent integer overflow when + // calculating the search range. + if a_len == 0 || b_len == 0 || (a_len == 1 && b_len == 1) { + return 0.0; + } + + let search_range = (max(a_len, b_len) / 2) - 1; + + let mut b_consumed = Vec::with_capacity(b_len); + for _ in 0..b_len { + b_consumed.push(false); + } + let mut matches = 0.0; + + let mut transpositions = 0.0; + let mut b_match_index = 0; + + for (i, a_char) in a.chars().enumerate() { + let min_bound = + // prevent integer wrapping + if i > search_range { + max(0, i - search_range) + } else { + 0 + }; + + let max_bound = min(b_len - 1, i + search_range); + + if min_bound > max_bound { + continue; + } + + for (j, b_char) in b.chars().enumerate() { + if min_bound <= j && j <= max_bound && a_char == b_char && + !b_consumed[j] { + b_consumed[j] = true; + matches += 1.0; + + if j < b_match_index { + transpositions += 1.0; + } + b_match_index = j; + + break; + } + } + } + + if matches == 0.0 { + 0.0 + } else { + (1.0 / 3.0) * ((matches / a_len as f64) + + (matches / b_len as f64) + + ((matches - transpositions) / matches)) + } +} + +/// Like Jaro but gives a boost to strings that have a common prefix. +/// +/// ``` +/// use strsim::jaro_winkler; +/// +/// assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() < +/// 0.001); +/// ``` +pub fn jaro_winkler(a: &str, b: &str) -> f64 { + let jaro_distance = jaro(a, b); + + // Don't limit the length of the common prefix + let prefix_length = a.chars() + .zip(b.chars()) + .take_while(|&(a_char, b_char)| a_char == b_char) + .count(); + + let jaro_winkler_distance = + jaro_distance + (0.1 * prefix_length as f64 * (1.0 - jaro_distance)); + + if jaro_winkler_distance <= 1.0 { + jaro_winkler_distance + } else { + 1.0 + } +} + +/// Calculates the minimum number of insertions, deletions, and substitutions +/// required to change one string into the other. +/// +/// ``` +/// use strsim::levenshtein; +/// +/// assert_eq!(3, levenshtein("kitten", "sitting")); +/// ``` +pub fn levenshtein(a: &str, b: &str) -> usize { + if a == b { return 0; } + + let a_len = a.chars().count(); + let b_len = b.chars().count(); + + if a_len == 0 { return b_len; } + if b_len == 0 { return a_len; } + + let mut cache: Vec<usize> = (1..b_len+1).collect(); + + let mut result = 0; + let mut distance_a; + let mut distance_b; + + for (i, a_char) in a.chars().enumerate() { + result = i; + distance_b = i; + + for (j, b_char) in b.chars().enumerate() { + let cost = if a_char == b_char { 0 } else { 1 }; + distance_a = distance_b + cost; + distance_b = cache[j]; + result = min(result + 1, min(distance_a, distance_b + 1)); + cache[j] = result; + } + } + + result +} + +/// Like Levenshtein but allows for adjacent transpositions. Each substring can +/// only be edited once. +/// +/// ``` +/// use strsim::osa_distance; +/// +/// assert_eq!(3, osa_distance("ab", "bca")); +/// ``` +pub fn osa_distance(a: &str, b: &str) -> usize { + let a_len = a.chars().count(); + let b_len = b.chars().count(); + if a == b { return 0; } + else if a_len == 0 { return b_len; } + else if b_len == 0 { return a_len; } + + let mut prev_two_distances: Vec<usize> = Vec::with_capacity(b_len + 1); + let mut prev_distances: Vec<usize> = Vec::with_capacity(b_len + 1); + let mut curr_distances: Vec<usize> = Vec::with_capacity(b_len + 1); + + let mut prev_a_char = char::MAX; + let mut prev_b_char = char::MAX; + + for i in 0..(b_len + 1) { + prev_two_distances.push(i); + prev_distances.push(i); + curr_distances.push(0); + } + + for (i, a_char) in a.chars().enumerate() { + curr_distances[0] = i + 1; + + for (j, b_char) in b.chars().enumerate() { + let cost = if a_char == b_char { 0 } else { 1 }; + curr_distances[j + 1] = min(curr_distances[j] + 1, + min(prev_distances[j + 1] + 1, + prev_distances[j] + cost)); + if i > 0 && j > 0 && a_char != b_char && + a_char == prev_b_char && b_char == prev_a_char { + curr_distances[j + 1] = min(curr_distances[j + 1], + prev_two_distances[j - 1] + 1); + } + + prev_b_char = b_char; + } + + prev_two_distances.clone_from(&prev_distances); + prev_distances.clone_from(&curr_distances); + prev_a_char = a_char; + } + + curr_distances[b_len] + +} + +/// Like optimal string alignment, but substrings can be edited an unlimited +/// number of times, and the triangle inequality holds. +/// +/// ``` +/// use strsim::damerau_levenshtein; +/// +/// assert_eq!(2, damerau_levenshtein("ab", "bca")); +/// ``` +pub fn damerau_levenshtein(a: &str, b: &str) -> usize { + if a == b { return 0; } + + let a_chars: Vec<char> = a.chars().collect(); + let b_chars: Vec<char> = b.chars().collect(); + let a_len = a_chars.len(); + let b_len = b_chars.len(); + + if a_len == 0 { return b_len; } + if b_len == 0 { return a_len; } + + let mut distances = vec![vec![0; b_len + 2]; a_len + 2]; + let max_distance = a_len + b_len; + distances[0][0] = max_distance; + + for i in 0..(a_len + 1) { + distances[i + 1][0] = max_distance; + distances[i + 1][1] = i; + } + + for j in 0..(b_len + 1) { + distances[0][j + 1] = max_distance; + distances[1][j + 1] = j; + } + + let mut chars: HashMap<char, usize> = HashMap::new(); + + for i in 1..(a_len + 1) { + let mut db = 0; + + for j in 1..(b_len + 1) { + let k = match chars.get(&b_chars[j - 1]) { + Some(value) => value.clone(), + None => 0 + }; + + let l = db; + + let mut cost = 1; + if a_chars[i - 1] == b_chars[j - 1] { + cost = 0; + db = j; + } + + let substitution_cost = distances[i][j] + cost; + let insertion_cost = distances[i][j + 1] + 1; + let deletion_cost = distances[i + 1][j] + 1; + let transposition_cost = distances[k][l] + (i - k - 1) + 1 + + (j - l - 1); + + distances[i + 1][j + 1] = min(substitution_cost, + min(insertion_cost, + min(deletion_cost, + transposition_cost))); + } + + chars.insert(a_chars[i - 1], i); + } + + distances[a_len + 1][b_len + 1] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn hamming_empty() { + match hamming("", "") { + Ok(distance) => { assert_eq!(0, distance); }, + Err(why) => { panic!("{:?}", why); } + } + } + + #[test] + fn hamming_same() { + match hamming("hamming", "hamming") { + Ok(distance) => { assert_eq!(0, distance); }, + Err(why) => { panic!("{:?}", why); } + } + } + + #[test] + fn hamming_diff() { + match hamming("hamming", "hammers") { + Ok(distance) => { assert_eq!(3, distance); }, + Err(why) => { panic!("{:?}", why); } + } + } + + #[test] + fn hamming_diff_multibyte() { + match hamming("hamming", "h香mmüng") { + Ok(distance) => { assert_eq!(2, distance); }, + Err(why) => { panic!("{:?}", why); } + } + } + + #[test] + fn hamming_unequal_length() { + match hamming("ham", "hamming") { + Ok(_) => { panic!(); }, + Err(why) => { assert_eq!(why, StrSimError::DifferentLengthArgs); } + } + } + + #[test] + fn hamming_names() { + match hamming("Friedrich Nietzs", "Jean-Paul Sartre") { + Ok(distance) => { assert_eq!(14, distance); }, + Err(why) => { panic!("{:?}", why); } + } + } + + #[test] + fn jaro_both_empty() { + assert_eq!(1.0, jaro("", "")); + } + + #[test] + fn jaro_first_empty() { + assert_eq!(0.0, jaro("", "jaro")); + } + + #[test] + fn jaro_second_empty() { + assert_eq!(0.0, jaro("distance", "")); + } + + #[test] + fn jaro_same() { + assert_eq!(1.0, jaro("jaro", "jaro")); + } + + #[test] + fn jaro_multibyte() { + assert!((0.818 - jaro("testabctest", "testöঙ香test")) < 0.001); + assert!((0.818 - jaro("testöঙ香test", "testabctest")) < 0.001); + } + + #[test] + fn jaro_diff_short() { + assert!((0.767 - jaro("dixon", "dicksonx")).abs() < 0.001); + } + + #[test] + fn jaro_diff_one_character() { + assert_eq!(0.0, jaro("a", "b")); + } + + #[test] + fn jaro_diff_one_and_two() { + assert!((0.83 - jaro("a", "ab")).abs() < 0.01); + } + + #[test] + fn jaro_diff_two_and_one() { + assert!((0.83 - jaro("ab", "a")).abs() < 0.01); + } + + #[test] + fn jaro_diff_no_transposition() { + assert!((0.822 - jaro("dwayne", "duane")).abs() < 0.001); + } + + #[test] + fn jaro_diff_with_transposition() { + assert!((0.944 - jaro("martha", "marhta")).abs() < 0.001); + } + + #[test] + fn jaro_names() { + assert!((0.392 - jaro("Friedrich Nietzsche", + "Jean-Paul Sartre")).abs() < 0.001); + } + + #[test] + fn jaro_winkler_both_empty() { + assert_eq!(1.0, jaro_winkler("", "")); + } + + #[test] + fn jaro_winkler_first_empty() { + assert_eq!(0.0, jaro_winkler("", "jaro-winkler")); + } + + #[test] + fn jaro_winkler_second_empty() { + assert_eq!(0.0, jaro_winkler("distance", "")); + } + + #[test] + fn jaro_winkler_same() { + assert_eq!(1.0, jaro_winkler("Jaro-Winkler", "Jaro-Winkler")); + } + + #[test] + fn jaro_winkler_multibyte() { + assert!((0.89 - jaro_winkler("testabctest", "testöঙ香test")).abs() < + 0.001); + assert!((0.89 - jaro_winkler("testöঙ香test", "testabctest")).abs() < + 0.001); + } + + #[test] + fn jaro_winkler_diff_short() { + assert!((0.813 - jaro_winkler("dixon", "dicksonx")).abs() < 0.001); + assert!((0.813 - jaro_winkler("dicksonx", "dixon")).abs() < 0.001); + } + + #[test] + fn jaro_winkler_diff_one_character() { + assert_eq!(0.0, jaro_winkler("a", "b")); + } + + #[test] + fn jaro_winkler_diff_no_transposition() { + assert!((0.840 - jaro_winkler("dwayne", "duane")).abs() < 0.001); + } + + #[test] + fn jaro_winkler_diff_with_transposition() { + assert!((0.961 - jaro_winkler("martha", "marhta")).abs() < 0.001); + } + + #[test] + fn jaro_winkler_names() { + assert!((0.562 - jaro_winkler("Friedrich Nietzsche", + "Fran-Paul Sartre")).abs() < 0.001); + } + + #[test] + fn jaro_winkler_long_prefix() { + assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() < + 0.001); + } + + #[test] + fn jaro_winkler_more_names() { + assert!((0.868 - jaro_winkler("Thorkel", "Thorgier")).abs() < 0.001); + } + + #[test] + fn jaro_winkler_length_of_one() { + assert!((0.738 - jaro_winkler("Dinsdale", "D")).abs() < 0.001); + } + + #[test] + fn jaro_winkler_very_long_prefix() { + assert!((1.0 - jaro_winkler("thequickbrownfoxjumpedoverx", + "thequickbrownfoxjumpedovery")).abs() < + 0.001); + } + + #[test] + fn levenshtein_empty() { + assert_eq!(0, levenshtein("", "")); + } + + #[test] + fn levenshtein_same() { + assert_eq!(0, levenshtein("levenshtein", "levenshtein")); + } + + #[test] + fn levenshtein_diff_short() { + assert_eq!(3, levenshtein("kitten", "sitting")); + } + + #[test] + fn levenshtein_diff_with_space() { + assert_eq!(5, levenshtein("hello, world", "bye, world")); + } + + #[test] + fn levenshtein_diff_multibyte() { + assert_eq!(3, levenshtein("öঙ香", "abc")); + assert_eq!(3, levenshtein("abc", "öঙ香")); + } + + #[test] + fn levenshtein_diff_longer() { + let a = "The quick brown fox jumped over the angry dog."; + let b = "Lorem ipsum dolor sit amet, dicta latine an eam."; + assert_eq!(37, levenshtein(a, b)); + } + + #[test] + fn levenshtein_first_empty() { + assert_eq!(7, levenshtein("", "sitting")); + } + + #[test] + fn levenshtein_second_empty() { + assert_eq!(6, levenshtein("kitten", "")); + } + + #[test] + fn osa_distance_empty() { + assert_eq!(0, osa_distance("", "")); + } + + #[test] + fn osa_distance_same() { + assert_eq!(0, osa_distance("damerau", "damerau")); + } + + #[test] + fn osa_distance_first_empty() { + assert_eq!(7, osa_distance("", "damerau")); + } + + #[test] + fn osa_distance_second_empty() { + assert_eq!(7, osa_distance("damerau", "")); + } + + #[test] + fn osa_distance_diff() { + assert_eq!(3, osa_distance("ca", "abc")); + } + + #[test] + fn osa_distance_diff_short() { + assert_eq!(3, osa_distance("damerau", "aderua")); + } + + #[test] + fn osa_distance_diff_reversed() { + assert_eq!(3, osa_distance("aderua", "damerau")); + } + + #[test] + fn osa_distance_diff_multibyte() { + assert_eq!(3, osa_distance("öঙ香", "abc")); + assert_eq!(3, osa_distance("abc", "öঙ香")); + } + + #[test] + fn osa_distance_diff_unequal_length() { + assert_eq!(6, osa_distance("damerau", "aderuaxyz")); + } + + #[test] + fn osa_distance_diff_unequal_length_reversed() { + assert_eq!(6, osa_distance("aderuaxyz", "damerau")); + } + + #[test] + fn osa_distance_diff_comedians() { + assert_eq!(5, osa_distance("Stewart", "Colbert")); + } + + #[test] + fn osa_distance_many_transpositions() { + assert_eq!(4, osa_distance("abcdefghijkl", "bacedfgihjlk")); + } + + #[test] + fn osa_distance_diff_longer() { + let a = "The quick brown fox jumped over the angry dog."; + let b = "Lehem ipsum dolor sit amet, dicta latine an eam."; + assert_eq!(36, osa_distance(a, b)); + } + + #[test] + fn osa_distance_beginning_transposition() { + assert_eq!(1, osa_distance("foobar", "ofobar")); + } + + #[test] + fn osa_distance_end_transposition() { + assert_eq!(1, osa_distance("specter", "spectre")); + } + + #[test] + fn osa_distance_restricted_edit() { + assert_eq!(4, osa_distance("a cat", "an abct")); + } + + #[test] + fn damerau_levenshtein_empty() { + assert_eq!(0, damerau_levenshtein("", "")); + } + + #[test] + fn damerau_levenshtein_same() { + assert_eq!(0, damerau_levenshtein("damerau", "damerau")); + } + + #[test] + fn damerau_levenshtein_first_empty() { + assert_eq!(7, damerau_levenshtein("", "damerau")); + } + + #[test] + fn damerau_levenshtein_second_empty() { + assert_eq!(7, damerau_levenshtein("damerau", "")); + } + + #[test] + fn damerau_levenshtein_diff() { + assert_eq!(2, damerau_levenshtein("ca", "abc")); + } + + #[test] + fn damerau_levenshtein_diff_short() { + assert_eq!(3, damerau_levenshtein("damerau", "aderua")); + } + + #[test] + fn damerau_levenshtein_diff_reversed() { + assert_eq!(3, damerau_levenshtein("aderua", "damerau")); + } + + #[test] + fn damerau_levenshtein_diff_multibyte() { + assert_eq!(3, damerau_levenshtein("öঙ香", "abc")); + assert_eq!(3, damerau_levenshtein("abc", "öঙ香")); + } + + #[test] + fn damerau_levenshtein_diff_unequal_length() { + assert_eq!(6, damerau_levenshtein("damerau", "aderuaxyz")); + } + + #[test] + fn damerau_levenshtein_diff_unequal_length_reversed() { + assert_eq!(6, damerau_levenshtein("aderuaxyz", "damerau")); + } + + #[test] + fn damerau_levenshtein_diff_comedians() { + assert_eq!(5, damerau_levenshtein("Stewart", "Colbert")); + } + + #[test] + fn damerau_levenshtein_many_transpositions() { + assert_eq!(4, damerau_levenshtein("abcdefghijkl", "bacedfgihjlk")); + } + + #[test] + fn damerau_levenshtein_diff_longer() { + let a = "The quick brown fox jumped over the angry dog."; + let b = "Lehem ipsum dolor sit amet, dicta latine an eam."; + assert_eq!(36, damerau_levenshtein(a, b)); + } + + #[test] + fn damerau_levenshtein_beginning_transposition() { + assert_eq!(1, damerau_levenshtein("foobar", "ofobar")); + } + + #[test] + fn damerau_levenshtein_end_transposition() { + assert_eq!(1, damerau_levenshtein("specter", "spectre")); + } + + #[test] + fn damerau_levenshtein_unrestricted_edit() { + assert_eq!(3, damerau_levenshtein("a cat", "an abct")); + } +} diff --git a/third_party/rust/strsim/tests/lib.rs b/third_party/rust/strsim/tests/lib.rs new file mode 100644 index 0000000000..46478828f4 --- /dev/null +++ b/third_party/rust/strsim/tests/lib.rs @@ -0,0 +1,39 @@ +extern crate strsim; + +use strsim::{hamming, levenshtein, osa_distance, damerau_levenshtein, jaro, + jaro_winkler}; + +#[test] +fn hamming_works() { + match hamming("hamming", "hammers") { + Ok(distance) => assert_eq!(3, distance), + Err(why) => panic!("{:?}", why) + } +} + +#[test] +fn levenshtein_works() { + assert_eq!(3, levenshtein("kitten", "sitting")); +} + +#[test] +fn osa_distance_works() { + assert_eq!(3, osa_distance("ac", "cba")); +} + +#[test] +fn damerau_levenshtein_works() { + assert_eq!(2, damerau_levenshtein("ac", "cba")); +} + +#[test] +fn jaro_works() { + assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() < + 0.001); +} + +#[test] +fn jaro_winkler_works() { + assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() < + 0.001); +} |