summaryrefslogtreecommitdiffstats
path: root/vendor/gsgdt/src/levenshtein.rs
blob: 8acfefc83aa650a9a2b7693777d495355fca8fca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
use std::cmp::min;

/// Calculate the levenshtein distance between two strings.
pub(crate) fn distance(s1: &str, s2: &str) -> usize {
    let v1: Vec<char> = s1.chars().collect();
    let v2: Vec<char> = s2.chars().collect();

    let l_v1 = v1.len();
    let l_v2 = v2.len();

    if l_v1 == 0 {
        return l_v2;
    }
    if l_v2 == 0 {
        return l_v1;
    }
    if l_v1 > l_v2 {
        return distance(s2, s1);
    }

    let mut col: Vec<usize> = (0..=l_v1).collect();

    for i in 1..=l_v2 {
        let mut last_diag = col[0];
        col[0] += 1;
        for j in 1..=l_v1 {
            let last_diag_temp = col[j];
            if v1[j-1] == v2[i-1] {
                col[j] = last_diag;
            } else {
                col[j] = min(last_diag, min(col[j-1], col[j])) + 1;
            }
            last_diag = last_diag_temp;
        }
    }

    col[l_v1]
}


#[cfg(test)]
mod tests {
    use crate::levenshtein::*;
    #[test]
    fn test1() {
        assert_eq!(distance("kitten", "sitting"), 3);
    }

    #[test]
    fn test_diff_len() {
        assert_eq!(distance("kit", "kitten"), 3);
    }

    #[test]
    fn test_equal() {
        assert_eq!(distance("test", "test"), 0);
        assert_eq!(distance("", ""), 0);
        assert_eq!(distance("long string with space", "long string with space"), 0);
        assert_eq!(distance("unicode 😀", "unicode 😀"), 0);
    }

    #[test]
    fn test_one_empty() {
        assert_eq!(distance("test", ""), 4);
        assert_eq!(distance("", "test"), 4);
        assert_eq!(distance("", ""), 0);
        assert_eq!(distance("long string", ""), 11);
        assert_eq!(distance("😀", ""), 1);
    }
}