blob: 8e6c1cc1ff38bc60e852ee2f0ed4e775d69bb649 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
#!/bin/bash
# Check common misspellings
# input file format:
# word->word1, ...
# Source: http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
input=${INPUT:-List_of_common_misspellings.txt}
function check() {
cat $1 | awk 'BEGIN{maxord=0;FS="\t"}FILENAME=="-"{for (i=1; i<=NF; i++){a[NR,$(i)]=i};max=NR;next}{x1=a[NR-max,$2];x2=a[NR-max,$3];sug++;if($3)sug++;if (!x1&&!x2){mis2++;misrow=misrow"\n"$0};if(!x1||($3 && !x2))mis++;ord+=x1+x2;}END{
print "Missed rows", misrow;
print "======================================="
print maxord, "max. suggestion for a word";
print max, "input rows";
print mis2, "missing rows";
print sug, "expected suggestions";
print mis, "missing suggestions";
print ord/(sug-mis), "average ranking";
}' - $2
}
test -f $input.4 && check $input.4 $input.3 >result.aspell
check $input.5 $input.3 >result.hunspell
test -f result.aspell && tail -6 result.aspell
tail -6 result.hunspell
|