#!/bin/bash # Check common misspellings # input file format: # word->word1, ... # Source: http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines input=${INPUT:-List_of_common_misspellings.txt} function check() { cat $1 | awk 'BEGIN{maxord=0;FS="\t"}FILENAME=="-"{for (i=1; i<=NF; i++){a[NR,$(i)]=i};max=NR;next}{x1=a[NR-max,$2];x2=a[NR-max,$3];sug++;if($3)sug++;if (!x1&&!x2){mis2++;misrow=misrow"\n"$0};if(!x1||($3 && !x2))mis++;ord+=x1+x2;}END{ print "Missed rows", misrow; print "=======================================" print maxord, "max. suggestion for a word"; print max, "input rows"; print mis2, "missing rows"; print sug, "expected suggestions"; print mis, "missing suggestions"; print ord/(sug-mis), "average ranking"; }' - $2 } test -f $input.4 && check $input.4 $input.3 >result.aspell check $input.5 $input.3 >result.hunspell test -f result.aspell && tail -6 result.aspell tail -6 result.hunspell