summaryrefslogtreecommitdiffstats
path: root/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:32:43 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:32:43 +0000
commit6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
treea68f146d7fa01f0134297619fbe7e33db084e0aa /extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare
parentInitial commit. (diff)
downloadthunderbird-upstream.tar.xz
thunderbird-upstream.zip
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare')
-rw-r--r--extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare40
1 files changed, 40 insertions, 0 deletions
diff --git a/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare
new file mode 100644
index 0000000000..a72d931b8b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare
@@ -0,0 +1,40 @@
+#!/bin/bash
+# Check common misspellings
+# input file format:
+# word->word1, ...
+# Source: http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
+
+hunspell=../../src/tools/hunspell
+hlang=${HUNSPELL:-en_US}
+alang=${ASPELL:-en_US}
+input=${INPUT:-List_of_common_misspellings.txt}
+
+# remove bad words recognised by Hunspell as good
+cat $input | sed 's/[-]>/ /' | $hunspell -d $hlang -1 -L |
+
+# remove items with dash for Aspell
+grep '^[^-]* ' |
+
+# remove spaces from end of lines
+sed 's/ *$//' >$input.1
+
+# remove bad words recognised by Aspell as good
+cut -f 1 -d ' ' $input.1 | aspell -l $alang --list |
+awk 'FILENAME=="-"{a[$1]=1;next}a[$1]{print$0}' - $input.1 |
+
+# change commas with tabs
+sed 's/, */ /g' >$input.2
+
+# remove lines with unrecognised suggestions (except suggestion with spaces)
+cut -d ' ' -f 2- $input.2 | tr "\t" "\n" | grep -v ' ' >x.1
+cat x.1 | $hunspell -l -d $hlang >x.2
+cat x.1 | aspell -l $alang --list >>x.2
+cat x.2 | awk 'BEGIN{FS="\t"}
+FILENAME=="-"{a[$1]=1;next}a[$2]!=1 && a[$3]!=1{print $0}' - $input.2 >$input.3
+
+cut -f 1 -d ' ' $input.3 | aspell -l $alang -a | grep -v ^$ | sed -n '2,$p' |
+sed 's/^.*: //;s/, / /g' >$input.4
+
+cat $input.3 | $hunspell -d $hlang -a -1 | grep -v ^$ | sed -n '2,$p' |
+sed 's/^.*: //;s/, / /g' >$input.5
+