From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- .../tests/unit/data/suggestiontest/prepare | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare (limited to 'extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare') diff --git a/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare new file mode 100644 index 0000000000..a72d931b8b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare @@ -0,0 +1,40 @@ +#!/bin/bash +# Check common misspellings +# input file format: +# word->word1, ... +# Source: http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines + +hunspell=../../src/tools/hunspell +hlang=${HUNSPELL:-en_US} +alang=${ASPELL:-en_US} +input=${INPUT:-List_of_common_misspellings.txt} + +# remove bad words recognised by Hunspell as good +cat $input | sed 's/[-]>/ /' | $hunspell -d $hlang -1 -L | + +# remove items with dash for Aspell +grep '^[^-]* ' | + +# remove spaces from end of lines +sed 's/ *$//' >$input.1 + +# remove bad words recognised by Aspell as good +cut -f 1 -d ' ' $input.1 | aspell -l $alang --list | +awk 'FILENAME=="-"{a[$1]=1;next}a[$1]{print$0}' - $input.1 | + +# change commas with tabs +sed 's/, */ /g' >$input.2 + +# remove lines with unrecognised suggestions (except suggestion with spaces) +cut -d ' ' -f 2- $input.2 | tr "\t" "\n" | grep -v ' ' >x.1 +cat x.1 | $hunspell -l -d $hlang >x.2 +cat x.1 | aspell -l $alang --list >>x.2 +cat x.2 | awk 'BEGIN{FS="\t"} +FILENAME=="-"{a[$1]=1;next}a[$2]!=1 && a[$3]!=1{print $0}' - $input.2 >$input.3 + +cut -f 1 -d ' ' $input.3 | aspell -l $alang -a | grep -v ^$ | sed -n '2,$p' | +sed 's/^.*: //;s/, / /g' >$input.4 + +cat $input.3 | $hunspell -d $hlang -a -1 | grep -v ^$ | sed -n '2,$p' | +sed 's/^.*: //;s/, / /g' >$input.5 + -- cgit v1.2.3