diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /extensions/spellcheck/locales/en-US/hunspell/dictionary-sources/edit-dictionary.sh | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'extensions/spellcheck/locales/en-US/hunspell/dictionary-sources/edit-dictionary.sh')
-rwxr-xr-x | extensions/spellcheck/locales/en-US/hunspell/dictionary-sources/edit-dictionary.sh | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/extensions/spellcheck/locales/en-US/hunspell/dictionary-sources/edit-dictionary.sh b/extensions/spellcheck/locales/en-US/hunspell/dictionary-sources/edit-dictionary.sh new file mode 100755 index 0000000000..e72654e84d --- /dev/null +++ b/extensions/spellcheck/locales/en-US/hunspell/dictionary-sources/edit-dictionary.sh @@ -0,0 +1,95 @@ +#! /usr/bin/env sh + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +set -e + +WKDIR="`pwd`" +SPELLER="$WKDIR/scowl/speller" + +munch() { + $SPELLER/munch-list munch $1 | sort -u +} + +expand() { + grep -v '^[0-9]\+$' | $SPELLER/munch-list expand $1 | sort -u +} + +if [ ! -d "$SPELLER" ]; then + echo "The 'scowl' folder is missing. Check the documentation at" + echo "https://firefox-source-docs.mozilla.org/extensions/spellcheck/index.html" + exit 1 +fi + +if [ -z "$EDITOR" ]; then + echo 'Need to set the $EDITOR environment variable to your favorite editor.' + exit 1 +fi + +# Open the editor and allow the user to type or paste words +echo "Editor is going to open, you can add the list of words. Quit the editor to finish editing." +echo "Press Enter to begin." +read foo +$EDITOR temp-list.txt + +if [ ! -f temp-list.txt ]; then + echo "The content of the editor hasn't been saved." + exit 1 +fi +# Remove empty lines +sed -i "" "/^$/d" temp-list.txt + +# Copy the current en-US dictionary and strip the first line that contains +# the count. +tail -n +2 ../en-US.dic > en-US.stripped + +# Convert the file to UTF-8 +iconv -f iso-8859-1 -t utf-8 en-US.stripped > en-US.utf8 +rm en-US.stripped + +# Save to a temporary file words excluded from suggestions, and numerals, +# since the munched result is different for both. +grep '!$' < utf8/en-US-utf8.dic > en-US-nosug.txt +grep '^[0-9][a-z/]' < utf8/en-US-utf8.dic > en-US-numerals.txt + +# Expand the dictionary to a word list +expand ../en-US.aff < en-US.utf8 > en-US-wordlist.txt +rm en-US.utf8 + +# Add the new words +cat temp-list.txt >> en-US-wordlist.txt +rm temp-list.txt + +# Remove numerals from the expanded wordlist +grep -v '^[0-9]' < en-US-wordlist.txt > en-US-wordlist-nonum.txt +rm en-US-wordlist.txt + +# Run the wordlist through the munch script, to compress the dictionary where +# possible (using affix rules). +munch ../en-US.aff < en-US-wordlist-nonum.txt > en-US-munched.dic +rm en-US-wordlist-nonum.txt + +# Remove words that should not be suggested +while IFS='/' read -ra line +do + sed -E -i "" "\:^$line($|/.*):d" en-US-munched.dic +done < "en-US-nosug.txt" + +# Add back suggestion exclusions and numerals from the original .dic file +cat en-US-nosug.txt >> en-US-munched.dic +cat en-US-numerals.txt >> en-US-munched.dic +rm en-US-nosug.txt +rm en-US-numerals.txt + +# Add back the line count and sort the lines +wc -l < en-US-munched.dic | tr -d '[:blank:]' > en-US.dic +LC_ALL=C sort en-US-munched.dic >> en-US.dic +rm -f en-US-munched.dic + +# Convert back to ISO-8859-1 +iconv -f utf-8 -t iso-8859-1 en-US.dic > ../en-US.dic + +# Keep a copy of the UTF-8 file in /utf8 +mv en-US.dic utf8/en-US-utf8.dic |