summaryrefslogtreecommitdiffstats
path: root/extensions/spellcheck/locales/en-US/hunspell/dictionary-sources/edit-dictionary.sh
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
commit43a97878ce14b72f0981164f87f2e35e14151312 (patch)
tree620249daf56c0258faa40cbdcf9cfba06de2a846 /extensions/spellcheck/locales/en-US/hunspell/dictionary-sources/edit-dictionary.sh
parentInitial commit. (diff)
downloadfirefox-upstream.tar.xz
firefox-upstream.zip
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'extensions/spellcheck/locales/en-US/hunspell/dictionary-sources/edit-dictionary.sh')
-rwxr-xr-xextensions/spellcheck/locales/en-US/hunspell/dictionary-sources/edit-dictionary.sh95
1 files changed, 95 insertions, 0 deletions
diff --git a/extensions/spellcheck/locales/en-US/hunspell/dictionary-sources/edit-dictionary.sh b/extensions/spellcheck/locales/en-US/hunspell/dictionary-sources/edit-dictionary.sh
new file mode 100755
index 0000000000..e72654e84d
--- /dev/null
+++ b/extensions/spellcheck/locales/en-US/hunspell/dictionary-sources/edit-dictionary.sh
@@ -0,0 +1,95 @@
+#! /usr/bin/env sh
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+set -e
+
+WKDIR="`pwd`"
+SPELLER="$WKDIR/scowl/speller"
+
+munch() {
+ $SPELLER/munch-list munch $1 | sort -u
+}
+
+expand() {
+ grep -v '^[0-9]\+$' | $SPELLER/munch-list expand $1 | sort -u
+}
+
+if [ ! -d "$SPELLER" ]; then
+ echo "The 'scowl' folder is missing. Check the documentation at"
+ echo "https://firefox-source-docs.mozilla.org/extensions/spellcheck/index.html"
+ exit 1
+fi
+
+if [ -z "$EDITOR" ]; then
+ echo 'Need to set the $EDITOR environment variable to your favorite editor.'
+ exit 1
+fi
+
+# Open the editor and allow the user to type or paste words
+echo "Editor is going to open, you can add the list of words. Quit the editor to finish editing."
+echo "Press Enter to begin."
+read foo
+$EDITOR temp-list.txt
+
+if [ ! -f temp-list.txt ]; then
+ echo "The content of the editor hasn't been saved."
+ exit 1
+fi
+# Remove empty lines
+sed -i "" "/^$/d" temp-list.txt
+
+# Copy the current en-US dictionary and strip the first line that contains
+# the count.
+tail -n +2 ../en-US.dic > en-US.stripped
+
+# Convert the file to UTF-8
+iconv -f iso-8859-1 -t utf-8 en-US.stripped > en-US.utf8
+rm en-US.stripped
+
+# Save to a temporary file words excluded from suggestions, and numerals,
+# since the munched result is different for both.
+grep '!$' < utf8/en-US-utf8.dic > en-US-nosug.txt
+grep '^[0-9][a-z/]' < utf8/en-US-utf8.dic > en-US-numerals.txt
+
+# Expand the dictionary to a word list
+expand ../en-US.aff < en-US.utf8 > en-US-wordlist.txt
+rm en-US.utf8
+
+# Add the new words
+cat temp-list.txt >> en-US-wordlist.txt
+rm temp-list.txt
+
+# Remove numerals from the expanded wordlist
+grep -v '^[0-9]' < en-US-wordlist.txt > en-US-wordlist-nonum.txt
+rm en-US-wordlist.txt
+
+# Run the wordlist through the munch script, to compress the dictionary where
+# possible (using affix rules).
+munch ../en-US.aff < en-US-wordlist-nonum.txt > en-US-munched.dic
+rm en-US-wordlist-nonum.txt
+
+# Remove words that should not be suggested
+while IFS='/' read -ra line
+do
+ sed -E -i "" "\:^$line($|/.*):d" en-US-munched.dic
+done < "en-US-nosug.txt"
+
+# Add back suggestion exclusions and numerals from the original .dic file
+cat en-US-nosug.txt >> en-US-munched.dic
+cat en-US-numerals.txt >> en-US-munched.dic
+rm en-US-nosug.txt
+rm en-US-numerals.txt
+
+# Add back the line count and sort the lines
+wc -l < en-US-munched.dic | tr -d '[:blank:]' > en-US.dic
+LC_ALL=C sort en-US-munched.dic >> en-US.dic
+rm -f en-US-munched.dic
+
+# Convert back to ISO-8859-1
+iconv -f utf-8 -t iso-8859-1 en-US.dic > ../en-US.dic
+
+# Keep a copy of the UTF-8 file in /utf8
+mv en-US.dic utf8/en-US-utf8.dic