diff options
Diffstat (limited to 'extensions/spellcheck/hunspell/glue/mozHunspell.cpp')
-rw-r--r-- | extensions/spellcheck/hunspell/glue/mozHunspell.cpp | 594 |
1 files changed, 594 insertions, 0 deletions
diff --git a/extensions/spellcheck/hunspell/glue/mozHunspell.cpp b/extensions/spellcheck/hunspell/glue/mozHunspell.cpp new file mode 100644 index 0000000000..a69dcd6b05 --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/mozHunspell.cpp @@ -0,0 +1,594 @@ +/******* BEGIN LICENSE BLOCK ******* + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Initial Developers of the Original Code are Kevin Hendricks (MySpell) + * and László Németh (Hunspell). Portions created by the Initial Developers + * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved. + * + * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca) + * David Einstein (deinst@world.std.com) + * Michiel van Leeuwen (mvl@exedo.nl) + * Caolan McNamara (cmc@openoffice.org) + * László Németh (nemethl@gyorsposta.hu) + * Davide Prina + * Giuseppe Modugno + * Gianluca Turconi + * Simon Brouwer + * Noll Janos + * Biro Arpad + * Goldman Eleonora + * Sarlos Tamas + * Bencsath Boldizsar + * Halacsy Peter + * Dvornik Laszlo + * Gefferth Andras + * Nagy Viktor + * Varga Daniel + * Chris Halls + * Rene Engelhard + * Bram Moolenaar + * Dafydd Jones + * Harri Pitkanen + * Andras Timar + * Tor Lillqvist + * Jesper Kristensen (mail@jesperkristensen.dk) + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + ******* END LICENSE BLOCK *******/ + +#include "mozHunspell.h" +#include "nsReadableUtils.h" +#include "nsString.h" +#include "nsIObserverService.h" +#include "nsIDirectoryEnumerator.h" +#include "nsIFile.h" +#include "nsUnicharUtils.h" +#include "nsCRT.h" +#include "mozInlineSpellChecker.h" +#include "nsIPrefBranch.h" +#include "nsIPrefService.h" +#include "nsNetUtil.h" +#include "mozilla/dom/ContentParent.h" +#include "mozilla/Components.h" +#include "mozilla/Services.h" + +#include <stdlib.h> +#include <tuple> + +using mozilla::dom::ContentParent; +using namespace mozilla; + +NS_IMPL_CYCLE_COLLECTING_ADDREF(mozHunspell) +NS_IMPL_CYCLE_COLLECTING_RELEASE(mozHunspell) + +NS_INTERFACE_MAP_BEGIN(mozHunspell) + NS_INTERFACE_MAP_ENTRY(mozISpellCheckingEngine) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) + NS_INTERFACE_MAP_ENTRY(nsIMemoryReporter) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, mozISpellCheckingEngine) + NS_INTERFACE_MAP_ENTRIES_CYCLE_COLLECTION(mozHunspell) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTION_WEAK(mozHunspell, mPersonalDictionary) + +NS_IMPL_COMPONENT_FACTORY(mozHunspell) { + auto hunspell = MakeRefPtr<mozHunspell>(); + if (NS_SUCCEEDED(hunspell->Init())) { + return hunspell.forget().downcast<mozISpellCheckingEngine>(); + } + return nullptr; +} + +mozHunspell::mozHunspell() { +#ifdef DEBUG + // There must be only one instance of this class: it reports memory based on + // a single static count in HunspellAllocator. + static bool hasRun = false; + MOZ_ASSERT(!hasRun); + hasRun = true; +#endif +} + +nsresult mozHunspell::Init() { + LoadDictionaryList(false); + + nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService(); + if (obs) { + obs->AddObserver(this, "profile-do-change", true); + obs->AddObserver(this, "profile-after-change", true); + } + + mozilla::RegisterWeakMemoryReporter(this); + + return NS_OK; +} + +mozHunspell::~mozHunspell() { + mozilla::UnregisterWeakMemoryReporter(this); + + mPersonalDictionary = nullptr; + mHunspells.Clear(); +} + +NS_IMETHODIMP +mozHunspell::GetDictionaries(nsTArray<nsCString>& aDictionaries) { + MOZ_ASSERT(aDictionaries.IsEmpty()); + for (auto iter = mHunspells.ConstIter(); !iter.Done(); iter.Next()) { + if (iter.Data().mEnabled) { + aDictionaries.AppendElement(iter.Key()); + } + } + return NS_OK; +} + +/* Set the Dictionaries. + * This also Loads the dictionaries and initializes the converter using the + * dictionaries converter + */ +NS_IMETHODIMP +mozHunspell::SetDictionaries(const nsTArray<nsCString>& aDictionaries) { + if (aDictionaries.IsEmpty()) { + mHunspells.Clear(); + return NS_OK; + } + + // Disable any dictionaries we've already loaded that we're not + // going to use. + for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) { + if (!aDictionaries.Contains(iter.Key())) { + iter.Data().mEnabled = false; + } + } + + bool firstDictionary = true; + for (const auto& dictionary : aDictionaries) { + NS_ConvertUTF8toUTF16 dict(dictionary); + nsIURI* affFile = mDictionaries.GetWeak(dict); + if (!affFile) { + return NS_ERROR_FILE_NOT_FOUND; + } + + nsAutoCString affFileName; + nsresult rv = affFile->GetSpec(affFileName); + NS_ENSURE_SUCCESS(rv, rv); + + if (auto entry = mHunspells.Lookup(dictionary)) { + if (entry.Data().mAffixFileName == affFileName) { + entry.Data().mEnabled = true; + continue; + } + } + + DictionaryData dictionaryData; + dictionaryData.mAffixFileName = affFileName; + + // Load the first dictionary now, we'll load the others lazily during + // checking. + if (firstDictionary) { + rv = dictionaryData.LoadIfNecessary(); + NS_ENSURE_SUCCESS(rv, rv); + firstDictionary = false; + } + + mHunspells.InsertOrUpdate(dictionary, std::move(dictionaryData)); + } + + // If we have a large number of dictionaries loaded, try freeing any disabled + // dictionaries to limit memory use. + if (mHunspells.Count() > 10) { + mHunspells.RemoveIf([](const auto& iter) { return !iter.Data().mEnabled; }); + } + + return NS_OK; +} + +NS_IMETHODIMP mozHunspell::GetPersonalDictionary( + mozIPersonalDictionary** aPersonalDictionary) { + *aPersonalDictionary = mPersonalDictionary; + NS_IF_ADDREF(*aPersonalDictionary); + return NS_OK; +} + +NS_IMETHODIMP mozHunspell::SetPersonalDictionary( + mozIPersonalDictionary* aPersonalDictionary) { + mPersonalDictionary = aPersonalDictionary; + return NS_OK; +} + +NS_IMETHODIMP mozHunspell::GetDictionaryList( + nsTArray<nsCString>& aDictionaries) { + MOZ_ASSERT(aDictionaries.IsEmpty()); + for (const auto& key : mDictionaries.Keys()) { + aDictionaries.AppendElement(NS_ConvertUTF16toUTF8(key)); + } + + return NS_OK; +} + +void mozHunspell::LoadDictionaryList(bool aNotifyChildProcesses) { + mDictionaries.Clear(); + + nsresult rv; + + // find built in dictionaries, or dictionaries specified in + // spellchecker.dictionary_path in prefs + nsCOMPtr<nsIFile> dictDir; + + // check preferences first + nsCOMPtr<nsIPrefBranch> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID)); + if (prefs) { + nsAutoCString extDictPath; + rv = prefs->GetCharPref("spellchecker.dictionary_path", extDictPath); + if (NS_SUCCEEDED(rv)) { + // set the spellchecker.dictionary_path + rv = NS_NewNativeLocalFile(extDictPath, true, getter_AddRefs(dictDir)); + } + if (dictDir) { + LoadDictionariesFromDir(dictDir); + } + } + + // find dictionaries in DICPATH + char* dicEnv = PR_GetEnv("DICPATH"); + if (dicEnv) { + // do a two-pass dance so dictionaries are loaded right-to-left as + // preference + nsTArray<nsCOMPtr<nsIFile>> dirs; + nsAutoCString env(dicEnv); // assume dicEnv is UTF-8 + + char* currPath = nullptr; + char* nextPaths = env.BeginWriting(); + while ((currPath = NS_strtok(":", &nextPaths))) { + nsCOMPtr<nsIFile> dir; + rv = + NS_NewNativeLocalFile(nsCString(currPath), true, getter_AddRefs(dir)); + if (NS_SUCCEEDED(rv)) { + dirs.AppendElement(dir); + } + } + + // load them in reverse order so they override each other properly + for (int32_t i = dirs.Length() - 1; i >= 0; i--) { + LoadDictionariesFromDir(dirs[i]); + } + } + + // find dictionaries from restartless extensions + for (int32_t i = 0; i < mDynamicDirectories.Count(); i++) { + LoadDictionariesFromDir(mDynamicDirectories[i]); + } + + for (const auto& dictionaryEntry : mDynamicDictionaries) { + mDictionaries.InsertOrUpdate(dictionaryEntry.GetKey(), + dictionaryEntry.GetData()); + } + + DictionariesChanged(aNotifyChildProcesses); +} + +void mozHunspell::DictionariesChanged(bool aNotifyChildProcesses) { + // Now we have finished updating the list of dictionaries, update the current + // dictionary and any editors which may use it. + mozInlineSpellChecker::UpdateCanEnableInlineSpellChecking(); + + if (aNotifyChildProcesses) { + ContentParent::NotifyUpdatedDictionaries(); + } + + // Check if the current dictionaries are still available. + // If not, try to replace it with other dictionaries of the same language. + if (!mHunspells.IsEmpty()) { + nsTArray<nsCString> dictionaries; + for (auto iter = mHunspells.ConstIter(); !iter.Done(); iter.Next()) { + if (iter.Data().mEnabled) { + dictionaries.AppendElement(iter.Key()); + } + } + nsresult rv = SetDictionaries(dictionaries); + if (NS_SUCCEEDED(rv)) return; + } + + // If the current dictionaries are gone, and we don't have a good replacement, + // set no current dictionary. + if (!mHunspells.IsEmpty()) { + nsTArray<nsCString> empty; + SetDictionaries(empty); + } +} + +NS_IMETHODIMP +mozHunspell::LoadDictionariesFromDir(nsIFile* aDir) { + nsresult rv; + + bool check = false; + rv = aDir->Exists(&check); + if (NS_FAILED(rv) || !check) return NS_ERROR_UNEXPECTED; + + rv = aDir->IsDirectory(&check); + if (NS_FAILED(rv) || !check) return NS_ERROR_UNEXPECTED; + + nsCOMPtr<nsIDirectoryEnumerator> files; + rv = aDir->GetDirectoryEntries(getter_AddRefs(files)); + if (NS_FAILED(rv)) return NS_ERROR_UNEXPECTED; + + nsCOMPtr<nsIFile> file; + while (NS_SUCCEEDED(files->GetNextFile(getter_AddRefs(file))) && file) { + nsAutoString leafName; + file->GetLeafName(leafName); + if (!StringEndsWith(leafName, u".dic"_ns)) continue; + + nsAutoString dict(leafName); + dict.SetLength(dict.Length() - 4); // magic length of ".dic" + + // check for the presence of the .aff file + leafName = dict; + leafName.AppendLiteral(".aff"); + file->SetLeafName(leafName); + rv = file->Exists(&check); + if (NS_FAILED(rv) || !check) continue; + + // Replace '_' separator with '-' + dict.ReplaceChar('_', '-'); + + nsCOMPtr<nsIURI> uri; + rv = NS_NewFileURI(getter_AddRefs(uri), file); + NS_ENSURE_SUCCESS(rv, rv); + + mDictionaries.InsertOrUpdate(dict, uri); + } + + return NS_OK; +} + +nsresult mozHunspell::DictionaryData::ConvertCharset(const nsAString& aStr, + std::string& aDst) { + if (NS_WARN_IF(!mEncoder)) { + return NS_ERROR_NOT_INITIALIZED; + } + + auto src = Span(aStr.BeginReading(), aStr.Length()); + CheckedInt<size_t> needed = + mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(src.Length()); + if (!needed.isValid()) { + return NS_ERROR_OUT_OF_MEMORY; + } + + aDst.resize(needed.value()); + + char* dstPtr = &aDst[0]; + auto dst = Span(reinterpret_cast<uint8_t*>(dstPtr), needed.value()); + + uint32_t result; + size_t written; + std::tie(result, std::ignore, written) = + mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true); + MOZ_ASSERT(result != kOutputFull); + if (result != kInputEmpty) { + return NS_ERROR_UENC_NOMAPPING; + } + aDst.resize(written); + mEncoder->Encoding()->NewEncoderInto(*mEncoder); + return NS_OK; +} + +nsresult mozHunspell::DictionaryData::LoadIfNecessary() { + if (mHunspell && mEncoder && mDecoder) { + return NS_OK; + } + + if (mLoadFailed) { + return NS_ERROR_FAILURE; + } + + nsCString dictFileName = mAffixFileName; + int32_t dotPos = dictFileName.RFindChar('.'); + if (dotPos == -1) { + mLoadFailed = true; + return NS_ERROR_FAILURE; + } + dictFileName.SetLength(dotPos); + dictFileName.AppendLiteral(".dic"); + + UniquePtr<RLBoxHunspell> hunspell( + RLBoxHunspell::Create(mAffixFileName, dictFileName)); + if (!hunspell) { + mLoadFailed = true; + // TODO Bug 1788857: Verify error propagation in case of inaccessible file + return NS_ERROR_OUT_OF_MEMORY; + } + mHunspell = std::move(hunspell); + auto encoding = + Encoding::ForLabelNoReplacement(mHunspell->get_dict_encoding()); + if (!encoding) { + mLoadFailed = true; + return NS_ERROR_UCONV_NOCONV; + } + mEncoder = encoding->NewEncoder(); + mDecoder = encoding->NewDecoderWithoutBOMHandling(); + return NS_OK; +} + +NS_IMETHODIMP +mozHunspell::CollectReports(nsIHandleReportCallback* aHandleReport, + nsISupports* aData, bool aAnonymize) { + MOZ_COLLECT_REPORT("explicit/spell-check", KIND_HEAP, UNITS_BYTES, + HunspellAllocator::MemoryAllocated(), + "Memory used by the spell-checking engine."); + + return NS_OK; +} + +NS_IMETHODIMP +mozHunspell::Check(const nsAString& aWord, bool* aResult) { + if (NS_WARN_IF(!aResult)) { + return NS_ERROR_INVALID_ARG; + } + + if (NS_WARN_IF(mHunspells.IsEmpty())) { + return NS_ERROR_FAILURE; + } + + *aResult = true; + for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) { + if (!iter.Data().mEnabled) { + continue; + } + + nsresult rv = iter.Data().LoadIfNecessary(); + if (NS_FAILED(rv)) { + continue; + } + + std::string charsetWord; + rv = iter.Data().ConvertCharset(aWord, charsetWord); + if (NS_FAILED(rv)) { + continue; + } + + // Depending upon the encoding, we might end up with a string that begins + // with the null byte. Since the hunspell interface uses C-style strings, + // this appears like an empty string, and hunspell marks empty strings as + // spelled correctly. Skip these cases to allow another dictionary to have + // the chance to spellcheck them. + if (charsetWord.empty() || charsetWord[0] == 0) { + continue; + } + + *aResult = iter.Data().mHunspell->spell(charsetWord); + if (*aResult) { + break; + } + } + + if (!*aResult && mPersonalDictionary) { + return mPersonalDictionary->Check(aWord, aResult); + } + + return NS_OK; +} + +NS_IMETHODIMP +mozHunspell::Suggest(const nsAString& aWord, nsTArray<nsString>& aSuggestions) { + if (NS_WARN_IF(mHunspells.IsEmpty())) { + return NS_ERROR_FAILURE; + } + + MOZ_ASSERT(aSuggestions.IsEmpty()); + + for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) { + if (!iter.Data().mEnabled) { + continue; + } + + nsresult rv = iter.Data().LoadIfNecessary(); + if (NS_FAILED(rv)) { + continue; + } + + std::string charsetWord; + rv = iter.Data().ConvertCharset(aWord, charsetWord); + NS_ENSURE_SUCCESS(rv, rv); + + std::vector<std::string> suggestions = + iter.Data().mHunspell->suggest(charsetWord); + if (!suggestions.empty()) { + aSuggestions.SetCapacity(aSuggestions.Length() + suggestions.size()); + for (Span<const char> charSrc : suggestions) { + // Convert the suggestion to utf16 + auto src = AsBytes(charSrc); + nsresult rv = + iter.Data().mDecoder->Encoding()->DecodeWithoutBOMHandling( + src, *aSuggestions.AppendElement()); + NS_ENSURE_SUCCESS(rv, rv); + iter.Data().mDecoder->Encoding()->NewDecoderWithoutBOMHandlingInto( + *iter.Data().mDecoder); + } + } + } + + return NS_OK; +} + +NS_IMETHODIMP +mozHunspell::Observe(nsISupports* aSubj, const char* aTopic, + const char16_t* aData) { + NS_ASSERTION(!strcmp(aTopic, "profile-do-change") || + !strcmp(aTopic, "profile-after-change"), + "Unexpected observer topic"); + + LoadDictionaryList(false); + + return NS_OK; +} + +NS_IMETHODIMP mozHunspell::AddDirectory(nsIFile* aDir) { + mDynamicDirectories.AppendObject(aDir); + LoadDictionaryList(true); + return NS_OK; +} + +NS_IMETHODIMP mozHunspell::RemoveDirectory(nsIFile* aDir) { + mDynamicDirectories.RemoveObject(aDir); + LoadDictionaryList(true); + +#ifdef MOZ_THUNDERBIRD + /* + * This notification is needed for Thunderbird. Thunderbird derives the + * dictionary from the document's "lang" attribute. If a dictionary is + * removed, we need to change the "lang" attribute. + */ + nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService(); + if (obs) { + obs->NotifyObservers(nullptr, SPELLCHECK_DICTIONARY_REMOVE_NOTIFICATION, + nullptr); + } +#endif + return NS_OK; +} + +NS_IMETHODIMP mozHunspell::AddDictionary(const nsAString& aLang, + nsIURI* aFile) { + NS_ENSURE_TRUE(aFile, NS_ERROR_INVALID_ARG); + + mDynamicDictionaries.InsertOrUpdate(aLang, aFile); + mDictionaries.InsertOrUpdate(aLang, aFile); + DictionariesChanged(true); + return NS_OK; +} + +NS_IMETHODIMP mozHunspell::RemoveDictionary(const nsAString& aLang, + nsIURI* aFile, bool* aRetVal) { + NS_ENSURE_TRUE(aFile, NS_ERROR_INVALID_ARG); + *aRetVal = false; + + nsCOMPtr<nsIURI> file = mDynamicDictionaries.Get(aLang); + bool equal; + if (file && NS_SUCCEEDED(file->Equals(aFile, &equal)) && equal) { + mDynamicDictionaries.Remove(aLang); + LoadDictionaryList(true); + *aRetVal = true; + } + return NS_OK; +} |