summaryrefslogtreecommitdiffstats
path: root/extensions/spellcheck/hunspell/glue/RLBoxHunspell.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:32:43 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:32:43 +0000
commit6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
treea68f146d7fa01f0134297619fbe7e33db084e0aa /extensions/spellcheck/hunspell/glue/RLBoxHunspell.cpp
parentInitial commit. (diff)
downloadthunderbird-upstream.tar.xz
thunderbird-upstream.zip
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'extensions/spellcheck/hunspell/glue/RLBoxHunspell.cpp')
-rw-r--r--extensions/spellcheck/hunspell/glue/RLBoxHunspell.cpp256
1 files changed, 256 insertions, 0 deletions
diff --git a/extensions/spellcheck/hunspell/glue/RLBoxHunspell.cpp b/extensions/spellcheck/hunspell/glue/RLBoxHunspell.cpp
new file mode 100644
index 0000000000..b3a1990ceb
--- /dev/null
+++ b/extensions/spellcheck/hunspell/glue/RLBoxHunspell.cpp
@@ -0,0 +1,256 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Assertions.h"
+#ifdef MOZ_WASM_SANDBOXING_HUNSPELL
+# include "mozilla/ipc/LibrarySandboxPreload.h"
+#endif
+#include "RLBoxHunspell.h"
+#include "mozHunspellRLBoxGlue.h"
+#include "mozHunspellRLBoxHost.h"
+#include "nsThread.h"
+
+using namespace rlbox;
+using namespace mozilla;
+
+// Helper function for allocating and copying std::string into sandbox
+static tainted_hunspell<char*> allocStrInSandbox(
+ rlbox_sandbox_hunspell& aSandbox, const std::string& str) {
+ size_t size = str.size() + 1;
+ tainted_hunspell<char*> t_str = aSandbox.malloc_in_sandbox<char>(size);
+ if (t_str) {
+ rlbox::memcpy(aSandbox, t_str, str.c_str(), size);
+ }
+ return t_str;
+}
+
+/* static */
+RLBoxHunspell* RLBoxHunspell::Create(const nsCString& affpath,
+ const nsCString& dpath) {
+ MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread());
+
+ mozilla::UniquePtr<rlbox_sandbox_hunspell> sandbox(
+ new rlbox_sandbox_hunspell());
+
+#if defined(MOZ_WASM_SANDBOXING_HUNSPELL) && !defined(HAVE_64BIT_BUILD)
+ // By default, the rlbox sandbox size is smaller on 32-bit builds than the max
+ // 4GB. We may need to ask for a larger sandbox size for hunspell to
+ // spellcheck in some locales See Bug 1739669 for more details
+
+ // We first get the size of the dictionary. This is actually the first read we
+ // try on dpath and it might fail for whatever filesystem reasons (invalid
+ // path, unaccessible, ...).
+ Result<int64_t, nsresult> dictSizeResult =
+ mozHunspellFileMgrHost::GetSize(dpath);
+ NS_ENSURE_TRUE(dictSizeResult.isOk(), nullptr);
+
+ int64_t dictSize = dictSizeResult.unwrap();
+ NS_ENSURE_TRUE(dictSize >= 0, nullptr);
+
+ // Next, we compute the expected memory needed for hunspell spell checking.
+ // This will vary based on the size of the dictionary file, which varies by
+ // locale — so we size the sandbox by multiplying the file size by 4.8. This
+ // allows the 1.5MB en_US dictionary to fit in an 8MB sandbox. See bug 1739669
+ // and bug 1739761 for the analysis behind this.
+ const uint64_t expectedMaxMemory = static_cast<uint64_t>(4.8 * dictSize);
+
+ // Get a capacity of at least the expected size
+ const w2c_mem_capacity capacity = get_valid_wasm2c_memory_capacity(
+ expectedMaxMemory, true /* wasm's 32-bit memory */);
+
+ bool success =
+ sandbox->create_sandbox(/* shouldAbortOnFailure = */ false, &capacity);
+#elif defined(MOZ_WASM_SANDBOXING_HUNSPELL)
+ bool success = sandbox->create_sandbox(/* shouldAbortOnFailure = */ false);
+#else
+ sandbox->create_sandbox();
+ const bool success = true;
+#endif
+
+ NS_ENSURE_TRUE(success, nullptr);
+
+ mozilla::UniquePtr<rlbox_sandbox_hunspell, RLBoxDeleter> sandbox_initialized(
+ sandbox.release());
+
+ // Add the aff and dict files to allow list
+ if (!affpath.IsEmpty()) {
+ mozHunspellCallbacks::AllowFile(affpath);
+ }
+ if (!dpath.IsEmpty()) {
+ mozHunspellCallbacks::AllowFile(dpath);
+ }
+
+ // TODO Bug 1788857: Verify error handling in case of inaccessible file
+ return new RLBoxHunspell(std::move(sandbox_initialized), affpath, dpath);
+}
+
+RLBoxHunspell::RLBoxHunspell(
+ mozilla::UniquePtr<rlbox_sandbox_hunspell, RLBoxDeleter> aSandbox,
+ const nsCString& affpath, const nsCString& dpath)
+ : mSandbox(std::move(aSandbox)), mHandle(nullptr) {
+ // Register callbacks
+ mCreateFilemgr =
+ mSandbox->register_callback(mozHunspellCallbacks::CreateFilemgr);
+ mGetLine = mSandbox->register_callback(mozHunspellCallbacks::GetLine);
+ mGetLineNum = mSandbox->register_callback(mozHunspellCallbacks::GetLineNum);
+ mDestructFilemgr =
+ mSandbox->register_callback(mozHunspellCallbacks::DestructFilemgr);
+ mHunspellToUpperCase =
+ mSandbox->register_callback(mozHunspellCallbacks::ToUpperCase);
+ mHunspellToLowerCase =
+ mSandbox->register_callback(mozHunspellCallbacks::ToLowerCase);
+ mHunspellGetCurrentCS =
+ mSandbox->register_callback(mozHunspellCallbacks::GetCurrentCS);
+
+ mSandbox->invoke_sandbox_function(RegisterHunspellCallbacks, mCreateFilemgr,
+ mGetLine, mGetLineNum, mDestructFilemgr,
+ mHunspellToUpperCase, mHunspellToLowerCase,
+ mHunspellGetCurrentCS);
+
+ // Copy the affpath and dpath into the sandbox
+ // These allocations should definitely succeed as these are first allocations
+ // inside the sandbox.
+ tainted_hunspell<char*> t_affpath =
+ allocStrInSandbox(*mSandbox, affpath.get());
+ MOZ_RELEASE_ASSERT(t_affpath);
+
+ tainted_hunspell<char*> t_dpath = allocStrInSandbox(*mSandbox, dpath.get());
+ MOZ_RELEASE_ASSERT(t_dpath);
+
+ // Create handle
+ mHandle = mSandbox->invoke_sandbox_function(
+ Hunspell_create, rlbox::sandbox_const_cast<const char*>(t_affpath),
+ rlbox::sandbox_const_cast<const char*>(t_dpath));
+ MOZ_RELEASE_ASSERT(mHandle);
+
+ mSandbox->free_in_sandbox(t_dpath);
+ mSandbox->free_in_sandbox(t_affpath);
+
+ // Get dictionary encoding
+ tainted_hunspell<char*> t_enc =
+ mSandbox->invoke_sandbox_function(Hunspell_get_dic_encoding, mHandle);
+ t_enc.copy_and_verify_string([&](std::unique_ptr<char[]> enc) {
+ size_t len = std::strlen(enc.get());
+ mDicEncoding = std::string(enc.get(), len);
+ });
+}
+
+RLBoxHunspell::~RLBoxHunspell() {
+ MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread());
+ // Call hunspell's destroy which frees mHandle
+ mSandbox->invoke_sandbox_function(Hunspell_destroy, mHandle);
+ mHandle = nullptr;
+
+ // Unregister callbacks
+ mDestructFilemgr.unregister();
+ mGetLineNum.unregister();
+ mGetLine.unregister();
+ mCreateFilemgr.unregister();
+ mHunspellToUpperCase.unregister();
+ mHunspellToLowerCase.unregister();
+ mHunspellGetCurrentCS.unregister();
+
+ // Clear any callback data and allow list
+ mozHunspellCallbacks::Clear();
+}
+
+// Invoking hunspell with words larger than a certain size will cause the
+// Hunspell sandbox to run out of memory. So we pick an arbitrary limit of
+// 200000 here to ensure this doesn't happen.
+static const size_t gWordSizeLimit = 200000;
+
+int RLBoxHunspell::spell(const std::string& stdWord) {
+ MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread());
+
+ const int ok = 1;
+
+ if (stdWord.length() >= gWordSizeLimit) {
+ // Fail gracefully assuming the word is spelt correctly
+ return ok;
+ }
+
+ // Copy word into the sandbox
+ tainted_hunspell<char*> t_word = allocStrInSandbox(*mSandbox, stdWord);
+ if (!t_word) {
+ // Ran out of memory in the hunspell sandbox
+ // Fail gracefully assuming the word is spelt correctly
+ return ok;
+ }
+
+ // Check word
+ int good = mSandbox
+ ->invoke_sandbox_function(
+ Hunspell_spell, mHandle,
+ rlbox::sandbox_const_cast<const char*>(t_word))
+ .copy_and_verify([](int good) { return good; });
+ mSandbox->free_in_sandbox(t_word);
+ return good;
+}
+
+const std::string& RLBoxHunspell::get_dict_encoding() const {
+ return mDicEncoding;
+}
+
+// This function fails gracefully - if we run out of memory in the hunspell
+// sandbox, we return empty suggestion list
+std::vector<std::string> RLBoxHunspell::suggest(const std::string& stdWord) {
+ MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread());
+
+ if (stdWord.length() >= gWordSizeLimit) {
+ return {};
+ }
+
+ // Copy word into the sandbox
+ tainted_hunspell<char*> t_word = allocStrInSandbox(*mSandbox, stdWord);
+ if (!t_word) {
+ return {};
+ }
+
+ // Allocate suggestion list in the sandbox
+ tainted_hunspell<char***> t_slst = mSandbox->malloc_in_sandbox<char**>();
+ if (!t_slst) {
+ // Free the earlier allocation
+ mSandbox->free_in_sandbox(t_word);
+ return {};
+ }
+
+ *t_slst = nullptr;
+
+ // Get suggestions
+ int nr = mSandbox
+ ->invoke_sandbox_function(
+ Hunspell_suggest, mHandle, t_slst,
+ rlbox::sandbox_const_cast<const char*>(t_word))
+ .copy_and_verify([](int nr) {
+ MOZ_RELEASE_ASSERT(nr >= 0);
+ return nr;
+ });
+
+ tainted_hunspell<char**> t_slst_ref = *t_slst;
+
+ std::vector<std::string> suggestions;
+ if (nr > 0 && t_slst_ref != nullptr) {
+ // Copy suggestions from sandbox
+ suggestions.reserve(nr);
+
+ for (int i = 0; i < nr; i++) {
+ tainted_hunspell<char*> t_sug = t_slst_ref[i];
+
+ if (t_sug) {
+ t_sug.copy_and_verify_string(
+ [&](std::string sug) { suggestions.push_back(std::move(sug)); });
+ // free the suggestion string allocated by the sandboxed hunspell
+ mSandbox->free_in_sandbox(t_sug);
+ }
+ }
+
+ // free the suggestion list allocated by the sandboxed hunspell
+ mSandbox->free_in_sandbox(t_slst_ref);
+ }
+
+ mSandbox->free_in_sandbox(t_word);
+ mSandbox->free_in_sandbox(t_slst);
+ return suggestions;
+}