summaryrefslogtreecommitdiffstats
path: root/intl/hyphenation
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /intl/hyphenation
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/hyphenation')
-rw-r--r--intl/hyphenation/README.mozilla13
-rw-r--r--intl/hyphenation/glue/moz.build22
-rw-r--r--intl/hyphenation/glue/nsHyphenationManager.cpp369
-rw-r--r--intl/hyphenation/glue/nsHyphenationManager.h55
-rw-r--r--intl/hyphenation/glue/nsHyphenator.cpp502
-rw-r--r--intl/hyphenation/glue/nsHyphenator.h61
6 files changed, 1022 insertions, 0 deletions
diff --git a/intl/hyphenation/README.mozilla b/intl/hyphenation/README.mozilla
new file mode 100644
index 0000000000..dc0718f704
--- /dev/null
+++ b/intl/hyphenation/README.mozilla
@@ -0,0 +1,13 @@
+About the hyphenation code in this directory
+============================================
+
+The hyphen directory comes from the Hyphen library, part of the hunspell project.
+ https://github.com/hunspell/hyphen
+
+This code is distributed under the GPL 2.0/LGPL 2.1/MPL 1.1 tri-license, as
+detailed in the associated README and COPYING files.
+
+Note that we do not include other tools and resources found in the complete
+Hyphen package from upstream, so the original README.* files may refer to
+additional files that are not present in the Mozilla source tree.
+
diff --git a/intl/hyphenation/glue/moz.build b/intl/hyphenation/glue/moz.build
new file mode 100644
index 0000000000..306edca2eb
--- /dev/null
+++ b/intl/hyphenation/glue/moz.build
@@ -0,0 +1,22 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+EXPORTS += [
+ "nsHyphenationManager.h",
+ "nsHyphenator.h",
+]
+
+UNIFIED_SOURCES += [
+ "nsHyphenationManager.cpp",
+ "nsHyphenator.cpp",
+]
+
+include("/ipc/chromium/chromium-config.mozbuild")
+
+FINAL_LIBRARY = "xul"
+
+if CONFIG["COMPILE_ENVIRONMENT"]:
+ CbindgenHeader("mapped_hyph.h", inputs=["/third_party/rust/mapped_hyph"])
diff --git a/intl/hyphenation/glue/nsHyphenationManager.cpp b/intl/hyphenation/glue/nsHyphenationManager.cpp
new file mode 100644
index 0000000000..f475d2b027
--- /dev/null
+++ b/intl/hyphenation/glue/nsHyphenationManager.cpp
@@ -0,0 +1,369 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsHyphenationManager.h"
+#include "nsHyphenator.h"
+#include "nsAtom.h"
+#include "nsIFile.h"
+#include "nsIURI.h"
+#include "nsIJARURI.h"
+#include "nsIProperties.h"
+#include "nsIDirectoryEnumerator.h"
+#include "nsDirectoryServiceDefs.h"
+#include "nsNetUtil.h"
+#include "nsUnicharUtils.h"
+#include "mozilla/CountingAllocatorBase.h"
+#include "mozilla/Preferences.h"
+#include "nsZipArchive.h"
+#include "mozilla/Services.h"
+#include "nsIObserverService.h"
+#include "nsCRT.h"
+#include "nsAppDirectoryServiceDefs.h"
+#include "nsDirectoryServiceUtils.h"
+#include "nsXULAppAPI.h"
+
+using namespace mozilla;
+
+static const char kIntlHyphenationAliasPrefix[] = "intl.hyphenation-alias.";
+static const char kMemoryPressureNotification[] = "memory-pressure";
+
+class HyphenReporter final : public nsIMemoryReporter {
+ private:
+ ~HyphenReporter() = default;
+
+ public:
+ NS_DECL_ISUPPORTS
+
+ // For telemetry, we report the memory rounded up to the nearest KB.
+ static uint32_t MemoryAllocatedInKB() {
+ size_t total = 0;
+ if (nsHyphenationManager::Instance()) {
+ total = nsHyphenationManager::Instance()->SizeOfIncludingThis(
+ moz_malloc_size_of);
+ }
+ return (total + 1023) / 1024;
+ }
+
+ NS_IMETHOD CollectReports(nsIHandleReportCallback* aHandleReport,
+ nsISupports* aData, bool aAnonymize) override {
+ size_t total = 0;
+ if (nsHyphenationManager::Instance()) {
+ total = nsHyphenationManager::Instance()->SizeOfIncludingThis(
+ moz_malloc_size_of);
+ }
+ MOZ_COLLECT_REPORT("explicit/hyphenation", KIND_HEAP, UNITS_BYTES, total,
+ "Memory used by hyphenation data.");
+ return NS_OK;
+ }
+};
+
+NS_IMPL_ISUPPORTS(HyphenReporter, nsIMemoryReporter)
+
+nsHyphenationManager* nsHyphenationManager::sInstance = nullptr;
+
+NS_IMPL_ISUPPORTS(nsHyphenationManager, nsIObserver)
+
+NS_IMETHODIMP
+nsHyphenationManager::Observe(nsISupports* aSubject, const char* aTopic,
+ const char16_t* aData) {
+ if (!nsCRT::strcmp(aTopic, kMemoryPressureNotification)) {
+ nsHyphenationManager::sInstance->mHyphenators.Clear();
+ }
+ return NS_OK;
+}
+
+nsHyphenationManager* nsHyphenationManager::Instance() {
+ if (sInstance == nullptr) {
+ sInstance = new nsHyphenationManager();
+
+ nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
+ if (obs) {
+ obs->AddObserver(sInstance, kMemoryPressureNotification, false);
+ }
+
+ RegisterStrongMemoryReporter(new HyphenReporter());
+ }
+ return sInstance;
+}
+
+void nsHyphenationManager::Shutdown() {
+ if (sInstance) {
+ nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
+ if (obs) {
+ obs->RemoveObserver(sInstance, kMemoryPressureNotification);
+ }
+ delete sInstance;
+ sInstance = nullptr;
+ }
+}
+
+nsHyphenationManager::nsHyphenationManager() {
+ LoadPatternList();
+ LoadAliases();
+}
+
+nsHyphenationManager::~nsHyphenationManager() { sInstance = nullptr; }
+
+already_AddRefed<nsHyphenator> nsHyphenationManager::GetHyphenator(
+ nsAtom* aLocale) {
+ RefPtr<nsHyphenator> hyph;
+ mHyphenators.Get(aLocale, getter_AddRefs(hyph));
+ if (hyph) {
+ return hyph.forget();
+ }
+ nsCOMPtr<nsIURI> uri = mPatternFiles.Get(aLocale);
+ if (!uri) {
+ RefPtr<nsAtom> alias = mHyphAliases.Get(aLocale);
+ if (alias) {
+ mHyphenators.Get(alias, getter_AddRefs(hyph));
+ if (hyph) {
+ return hyph.forget();
+ }
+ uri = mPatternFiles.Get(alias);
+ if (uri) {
+ aLocale = alias;
+ }
+ }
+ if (!uri) {
+ // In the case of a locale such as "de-DE-1996", we try replacing
+ // successive trailing subtags with "-*" to find fallback patterns,
+ // so "de-DE-1996" -> "de-DE-*" (and then recursively -> "de-*")
+ nsAtomCString localeStr(aLocale);
+ if (StringEndsWith(localeStr, "-*"_ns)) {
+ localeStr.Truncate(localeStr.Length() - 2);
+ }
+ int32_t i = localeStr.RFindChar('-');
+ if (i > 1) {
+ localeStr.ReplaceLiteral(i, localeStr.Length() - i, "-*");
+ RefPtr<nsAtom> fuzzyLocale = NS_Atomize(localeStr);
+ return GetHyphenator(fuzzyLocale);
+ }
+ return nullptr;
+ }
+ }
+ nsAutoCString hyphCapPref("intl.hyphenate-capitalized.");
+ hyphCapPref.Append(nsAtomCString(aLocale));
+ hyph = new nsHyphenator(uri, Preferences::GetBool(hyphCapPref.get()));
+ if (hyph->IsValid()) {
+ mHyphenators.InsertOrUpdate(aLocale, RefPtr{hyph});
+ return hyph.forget();
+ }
+#ifdef DEBUG
+ nsCString msg("failed to load patterns from ");
+ msg += uri->GetSpecOrDefault();
+ NS_WARNING(msg.get());
+#endif
+ mPatternFiles.Remove(aLocale);
+ return nullptr;
+}
+
+void nsHyphenationManager::LoadPatternList() {
+ mPatternFiles.Clear();
+ mHyphenators.Clear();
+
+ LoadPatternListFromOmnijar(Omnijar::GRE);
+ LoadPatternListFromOmnijar(Omnijar::APP);
+
+ nsCOMPtr<nsIProperties> dirSvc =
+ do_GetService(NS_DIRECTORY_SERVICE_CONTRACTID);
+ if (!dirSvc) {
+ return;
+ }
+
+ nsresult rv;
+ nsCOMPtr<nsIFile> greDir;
+ rv = dirSvc->Get(NS_GRE_DIR, NS_GET_IID(nsIFile), getter_AddRefs(greDir));
+ if (NS_SUCCEEDED(rv)) {
+ greDir->AppendNative("hyphenation"_ns);
+ LoadPatternListFromDir(greDir);
+ }
+
+ nsCOMPtr<nsIFile> appDir;
+ rv = dirSvc->Get(NS_XPCOM_CURRENT_PROCESS_DIR, NS_GET_IID(nsIFile),
+ getter_AddRefs(appDir));
+ if (NS_SUCCEEDED(rv)) {
+ appDir->AppendNative("hyphenation"_ns);
+ bool equals;
+ if (NS_SUCCEEDED(appDir->Equals(greDir, &equals)) && !equals) {
+ LoadPatternListFromDir(appDir);
+ }
+ }
+
+ nsCOMPtr<nsIFile> profileDir;
+ rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_LOCAL_50_DIR,
+ getter_AddRefs(profileDir));
+ if (NS_SUCCEEDED(rv)) {
+ profileDir->AppendNative("hyphenation"_ns);
+ LoadPatternListFromDir(profileDir);
+ }
+}
+
+// Extract the locale code we'll use to identify a given hyphenation resource
+// from the path name as found in omnijar or on disk.
+static already_AddRefed<nsAtom> LocaleAtomFromPath(const nsCString& aPath) {
+ MOZ_ASSERT(StringEndsWith(aPath, ".hyf"_ns) ||
+ StringEndsWith(aPath, ".dic"_ns));
+ nsCString locale(aPath);
+ locale.Truncate(locale.Length() - 4); // strip ".hyf" or ".dic"
+ locale.Cut(0, locale.RFindChar('/') + 1); // strip directory
+ ToLowerCase(locale);
+ if (StringBeginsWith(locale, "hyph_"_ns)) {
+ locale.Cut(0, 5);
+ }
+ for (uint32_t i = 0; i < locale.Length(); ++i) {
+ if (locale[i] == '_') {
+ locale.Replace(i, 1, '-');
+ }
+ }
+ return NS_Atomize(locale);
+}
+
+void nsHyphenationManager::LoadPatternListFromOmnijar(Omnijar::Type aType) {
+ nsCString base;
+ nsresult rv = Omnijar::GetURIString(aType, base);
+ if (NS_FAILED(rv)) {
+ return;
+ }
+
+ RefPtr<nsZipArchive> zip = Omnijar::GetReader(aType);
+ if (!zip) {
+ return;
+ }
+
+ nsZipFind* find;
+ zip->FindInit("hyphenation/hyph_*.*", &find);
+ if (!find) {
+ return;
+ }
+
+ const char* result;
+ uint16_t len;
+ while (NS_SUCCEEDED(find->FindNext(&result, &len))) {
+ nsCString uriString(base);
+ uriString.Append(result, len);
+ nsCOMPtr<nsIURI> uri;
+ rv = NS_NewURI(getter_AddRefs(uri), uriString);
+ if (NS_FAILED(rv)) {
+ continue;
+ }
+ nsCString locale;
+ rv = uri->GetPathQueryRef(locale);
+ if (NS_FAILED(rv)) {
+ continue;
+ }
+ RefPtr<nsAtom> localeAtom = LocaleAtomFromPath(locale);
+ mPatternFiles.InsertOrUpdate(localeAtom, uri);
+ }
+
+ delete find;
+}
+
+void nsHyphenationManager::LoadPatternListFromDir(nsIFile* aDir) {
+ nsresult rv;
+
+ bool check = false;
+ rv = aDir->Exists(&check);
+ if (NS_FAILED(rv) || !check) {
+ return;
+ }
+
+ rv = aDir->IsDirectory(&check);
+ if (NS_FAILED(rv) || !check) {
+ return;
+ }
+
+ nsCOMPtr<nsIDirectoryEnumerator> files;
+ rv = aDir->GetDirectoryEntries(getter_AddRefs(files));
+ if (NS_FAILED(rv)) {
+ return;
+ }
+
+ nsCOMPtr<nsIFile> file;
+ while (NS_SUCCEEDED(files->GetNextFile(getter_AddRefs(file))) && file) {
+ nsAutoString dictName;
+ file->GetLeafName(dictName);
+ NS_ConvertUTF16toUTF8 path(dictName);
+ if (!(StringEndsWith(path, ".hyf"_ns) || StringEndsWith(path, ".dic"_ns))) {
+ continue;
+ }
+ RefPtr<nsAtom> localeAtom = LocaleAtomFromPath(path);
+ nsCOMPtr<nsIURI> uri;
+ nsresult rv = NS_NewFileURI(getter_AddRefs(uri), file);
+ if (NS_SUCCEEDED(rv)) {
+#ifdef DEBUG_hyph
+ printf("adding hyphenation patterns for %s: %s\n",
+ nsAtomCString(localeAtom).get(), path.get());
+#endif
+ mPatternFiles.InsertOrUpdate(localeAtom, uri);
+ }
+ }
+}
+
+void nsHyphenationManager::LoadAliases() {
+ nsIPrefBranch* prefRootBranch = Preferences::GetRootBranch();
+ if (!prefRootBranch) {
+ return;
+ }
+ nsTArray<nsCString> prefNames;
+ nsresult rv =
+ prefRootBranch->GetChildList(kIntlHyphenationAliasPrefix, prefNames);
+ if (NS_SUCCEEDED(rv)) {
+ for (auto& prefName : prefNames) {
+ nsAutoCString value;
+ rv = Preferences::GetCString(prefName.get(), value);
+ if (NS_SUCCEEDED(rv)) {
+ nsAutoCString alias(prefName);
+ alias.Cut(0, sizeof(kIntlHyphenationAliasPrefix) - 1);
+ ToLowerCase(alias);
+ ToLowerCase(value);
+ RefPtr<nsAtom> aliasAtom = NS_Atomize(alias);
+ RefPtr<nsAtom> valueAtom = NS_Atomize(value);
+ mHyphAliases.InsertOrUpdate(aliasAtom, std::move(valueAtom));
+ }
+ }
+ }
+}
+
+void nsHyphenationManager::ShareHyphDictToProcess(
+ nsIURI* aURI, base::ProcessId aPid, base::SharedMemoryHandle* aOutHandle,
+ uint32_t* aOutSize) {
+ MOZ_ASSERT(XRE_IsParentProcess());
+ // aURI will be referring to an omnijar resource (otherwise just bail).
+ *aOutHandle = base::SharedMemory::NULLHandle();
+ *aOutSize = 0;
+
+ // Extract the locale code from the URI, and get the corresponding
+ // hyphenator (loading it into shared memory if necessary).
+ nsCString path;
+ nsCOMPtr<nsIJARURI> jar = do_QueryInterface(aURI);
+ if (jar) {
+ jar->GetJAREntry(path);
+ } else {
+ aURI->GetFilePath(path);
+ }
+
+ RefPtr<nsAtom> localeAtom = LocaleAtomFromPath(path);
+ RefPtr<nsHyphenator> hyph = GetHyphenator(localeAtom);
+ if (!hyph) {
+ MOZ_ASSERT_UNREACHABLE("failed to find hyphenator");
+ return;
+ }
+
+ hyph->CloneHandle(aOutHandle, aOutSize);
+}
+
+size_t nsHyphenationManager::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) {
+ size_t result = aMallocSizeOf(this);
+
+ result += mHyphAliases.ShallowSizeOfExcludingThis(aMallocSizeOf);
+
+ result += mPatternFiles.ShallowSizeOfExcludingThis(aMallocSizeOf);
+ // Measurement of the URIs stored in mPatternFiles may be added later if DMD
+ // finds it is worthwhile.
+
+ result += mHyphenators.ShallowSizeOfExcludingThis(aMallocSizeOf);
+
+ return result;
+}
diff --git a/intl/hyphenation/glue/nsHyphenationManager.h b/intl/hyphenation/glue/nsHyphenationManager.h
new file mode 100644
index 0000000000..8937ca5808
--- /dev/null
+++ b/intl/hyphenation/glue/nsHyphenationManager.h
@@ -0,0 +1,55 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsHyphenationManager_h__
+#define nsHyphenationManager_h__
+
+#include "base/shared_memory.h"
+#include "mozilla/Omnijar.h"
+#include "nsHashKeys.h"
+#include "nsInterfaceHashtable.h"
+#include "nsIObserver.h"
+#include "nsRefPtrHashtable.h"
+
+class nsHyphenator;
+class nsAtom;
+class nsIURI;
+
+class nsHyphenationManager : public nsIObserver {
+ public:
+ NS_DECL_ISUPPORTS
+ NS_DECL_NSIOBSERVER
+
+ nsHyphenationManager();
+
+ already_AddRefed<nsHyphenator> GetHyphenator(nsAtom* aLocale);
+
+ void ShareHyphDictToProcess(nsIURI* aURI, base::ProcessId aPid,
+ base::SharedMemoryHandle* aOutHandle,
+ uint32_t* aOutSize);
+
+ static nsHyphenationManager* Instance();
+
+ static void Shutdown();
+
+ size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf);
+
+ private:
+ virtual ~nsHyphenationManager();
+
+ protected:
+ void LoadPatternList();
+ void LoadPatternListFromOmnijar(mozilla::Omnijar::Type aType);
+ void LoadPatternListFromDir(nsIFile* aDir);
+ void LoadAliases();
+
+ nsRefPtrHashtable<nsRefPtrHashKey<nsAtom>, nsAtom> mHyphAliases;
+ nsInterfaceHashtable<nsRefPtrHashKey<nsAtom>, nsIURI> mPatternFiles;
+ nsRefPtrHashtable<nsRefPtrHashKey<nsAtom>, nsHyphenator> mHyphenators;
+
+ static nsHyphenationManager* sInstance;
+};
+
+#endif // nsHyphenationManager_h__
diff --git a/intl/hyphenation/glue/nsHyphenator.cpp b/intl/hyphenation/glue/nsHyphenator.cpp
new file mode 100644
index 0000000000..129f30f9d5
--- /dev/null
+++ b/intl/hyphenation/glue/nsHyphenator.cpp
@@ -0,0 +1,502 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsHyphenator.h"
+
+#include "mozilla/dom/ContentChild.h"
+#include "mozilla/Omnijar.h"
+#include "nsContentUtils.h"
+#include "nsIChannel.h"
+#include "nsIFile.h"
+#include "nsIFileURL.h"
+#include "nsIInputStream.h"
+#include "nsIJARURI.h"
+#include "nsIURI.h"
+#include "nsNetUtil.h"
+#include "nsUnicodeProperties.h"
+#include "nsUTF8Utils.h"
+#include "nsZipArchive.h"
+
+#include "mapped_hyph.h"
+
+using namespace mozilla;
+
+void DefaultDelete<const HyphDic>::operator()(const HyphDic* aHyph) const {
+ mapped_hyph_free_dictionary(const_cast<HyphDic*>(aHyph));
+}
+
+void DefaultDelete<const CompiledData>::operator()(
+ const CompiledData* aData) const {
+ mapped_hyph_free_compiled_data(const_cast<CompiledData*>(aData));
+}
+
+static const void* GetItemPtrFromJarURI(nsIJARURI* aJAR, uint32_t* aLength) {
+ // Try to get the jarfile's nsZipArchive, find the relevant item, and return
+ // a pointer to its data provided it is stored uncompressed.
+ nsCOMPtr<nsIURI> jarFile;
+ if (NS_FAILED(aJAR->GetJARFile(getter_AddRefs(jarFile)))) {
+ return nullptr;
+ }
+ nsCOMPtr<nsIFileURL> fileUrl = do_QueryInterface(jarFile);
+ if (!fileUrl) {
+ return nullptr;
+ }
+ nsCOMPtr<nsIFile> file;
+ fileUrl->GetFile(getter_AddRefs(file));
+ if (!file) {
+ return nullptr;
+ }
+ RefPtr<nsZipArchive> archive = Omnijar::GetReader(file);
+ if (archive) {
+ nsCString path;
+ aJAR->GetJAREntry(path);
+ nsZipItem* item = archive->GetItem(path.get());
+ if (item && item->Compression() == 0 && item->Size() > 0) {
+ // We do NOT own this data, but it won't go away until the omnijar
+ // file is closed during shutdown.
+ const uint8_t* data = archive->GetData(item);
+ if (data) {
+ *aLength = item->Size();
+ return data;
+ }
+ }
+ }
+ return nullptr;
+}
+
+static UniquePtr<base::SharedMemory> GetHyphDictFromParent(nsIURI* aURI,
+ uint32_t* aLength) {
+ MOZ_ASSERT(!XRE_IsParentProcess());
+ base::SharedMemoryHandle handle = base::SharedMemory::NULLHandle();
+ uint32_t size;
+ MOZ_ASSERT(aURI);
+ if (!dom::ContentChild::GetSingleton()->SendGetHyphDict(aURI, &handle,
+ &size)) {
+ return nullptr;
+ }
+ UniquePtr<base::SharedMemory> shm = MakeUnique<base::SharedMemory>();
+ if (!shm->IsHandleValid(handle)) {
+ return nullptr;
+ }
+ if (!shm->SetHandle(std::move(handle), true)) {
+ return nullptr;
+ }
+ if (!shm->Map(size)) {
+ return nullptr;
+ }
+ char* addr = static_cast<char*>(shm->memory());
+ if (!addr) {
+ return nullptr;
+ }
+ *aLength = size;
+ return shm;
+}
+
+static UniquePtr<base::SharedMemory> CopyToShmem(const CompiledData* aData) {
+ MOZ_ASSERT(XRE_IsParentProcess());
+
+ // The shm-related calls here are not expected to fail, but if they do,
+ // we'll just return null (as if the resource was unavailable) and proceed
+ // without hyphenation.
+ uint32_t size = mapped_hyph_compiled_data_size(aData);
+ UniquePtr<base::SharedMemory> shm = MakeUnique<base::SharedMemory>();
+ if (!shm->CreateFreezeable(size)) {
+ return nullptr;
+ }
+ if (!shm->Map(size)) {
+ return nullptr;
+ }
+ char* buffer = static_cast<char*>(shm->memory());
+ if (!buffer) {
+ return nullptr;
+ }
+
+ memcpy(buffer, mapped_hyph_compiled_data_ptr(aData), size);
+ if (!shm->Freeze()) {
+ return nullptr;
+ }
+
+ return shm;
+}
+
+static UniquePtr<base::SharedMemory> LoadFromURI(nsIURI* aURI,
+ uint32_t* aLength,
+ bool aPrecompiled) {
+ MOZ_ASSERT(XRE_IsParentProcess());
+ nsCOMPtr<nsIChannel> channel;
+ if (NS_FAILED(NS_NewChannel(
+ getter_AddRefs(channel), aURI, nsContentUtils::GetSystemPrincipal(),
+ nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_SEC_CONTEXT_IS_NULL,
+ nsIContentPolicy::TYPE_OTHER))) {
+ return nullptr;
+ }
+ nsCOMPtr<nsIInputStream> instream;
+ if (NS_FAILED(channel->Open(getter_AddRefs(instream)))) {
+ return nullptr;
+ }
+ // Check size, bail out if it is excessively large (the largest of the
+ // hyphenation files currently shipped with Firefox is around 1MB
+ // uncompressed).
+ uint64_t available;
+ if (NS_FAILED(instream->Available(&available)) || !available ||
+ available > 16 * 1024 * 1024) {
+ return nullptr;
+ }
+
+ if (aPrecompiled) {
+ UniquePtr<base::SharedMemory> shm = MakeUnique<base::SharedMemory>();
+ if (!shm->CreateFreezeable(available)) {
+ return nullptr;
+ }
+ if (!shm->Map(available)) {
+ return nullptr;
+ }
+ char* buffer = static_cast<char*>(shm->memory());
+ if (!buffer) {
+ return nullptr;
+ }
+
+ uint32_t bytesRead = 0;
+ if (NS_FAILED(instream->Read(buffer, available, &bytesRead)) ||
+ bytesRead != available) {
+ return nullptr;
+ }
+
+ if (!mapped_hyph_is_valid_hyphenator(
+ reinterpret_cast<const uint8_t*>(buffer), bytesRead)) {
+ return nullptr;
+ }
+
+ if (!shm->Freeze()) {
+ return nullptr;
+ }
+
+ *aLength = bytesRead;
+ return shm;
+ }
+
+ // Read from the URI into a temporary buffer, compile it, then copy the
+ // compiled resource to a shared memory region.
+ auto buffer = MakeUnique<char[]>(available);
+ uint32_t bytesRead = 0;
+ if (NS_FAILED(instream->Read(buffer.get(), available, &bytesRead)) ||
+ bytesRead != available) {
+ return nullptr;
+ }
+
+ UniquePtr<const CompiledData> data(mapped_hyph_compile_buffer(
+ reinterpret_cast<const uint8_t*>(buffer.get()), bytesRead, false));
+ if (data) {
+ *aLength = mapped_hyph_compiled_data_size(data.get());
+ return CopyToShmem(data.get());
+ }
+
+ return nullptr;
+}
+
+nsHyphenator::nsHyphenator(nsIURI* aURI, bool aHyphenateCapitalized)
+ : mDict(static_cast<const void*>(nullptr)),
+ mDictSize(0),
+ mHyphenateCapitalized(aHyphenateCapitalized) {
+ // Files with extension ".hyf" are expected to be precompiled mapped_hyph
+ // tables; we also support uncompiled ".dic" files, but they are more
+ // expensive to process on first load.
+ nsAutoCString path;
+ aURI->GetFilePath(path);
+ bool precompiled = StringEndsWith(path, ".hyf"_ns);
+
+ // Content processes don't do compilation; they depend on the parent giving
+ // them a compiled version of the resource, so that we only pay the cost of
+ // compilation once per language per session.
+ if (!precompiled && !XRE_IsParentProcess()) {
+ uint32_t length;
+ UniquePtr<base::SharedMemory> shm = GetHyphDictFromParent(aURI, &length);
+ if (shm) {
+ // We don't need to validate mDict because the parent process
+ // will have done so.
+ mDictSize = length;
+ mDict = AsVariant(std::move(shm));
+ }
+ return;
+ }
+
+ nsCOMPtr<nsIJARURI> jar = do_QueryInterface(aURI);
+ if (jar) {
+ // This gives us a raw pointer into the omnijar's data (if uncompressed);
+ // we do not own it and must not attempt to free it!
+ uint32_t length;
+ const void* ptr = GetItemPtrFromJarURI(jar, &length);
+ if (ptr) {
+ if (precompiled) {
+ // The data should be directly usable by mapped_hyph; validate that it
+ // looks correct, and save the pointer.
+ if (mapped_hyph_is_valid_hyphenator(static_cast<const uint8_t*>(ptr),
+ length)) {
+ mDictSize = length;
+ mDict = AsVariant(ptr);
+ return;
+ }
+ } else {
+ // The data is an uncompiled pattern file, so we need to compile it.
+ // We then move it to shared memory so we can expose it to content
+ // processes.
+ MOZ_ASSERT(XRE_IsParentProcess());
+ UniquePtr<const CompiledData> data(mapped_hyph_compile_buffer(
+ static_cast<const uint8_t*>(ptr), length, false));
+ if (data) {
+ UniquePtr<base::SharedMemory> shm = CopyToShmem(data.get());
+ if (shm) {
+ mDictSize = mapped_hyph_compiled_data_size(data.get());
+ mDict = AsVariant(std::move(shm));
+ return;
+ }
+ }
+ }
+ } else {
+ // Omnijar must be compressed (currently this is the case on Android).
+ // If we're the parent process, decompress the resource into a shmem
+ // buffer; if we're a child, send a request to the parent for the
+ // shared-memory copy (which it will load if not already available).
+ if (XRE_IsParentProcess()) {
+ UniquePtr<base::SharedMemory> shm =
+ LoadFromURI(aURI, &length, precompiled);
+ if (shm) {
+ mDictSize = length;
+ mDict = AsVariant(std::move(shm));
+ return;
+ }
+ } else {
+ UniquePtr<base::SharedMemory> shm =
+ GetHyphDictFromParent(aURI, &length);
+ if (shm) {
+ // We don't need to validate mDict because the parent process
+ // will have done so.
+ mDictSize = length;
+ mDict = AsVariant(std::move(shm));
+ return;
+ }
+ }
+ }
+ }
+
+ // We get file:// URIs when running an unpackaged build; they could also
+ // occur if we support adding hyphenation dictionaries by putting files in
+ // a directory of the profile, for example.
+ if (net::SchemeIsFile(aURI)) {
+ // Ask the Rust lib to mmap the file. In this case our mDictSize field
+ // remains zero; mDict is not a pointer to the raw data but an opaque
+ // reference to a Rust object, and can only be freed by passing it to
+ // mapped_hyph_free_dictionary().
+ // (This case occurs in unpackaged developer builds.)
+#if XP_WIN
+ // GetFilePath returns the path with an unexpected leading slash (like
+ // "/c:/path/to/firefox/...") that may prevent it being found if it's an
+ // absolute Windows path starting with a drive letter.
+ // So check for this case and strip the slash.
+ if (path.Length() > 2 && path[0] == '/' && path[2] == ':') {
+ path.Cut(0, 1);
+ }
+#endif
+ if (precompiled) {
+ // If the file is compiled, we can just map it directly.
+ UniquePtr<const HyphDic> dic(mapped_hyph_load_dictionary(path.get()));
+ if (dic) {
+ mDict = AsVariant(std::move(dic));
+ return;
+ }
+ } else {
+ // For an uncompiled .dic file, the parent process is responsible for
+ // compiling it and storing the result in a shmem block that can be
+ // shared to content processes.
+ MOZ_ASSERT(XRE_IsParentProcess());
+ MOZ_ASSERT(StringEndsWith(path, ".dic"_ns));
+ UniquePtr<const CompiledData> data(
+ mapped_hyph_compile_file(path.get(), false));
+ if (data) {
+ UniquePtr<base::SharedMemory> shm = CopyToShmem(data.get());
+ if (shm) {
+ mDictSize = mapped_hyph_compiled_data_size(data.get());
+ mDict = AsVariant(std::move(shm));
+ return;
+ }
+ }
+ }
+ }
+
+ // Each loading branch above will return if successful. So if we get here,
+ // whichever load type we attempted must have failed because something about
+ // the resource is broken.
+ nsAutoCString msg;
+ aURI->GetSpec(msg);
+ msg.Insert("Invalid hyphenation resource: ", 0);
+ NS_ASSERTION(false, msg.get());
+}
+
+bool nsHyphenator::IsValid() {
+ return mDict.match(
+ [](const void*& ptr) { return ptr != nullptr; },
+ [](UniquePtr<base::SharedMemory>& shm) { return shm != nullptr; },
+ [](UniquePtr<const HyphDic>& hyph) { return hyph != nullptr; });
+}
+
+nsresult nsHyphenator::Hyphenate(const nsAString& aString,
+ nsTArray<bool>& aHyphens) {
+ if (!aHyphens.SetLength(aString.Length(), fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ memset(aHyphens.Elements(), false, aHyphens.Length() * sizeof(bool));
+
+ bool inWord = false;
+ uint32_t wordStart = 0, wordLimit = 0;
+ uint32_t chLen;
+ for (uint32_t i = 0; i < aString.Length(); i += chLen) {
+ uint32_t ch = aString[i];
+ chLen = 1;
+
+ if (NS_IS_HIGH_SURROGATE(ch)) {
+ if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i + 1])) {
+ ch = SURROGATE_TO_UCS4(ch, aString[i + 1]);
+ chLen = 2;
+ } else {
+ NS_WARNING("unpaired surrogate found during hyphenation");
+ }
+ }
+
+ nsUGenCategory cat = unicode::GetGenCategory(ch);
+ if (cat == nsUGenCategory::kLetter || cat == nsUGenCategory::kMark) {
+ if (!inWord) {
+ inWord = true;
+ wordStart = i;
+ }
+ wordLimit = i + chLen;
+ if (i + chLen < aString.Length()) {
+ continue;
+ }
+ }
+
+ if (inWord) {
+ HyphenateWord(aString, wordStart, wordLimit, aHyphens);
+ inWord = false;
+ }
+ }
+
+ return NS_OK;
+}
+
+void nsHyphenator::HyphenateWord(const nsAString& aString, uint32_t aStart,
+ uint32_t aLimit, nsTArray<bool>& aHyphens) {
+ // Convert word from aStart and aLimit in aString to utf-8 for mapped_hyph,
+ // lowercasing it as we go so that it will match the (lowercased) patterns
+ // (bug 1105644).
+ nsAutoCString utf8;
+ const char16_t* cur = aString.BeginReading() + aStart;
+ const char16_t* end = aString.BeginReading() + aLimit;
+ bool firstLetter = true;
+ while (cur < end) {
+ uint32_t ch = *cur++;
+
+ if (NS_IS_HIGH_SURROGATE(ch)) {
+ if (cur < end && NS_IS_LOW_SURROGATE(*cur)) {
+ ch = SURROGATE_TO_UCS4(ch, *cur++);
+ } else {
+ return; // unpaired surrogate: bail out, don't hyphenate broken text
+ }
+ } else if (NS_IS_LOW_SURROGATE(ch)) {
+ return; // unpaired surrogate
+ }
+
+ // XXX What about language-specific casing? Consider Turkish I/i...
+ // In practice, it looks like the current patterns will not be
+ // affected by this, as they treat dotted and undotted i similarly.
+ uint32_t origCh = ch;
+ ch = ToLowerCase(ch);
+
+ if (ch != origCh) {
+ // Avoid hyphenating capitalized words (bug 1550532) unless explicitly
+ // allowed by prefs for the language in use.
+ // Also never auto-hyphenate a word that has internal caps, as it may
+ // well be an all-caps acronym or a quirky name like iTunes.
+ if (!mHyphenateCapitalized || !firstLetter) {
+ return;
+ }
+ }
+ firstLetter = false;
+
+ if (ch < 0x80) { // U+0000 - U+007F
+ utf8.Append(ch);
+ } else if (ch < 0x0800) { // U+0100 - U+07FF
+ utf8.Append(0xC0 | (ch >> 6));
+ utf8.Append(0x80 | (0x003F & ch));
+ } else if (ch < 0x10000) { // U+0800 - U+D7FF,U+E000 - U+FFFF
+ utf8.Append(0xE0 | (ch >> 12));
+ utf8.Append(0x80 | (0x003F & (ch >> 6)));
+ utf8.Append(0x80 | (0x003F & ch));
+ } else {
+ utf8.Append(0xF0 | (ch >> 18));
+ utf8.Append(0x80 | (0x003F & (ch >> 12)));
+ utf8.Append(0x80 | (0x003F & (ch >> 6)));
+ utf8.Append(0x80 | (0x003F & ch));
+ }
+ }
+
+ AutoTArray<uint8_t, 200> hyphenValues;
+ hyphenValues.SetLength(utf8.Length());
+ int32_t result = mDict.match(
+ [&](const void*& ptr) {
+ return mapped_hyph_find_hyphen_values_raw(
+ static_cast<const uint8_t*>(ptr), mDictSize, utf8.BeginReading(),
+ utf8.Length(), hyphenValues.Elements(), hyphenValues.Length());
+ },
+ [&](UniquePtr<base::SharedMemory>& shm) {
+ return mapped_hyph_find_hyphen_values_raw(
+ static_cast<const uint8_t*>(shm->memory()), mDictSize,
+ utf8.BeginReading(), utf8.Length(), hyphenValues.Elements(),
+ hyphenValues.Length());
+ },
+ [&](UniquePtr<const HyphDic>& hyph) {
+ return mapped_hyph_find_hyphen_values_dic(
+ hyph.get(), utf8.BeginReading(), utf8.Length(),
+ hyphenValues.Elements(), hyphenValues.Length());
+ });
+ if (result > 0) {
+ // We need to convert UTF-8 indexing as used by the hyphenation lib into
+ // UTF-16 indexing of the aHyphens[] array for Gecko.
+ uint32_t utf16index = 0;
+ for (uint32_t utf8index = 0; utf8index < utf8.Length();) {
+ // We know utf8 is valid, so we only need to look at the first byte of
+ // each character to determine its length and the corresponding UTF-16
+ // length to add to utf16index.
+ const uint8_t leadByte = utf8[utf8index];
+ if (leadByte < 0x80) {
+ utf8index += 1;
+ } else if (leadByte < 0xE0) {
+ utf8index += 2;
+ } else if (leadByte < 0xF0) {
+ utf8index += 3;
+ } else {
+ utf8index += 4;
+ }
+ // The hyphenation value of interest is the one for the last code unit
+ // of the utf-8 character, and is recorded on the last code unit of the
+ // utf-16 character (in the case of a surrogate pair).
+ utf16index += leadByte >= 0xF0 ? 2 : 1;
+ if (utf16index > 0 && (hyphenValues[utf8index - 1] & 0x01)) {
+ aHyphens[aStart + utf16index - 1] = true;
+ }
+ }
+ }
+}
+
+void nsHyphenator::CloneHandle(base::SharedMemoryHandle* aOutHandle,
+ uint32_t* aOutSize) {
+ // If the resource is invalid, or if we fail to share it to the child
+ // process, we'll just bail out and continue without hyphenation; no need
+ // for this to be a fatal error.
+ if (!mDict.is<UniquePtr<base::SharedMemory>>()) {
+ return;
+ }
+ *aOutHandle = mDict.as<UniquePtr<base::SharedMemory>>()->CloneHandle();
+ *aOutSize = mDictSize;
+}
diff --git a/intl/hyphenation/glue/nsHyphenator.h b/intl/hyphenation/glue/nsHyphenator.h
new file mode 100644
index 0000000000..7574d57fdb
--- /dev/null
+++ b/intl/hyphenation/glue/nsHyphenator.h
@@ -0,0 +1,61 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsHyphenator_h__
+#define nsHyphenator_h__
+
+#include "base/shared_memory.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Variant.h"
+#include "nsCOMPtr.h"
+#include "nsString.h"
+#include "nsTArray.h"
+
+class nsIURI;
+struct HyphDic;
+struct CompiledData;
+
+namespace mozilla {
+template <>
+class DefaultDelete<const HyphDic> {
+ public:
+ void operator()(const HyphDic* ptr) const;
+};
+
+template <>
+class DefaultDelete<const CompiledData> {
+ public:
+ void operator()(const CompiledData* ptr) const;
+};
+} // namespace mozilla
+
+class nsHyphenator {
+ public:
+ nsHyphenator(nsIURI* aURI, bool aHyphenateCapitalized);
+
+ NS_INLINE_DECL_REFCOUNTING(nsHyphenator)
+
+ bool IsValid();
+
+ nsresult Hyphenate(const nsAString& aText, nsTArray<bool>& aHyphens);
+
+ void CloneHandle(base::SharedMemoryHandle* aOutHandle, uint32_t* aOutSize);
+
+ private:
+ ~nsHyphenator() = default;
+
+ void HyphenateWord(const nsAString& aString, uint32_t aStart, uint32_t aLimit,
+ nsTArray<bool>& aHyphens);
+
+ mozilla::Variant<const void*, // raw pointer to uncompressed omnijar data
+ mozilla::UniquePtr<base::SharedMemory>, // shmem block
+ mozilla::UniquePtr<const HyphDic> // loaded by mapped_hyph
+ >
+ mDict;
+ uint32_t mDictSize; // size of mDict data (not used if type is HyphDic)
+ bool mHyphenateCapitalized;
+};
+
+#endif // nsHyphenator_h__