summaryrefslogtreecommitdiffstats
path: root/gfx/thebes/gfxScriptItemizer.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /gfx/thebes/gfxScriptItemizer.cpp
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'gfx/thebes/gfxScriptItemizer.cpp')
-rw-r--r--gfx/thebes/gfxScriptItemizer.cpp256
1 files changed, 256 insertions, 0 deletions
diff --git a/gfx/thebes/gfxScriptItemizer.cpp b/gfx/thebes/gfxScriptItemizer.cpp
new file mode 100644
index 0000000000..29a2b4d0ed
--- /dev/null
+++ b/gfx/thebes/gfxScriptItemizer.cpp
@@ -0,0 +1,256 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This file is based on usc_impl.c from ICU 4.2.0.1, slightly adapted
+ * for use within Mozilla Gecko, separate from a standard ICU build.
+ *
+ * The original ICU license of the code follows:
+ *
+ * ICU License - ICU 1.8.1 and later
+ *
+ * COPYRIGHT AND PERMISSION NOTICE
+ *
+ * Copyright (c) 1995-2009 International Business Machines Corporation and
+ * others
+ *
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, provided that the above copyright notice(s) and this
+ * permission notice appear in all copies of the Software and that both the
+ * above copyright notice(s) and this permission notice appear in supporting
+ * documentation.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+ * BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Except as contained in this notice, the name of a copyright holder shall
+ * not be used in advertising or otherwise to promote the sale, use or other
+ * dealings in this Software without prior written authorization of the
+ * copyright holder.
+ *
+ * All trademarks and registered trademarks mentioned herein are the property
+ * of their respective owners.
+ */
+
+#include "gfxScriptItemizer.h"
+#include "mozilla/intl/UnicodeProperties.h"
+#include "nsCharTraits.h"
+#include "nsUnicodeProperties.h"
+#include "harfbuzz/hb.h"
+
+using namespace mozilla::intl;
+using namespace mozilla::unicode;
+
+#define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
+#define LIMIT_INC(sp) \
+ (((sp) < PAREN_STACK_DEPTH) ? (sp) + 1 : PAREN_STACK_DEPTH)
+#define INC(sp, count) (MOD((sp) + (count)))
+#define INC1(sp) (INC(sp, 1))
+#define DEC(sp, count) (MOD((sp) + PAREN_STACK_DEPTH - (count)))
+#define DEC1(sp) (DEC(sp, 1))
+#define STACK_IS_EMPTY() (pushCount <= 0)
+#define STACK_IS_NOT_EMPTY() (!STACK_IS_EMPTY())
+#define TOP() (parenStack[parenSP])
+#define SYNC_FIXUP() (fixupCount = 0)
+
+void gfxScriptItemizer::push(uint32_t endPairChar, Script newScriptCode) {
+ pushCount = LIMIT_INC(pushCount);
+ fixupCount = LIMIT_INC(fixupCount);
+
+ parenSP = INC1(parenSP);
+ parenStack[parenSP].endPairChar = endPairChar;
+ parenStack[parenSP].scriptCode = newScriptCode;
+}
+
+void gfxScriptItemizer::pop() {
+ if (STACK_IS_EMPTY()) {
+ return;
+ }
+
+ if (fixupCount > 0) {
+ fixupCount -= 1;
+ }
+
+ pushCount -= 1;
+ parenSP = DEC1(parenSP);
+
+ /* If the stack is now empty, reset the stack
+ pointers to their initial values.
+ */
+ if (STACK_IS_EMPTY()) {
+ parenSP = -1;
+ }
+}
+
+void gfxScriptItemizer::fixup(Script newScriptCode) {
+ int32_t fixupSP = DEC(parenSP, fixupCount);
+
+ while (fixupCount-- > 0) {
+ fixupSP = INC1(fixupSP);
+ parenStack[fixupSP].scriptCode = newScriptCode;
+ }
+}
+
+static inline bool CanMergeWithContext(Script aScript) {
+ return aScript <= Script::INHERITED || aScript == Script::UNKNOWN;
+}
+
+// We regard the current char as having the same script as the in-progress run
+// if either script is Common/Inherited/Unknown, or if the run script appears
+// in the character's ScriptExtensions, or if the char is a cluster extender.
+static inline bool SameScript(Script runScript, Script currCharScript,
+ uint32_t aCurrCh) {
+ return CanMergeWithContext(runScript) ||
+ CanMergeWithContext(currCharScript) || currCharScript == runScript ||
+ IsClusterExtender(aCurrCh) ||
+ UnicodeProperties::HasScript(aCurrCh, runScript);
+}
+
+gfxScriptItemizer::gfxScriptItemizer(const char16_t* src, uint32_t length)
+ : textPtr(src), textLength(length) {
+ reset();
+}
+
+void gfxScriptItemizer::SetText(const char16_t* src, uint32_t length) {
+ textPtr = src;
+ textLength = length;
+
+ reset();
+}
+
+bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
+ Script& aRunScript) {
+ /* if we've fallen off the end of the text, we're done */
+ if (scriptLimit >= textLength) {
+ return false;
+ }
+
+ SYNC_FIXUP();
+ scriptCode = Script::COMMON;
+ Script fallbackScript = Script::UNKNOWN;
+
+ for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) {
+ uint32_t ch;
+ Script sc;
+ uint32_t startOfChar = scriptLimit;
+
+ ch = textPtr[scriptLimit];
+
+ /* decode UTF-16 (may be surrogate pair) */
+ if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) {
+ uint32_t low = textPtr[scriptLimit + 1];
+ if (NS_IS_LOW_SURROGATE(low)) {
+ ch = SURROGATE_TO_UCS4(ch, low);
+ scriptLimit += 1;
+ }
+ }
+
+ // Initialize gc to UNASSIGNED; we'll only set it to the true GC
+ // if the character has script=COMMON, otherwise we don't care.
+ uint8_t gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
+
+ sc = UnicodeProperties::GetScriptCode(ch);
+ if (sc == Script::COMMON) {
+ /*
+ * Paired character handling:
+ *
+ * if it's an open character, push it onto the stack.
+ * if it's a close character, find the matching open on the
+ * stack, and use that script code. Any non-matching open
+ * characters above it on the stack will be popped.
+ *
+ * We only do this if the script is COMMON; for chars with
+ * specific script assignments, we just use them as-is.
+ */
+ gc = GetGeneralCategory(ch);
+ if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) {
+ uint32_t endPairChar = UnicodeProperties::CharMirror(ch);
+ if (endPairChar != ch) {
+ push(endPairChar, scriptCode);
+ }
+ } else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
+ UnicodeProperties::IsMirrored(ch)) {
+ while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) {
+ pop();
+ }
+
+ if (STACK_IS_NOT_EMPTY()) {
+ sc = TOP().scriptCode;
+ }
+ }
+ }
+
+ // Both Hiragana and Katakana are shaped as OpenType 'kana'. Merge them
+ // here to avoid script-run breaks and allow kerning to apply between the
+ // two alphabets.
+ if (sc == Script::HIRAGANA) {
+ sc = Script::KATAKANA;
+ }
+
+ if (SameScript(scriptCode, sc, ch)) {
+ if (scriptCode == Script::COMMON) {
+ // If we have not yet resolved a specific scriptCode for the run,
+ // check whether this character provides it.
+ if (!CanMergeWithContext(sc)) {
+ // Use this character's script.
+ scriptCode = sc;
+ fixup(scriptCode);
+ } else if (fallbackScript == Script::UNKNOWN) {
+ // See if the character has a ScriptExtensions property we can
+ // store for use in the event the run remains unresolved.
+ UnicodeProperties::ScriptExtensionVector extensions;
+ auto extResult = UnicodeProperties::GetExtensions(ch, extensions);
+ if (extResult.isOk()) {
+ Script ext = Script(extensions[0]);
+ if (!CanMergeWithContext(ext)) {
+ fallbackScript = ext;
+ }
+ }
+ }
+ }
+
+ /*
+ * if this character is a close paired character,
+ * pop the matching open character from the stack
+ */
+ if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
+ UnicodeProperties::IsMirrored(ch)) {
+ pop();
+ }
+ } else {
+ /*
+ * reset scriptLimit in case it was advanced during reading a
+ * multiple-code-unit character
+ */
+ scriptLimit = startOfChar;
+
+ break;
+ }
+ }
+
+ aRunStart = scriptStart;
+ aRunLimit = scriptLimit;
+
+ if (scriptCode == Script::COMMON && fallbackScript != Script::UNKNOWN) {
+ aRunScript = fallbackScript;
+ } else {
+ aRunScript = scriptCode;
+ }
+
+ return true;
+}