summaryrefslogtreecommitdiffstats
path: root/vcl/source/gdi/scrptrun.cxx
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:54:39 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:54:39 +0000
commit267c6f2ac71f92999e969232431ba04678e7437e (patch)
tree358c9467650e1d0a1d7227a21dac2e3d08b622b2 /vcl/source/gdi/scrptrun.cxx
parentInitial commit. (diff)
downloadlibreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz
libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip
Adding upstream version 4:24.2.0.upstream/4%24.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vcl/source/gdi/scrptrun.cxx')
-rw-r--r--vcl/source/gdi/scrptrun.cxx259
1 files changed, 259 insertions, 0 deletions
diff --git a/vcl/source/gdi/scrptrun.cxx b/vcl/source/gdi/scrptrun.cxx
new file mode 100644
index 0000000000..19cb54772b
--- /dev/null
+++ b/vcl/source/gdi/scrptrun.cxx
@@ -0,0 +1,259 @@
+/*
+ *******************************************************************************
+ *
+ * Copyright (c) 1995-2013 International Business Machines Corporation and others
+ *
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, and/or sell copies of the
+ * Software, and to permit persons to whom the Software is furnished to do so,
+ * provided that the above copyright notice(s) and this permission notice appear
+ * in all copies of the Software and that both the above copyright notice(s) and
+ * this permission notice appear in supporting documentation.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN
+ * NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
+ * LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Except as contained in this notice, the name of a copyright holder shall not be
+ * used in advertising or otherwise to promote the sale, use or other dealings in
+ * this Software without prior written authorization of the copyright holder.
+ *
+ *******************************************************************************
+ * file name: scrptrun.cpp
+ *
+ * created on: 10/17/2001
+ * created by: Eric R. Mader
+ */
+/**
+ * This file is largely copied from the ICU project,
+ * under folder source/extra/scrptrun/scrptrun.cpp
+ */
+
+#include <sal/config.h>
+
+#include <rtl/character.hxx>
+#include <unicode/uchar.h>
+#include <unicode/utypes.h>
+#include <unicode/uscript.h>
+
+#include <scrptrun.h>
+#include <algorithm>
+
+namespace {
+
+struct PairIndices
+{
+ int8_t ma00[0xff];
+ int8_t ma20[0x7f];
+ int8_t ma30[0x7f];
+
+ PairIndices()
+ {
+ std::fill_n(ma00, 0xff, -1);
+ std::fill_n(ma20, 0x7f, -1);
+ std::fill_n(ma30, 0x7f, -1);
+
+ // characters in the range 0x0000 - 0x007e (inclusive)
+ // ascii paired punctuation
+ ma00[0x28] = 0;
+ ma00[0x29] = 1;
+ ma00[0x3c] = 2;
+ ma00[0x3e] = 3;
+ ma00[0x5b] = 4;
+ ma00[0x5d] = 5;
+ ma00[0x7b] = 6;
+ ma00[0x7d] = 7;
+ // guillemets
+ ma00[0xab] = 8;
+ ma00[0xbb] = 9;
+
+ // characters in the range 0x2000 - 0x207e (inclusive)
+ // general punctuation
+ ma20[0x18] = 10;
+ ma20[0x19] = 11;
+ ma20[0x1c] = 12;
+ ma20[0x1d] = 13;
+ ma20[0x39] = 14;
+ ma20[0x3a] = 15;
+
+ // characters in the range 0x3000 - 0x307e (inclusive)
+ // chinese paired punctuation
+ ma30[0x08] = 16;
+ ma30[0x09] = 17;
+ ma30[0x0a] = 18;
+ ma30[0x0b] = 19;
+ ma30[0x0c] = 20;
+ ma30[0x0d] = 21;
+ ma30[0x0e] = 22;
+ ma30[0x0f] = 23;
+ ma30[0x10] = 24;
+ ma30[0x11] = 25;
+ ma30[0x14] = 26;
+ ma30[0x15] = 27;
+ ma30[0x16] = 28;
+ ma30[0x17] = 29;
+ ma30[0x18] = 30;
+ ma30[0x19] = 31;
+ ma30[0x1a] = 32;
+ ma30[0x1b] = 33;
+ }
+
+ int32_t getPairIndex(UChar32 ch) const
+ {
+ if (ch < 0xff)
+ return ma00[ch];
+ if (ch >= 0x2000 && ch < 0x207f)
+ return ma20[ch - 0x2000];
+ if (ch >= 0x3000 && ch < 0x307f)
+ return ma30[ch - 0x3000];
+ return -1;
+ }
+
+};
+
+UScriptCode getScript(UChar32 ch, UErrorCode* status)
+{
+ // tdf#154549
+ // Make combining marks inherit the script of their bases, regardless of
+ // their own script.
+ if (u_getIntPropertyValue(ch, UCHAR_GENERAL_CATEGORY) == U_NON_SPACING_MARK)
+ return USCRIPT_INHERITED;
+
+ UScriptCode script = uscript_getScript(ch, status);
+ if (U_FAILURE(*status))
+ return script;
+
+ // There are three Unicode script codes for Japanese text, but only one
+ // OpenType script tag, so we want to keep them in one run as splitting is
+ // pointless for the purpose of OpenType shaping.
+ if (script == USCRIPT_KATAKANA || script == USCRIPT_KATAKANA_OR_HIRAGANA)
+ return USCRIPT_HIRAGANA;
+ return script;
+}
+
+}
+
+const PairIndices gPairIndices;
+
+
+namespace vcl {
+
+const char ScriptRun::fgClassID=0;
+
+static bool sameScript(int32_t scriptOne, int32_t scriptTwo)
+{
+ return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo;
+}
+
+UBool ScriptRun::next()
+{
+ int32_t startSP = parenSP; // used to find the first new open character
+ UErrorCode error = U_ZERO_ERROR;
+
+ // if we've fallen off the end of the text, we're done
+ if (scriptEnd >= charLimit) {
+ return false;
+ }
+
+ scriptCode = USCRIPT_COMMON;
+
+ for (scriptStart = scriptEnd; scriptEnd < charLimit; scriptEnd += 1) {
+ UChar high = charArray[scriptEnd];
+ UChar32 ch = high;
+
+ // if the character is a high surrogate and it's not the last one
+ // in the text, see if it's followed by a low surrogate
+ if (rtl::isHighSurrogate(high) && scriptEnd < charLimit - 1)
+ {
+ UChar low = charArray[scriptEnd + 1];
+
+ // if it is followed by a low surrogate,
+ // consume it and form the full character
+ if (rtl::isLowSurrogate(low)) {
+ ch = rtl::combineSurrogates(high, low);
+ scriptEnd += 1;
+ }
+ }
+
+ UScriptCode sc = getScript(ch, &error);
+ int32_t pairIndex = gPairIndices.getPairIndex(ch);
+
+ // Paired character handling:
+
+ // if it's an open character, push it onto the stack.
+ // if it's a close character, find the matching open on the
+ // stack, and use that script code. Any non-matching open
+ // characters above it on the stack will be popped.
+ if (pairIndex >= 0) {
+ if ((pairIndex & 1) == 0) {
+ ++parenSP;
+ int32_t nVecSize = parenStack.size();
+ if (parenSP == nVecSize)
+ parenStack.resize(nVecSize + 128);
+ parenStack[parenSP].pairIndex = pairIndex;
+ parenStack[parenSP].scriptCode = scriptCode;
+ } else if (parenSP >= 0) {
+ int32_t pi = pairIndex & ~1;
+
+ while (parenSP >= 0 && parenStack[parenSP].pairIndex != pi) {
+ parenSP -= 1;
+ }
+
+ if (parenSP < startSP) {
+ startSP = parenSP;
+ }
+
+ if (parenSP >= 0) {
+ sc = parenStack[parenSP].scriptCode;
+ }
+ }
+ }
+
+ if (sameScript(scriptCode, sc)) {
+ if (scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
+ scriptCode = sc;
+
+ // now that we have a final script code, fix any open
+ // characters we pushed before we knew the script code.
+ while (startSP < parenSP) {
+ parenStack[++startSP].scriptCode = scriptCode;
+ }
+ }
+
+ // if this character is a close paired character,
+ // pop it from the stack
+ if (pairIndex >= 0 && (pairIndex & 1) != 0 && parenSP >= 0) {
+ parenSP -= 1;
+ /* decrement startSP only if it is >= 0,
+ decrementing it unnecessarily will lead to memory corruption
+ while processing the above while block.
+ e.g. startSP = -4 , parenSP = -1
+ */
+ if (startSP >= 0) {
+ startSP -= 1;
+ }
+ }
+ } else {
+ // if the run broke on a surrogate pair,
+ // end it before the high surrogate
+ if (ch >= 0x10000) {
+ scriptEnd -= 1;
+ }
+
+ break;
+ }
+ }
+
+ return true;
+}
+
+}