Adding upstream version 124.0.1.upstream/124.0.1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
commit: 26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree: f435a8308119effd964b339f76abb83a57c29483 /layout/generic/nsTextRunTransformations.cpp
parent: Initial commit. (diff)
download: firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
1 files changed, 940 insertions, 0 deletions
diff --git a/layout/generic/nsTextRunTransformations.cpp b/layout/generic/nsTextRunTransformations.cpp
new file mode 100644
index 0000000000..d18a7ec293
--- /dev/null
+++ b/layout/generic/nsTextRunTransformations.cpp
@@ -0,0 +1,940 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsTextRunTransformations.h"
+
+#include <utility>
+
+#include "GreekCasing.h"
+#include "IrishCasing.h"
+#include "MathMLTextRunFactory.h"
+#include "mozilla/ComputedStyleInlines.h"
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/StaticPrefs_layout.h"
+#include "mozilla/StaticPrefs_mathml.h"
+#include "mozilla/TextEditor.h"
+#include "mozilla/gfx/2D.h"
+#include "nsGkAtoms.h"
+#include "nsSpecialCasingData.h"
+#include "nsStyleConsts.h"
+#include "nsTextFrameUtils.h"
+#include "nsUnicharUtils.h"
+#include "nsUnicodeProperties.h"
+
+using namespace mozilla;
+using namespace mozilla::gfx;
+
+// Unicode characters needing special casing treatment in tr/az languages
+#define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
+#define LATIN_SMALL_LETTER_DOTLESS_I 0x0131
+
+// Greek sigma needs custom handling for the lowercase transform; for details
+// see bug 740120.
+#define GREEK_CAPITAL_LETTER_SIGMA 0x03A3
+#define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
+#define GREEK_SMALL_LETTER_SIGMA 0x03C3
+
+already_AddRefed<nsTransformedTextRun> nsTransformedTextRun::Create(
+    const gfxTextRunFactory::Parameters* aParams,
+    nsTransformingTextRunFactory* aFactory, gfxFontGroup* aFontGroup,
+    const char16_t* aString, uint32_t aLength,
+    const gfx::ShapedTextFlags aFlags, const nsTextFrameUtils::Flags aFlags2,
+    nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
+  NS_ASSERTION(!(aFlags & gfx::ShapedTextFlags::TEXT_IS_8BIT),
+               "didn't expect text to be marked as 8-bit here");
+
+  void* storage =
+      AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength);
+  if (!storage) {
+    return nullptr;
+  }
+
+  RefPtr<nsTransformedTextRun> result = new (storage)
+      nsTransformedTextRun(aParams, aFactory, aFontGroup, aString, aLength,
+                           aFlags, aFlags2, std::move(aStyles), aOwnsFactory);
+  return result.forget();
+}
+
+void nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength,
+                                             bool* aCapitalization) {
+  if (mCapitalize.IsEmpty()) {
+    // XXX(Bug 1631371) Check if this should use a fallible operation as it
+    // pretended earlier.
+    mCapitalize.AppendElements(GetLength());
+    memset(mCapitalize.Elements(), 0, GetLength() * sizeof(bool));
+  }
+  memcpy(mCapitalize.Elements() + aStart, aCapitalization,
+         aLength * sizeof(bool));
+  mNeedsRebuild = true;
+}
+
+bool nsTransformedTextRun::SetPotentialLineBreaks(Range aRange,
+                                                  const uint8_t* aBreakBefore) {
+  bool changed = gfxTextRun::SetPotentialLineBreaks(aRange, aBreakBefore);
+  if (changed) {
+    mNeedsRebuild = true;
+  }
+  return changed;
+}
+
+size_t nsTransformedTextRun::SizeOfExcludingThis(
+    mozilla::MallocSizeOf aMallocSizeOf) {
+  size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf);
+  total += mStyles.ShallowSizeOfExcludingThis(aMallocSizeOf);
+  total += mCapitalize.ShallowSizeOfExcludingThis(aMallocSizeOf);
+  if (mOwnsFactory) {
+    total += aMallocSizeOf(mFactory);
+  }
+  return total;
+}
+
+size_t nsTransformedTextRun::SizeOfIncludingThis(
+    mozilla::MallocSizeOf aMallocSizeOf) {
+  return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
+}
+
+already_AddRefed<nsTransformedTextRun>
+nsTransformingTextRunFactory::MakeTextRun(
+    const char16_t* aString, uint32_t aLength,
+    const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
+    gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
+    nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
+  return nsTransformedTextRun::Create(aParams, this, aFontGroup, aString,
+                                      aLength, aFlags, aFlags2,
+                                      std::move(aStyles), aOwnsFactory);
+}
+
+already_AddRefed<nsTransformedTextRun>
+nsTransformingTextRunFactory::MakeTextRun(
+    const uint8_t* aString, uint32_t aLength,
+    const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
+    gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
+    nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
+  // We'll only have a Unicode code path to minimize the amount of code needed
+  // for these rarely used features
+  NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString),
+                                       aLength);
+  return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup,
+                     aFlags & ~gfx::ShapedTextFlags::TEXT_IS_8BIT, aFlags2,
+                     std::move(aStyles), aOwnsFactory);
+}
+
+void MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
+                              const bool* aCharsToMerge,
+                              const bool* aDeletedChars) {
+  MOZ_ASSERT(!aDest->TrailingGlyphRun(), "unexpected glyphRuns in aDest!");
+  uint32_t offset = 0;
+  AutoTArray<gfxTextRun::DetailedGlyph, 2> glyphs;
+  const gfxTextRun::CompressedGlyph continuationGlyph =
+      gfxTextRun::CompressedGlyph::MakeComplex(false, false);
+  const gfxTextRun::CompressedGlyph* srcGlyphs = aSrc->GetCharacterGlyphs();
+  gfxTextRun::CompressedGlyph* destGlyphs = aDest->GetCharacterGlyphs();
+  for (gfxTextRun::GlyphRunIterator iter(aSrc, gfxTextRun::Range(aSrc));
+       !iter.AtEnd(); iter.NextRun()) {
+    const gfxTextRun::GlyphRun* run = iter.GlyphRun();
+    aDest->AddGlyphRun(run->mFont, run->mMatchType, offset, false,
+                       run->mOrientation, run->mIsCJK);
+
+    bool anyMissing = false;
+    uint32_t mergeRunStart = iter.StringStart();
+    // Initialize to a copy of the first source glyph in the merge run.
+    gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
+    uint32_t stringEnd = iter.StringEnd();
+    for (uint32_t k = iter.StringStart(); k < stringEnd; ++k) {
+      const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
+      if (g.IsSimpleGlyph()) {
+        if (!anyMissing) {
+          gfxTextRun::DetailedGlyph details;
+          details.mGlyphID = g.GetSimpleGlyph();
+          details.mAdvance = g.GetSimpleAdvance();
+          glyphs.AppendElement(details);
+        }
+      } else {
+        if (g.IsMissing()) {
+          anyMissing = true;
+          glyphs.Clear();
+        }
+        if (g.GetGlyphCount() > 0) {
+          glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());
+        }
+      }
+
+      if (k + 1 < iter.StringEnd() && aCharsToMerge[k + 1]) {
+        // next char is supposed to merge with current, so loop without
+        // writing current merged glyph to the destination
+        continue;
+      }
+
+      // If the start of the merge run is actually a character that should
+      // have been merged with the previous character (this can happen
+      // if there's a font change in the middle of a case-mapped character,
+      // that decomposed into a sequence of base+diacritics, for example),
+      // just discard the entire merge run. See comment at start of this
+      // function.
+      NS_WARNING_ASSERTION(
+          !aCharsToMerge[mergeRunStart],
+          "unable to merge across a glyph run boundary, glyph(s) discarded");
+      if (!aCharsToMerge[mergeRunStart]) {
+        // Determine if we can just copy the existing simple glyph record.
+        if (mergedGlyph.IsSimpleGlyph() && glyphs.Length() == 1) {
+          destGlyphs[offset] = mergedGlyph;
+        } else {
+          // Otherwise set up complex glyph record and store detailed glyphs.
+          mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
+                                 mergedGlyph.IsLigatureGroupStart());
+          destGlyphs[offset] = mergedGlyph;
+          aDest->SetDetailedGlyphs(offset, glyphs.Length(), glyphs.Elements());
+          if (anyMissing) {
+            destGlyphs[offset].SetMissing();
+          }
+        }
+        offset++;
+
+        while (offset < aDest->GetLength() && aDeletedChars[offset]) {
+          destGlyphs[offset++] = continuationGlyph;
+        }
+      }
+
+      glyphs.Clear();
+      anyMissing = false;
+      mergeRunStart = k + 1;
+      if (mergeRunStart < stringEnd) {
+        mergedGlyph = srcGlyphs[mergeRunStart];
+      }
+    }
+    NS_ASSERTION(glyphs.Length() == 0,
+                 "Leftover glyphs, don't request merging of the last character "
+                 "with its next!");
+  }
+  NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");
+}
+
+gfxTextRunFactory::Parameters GetParametersForInner(
+    nsTransformedTextRun* aTextRun, gfx::ShapedTextFlags* aFlags,
+    DrawTarget* aRefDrawTarget) {
+  gfxTextRunFactory::Parameters params = {
+      aRefDrawTarget, nullptr, nullptr,
+      nullptr,        0,       aTextRun->GetAppUnitsPerDevUnit()};
+  *aFlags = aTextRun->GetFlags();
+  return params;
+}
+
+// Some languages have special casing conventions that differ from the
+// default Unicode mappings.
+// The enum values here are named for well-known exemplar languages that
+// exhibit the behavior in question; multiple lang tags may map to the
+// same setting here, if the behavior is shared by other languages.
+enum LanguageSpecificCasingBehavior {
+  eLSCB_None,       // default non-lang-specific behavior
+  eLSCB_Dutch,      // treat "ij" digraph as a unit for capitalization
+  eLSCB_Greek,      // strip accent when uppercasing Greek vowels
+  eLSCB_Irish,      // keep prefix letters as lowercase when uppercasing Irish
+  eLSCB_Turkish,    // preserve dotted/dotless-i distinction in uppercase
+  eLSCB_Lithuanian  // retain dot on lowercase i/j when an accent is present
+};
+
+static LanguageSpecificCasingBehavior GetCasingFor(const nsAtom* aLang) {
+  if (!aLang) {
+    return eLSCB_None;
+  }
+  if (aLang == nsGkAtoms::tr || aLang == nsGkAtoms::az ||
+      aLang == nsGkAtoms::ba || aLang == nsGkAtoms::crh ||
+      aLang == nsGkAtoms::tt) {
+    return eLSCB_Turkish;
+  }
+  if (aLang == nsGkAtoms::nl) {
+    return eLSCB_Dutch;
+  }
+  if (aLang == nsGkAtoms::el) {
+    return eLSCB_Greek;
+  }
+  if (aLang == nsGkAtoms::ga) {
+    return eLSCB_Irish;
+  }
+  if (aLang == nsGkAtoms::lt_) {
+    return eLSCB_Lithuanian;
+  }
+
+  // Is there a region subtag we should ignore?
+  nsAtomString langStr(const_cast<nsAtom*>(aLang));
+  int index = langStr.FindChar('-');
+  if (index > 0) {
+    langStr.Truncate(index);
+    RefPtr<nsAtom> truncatedLang = NS_Atomize(langStr);
+    return GetCasingFor(truncatedLang);
+  }
+
+  return eLSCB_None;
+}
+
+bool nsCaseTransformTextRunFactory::TransformString(
+    const nsAString& aString, nsString& aConvertedString,
+    const Maybe<StyleTextTransform>& aGlobalTransform, char16_t aMaskChar,
+    bool aCaseTransformsOnly, const nsAtom* aLanguage,
+    nsTArray<bool>& aCharsToMergeArray, nsTArray<bool>& aDeletedCharsArray,
+    const nsTransformedTextRun* aTextRun, uint32_t aOffsetInTextRun,
+    nsTArray<uint8_t>* aCanBreakBeforeArray,
+    nsTArray<RefPtr<nsTransformedCharStyle>>* aStyleArray) {
+  bool auxiliaryOutputArrays = aCanBreakBeforeArray && aStyleArray;
+  MOZ_ASSERT(!auxiliaryOutputArrays || aTextRun,
+             "text run must be provided to use aux output arrays");
+
+  uint32_t length = aString.Length();
+  const char16_t* str = aString.BeginReading();
+  // If an unconditional mask character was passed, we'll use it; if not, any
+  // masking called for by the textrun styles will use TextEditor's mask char.
+  const char16_t mask = aMaskChar ? aMaskChar : TextEditor::PasswordMask();
+
+  bool mergeNeeded = false;
+
+  bool capitalizeDutchIJ = false;
+  bool prevIsLetter = false;
+  bool ntPrefix = false;  // true immediately after a word-initial 'n' or 't'
+                          // when doing Irish lowercasing
+  bool seenSoftDotted = false;  // true immediately after an I or J that is
+                                // converted to lowercase in Lithuanian mode
+  uint32_t sigmaIndex = uint32_t(-1);
+  nsUGenCategory cat;
+
+  StyleTextTransform style =
+      aGlobalTransform.valueOr(StyleTextTransform::None());
+  bool forceNonFullWidth = false;
+  const nsAtom* lang = aLanguage;
+
+  LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang);
+  mozilla::GreekCasing::State greekState;
+  mozilla::IrishCasing::State irishState;
+  uint32_t irishMark = uint32_t(-1);  // location of possible prefix letter(s)
+                                      // in the output string
+  uint32_t irishMarkSrc = uint32_t(-1);  // corresponding location in source
+                                         // string (may differ from output due
+                                         // to expansions like eszet -> 'SS')
+  uint32_t greekMark = uint32_t(-1);  // location of uppercase ETA that may need
+                                      // tonos added (if it is disjunctive eta)
+  const char16_t kGreekUpperEta = 0x0397;
+
+  for (uint32_t i = 0; i < length; ++i, ++aOffsetInTextRun) {
+    uint32_t ch = str[i];
+
+    RefPtr<nsTransformedCharStyle> charStyle;
+    if (aTextRun) {
+      charStyle = aTextRun->mStyles[aOffsetInTextRun];
+      style = aGlobalTransform.valueOr(charStyle->mTextTransform);
+      forceNonFullWidth = charStyle->mForceNonFullWidth;
+
+      nsAtom* newLang =
+          charStyle->mExplicitLanguage ? charStyle->mLanguage.get() : nullptr;
+      if (lang != newLang) {
+        lang = newLang;
+        languageSpecificCasing = GetCasingFor(lang);
+        greekState.Reset();
+        irishState.Reset();
+        irishMark = uint32_t(-1);
+        irishMarkSrc = uint32_t(-1);
+        greekMark = uint32_t(-1);
+      }
+    }
+
+    // These should be mutually exclusive: mMaskPassword is set if we are
+    // handling <input type=password>, where the TextEditor code controls
+    // masking and we use its PasswordMask() character, in which case
+    // aMaskChar (from -webkit-text-security) is not used.
+    MOZ_ASSERT_IF(aMaskChar, !(charStyle && charStyle->mMaskPassword));
+
+    bool maskPassword = (charStyle && charStyle->mMaskPassword) || aMaskChar;
+    int extraChars = 0;
+    const mozilla::unicode::MultiCharMapping* mcm;
+    bool inhibitBreakBefore = false;  // have we just deleted preceding hyphen?
+
+    if (i < length - 1 && NS_IS_SURROGATE_PAIR(ch, str[i + 1])) {
+      ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
+    }
+    const uint32_t originalCh = ch;
+
+    // Skip case transform if we're masking current character.
+    if (!maskPassword) {
+      switch (style.case_) {
+        case StyleTextTransformCase::None:
+          break;
+
+        case StyleTextTransformCase::Lowercase:
+          if (languageSpecificCasing == eLSCB_Turkish) {
+            if (ch == 'I') {
+              ch = LATIN_SMALL_LETTER_DOTLESS_I;
+              prevIsLetter = true;
+              sigmaIndex = uint32_t(-1);
+              break;
+            }
+            if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
+              ch = 'i';
+              prevIsLetter = true;
+              sigmaIndex = uint32_t(-1);
+              break;
+            }
+          }
+
+          if (languageSpecificCasing == eLSCB_Lithuanian) {
+            // clang-format off
+            /* From SpecialCasing.txt:
+             * # Introduce an explicit dot above when lowercasing capital I's and J's
+             * # whenever there are more accents above.
+             * # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
+             *
+             * 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
+             * 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
+             * 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
+             * 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
+             * 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
+             * 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
+             */
+            // clang-format on
+            if (ch == 'I' || ch == 'J' || ch == 0x012E) {
+              ch = ToLowerCase(ch);
+              prevIsLetter = true;
+              seenSoftDotted = true;
+              sigmaIndex = uint32_t(-1);
+              break;
+            }
+            if (ch == 0x00CC) {
+              aConvertedString.Append('i');
+              aConvertedString.Append(0x0307);
+              extraChars += 2;
+              ch = 0x0300;
+              prevIsLetter = true;
+              seenSoftDotted = false;
+              sigmaIndex = uint32_t(-1);
+              break;
+            }
+            if (ch == 0x00CD) {
+              aConvertedString.Append('i');
+              aConvertedString.Append(0x0307);
+              extraChars += 2;
+              ch = 0x0301;
+              prevIsLetter = true;
+              seenSoftDotted = false;
+              sigmaIndex = uint32_t(-1);
+              break;
+            }
+            if (ch == 0x0128) {
+              aConvertedString.Append('i');
+              aConvertedString.Append(0x0307);
+              extraChars += 2;
+              ch = 0x0303;
+              prevIsLetter = true;
+              seenSoftDotted = false;
+              sigmaIndex = uint32_t(-1);
+              break;
+            }
+          }
+
+          cat = mozilla::unicode::GetGenCategory(ch);
+
+          if (languageSpecificCasing == eLSCB_Irish &&
+              cat == nsUGenCategory::kLetter) {
+            // See bug 1018805 for Irish lowercasing requirements
+            if (!prevIsLetter && (ch == 'n' || ch == 't')) {
+              ntPrefix = true;
+            } else {
+              if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) {
+                aConvertedString.Append('-');
+                ++extraChars;
+              }
+              ntPrefix = false;
+            }
+          } else {
+            ntPrefix = false;
+          }
+
+          if (seenSoftDotted && cat == nsUGenCategory::kMark) {
+            // The seenSoftDotted flag will only be set in Lithuanian mode.
+            if (ch == 0x0300 || ch == 0x0301 || ch == 0x0303) {
+              aConvertedString.Append(0x0307);
+              ++extraChars;
+            }
+          }
+          seenSoftDotted = false;
+
+          // Special lowercasing behavior for Greek Sigma: note that this is
+          // listed as context-sensitive in Unicode's SpecialCasing.txt, but is
+          // *not* a language-specific mapping; it applies regardless of the
+          // language of the element.
+          //
+          // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA
+          // (i.e. the non-final form) whenever there is a following letter, or
+          // when the CAPITAL SIGMA occurs in isolation (neither preceded nor
+          // followed by a LETTER); and to FINAL SIGMA when it is preceded by
+          // another letter but not followed by one.
+          //
+          // To implement the context-sensitive nature of this mapping, we keep
+          // track of whether the previous character was a letter. If not,
+          // CAPITAL SIGMA will map directly to SMALL SIGMA. If the previous
+          // character was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we
+          // record the position in the converted string; if we then encounter
+          // another letter, that FINAL SIGMA is replaced with a standard
+          // SMALL SIGMA.
+
+          // If sigmaIndex is not -1, it marks where we have provisionally
+          // mapped a CAPITAL SIGMA to FINAL SIGMA; if we now find another
+          // letter, we need to change it to SMALL SIGMA.
+          if (sigmaIndex != uint32_t(-1)) {
+            if (cat == nsUGenCategory::kLetter) {
+              aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
+            }
+          }
+
+          if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
+            // If preceding char was a letter, map to FINAL instead of SMALL,
+            // and note where it occurred by setting sigmaIndex; we'll change
+            // it to standard SMALL SIGMA later if another letter follows
+            if (prevIsLetter) {
+              ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
+              sigmaIndex = aConvertedString.Length();
+            } else {
+              // CAPITAL SIGMA not preceded by a letter is unconditionally
+              // mapped to SMALL SIGMA
+              ch = GREEK_SMALL_LETTER_SIGMA;
+              sigmaIndex = uint32_t(-1);
+            }
+            prevIsLetter = true;
+            break;
+          }
+
+          // ignore diacritics for the purpose of contextual sigma mapping;
+          // otherwise, reset prevIsLetter appropriately and clear the
+          // sigmaIndex marker
+          if (cat != nsUGenCategory::kMark) {
+            prevIsLetter = (cat == nsUGenCategory::kLetter);
+            sigmaIndex = uint32_t(-1);
+          }
+
+          mcm = mozilla::unicode::SpecialLower(ch);
+          if (mcm) {
+            int j = 0;
+            while (j < 2 && mcm->mMappedChars[j + 1]) {
+              aConvertedString.Append(mcm->mMappedChars[j]);
+              ++extraChars;
+              ++j;
+            }
+            ch = mcm->mMappedChars[j];
+            break;
+          }
+
+          ch = ToLowerCase(ch);
+          break;
+
+        case StyleTextTransformCase::Uppercase:
+          if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
+            ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
+            break;
+          }
+
+          if (languageSpecificCasing == eLSCB_Greek) {
+            bool markEta;
+            bool updateEta;
+            ch = mozilla::GreekCasing::UpperCase(ch, greekState, markEta,
+                                                 updateEta);
+            if (markEta) {
+              greekMark = aConvertedString.Length();
+            } else if (updateEta) {
+              // Remove the TONOS from an uppercase ETA-TONOS that turned out
+              // not to be disjunctive-eta.
+              MOZ_ASSERT(aConvertedString.Length() > 0 &&
+                             greekMark < aConvertedString.Length(),
+                         "bad greekMark!");
+              aConvertedString.SetCharAt(kGreekUpperEta, greekMark);
+              greekMark = uint32_t(-1);
+            }
+            break;
+          }
+
+          if (languageSpecificCasing == eLSCB_Lithuanian) {
+            /*
+             * # Remove DOT ABOVE after "i" with upper or titlecase
+             *
+             * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
+             */
+            if (ch == 'i' || ch == 'j' || ch == 0x012F) {
+              seenSoftDotted = true;
+              ch = ToTitleCase(ch);
+              break;
+            }
+            if (seenSoftDotted) {
+              seenSoftDotted = false;
+              if (ch == 0x0307) {
+                ch = uint32_t(-1);
+                break;
+              }
+            }
+          }
+
+          if (languageSpecificCasing == eLSCB_Irish) {
+            bool mark;
+            uint8_t action;
+            ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action);
+            if (mark) {
+              irishMark = aConvertedString.Length();
+              irishMarkSrc = i;
+              break;
+            } else if (action) {
+              nsString& str = aConvertedString;  // shorthand
+              switch (action) {
+                case 1:
+                  // lowercase a single prefix letter
+                  MOZ_ASSERT(str.Length() > 0 && irishMark < str.Length(),
+                             "bad irishMark!");
+                  str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
+                  irishMark = uint32_t(-1);
+                  irishMarkSrc = uint32_t(-1);
+                  break;
+                case 2:
+                  // lowercase two prefix letters (immediately before current
+                  // pos)
+                  MOZ_ASSERT(str.Length() >= 2 && irishMark == str.Length() - 2,
+                             "bad irishMark!");
+                  str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
+                  str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1);
+                  irishMark = uint32_t(-1);
+                  irishMarkSrc = uint32_t(-1);
+                  break;
+                case 3:
+                  // lowercase one prefix letter, and delete following hyphen
+                  // (which must be the immediately-preceding char)
+                  MOZ_ASSERT(str.Length() >= 2 && irishMark == str.Length() - 2,
+                             "bad irishMark!");
+                  MOZ_ASSERT(
+                      irishMark != uint32_t(-1) && irishMarkSrc != uint32_t(-1),
+                      "failed to set irishMarks");
+                  str.Replace(irishMark, 2, ToLowerCase(str[irishMark]));
+                  aDeletedCharsArray[irishMarkSrc + 1] = true;
+                  // Remove the trailing entries (corresponding to the deleted
+                  // hyphen) from the auxiliary arrays.
+                  uint32_t len = aCharsToMergeArray.Length();
+                  MOZ_ASSERT(len >= 2);
+                  aCharsToMergeArray.TruncateLength(len - 1);
+                  if (auxiliaryOutputArrays) {
+                    MOZ_ASSERT(aStyleArray->Length() == len);
+                    MOZ_ASSERT(aCanBreakBeforeArray->Length() == len);
+                    aStyleArray->TruncateLength(len - 1);
+                    aCanBreakBeforeArray->TruncateLength(len - 1);
+                    inhibitBreakBefore = true;
+                  }
+                  mergeNeeded = true;
+                  irishMark = uint32_t(-1);
+                  irishMarkSrc = uint32_t(-1);
+                  break;
+              }
+              // ch has been set to the uppercase for current char;
+              // No need to check for SpecialUpper here as none of the
+              // characters that could trigger an Irish casing action have
+              // special mappings.
+              break;
+            }
+            // If we didn't have any special action to perform, fall through
+            // to check for special uppercase (ß)
+          }
+
+          // Updated mapping for German eszett, not currently reflected in the
+          // Unicode data files. This is behind a pref, as it may not work well
+          // with many (esp. older) fonts.
+          if (ch == 0x00DF &&
+              StaticPrefs::
+                  layout_css_text_transform_uppercase_eszett_enabled()) {
+            ch = 0x1E9E;
+            break;
+          }
+
+          mcm = mozilla::unicode::SpecialUpper(ch);
+          if (mcm) {
+            int j = 0;
+            while (j < 2 && mcm->mMappedChars[j + 1]) {
+              aConvertedString.Append(mcm->mMappedChars[j]);
+              ++extraChars;
+              ++j;
+            }
+            ch = mcm->mMappedChars[j];
+            break;
+          }
+
+          // Bug 1476304: we exclude Georgian letters U+10D0..10FF because of
+          // lack of widespread font support for the corresponding Mtavruli
+          // characters at this time (July 2018).
+          // This condition is to be removed once the major platforms ship with
+          // fonts that support U+1C90..1CBF.
+          if (ch < 0x10D0 || ch > 0x10FF) {
+            ch = ToUpperCase(ch);
+          }
+          break;
+
+        case StyleTextTransformCase::Capitalize:
+          if (aTextRun) {
+            if (capitalizeDutchIJ && ch == 'j') {
+              ch = 'J';
+              capitalizeDutchIJ = false;
+              break;
+            }
+            capitalizeDutchIJ = false;
+            if (aOffsetInTextRun < aTextRun->mCapitalize.Length() &&
+                aTextRun->mCapitalize[aOffsetInTextRun]) {
+              if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
+                ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
+                break;
+              }
+              if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') {
+                ch = 'I';
+                capitalizeDutchIJ = true;
+                break;
+              }
+              if (languageSpecificCasing == eLSCB_Lithuanian) {
+                /*
+                 * # Remove DOT ABOVE after "i" with upper or titlecase
+                 *
+                 * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
+                 */
+                if (ch == 'i' || ch == 'j' || ch == 0x012F) {
+                  seenSoftDotted = true;
+                  ch = ToTitleCase(ch);
+                  break;
+                }
+                if (seenSoftDotted) {
+                  seenSoftDotted = false;
+                  if (ch == 0x0307) {
+                    ch = uint32_t(-1);
+                    break;
+                  }
+                }
+              }
+
+              mcm = mozilla::unicode::SpecialTitle(ch);
+              if (mcm) {
+                int j = 0;
+                while (j < 2 && mcm->mMappedChars[j + 1]) {
+                  aConvertedString.Append(mcm->mMappedChars[j]);
+                  ++extraChars;
+                  ++j;
+                }
+                ch = mcm->mMappedChars[j];
+                break;
+              }
+
+              ch = ToTitleCase(ch);
+            }
+          }
+          break;
+
+        case StyleTextTransformCase::MathAuto:
+          // text-transform: math-auto is used for automatic italicization of
+          // single-char <mi> elements. However, some legacy cases (italic style
+          // fallback and <mi> with leading/trailing whitespace) are still
+          // handled in MathMLTextRunFactory.
+          if (length == 1) {
+            uint32_t ch2 =
+                MathMLTextRunFactory::MathVariant(ch, StyleMathVariant::Italic);
+            if (StaticPrefs::mathml_mathvariant_styling_fallback_disabled()) {
+              ch = ch2;
+            } else if (ch2 != ch) {
+              // Bug 930504. Some platforms do not have fonts for Mathematical
+              // Alphanumeric Symbols. Hence we only perform the transform if a
+              // character is actually available.
+              FontMatchType matchType;
+              RefPtr<gfxFont> mathFont =
+                  aTextRun->GetFontGroup()->FindFontForChar(
+                      ch2, 0, 0, intl::Script::COMMON, nullptr, &matchType);
+              if (mathFont) {
+                ch = ch2;
+              }
+            }
+          }
+          break;
+
+        default:
+          MOZ_ASSERT_UNREACHABLE("all cases should be handled");
+          break;
+      }
+
+      if (!aCaseTransformsOnly) {
+        if (!forceNonFullWidth &&
+            (style.other_ & StyleTextTransformOther::FULL_WIDTH)) {
+          ch = mozilla::unicode::GetFullWidth(ch);
+        }
+
+        if (style.other_ & StyleTextTransformOther::FULL_SIZE_KANA) {
+          // clang-format off
+          static const uint32_t kSmallKanas[] = {
+              // ぁ   ぃ      ぅ      ぇ      ぉ      っ      ゃ      ゅ      ょ
+              0x3041, 0x3043, 0x3045, 0x3047, 0x3049, 0x3063, 0x3083, 0x3085, 0x3087,
+              // ゎ   ゕ      ゖ
+              0x308E, 0x3095, 0x3096,
+              // ァ   ィ      ゥ      ェ      ォ      ッ      ャ      ュ      ョ
+              0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30C3, 0x30E3, 0x30E5, 0x30E7,
+              // ヮ   ヵ      ヶ      ㇰ      ㇱ      ㇲ      ㇳ      ㇴ      ㇵ
+              0x30EE, 0x30F5, 0x30F6, 0x31F0, 0x31F1, 0x31F2, 0x31F3, 0x31F4, 0x31F5,
+              // ㇶ   ㇷ      ㇸ      ㇹ      ㇺ      ㇻ      ㇼ      ㇽ      ㇾ
+              0x31F6, 0x31F7, 0x31F8, 0x31F9, 0x31FA, 0x31FB, 0x31FC, 0x31FD, 0x31FE,
+              // ㇿ
+              0x31FF,
+              // ｧ    ｨ       ｩ       ｪ       ｫ       ｬ       ｭ       ｮ       ｯ
+              0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F,
+              // 𛄲    𛅐       𛅑       𛅒       𛅕       𛅤       𛅥       𛅦
+              0x1B132, 0x1B150, 0x1B151, 0x1B152, 0x1B155, 0x1B164, 0x1B165, 0x1B166,
+              // 𛅧
+              0x1B167};
+          static const uint16_t kFullSizeKanas[] = {
+              // あ   い      う      え      お      つ      や      ゆ      よ
+              0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x3064, 0x3084, 0x3086, 0x3088,
+              // わ   か      け
+              0x308F, 0x304B, 0x3051,
+              // ア   イ      ウ      エ      オ      ツ      ヤ      ユ      ヨ
+              0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30C4, 0x30E4, 0x30E6, 0x30E8,
+              // ワ   カ      ケ      ク      シ      ス      ト      ヌ      ハ
+              0x30EF, 0x30AB, 0x30B1, 0x30AF, 0x30B7, 0x30B9, 0x30C8, 0x30CC, 0x30CF,
+              // ヒ   フ      ヘ      ホ      ム      ラ      リ      ル      レ
+              0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30E0, 0x30E9, 0x30EA, 0x30EB, 0x30EC,
+              // ロ
+              0x30ED,
+              // ｱ    ｲ       ｳ       ｴ       ｵ       ﾔ       ﾕ       ﾖ        ﾂ
+              0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF94, 0xFF95, 0xFF96, 0xFF82,
+              // こ   ゐ       ゑ      を      コ       ヰ      ヱ      ヲ       ン
+              0x3053, 0x3090, 0x3091, 0x3092, 0x30B3, 0x30F0, 0x30F1, 0x30F2, 0x30F3};
+          // clang-format on
+
+          size_t index;
+          const uint16_t len = MOZ_ARRAY_LENGTH(kSmallKanas);
+          if (mozilla::BinarySearch(kSmallKanas, 0, len, ch, &index)) {
+            ch = kFullSizeKanas[index];
+          }
+        }
+      }
+
+      if (forceNonFullWidth) {
+        ch = mozilla::unicode::GetFullWidthInverse(ch);
+      }
+    }
+
+    if (ch == uint32_t(-1)) {
+      aDeletedCharsArray.AppendElement(true);
+      mergeNeeded = true;
+    } else {
+      aDeletedCharsArray.AppendElement(false);
+      aCharsToMergeArray.AppendElement(false);
+      if (auxiliaryOutputArrays) {
+        aStyleArray->AppendElement(charStyle);
+        aCanBreakBeforeArray->AppendElement(
+            inhibitBreakBefore
+                ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
+                : aTextRun->CanBreakBefore(aOffsetInTextRun));
+      }
+
+      if (IS_IN_BMP(ch)) {
+        aConvertedString.Append(maskPassword ? mask : ch);
+      } else {
+        if (maskPassword) {
+          aConvertedString.Append(mask);
+          // TODO: We should show a password mask for a surrogate pair later.
+          aConvertedString.Append(mask);
+        } else {
+          aConvertedString.Append(H_SURROGATE(ch));
+          aConvertedString.Append(L_SURROGATE(ch));
+        }
+        ++extraChars;
+      }
+      if (!IS_IN_BMP(originalCh)) {
+        // Skip the trailing surrogate.
+        ++aOffsetInTextRun;
+        ++i;
+        aDeletedCharsArray.AppendElement(true);
+      }
+
+      while (extraChars-- > 0) {
+        mergeNeeded = true;
+        aCharsToMergeArray.AppendElement(true);
+        if (auxiliaryOutputArrays) {
+          aStyleArray->AppendElement(charStyle);
+          aCanBreakBeforeArray->AppendElement(
+              gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE);
+        }
+      }
+    }
+  }
+
+  // These output arrays, if present, must always have matching lengths:
+  if (auxiliaryOutputArrays) {
+    DebugOnly<uint32_t> len = aCharsToMergeArray.Length();
+    MOZ_ASSERT(aStyleArray->Length() == len);
+    MOZ_ASSERT(aCanBreakBeforeArray->Length() == len);
+  }
+
+  return mergeNeeded;
+}
+
+void nsCaseTransformTextRunFactory::RebuildTextRun(
+    nsTransformedTextRun* aTextRun, DrawTarget* aRefDrawTarget,
+    gfxMissingFontRecorder* aMFR) {
+  nsAutoString convertedString;
+  AutoTArray<bool, 50> charsToMergeArray;
+  AutoTArray<bool, 50> deletedCharsArray;
+  AutoTArray<uint8_t, 50> canBreakBeforeArray;
+  AutoTArray<RefPtr<nsTransformedCharStyle>, 50> styleArray;
+
+  auto globalTransform =
+      mAllUppercase
+          ? Some(StyleTextTransform{StyleTextTransformCase::Uppercase, {}})
+          : Nothing();
+  bool mergeNeeded = TransformString(
+      aTextRun->mString, convertedString, globalTransform, mMaskChar,
+      /* aCaseTransformsOnly = */ false, nullptr, charsToMergeArray,
+      deletedCharsArray, aTextRun, 0, &canBreakBeforeArray, &styleArray);
+
+  gfx::ShapedTextFlags flags;
+  gfxTextRunFactory::Parameters innerParams =
+      GetParametersForInner(aTextRun, &flags, aRefDrawTarget);
+  gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
+
+  RefPtr<nsTransformedTextRun> transformedChild;
+  RefPtr<gfxTextRun> cachedChild;
+  gfxTextRun* child;
+
+  if (mInnerTransformingTextRunFactory) {
+    transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
+        convertedString.BeginReading(), convertedString.Length(), &innerParams,
+        fontGroup, flags, nsTextFrameUtils::Flags(), std::move(styleArray),
+        false);
+    child = transformedChild.get();
+  } else {
+    cachedChild = fontGroup->MakeTextRun(
+        convertedString.BeginReading(), convertedString.Length(), &innerParams,
+        flags, nsTextFrameUtils::Flags(), aMFR);
+    child = cachedChild.get();
+  }
+  if (!child) {
+    return;
+  }
+  // Copy potential linebreaks into child so they're preserved
+  // (and also child will be shaped appropriately)
+  NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
+               "Dropped characters or break-before values somewhere!");
+  gfxTextRun::Range range(0, uint32_t(canBreakBeforeArray.Length()));
+  child->SetPotentialLineBreaks(range, canBreakBeforeArray.Elements());
+  if (transformedChild) {
+    transformedChild->FinishSettingProperties(aRefDrawTarget, aMFR);
+  }
+
+  aTextRun->ResetGlyphRuns();
+  if (mergeNeeded) {
+    // Now merge multiple characters into one multi-glyph character as required
+    // and deal with skipping deleted accent chars
+    NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
+                 "source length mismatch");
+    NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
+                 "destination length mismatch");
+    MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
+                             deletedCharsArray.Elements());
+  } else {
+    // No merging to do, so just copy; this produces a more optimized textrun.
+    // We can't steal the data because the child may be cached and stealing
+    // the data would break the cache.
+    aTextRun->CopyGlyphDataFrom(child, gfxTextRun::Range(child), 0);
+  }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
commit	26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree	f435a8308119effd964b339f76abb83a57c29483 /layout/generic/nsTextRunTransformations.cpp
parent	Initial commit. (diff)
download	firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip