/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsTextRunTransformations.h" #include #include "GreekCasing.h" #include "IrishCasing.h" #include "MathMLTextRunFactory.h" #include "mozilla/ComputedStyleInlines.h" #include "mozilla/MemoryReporting.h" #include "mozilla/StaticPrefs_layout.h" #include "mozilla/StaticPrefs_mathml.h" #include "mozilla/TextEditor.h" #include "mozilla/gfx/2D.h" #include "nsGkAtoms.h" #include "nsSpecialCasingData.h" #include "nsStyleConsts.h" #include "nsTextFrameUtils.h" #include "nsUnicharUtils.h" #include "nsUnicodeProperties.h" using namespace mozilla; using namespace mozilla::gfx; // Unicode characters needing special casing treatment in tr/az languages #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131 // Greek sigma needs custom handling for the lowercase transform; for details // see bug 740120. #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2 #define GREEK_SMALL_LETTER_SIGMA 0x03C3 already_AddRefed nsTransformedTextRun::Create( const gfxTextRunFactory::Parameters* aParams, nsTransformingTextRunFactory* aFactory, gfxFontGroup* aFontGroup, const char16_t* aString, uint32_t aLength, const gfx::ShapedTextFlags aFlags, const nsTextFrameUtils::Flags aFlags2, nsTArray>&& aStyles, bool aOwnsFactory) { NS_ASSERTION(!(aFlags & gfx::ShapedTextFlags::TEXT_IS_8BIT), "didn't expect text to be marked as 8-bit here"); void* storage = AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength); if (!storage) { return nullptr; } RefPtr result = new (storage) nsTransformedTextRun(aParams, aFactory, aFontGroup, aString, aLength, aFlags, aFlags2, std::move(aStyles), aOwnsFactory); return result.forget(); } void nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength, bool* aCapitalization) { if (mCapitalize.IsEmpty()) { // XXX(Bug 1631371) Check if this should use a fallible operation as it // pretended earlier. mCapitalize.AppendElements(GetLength()); memset(mCapitalize.Elements(), 0, GetLength() * sizeof(bool)); } memcpy(mCapitalize.Elements() + aStart, aCapitalization, aLength * sizeof(bool)); mNeedsRebuild = true; } bool nsTransformedTextRun::SetPotentialLineBreaks(Range aRange, const uint8_t* aBreakBefore) { bool changed = gfxTextRun::SetPotentialLineBreaks(aRange, aBreakBefore); if (changed) { mNeedsRebuild = true; } return changed; } size_t nsTransformedTextRun::SizeOfExcludingThis( mozilla::MallocSizeOf aMallocSizeOf) { size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf); total += mStyles.ShallowSizeOfExcludingThis(aMallocSizeOf); total += mCapitalize.ShallowSizeOfExcludingThis(aMallocSizeOf); if (mOwnsFactory) { total += aMallocSizeOf(mFactory); } return total; } size_t nsTransformedTextRun::SizeOfIncludingThis( mozilla::MallocSizeOf aMallocSizeOf) { return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); } already_AddRefed nsTransformingTextRunFactory::MakeTextRun( const char16_t* aString, uint32_t aLength, const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup, gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2, nsTArray>&& aStyles, bool aOwnsFactory) { return nsTransformedTextRun::Create(aParams, this, aFontGroup, aString, aLength, aFlags, aFlags2, std::move(aStyles), aOwnsFactory); } already_AddRefed nsTransformingTextRunFactory::MakeTextRun( const uint8_t* aString, uint32_t aLength, const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup, gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2, nsTArray>&& aStyles, bool aOwnsFactory) { // We'll only have a Unicode code path to minimize the amount of code needed // for these rarely used features NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast(aString), aLength); return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup, aFlags & ~gfx::ShapedTextFlags::TEXT_IS_8BIT, aFlags2, std::move(aStyles), aOwnsFactory); } void MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc, const bool* aCharsToMerge, const bool* aDeletedChars) { MOZ_ASSERT(!aDest->TrailingGlyphRun(), "unexpected glyphRuns in aDest!"); uint32_t offset = 0; AutoTArray glyphs; const gfxTextRun::CompressedGlyph continuationGlyph = gfxTextRun::CompressedGlyph::MakeComplex(false, false); const gfxTextRun::CompressedGlyph* srcGlyphs = aSrc->GetCharacterGlyphs(); gfxTextRun::CompressedGlyph* destGlyphs = aDest->GetCharacterGlyphs(); for (gfxTextRun::GlyphRunIterator iter(aSrc, gfxTextRun::Range(aSrc)); !iter.AtEnd(); iter.NextRun()) { const gfxTextRun::GlyphRun* run = iter.GlyphRun(); aDest->AddGlyphRun(run->mFont, run->mMatchType, offset, false, run->mOrientation, run->mIsCJK); bool anyMissing = false; uint32_t mergeRunStart = iter.StringStart(); // Initialize to a copy of the first source glyph in the merge run. gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart]; uint32_t stringEnd = iter.StringEnd(); for (uint32_t k = iter.StringStart(); k < stringEnd; ++k) { const gfxTextRun::CompressedGlyph g = srcGlyphs[k]; if (g.IsSimpleGlyph()) { if (!anyMissing) { gfxTextRun::DetailedGlyph details; details.mGlyphID = g.GetSimpleGlyph(); details.mAdvance = g.GetSimpleAdvance(); glyphs.AppendElement(details); } } else { if (g.IsMissing()) { anyMissing = true; glyphs.Clear(); } if (g.GetGlyphCount() > 0) { glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount()); } } if (k + 1 < iter.StringEnd() && aCharsToMerge[k + 1]) { // next char is supposed to merge with current, so loop without // writing current merged glyph to the destination continue; } // If the start of the merge run is actually a character that should // have been merged with the previous character (this can happen // if there's a font change in the middle of a case-mapped character, // that decomposed into a sequence of base+diacritics, for example), // just discard the entire merge run. See comment at start of this // function. NS_WARNING_ASSERTION( !aCharsToMerge[mergeRunStart], "unable to merge across a glyph run boundary, glyph(s) discarded"); if (!aCharsToMerge[mergeRunStart]) { // Determine if we can just copy the existing simple glyph record. if (mergedGlyph.IsSimpleGlyph() && glyphs.Length() == 1) { destGlyphs[offset] = mergedGlyph; } else { // Otherwise set up complex glyph record and store detailed glyphs. mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(), mergedGlyph.IsLigatureGroupStart()); destGlyphs[offset] = mergedGlyph; aDest->SetDetailedGlyphs(offset, glyphs.Length(), glyphs.Elements()); if (anyMissing) { destGlyphs[offset].SetMissing(); } } offset++; while (offset < aDest->GetLength() && aDeletedChars[offset]) { destGlyphs[offset++] = continuationGlyph; } } glyphs.Clear(); anyMissing = false; mergeRunStart = k + 1; if (mergeRunStart < stringEnd) { mergedGlyph = srcGlyphs[mergeRunStart]; } } NS_ASSERTION(glyphs.Length() == 0, "Leftover glyphs, don't request merging of the last character " "with its next!"); } NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations"); } gfxTextRunFactory::Parameters GetParametersForInner( nsTransformedTextRun* aTextRun, gfx::ShapedTextFlags* aFlags, DrawTarget* aRefDrawTarget) { gfxTextRunFactory::Parameters params = { aRefDrawTarget, nullptr, nullptr, nullptr, 0, aTextRun->GetAppUnitsPerDevUnit()}; *aFlags = aTextRun->GetFlags(); return params; } // Some languages have special casing conventions that differ from the // default Unicode mappings. // The enum values here are named for well-known exemplar languages that // exhibit the behavior in question; multiple lang tags may map to the // same setting here, if the behavior is shared by other languages. enum LanguageSpecificCasingBehavior { eLSCB_None, // default non-lang-specific behavior eLSCB_Dutch, // treat "ij" digraph as a unit for capitalization eLSCB_Greek, // strip accent when uppercasing Greek vowels eLSCB_Irish, // keep prefix letters as lowercase when uppercasing Irish eLSCB_Turkish, // preserve dotted/dotless-i distinction in uppercase eLSCB_Lithuanian // retain dot on lowercase i/j when an accent is present }; static LanguageSpecificCasingBehavior GetCasingFor(const nsAtom* aLang) { if (!aLang) { return eLSCB_None; } if (aLang == nsGkAtoms::tr || aLang == nsGkAtoms::az || aLang == nsGkAtoms::ba || aLang == nsGkAtoms::crh || aLang == nsGkAtoms::tt) { return eLSCB_Turkish; } if (aLang == nsGkAtoms::nl) { return eLSCB_Dutch; } if (aLang == nsGkAtoms::el) { return eLSCB_Greek; } if (aLang == nsGkAtoms::ga) { return eLSCB_Irish; } if (aLang == nsGkAtoms::lt_) { return eLSCB_Lithuanian; } // Is there a region subtag we should ignore? nsAtomString langStr(const_cast(aLang)); int index = langStr.FindChar('-'); if (index > 0) { langStr.Truncate(index); RefPtr truncatedLang = NS_Atomize(langStr); return GetCasingFor(truncatedLang); } return eLSCB_None; } bool nsCaseTransformTextRunFactory::TransformString( const nsAString& aString, nsString& aConvertedString, const Maybe& aGlobalTransform, char16_t aMaskChar, bool aCaseTransformsOnly, const nsAtom* aLanguage, nsTArray& aCharsToMergeArray, nsTArray& aDeletedCharsArray, const nsTransformedTextRun* aTextRun, uint32_t aOffsetInTextRun, nsTArray* aCanBreakBeforeArray, nsTArray>* aStyleArray) { bool auxiliaryOutputArrays = aCanBreakBeforeArray && aStyleArray; MOZ_ASSERT(!auxiliaryOutputArrays || aTextRun, "text run must be provided to use aux output arrays"); uint32_t length = aString.Length(); const char16_t* str = aString.BeginReading(); // If an unconditional mask character was passed, we'll use it; if not, any // masking called for by the textrun styles will use TextEditor's mask char. const char16_t mask = aMaskChar ? aMaskChar : TextEditor::PasswordMask(); bool mergeNeeded = false; bool capitalizeDutchIJ = false; bool prevIsLetter = false; bool ntPrefix = false; // true immediately after a word-initial 'n' or 't' // when doing Irish lowercasing bool seenSoftDotted = false; // true immediately after an I or J that is // converted to lowercase in Lithuanian mode uint32_t sigmaIndex = uint32_t(-1); nsUGenCategory cat; StyleTextTransform style = aGlobalTransform.valueOr(StyleTextTransform::None()); bool forceNonFullWidth = false; const nsAtom* lang = aLanguage; LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang); mozilla::GreekCasing::State greekState; mozilla::IrishCasing::State irishState; uint32_t irishMark = uint32_t(-1); // location of possible prefix letter(s) // in the output string uint32_t irishMarkSrc = uint32_t(-1); // corresponding location in source // string (may differ from output due // to expansions like eszet -> 'SS') uint32_t greekMark = uint32_t(-1); // location of uppercase ETA that may need // tonos added (if it is disjunctive eta) const char16_t kGreekUpperEta = 0x0397; for (uint32_t i = 0; i < length; ++i, ++aOffsetInTextRun) { uint32_t ch = str[i]; RefPtr charStyle; if (aTextRun) { charStyle = aTextRun->mStyles[aOffsetInTextRun]; style = aGlobalTransform.valueOr(charStyle->mTextTransform); forceNonFullWidth = charStyle->mForceNonFullWidth; nsAtom* newLang = charStyle->mExplicitLanguage ? charStyle->mLanguage.get() : nullptr; if (lang != newLang) { lang = newLang; languageSpecificCasing = GetCasingFor(lang); greekState.Reset(); irishState.Reset(); irishMark = uint32_t(-1); irishMarkSrc = uint32_t(-1); greekMark = uint32_t(-1); } } // These should be mutually exclusive: mMaskPassword is set if we are // handling , where the TextEditor code controls // masking and we use its PasswordMask() character, in which case // aMaskChar (from -webkit-text-security) is not used. MOZ_ASSERT_IF(aMaskChar, !(charStyle && charStyle->mMaskPassword)); bool maskPassword = (charStyle && charStyle->mMaskPassword) || aMaskChar; int extraChars = 0; const mozilla::unicode::MultiCharMapping* mcm; bool inhibitBreakBefore = false; // have we just deleted preceding hyphen? if (i < length - 1 && NS_IS_SURROGATE_PAIR(ch, str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } const uint32_t originalCh = ch; // Skip case transform if we're masking current character. if (!maskPassword) { switch (style.case_) { case StyleTextTransformCase::None: break; case StyleTextTransformCase::Lowercase: if (languageSpecificCasing == eLSCB_Turkish) { if (ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { ch = 'i'; prevIsLetter = true; sigmaIndex = uint32_t(-1); break; } } if (languageSpecificCasing == eLSCB_Lithuanian) { // clang-format off /* From SpecialCasing.txt: * # Introduce an explicit dot above when lowercasing capital I's and J's * # whenever there are more accents above. * # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) * * 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I * 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J * 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK * 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE * 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE * 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE */ // clang-format on if (ch == 'I' || ch == 'J' || ch == 0x012E) { ch = ToLowerCase(ch); prevIsLetter = true; seenSoftDotted = true; sigmaIndex = uint32_t(-1); break; } if (ch == 0x00CC) { aConvertedString.Append('i'); aConvertedString.Append(0x0307); extraChars += 2; ch = 0x0300; prevIsLetter = true; seenSoftDotted = false; sigmaIndex = uint32_t(-1); break; } if (ch == 0x00CD) { aConvertedString.Append('i'); aConvertedString.Append(0x0307); extraChars += 2; ch = 0x0301; prevIsLetter = true; seenSoftDotted = false; sigmaIndex = uint32_t(-1); break; } if (ch == 0x0128) { aConvertedString.Append('i'); aConvertedString.Append(0x0307); extraChars += 2; ch = 0x0303; prevIsLetter = true; seenSoftDotted = false; sigmaIndex = uint32_t(-1); break; } } cat = mozilla::unicode::GetGenCategory(ch); if (languageSpecificCasing == eLSCB_Irish && cat == nsUGenCategory::kLetter) { // See bug 1018805 for Irish lowercasing requirements if (!prevIsLetter && (ch == 'n' || ch == 't')) { ntPrefix = true; } else { if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) { aConvertedString.Append('-'); ++extraChars; } ntPrefix = false; } } else { ntPrefix = false; } if (seenSoftDotted && cat == nsUGenCategory::kMark) { // The seenSoftDotted flag will only be set in Lithuanian mode. if (ch == 0x0300 || ch == 0x0301 || ch == 0x0303) { aConvertedString.Append(0x0307); ++extraChars; } } seenSoftDotted = false; // Special lowercasing behavior for Greek Sigma: note that this is // listed as context-sensitive in Unicode's SpecialCasing.txt, but is // *not* a language-specific mapping; it applies regardless of the // language of the element. // // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA // (i.e. the non-final form) whenever there is a following letter, or // when the CAPITAL SIGMA occurs in isolation (neither preceded nor // followed by a LETTER); and to FINAL SIGMA when it is preceded by // another letter but not followed by one. // // To implement the context-sensitive nature of this mapping, we keep // track of whether the previous character was a letter. If not, // CAPITAL SIGMA will map directly to SMALL SIGMA. If the previous // character was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we // record the position in the converted string; if we then encounter // another letter, that FINAL SIGMA is replaced with a standard // SMALL SIGMA. // If sigmaIndex is not -1, it marks where we have provisionally // mapped a CAPITAL SIGMA to FINAL SIGMA; if we now find another // letter, we need to change it to SMALL SIGMA. if (sigmaIndex != uint32_t(-1)) { if (cat == nsUGenCategory::kLetter) { aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); } } if (ch == GREEK_CAPITAL_LETTER_SIGMA) { // If preceding char was a letter, map to FINAL instead of SMALL, // and note where it occurred by setting sigmaIndex; we'll change // it to standard SMALL SIGMA later if another letter follows if (prevIsLetter) { ch = GREEK_SMALL_LETTER_FINAL_SIGMA; sigmaIndex = aConvertedString.Length(); } else { // CAPITAL SIGMA not preceded by a letter is unconditionally // mapped to SMALL SIGMA ch = GREEK_SMALL_LETTER_SIGMA; sigmaIndex = uint32_t(-1); } prevIsLetter = true; break; } // ignore diacritics for the purpose of contextual sigma mapping; // otherwise, reset prevIsLetter appropriately and clear the // sigmaIndex marker if (cat != nsUGenCategory::kMark) { prevIsLetter = (cat == nsUGenCategory::kLetter); sigmaIndex = uint32_t(-1); } mcm = mozilla::unicode::SpecialLower(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToLowerCase(ch); break; case StyleTextTransformCase::Uppercase: if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eLSCB_Greek) { bool markEta; bool updateEta; ch = mozilla::GreekCasing::UpperCase(ch, greekState, markEta, updateEta); if (markEta) { greekMark = aConvertedString.Length(); } else if (updateEta) { // Remove the TONOS from an uppercase ETA-TONOS that turned out // not to be disjunctive-eta. MOZ_ASSERT(aConvertedString.Length() > 0 && greekMark < aConvertedString.Length(), "bad greekMark!"); aConvertedString.SetCharAt(kGreekUpperEta, greekMark); greekMark = uint32_t(-1); } break; } if (languageSpecificCasing == eLSCB_Lithuanian) { /* * # Remove DOT ABOVE after "i" with upper or titlecase * * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE */ if (ch == 'i' || ch == 'j' || ch == 0x012F) { seenSoftDotted = true; ch = ToTitleCase(ch); break; } if (seenSoftDotted) { seenSoftDotted = false; if (ch == 0x0307) { ch = uint32_t(-1); break; } } } if (languageSpecificCasing == eLSCB_Irish) { bool mark; uint8_t action; ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action); if (mark) { irishMark = aConvertedString.Length(); irishMarkSrc = i; break; } else if (action) { nsString& str = aConvertedString; // shorthand switch (action) { case 1: // lowercase a single prefix letter MOZ_ASSERT(str.Length() > 0 && irishMark < str.Length(), "bad irishMark!"); str.SetCharAt(ToLowerCase(str[irishMark]), irishMark); irishMark = uint32_t(-1); irishMarkSrc = uint32_t(-1); break; case 2: // lowercase two prefix letters (immediately before current // pos) MOZ_ASSERT(str.Length() >= 2 && irishMark == str.Length() - 2, "bad irishMark!"); str.SetCharAt(ToLowerCase(str[irishMark]), irishMark); str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1); irishMark = uint32_t(-1); irishMarkSrc = uint32_t(-1); break; case 3: // lowercase one prefix letter, and delete following hyphen // (which must be the immediately-preceding char) MOZ_ASSERT(str.Length() >= 2 && irishMark == str.Length() - 2, "bad irishMark!"); MOZ_ASSERT( irishMark != uint32_t(-1) && irishMarkSrc != uint32_t(-1), "failed to set irishMarks"); str.Replace(irishMark, 2, ToLowerCase(str[irishMark])); aDeletedCharsArray[irishMarkSrc + 1] = true; // Remove the trailing entries (corresponding to the deleted // hyphen) from the auxiliary arrays. uint32_t len = aCharsToMergeArray.Length(); MOZ_ASSERT(len >= 2); aCharsToMergeArray.TruncateLength(len - 1); if (auxiliaryOutputArrays) { MOZ_ASSERT(aStyleArray->Length() == len); MOZ_ASSERT(aCanBreakBeforeArray->Length() == len); aStyleArray->TruncateLength(len - 1); aCanBreakBeforeArray->TruncateLength(len - 1); inhibitBreakBefore = true; } mergeNeeded = true; irishMark = uint32_t(-1); irishMarkSrc = uint32_t(-1); break; } // ch has been set to the uppercase for current char; // No need to check for SpecialUpper here as none of the // characters that could trigger an Irish casing action have // special mappings. break; } // If we didn't have any special action to perform, fall through // to check for special uppercase (ß) } // Updated mapping for German eszett, not currently reflected in the // Unicode data files. This is behind a pref, as it may not work well // with many (esp. older) fonts. if (ch == 0x00DF && StaticPrefs:: layout_css_text_transform_uppercase_eszett_enabled()) { ch = 0x1E9E; break; } mcm = mozilla::unicode::SpecialUpper(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } // Bug 1476304: we exclude Georgian letters U+10D0..10FF because of // lack of widespread font support for the corresponding Mtavruli // characters at this time (July 2018). // This condition is to be removed once the major platforms ship with // fonts that support U+1C90..1CBF. if (ch < 0x10D0 || ch > 0x10FF) { ch = ToUpperCase(ch); } break; case StyleTextTransformCase::Capitalize: if (aTextRun) { if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (aOffsetInTextRun < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[aOffsetInTextRun]) { if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; break; } if (languageSpecificCasing == eLSCB_Lithuanian) { /* * # Remove DOT ABOVE after "i" with upper or titlecase * * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE */ if (ch == 'i' || ch == 'j' || ch == 0x012F) { seenSoftDotted = true; ch = ToTitleCase(ch); break; } if (seenSoftDotted) { seenSoftDotted = false; if (ch == 0x0307) { ch = uint32_t(-1); break; } } } mcm = mozilla::unicode::SpecialTitle(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { aConvertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToTitleCase(ch); } } break; case StyleTextTransformCase::MathAuto: // text-transform: math-auto is used for automatic italicization of // single-char elements. However, some legacy cases (italic style // fallback and with leading/trailing whitespace) are still // handled in MathMLTextRunFactory. if (length == 1) { uint32_t ch2 = MathMLTextRunFactory::MathVariant(ch, StyleMathVariant::Italic); if (StaticPrefs::mathml_mathvariant_styling_fallback_disabled()) { ch = ch2; } else if (ch2 != ch) { // Bug 930504. Some platforms do not have fonts for Mathematical // Alphanumeric Symbols. Hence we only perform the transform if a // character is actually available. FontMatchType matchType; RefPtr mathFont = aTextRun->GetFontGroup()->FindFontForChar( ch2, 0, 0, intl::Script::COMMON, nullptr, &matchType); if (mathFont) { ch = ch2; } } } break; default: MOZ_ASSERT_UNREACHABLE("all cases should be handled"); break; } if (!aCaseTransformsOnly) { if (!forceNonFullWidth && (style.other_ & StyleTextTransformOther::FULL_WIDTH)) { ch = mozilla::unicode::GetFullWidth(ch); } if (style.other_ & StyleTextTransformOther::FULL_SIZE_KANA) { // clang-format off static const uint32_t kSmallKanas[] = { // ぁ ぃ ぅ ぇ ぉ っ ゃ ゅ ょ 0x3041, 0x3043, 0x3045, 0x3047, 0x3049, 0x3063, 0x3083, 0x3085, 0x3087, // ゎ ゕ ゖ 0x308E, 0x3095, 0x3096, // ァ ィ ゥ ェ ォ ッ ャ ュ ョ 0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30C3, 0x30E3, 0x30E5, 0x30E7, // ヮ ヵ ヶ ㇰ ㇱ ㇲ ㇳ ㇴ ㇵ 0x30EE, 0x30F5, 0x30F6, 0x31F0, 0x31F1, 0x31F2, 0x31F3, 0x31F4, 0x31F5, // ㇶ ㇷ ㇸ ㇹ ㇺ ㇻ ㇼ ㇽ ㇾ 0x31F6, 0x31F7, 0x31F8, 0x31F9, 0x31FA, 0x31FB, 0x31FC, 0x31FD, 0x31FE, // ㇿ 0x31FF, // ァ ィ ゥ ェ ォ ャ ュ ョ ッ 0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F, // 𛄲 𛅐 𛅑 𛅒 𛅕 𛅤 𛅥 𛅦 0x1B132, 0x1B150, 0x1B151, 0x1B152, 0x1B155, 0x1B164, 0x1B165, 0x1B166, // 𛅧 0x1B167}; static const uint16_t kFullSizeKanas[] = { // あ い う え お つ や ゆ よ 0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x3064, 0x3084, 0x3086, 0x3088, // わ か け 0x308F, 0x304B, 0x3051, // ア イ ウ エ オ ツ ヤ ユ ヨ 0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30C4, 0x30E4, 0x30E6, 0x30E8, // ワ カ ケ ク シ ス ト ヌ ハ 0x30EF, 0x30AB, 0x30B1, 0x30AF, 0x30B7, 0x30B9, 0x30C8, 0x30CC, 0x30CF, // ヒ フ ヘ ホ ム ラ リ ル レ 0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30E0, 0x30E9, 0x30EA, 0x30EB, 0x30EC, // ロ 0x30ED, // ア イ ウ エ オ ヤ ユ ヨ ツ 0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF94, 0xFF95, 0xFF96, 0xFF82, // こ ゐ ゑ を コ ヰ ヱ ヲ ン 0x3053, 0x3090, 0x3091, 0x3092, 0x30B3, 0x30F0, 0x30F1, 0x30F2, 0x30F3}; // clang-format on size_t index; const uint16_t len = MOZ_ARRAY_LENGTH(kSmallKanas); if (mozilla::BinarySearch(kSmallKanas, 0, len, ch, &index)) { ch = kFullSizeKanas[index]; } } } if (forceNonFullWidth) { ch = mozilla::unicode::GetFullWidthInverse(ch); } } if (ch == uint32_t(-1)) { aDeletedCharsArray.AppendElement(true); mergeNeeded = true; } else { aDeletedCharsArray.AppendElement(false); aCharsToMergeArray.AppendElement(false); if (auxiliaryOutputArrays) { aStyleArray->AppendElement(charStyle); aCanBreakBeforeArray->AppendElement( inhibitBreakBefore ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE : aTextRun->CanBreakBefore(aOffsetInTextRun)); } if (IS_IN_BMP(ch)) { aConvertedString.Append(maskPassword ? mask : ch); } else { if (maskPassword) { aConvertedString.Append(mask); // TODO: We should show a password mask for a surrogate pair later. aConvertedString.Append(mask); } else { aConvertedString.Append(H_SURROGATE(ch)); aConvertedString.Append(L_SURROGATE(ch)); } ++extraChars; } if (!IS_IN_BMP(originalCh)) { // Skip the trailing surrogate. ++aOffsetInTextRun; ++i; aDeletedCharsArray.AppendElement(true); } while (extraChars-- > 0) { mergeNeeded = true; aCharsToMergeArray.AppendElement(true); if (auxiliaryOutputArrays) { aStyleArray->AppendElement(charStyle); aCanBreakBeforeArray->AppendElement( gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE); } } } } // These output arrays, if present, must always have matching lengths: if (auxiliaryOutputArrays) { DebugOnly len = aCharsToMergeArray.Length(); MOZ_ASSERT(aStyleArray->Length() == len); MOZ_ASSERT(aCanBreakBeforeArray->Length() == len); } return mergeNeeded; } void nsCaseTransformTextRunFactory::RebuildTextRun( nsTransformedTextRun* aTextRun, DrawTarget* aRefDrawTarget, gfxMissingFontRecorder* aMFR) { nsAutoString convertedString; AutoTArray charsToMergeArray; AutoTArray deletedCharsArray; AutoTArray canBreakBeforeArray; AutoTArray, 50> styleArray; auto globalTransform = mAllUppercase ? Some(StyleTextTransform{StyleTextTransformCase::Uppercase, {}}) : Nothing(); bool mergeNeeded = TransformString( aTextRun->mString, convertedString, globalTransform, mMaskChar, /* aCaseTransformsOnly = */ false, nullptr, charsToMergeArray, deletedCharsArray, aTextRun, 0, &canBreakBeforeArray, &styleArray); gfx::ShapedTextFlags flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefDrawTarget); gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); RefPtr transformedChild; RefPtr cachedChild; gfxTextRun* child; if (mInnerTransformingTextRunFactory) { transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, fontGroup, flags, nsTextFrameUtils::Flags(), std::move(styleArray), false); child = transformedChild.get(); } else { cachedChild = fontGroup->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, flags, nsTextFrameUtils::Flags(), aMFR); child = cachedChild.get(); } if (!child) { return; } // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), "Dropped characters or break-before values somewhere!"); gfxTextRun::Range range(0, uint32_t(canBreakBeforeArray.Length())); child->SetPotentialLineBreaks(range, canBreakBeforeArray.Elements()); if (transformedChild) { transformedChild->FinishSettingProperties(aRefDrawTarget, aMFR); } aTextRun->ResetGlyphRuns(); if (mergeNeeded) { // Now merge multiple characters into one multi-glyph character as required // and deal with skipping deleted accent chars NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(), "source length mismatch"); NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(), "destination length mismatch"); MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(), deletedCharsArray.Elements()); } else { // No merging to do, so just copy; this produces a more optimized textrun. // We can't steal the data because the child may be cached and stealing // the data would break the cache. aTextRun->CopyGlyphDataFrom(child, gfxTextRun::Range(child), 0); } }