From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- intl/components/gtest/TestBidi.cpp | 332 +++++++++++++++++++++++++++++++++++++ 1 file changed, 332 insertions(+) create mode 100644 intl/components/gtest/TestBidi.cpp (limited to 'intl/components/gtest/TestBidi.cpp') diff --git a/intl/components/gtest/TestBidi.cpp b/intl/components/gtest/TestBidi.cpp new file mode 100644 index 0000000000..a53a9c1b9a --- /dev/null +++ b/intl/components/gtest/TestBidi.cpp @@ -0,0 +1,332 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "gtest/gtest.h" + +#include "mozilla/intl/Bidi.h" +#include "mozilla/Span.h" +namespace mozilla::intl { + +struct VisualRun { + Span string; + BidiDirection direction; +}; + +/** + * An iterator for visual runs in a paragraph. See Bug 1736597 for integrating + * this into the public API. + */ +class MOZ_STACK_CLASS VisualRunIter { + public: + VisualRunIter(Bidi& aBidi, Span aParagraph, + BidiEmbeddingLevel aLevel) + : mBidi(aBidi), mParagraph(aParagraph) { + // Crash in case of errors by calling unwrap. If this were a real API, this + // would be a TryCreate call. + mBidi.SetParagraph(aParagraph, aLevel).unwrap(); + mRunCount = mBidi.CountRuns().unwrap(); + } + + Maybe Next() { + if (mRunIndex >= mRunCount) { + return Nothing(); + } + + int32_t stringIndex = -1; + int32_t stringLength = -1; + + BidiDirection direction = + mBidi.GetVisualRun(mRunIndex, &stringIndex, &stringLength); + + Span string(mParagraph.Elements() + stringIndex, + stringLength); + mRunIndex++; + return Some(VisualRun{string, direction}); + } + + private: + Bidi& mBidi; + Span mParagraph = Span(); + int32_t mRunIndex = 0; + int32_t mRunCount = 0; +}; + +struct LogicalRun { + Span string; + BidiEmbeddingLevel embeddingLevel; +}; + +/** + * An iterator for logical runs in a paragraph. See Bug 1736597 for integrating + * this into the public API. + */ +class MOZ_STACK_CLASS LogicalRunIter { + public: + LogicalRunIter(Bidi& aBidi, Span aParagraph, + BidiEmbeddingLevel aLevel) + : mBidi(aBidi), mParagraph(aParagraph) { + // Crash in case of errors by calling unwrap. If this were a real API, this + // would be a TryCreate call. + mBidi.SetParagraph(aParagraph, aLevel).unwrap(); + mBidi.CountRuns().unwrap(); + } + + Maybe Next() { + if (mRunIndex >= static_cast(mParagraph.Length())) { + return Nothing(); + } + + int32_t logicalLimit; + + BidiEmbeddingLevel embeddingLevel; + mBidi.GetLogicalRun(mRunIndex, &logicalLimit, &embeddingLevel); + + Span string(mParagraph.Elements() + mRunIndex, + logicalLimit - mRunIndex); + + mRunIndex = logicalLimit; + return Some(LogicalRun{string, embeddingLevel}); + } + + private: + Bidi& mBidi; + Span mParagraph = Span(); + int32_t mRunIndex = 0; +}; + +TEST(IntlBidi, SimpleLTR) +{ + Bidi bidi{}; + LogicalRunIter logicalRunIter(bidi, MakeStringSpan(u"this is a paragraph"), + BidiEmbeddingLevel::DefaultLTR()); + ASSERT_EQ(bidi.GetParagraphEmbeddingLevel(), 0); + ASSERT_EQ(bidi.GetParagraphDirection(), Bidi::ParagraphDirection::LTR); + + { + auto logicalRun = logicalRunIter.Next(); + ASSERT_TRUE(logicalRun.isSome()); + ASSERT_EQ(logicalRun->string, MakeStringSpan(u"this is a paragraph")); + ASSERT_EQ(logicalRun->embeddingLevel, 0); + ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::LTR); + } + + { + auto logicalRun = logicalRunIter.Next(); + ASSERT_TRUE(logicalRun.isNothing()); + } +} + +TEST(IntlBidi, SimpleRTL) +{ + Bidi bidi{}; + LogicalRunIter logicalRunIter(bidi, MakeStringSpan(u"فايرفوكس رائع"), + BidiEmbeddingLevel::DefaultLTR()); + ASSERT_EQ(bidi.GetParagraphEmbeddingLevel(), 1); + ASSERT_EQ(bidi.GetParagraphDirection(), Bidi::ParagraphDirection::RTL); + + { + auto logicalRun = logicalRunIter.Next(); + ASSERT_TRUE(logicalRun.isSome()); + ASSERT_EQ(logicalRun->string, MakeStringSpan(u"فايرفوكس رائع")); + ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::RTL); + ASSERT_EQ(logicalRun->embeddingLevel, 1); + } + + { + auto logicalRun = logicalRunIter.Next(); + ASSERT_TRUE(logicalRun.isNothing()); + } +} + +TEST(IntlBidi, MultiLevel) +{ + Bidi bidi{}; + LogicalRunIter logicalRunIter( + bidi, MakeStringSpan(u"Firefox is awesome: رائع Firefox"), + BidiEmbeddingLevel::DefaultLTR()); + ASSERT_EQ(bidi.GetParagraphEmbeddingLevel(), 0); + ASSERT_EQ(bidi.GetParagraphDirection(), Bidi::ParagraphDirection::Mixed); + + { + auto logicalRun = logicalRunIter.Next(); + ASSERT_TRUE(logicalRun.isSome()); + ASSERT_EQ(logicalRun->string, MakeStringSpan(u"Firefox is awesome: ")); + ASSERT_EQ(logicalRun->embeddingLevel, 0); + } + { + auto logicalRun = logicalRunIter.Next(); + ASSERT_TRUE(logicalRun.isSome()); + ASSERT_EQ(logicalRun->string, MakeStringSpan(u"رائع")); + ASSERT_EQ(logicalRun->embeddingLevel, 1); + } + { + auto logicalRun = logicalRunIter.Next(); + ASSERT_TRUE(logicalRun.isSome()); + ASSERT_EQ(logicalRun->string, MakeStringSpan(u" Firefox")); + ASSERT_EQ(logicalRun->embeddingLevel, 0); + } + { + auto logicalRun = logicalRunIter.Next(); + ASSERT_TRUE(logicalRun.isNothing()); + } +} + +TEST(IntlBidi, RtlOverride) +{ + Bidi bidi{}; + // Set the paragraph using the RTL embedding mark U+202B, and the LTR + // embedding mark U+202A to increase the embedding level. This mark switches + // the weakly directional character "_". This demonstrates that embedding + // levels can be computed. + LogicalRunIter logicalRunIter( + bidi, MakeStringSpan(u"ltr\u202b___رائع___\u202a___ltr__"), + BidiEmbeddingLevel::DefaultLTR()); + ASSERT_EQ(bidi.GetParagraphEmbeddingLevel(), 0); + ASSERT_EQ(bidi.GetParagraphDirection(), Bidi::ParagraphDirection::Mixed); + + // Note that the Unicode Bidi Algorithm explicitly does NOT require any + // specific placement or levels for the embedding controls (see + // rule https://www.unicode.org/reports/tr9/#X9). + // Further, the implementation notes at + // https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters + // advise to "Resolve any LRE, RLE, LRO, RLO, PDF, or BN to the level of the + // preceding character if there is one...", which means the embedding marks + // here will each become part of the *preceding* run. This is how the Rust + // unicode-bidi implementation behaves. + // However, ICU4C behavior is such that they take on the level of the *next* + // character, and become part of the following run. + // For now, we accept either result here. + { + auto logicalRun = logicalRunIter.Next(); + ASSERT_TRUE(logicalRun.isSome()); + ASSERT_TRUE(logicalRun->string == MakeStringSpan(u"ltr") || + logicalRun->string == MakeStringSpan(u"ltr\u202b")); + ASSERT_EQ(logicalRun->embeddingLevel, 0); + ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::LTR); + } + { + auto logicalRun = logicalRunIter.Next(); + ASSERT_TRUE(logicalRun.isSome()); + ASSERT_TRUE(logicalRun->string == MakeStringSpan(u"\u202b___رائع___") || + logicalRun->string == MakeStringSpan(u"___رائع___\u202a")); + ASSERT_EQ(logicalRun->embeddingLevel, 1); + ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::RTL); + } + { + auto logicalRun = logicalRunIter.Next(); + ASSERT_TRUE(logicalRun.isSome()); + ASSERT_TRUE(logicalRun->string == MakeStringSpan(u"\u202a___ltr__") || + logicalRun->string == MakeStringSpan(u"___ltr__")); + ASSERT_EQ(logicalRun->embeddingLevel, 2); + ASSERT_EQ(logicalRun->embeddingLevel.Direction(), BidiDirection::LTR); + } + { + auto logicalRun = logicalRunIter.Next(); + ASSERT_TRUE(logicalRun.isNothing()); + } +} + +TEST(IntlBidi, VisualRuns) +{ + Bidi bidi{}; + + VisualRunIter visualRunIter( + bidi, + MakeStringSpan( + u"first visual run التشغيل البصري الثاني third visual run"), + BidiEmbeddingLevel::DefaultLTR()); + { + Maybe run = visualRunIter.Next(); + ASSERT_TRUE(run.isSome()); + ASSERT_EQ(run->string, MakeStringSpan(u"first visual run ")); + ASSERT_EQ(run->direction, BidiDirection::LTR); + } + { + Maybe run = visualRunIter.Next(); + ASSERT_TRUE(run.isSome()); + ASSERT_EQ(run->string, MakeStringSpan(u"التشغيل البصري الثاني")); + ASSERT_EQ(run->direction, BidiDirection::RTL); + } + { + Maybe run = visualRunIter.Next(); + ASSERT_TRUE(run.isSome()); + ASSERT_EQ(run->string, MakeStringSpan(u" third visual run")); + ASSERT_EQ(run->direction, BidiDirection::LTR); + } + { + Maybe run = visualRunIter.Next(); + ASSERT_TRUE(run.isNothing()); + } +} + +TEST(IntlBidi, VisualRunsWithEmbeds) +{ + // Compare this test to the logical order test. + Bidi bidi{}; + VisualRunIter visualRunIter( + bidi, MakeStringSpan(u"ltr\u202b___رائع___\u202a___ltr___"), + BidiEmbeddingLevel::DefaultLTR()); + { + Maybe run = visualRunIter.Next(); + ASSERT_TRUE(run.isSome()); + ASSERT_TRUE(run->string == MakeStringSpan(u"ltr") || + run->string == MakeStringSpan(u"ltr\u202b")); + ASSERT_EQ(run->direction, BidiDirection::LTR); + } + { + Maybe run = visualRunIter.Next(); + ASSERT_TRUE(run.isSome()); + ASSERT_TRUE(run->string == MakeStringSpan(u"\u202a___ltr___") || + run->string == MakeStringSpan(u"___ltr___")); + ASSERT_EQ(run->direction, BidiDirection::LTR); + } + { + Maybe run = visualRunIter.Next(); + ASSERT_TRUE(run.isSome()); + ASSERT_TRUE(run->string == MakeStringSpan(u"\u202b___رائع___") || + run->string == MakeStringSpan(u"___رائع___\u202a")); + ASSERT_EQ(run->direction, BidiDirection::RTL); + } + { + Maybe run = visualRunIter.Next(); + ASSERT_TRUE(run.isNothing()); + } +} + +// The full Bidi class can be found in [1]. +// +// [1]: https://www.unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt +TEST(IntlBidi, GetBaseDirection) +{ + // Return Neutral as default if empty string is provided. + ASSERT_EQ(Bidi::GetBaseDirection(nullptr), Bidi::BaseDirection::Neutral); + + // White space(WS) is classified as Neutral. + ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u" ")), + Bidi::BaseDirection::Neutral); + + // 000A and 000D are paragraph separators(BS), which are also classified as + // Neutral. + ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"\u000A")), + Bidi::BaseDirection::Neutral); + ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"\u000D")), + Bidi::BaseDirection::Neutral); + + // 0620..063f are Arabic letters, which is of type AL. + ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"\u0620\u0621\u0622")), + Bidi::BaseDirection::RTL); + ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u" \u0620\u0621\u0622")), + Bidi::BaseDirection::RTL); + ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"\u0620\u0621\u0622ABC")), + Bidi::BaseDirection::RTL); + + // First strong character is of English letters. + ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"ABC")), + Bidi::BaseDirection::LTR); + ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u" ABC")), + Bidi::BaseDirection::LTR); + ASSERT_EQ(Bidi::GetBaseDirection(MakeStringSpan(u"ABC\u0620")), + Bidi::BaseDirection::LTR); +} + +} // namespace mozilla::intl -- cgit v1.2.3