diff options
Diffstat (limited to 'intl/components/gtest/TestString.cpp')
-rw-r--r-- | intl/components/gtest/TestString.cpp | 260 |
1 files changed, 260 insertions, 0 deletions
diff --git a/intl/components/gtest/TestString.cpp b/intl/components/gtest/TestString.cpp new file mode 100644 index 0000000000..5b4d24b3b7 --- /dev/null +++ b/intl/components/gtest/TestString.cpp @@ -0,0 +1,260 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "gtest/gtest.h" + +#include "mozilla/intl/String.h" +#include "mozilla/Span.h" +#include "mozilla/TextUtils.h" + +#include <algorithm> + +#include "TestBuffer.h" + +namespace mozilla::intl { + +static Result<std::u16string_view, ICUError> ToLocaleLowerCase( + const char* aLocale, const char16_t* aString, + TestBuffer<char16_t>& aBuffer) { + aBuffer.clear(); + + MOZ_TRY(String::ToLocaleLowerCase(aLocale, MakeStringSpan(aString), aBuffer)); + + return aBuffer.get_string_view(); +} + +static Result<std::u16string_view, ICUError> ToLocaleUpperCase( + const char* aLocale, const char16_t* aString, + TestBuffer<char16_t>& aBuffer) { + aBuffer.clear(); + + MOZ_TRY(String::ToLocaleUpperCase(aLocale, MakeStringSpan(aString), aBuffer)); + + return aBuffer.get_string_view(); +} + +TEST(IntlString, ToLocaleLowerCase) +{ + TestBuffer<char16_t> buf; + + ASSERT_EQ(ToLocaleLowerCase("en", u"test", buf).unwrap(), u"test"); + ASSERT_EQ(ToLocaleLowerCase("en", u"TEST", buf).unwrap(), u"test"); + + // Turkish dotless i. + ASSERT_EQ(ToLocaleLowerCase("tr", u"I", buf).unwrap(), u"ı"); + ASSERT_EQ(ToLocaleLowerCase("tr", u"İ", buf).unwrap(), u"i"); + ASSERT_EQ(ToLocaleLowerCase("tr", u"I\u0307", buf).unwrap(), u"i"); +} + +TEST(IntlString, ToLocaleUpperCase) +{ + TestBuffer<char16_t> buf; + + ASSERT_EQ(ToLocaleUpperCase("en", u"test", buf).unwrap(), u"TEST"); + ASSERT_EQ(ToLocaleUpperCase("en", u"TEST", buf).unwrap(), u"TEST"); + + // Turkish dotless i. + ASSERT_EQ(ToLocaleUpperCase("tr", u"i", buf).unwrap(), u"İ"); + ASSERT_EQ(ToLocaleUpperCase("tr", u"ı", buf).unwrap(), u"I"); + + // Output can be longer than the input string. + ASSERT_EQ(ToLocaleUpperCase("en", u"Größenmaßstäbe", buf).unwrap(), + u"GRÖSSENMASSSTÄBE"); +} + +TEST(IntlString, NormalizeNFC) +{ + using namespace std::literals; + + using NormalizationForm = String::NormalizationForm; + using AlreadyNormalized = String::AlreadyNormalized; + + TestBuffer<char16_t> buf; + + auto alreadyNormalized = + String::Normalize(NormalizationForm::NFC, u""sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); + ASSERT_EQ(buf.get_string_view(), u""); + + alreadyNormalized = + String::Normalize(NormalizationForm::NFC, u"abcdef"sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); + ASSERT_EQ(buf.get_string_view(), u""); + + alreadyNormalized = + String::Normalize(NormalizationForm::NFC, u"a\u0308"sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No); + ASSERT_EQ(buf.get_string_view(), u"ä"); + + buf.clear(); + + alreadyNormalized = String::Normalize(NormalizationForm::NFC, u"½"sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); + ASSERT_EQ(buf.get_string_view(), u""); +} + +TEST(IntlString, NormalizeNFD) +{ + using namespace std::literals; + + using NormalizationForm = String::NormalizationForm; + using AlreadyNormalized = String::AlreadyNormalized; + + TestBuffer<char16_t> buf; + + auto alreadyNormalized = + String::Normalize(NormalizationForm::NFD, u""sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); + ASSERT_EQ(buf.get_string_view(), u""); + + alreadyNormalized = + String::Normalize(NormalizationForm::NFD, u"abcdef"sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); + ASSERT_EQ(buf.get_string_view(), u""); + + alreadyNormalized = String::Normalize(NormalizationForm::NFD, u"ä"sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No); + ASSERT_EQ(buf.get_string_view(), u"a\u0308"); + + buf.clear(); + + alreadyNormalized = String::Normalize(NormalizationForm::NFD, u"½"sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); + ASSERT_EQ(buf.get_string_view(), u""); + + // Test with inline capacity. + TestBuffer<char16_t, 2> buf2; + + alreadyNormalized = String::Normalize(NormalizationForm::NFD, u" ç"sv, buf2); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No); + ASSERT_EQ(buf2.get_string_view(), u" c\u0327"); +} + +TEST(IntlString, NormalizeNFKC) +{ + using namespace std::literals; + + using NormalizationForm = String::NormalizationForm; + using AlreadyNormalized = String::AlreadyNormalized; + + TestBuffer<char16_t> buf; + + auto alreadyNormalized = + String::Normalize(NormalizationForm::NFKC, u""sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); + ASSERT_EQ(buf.get_string_view(), u""); + + alreadyNormalized = + String::Normalize(NormalizationForm::NFKC, u"abcdef"sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); + ASSERT_EQ(buf.get_string_view(), u""); + + alreadyNormalized = + String::Normalize(NormalizationForm::NFKC, u"a\u0308"sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No); + ASSERT_EQ(buf.get_string_view(), u"ä"); + + buf.clear(); + + alreadyNormalized = String::Normalize(NormalizationForm::NFKC, u"½"sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No); + ASSERT_EQ(buf.get_string_view(), u"1⁄2"); +} + +TEST(IntlString, NormalizeNFKD) +{ + using namespace std::literals; + + using NormalizationForm = String::NormalizationForm; + using AlreadyNormalized = String::AlreadyNormalized; + + TestBuffer<char16_t> buf; + + auto alreadyNormalized = + String::Normalize(NormalizationForm::NFKD, u""sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); + ASSERT_EQ(buf.get_string_view(), u""); + + alreadyNormalized = + String::Normalize(NormalizationForm::NFKD, u"abcdef"sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes); + ASSERT_EQ(buf.get_string_view(), u""); + + alreadyNormalized = String::Normalize(NormalizationForm::NFKD, u"ä"sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No); + ASSERT_EQ(buf.get_string_view(), u"a\u0308"); + + buf.clear(); + + alreadyNormalized = String::Normalize(NormalizationForm::NFKD, u"½"sv, buf); + ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No); + ASSERT_EQ(buf.get_string_view(), u"1⁄2"); +} + +TEST(IntlString, ComposePairNFC) +{ + // Pair of base characters do not compose + ASSERT_EQ(String::ComposePairNFC(U'a', U'b'), U'\0'); + // Base letter + accent + ASSERT_EQ(String::ComposePairNFC(U'a', U'\u0308'), U'ä'); + // Accented letter + a further accent + ASSERT_EQ(String::ComposePairNFC(U'ä', U'\u0304'), U'ǟ'); + // Accented letter + a further accent, but doubly-accented form is not + // available + ASSERT_EQ(String::ComposePairNFC(U'ä', U'\u0301'), U'\0'); + // These do not compose because although U+0344 has the decomposition <0308, + // 0301> (see below), it also has the Full_Composition_Exclusion property. + ASSERT_EQ(String::ComposePairNFC(U'\u0308', U'\u0301'), U'\0'); + // Supplementary-plane letter + accent + ASSERT_EQ(String::ComposePairNFC(U'\U00011099', U'\U000110BA'), + U'\U0001109A'); +} + +TEST(IntlString, DecomposeRawNFD) +{ + char32_t buf[2]; + // Non-decomposable character + ASSERT_EQ(String::DecomposeRawNFD(U'a', buf), 0); + // Singleton decomposition + ASSERT_EQ(String::DecomposeRawNFD(U'\u212A', buf), 1); + ASSERT_EQ(buf[0], U'K'); + // Simple accented letter + ASSERT_EQ(String::DecomposeRawNFD(U'ä', buf), 2); + ASSERT_EQ(buf[0], U'a'); + ASSERT_EQ(buf[1], U'\u0308'); + // Double-accented letter decomposes by only one level + ASSERT_EQ(String::DecomposeRawNFD(U'ǟ', buf), 2); + ASSERT_EQ(buf[0], U'ä'); + ASSERT_EQ(buf[1], U'\u0304'); + // Non-starter can decompose, but will not recompose (see above) + ASSERT_EQ(String::DecomposeRawNFD(U'\u0344', buf), 2); + ASSERT_EQ(buf[0], U'\u0308'); + ASSERT_EQ(buf[1], U'\u0301'); + // Supplementary-plane letter with decomposition + ASSERT_EQ(String::DecomposeRawNFD(U'\U0001109A', buf), 2); + ASSERT_EQ(buf[0], U'\U00011099'); + ASSERT_EQ(buf[1], U'\U000110BA'); +} + +TEST(IntlString, IsCased) +{ + ASSERT_TRUE(String::IsCased(U'a')); + ASSERT_FALSE(String::IsCased(U'0')); +} + +TEST(IntlString, IsCaseIgnorable) +{ + ASSERT_FALSE(String::IsCaseIgnorable(U'a')); + ASSERT_TRUE(String::IsCaseIgnorable(U'.')); +} + +TEST(IntlString, GetUnicodeVersion) +{ + auto version = String::GetUnicodeVersion(); + + ASSERT_TRUE(std::all_of(version.begin(), version.end(), [](char ch) { + return IsAsciiDigit(ch) || ch == '.'; + })); +} + +} // namespace mozilla::intl |