1
0
Fork 0
firefox/intl/components/gtest/TestString.cpp
Daniel Baumann 5e9a113729
Adding upstream version 140.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
2025-06-25 09:37:52 +02:00

261 lines
8.3 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "gtest/gtest.h"
#include "mozilla/intl/String.h"
#include "mozilla/Span.h"
#include "mozilla/TextUtils.h"
#include "mozilla/Try.h"
#include <algorithm>
#include "TestBuffer.h"
namespace mozilla::intl {
static Result<std::u16string_view, ICUError> ToLocaleLowerCase(
const char* aLocale, const char16_t* aString,
TestBuffer<char16_t>& aBuffer) {
aBuffer.clear();
MOZ_TRY(String::ToLocaleLowerCase(aLocale, MakeStringSpan(aString), aBuffer));
return aBuffer.get_string_view();
}
static Result<std::u16string_view, ICUError> ToLocaleUpperCase(
const char* aLocale, const char16_t* aString,
TestBuffer<char16_t>& aBuffer) {
aBuffer.clear();
MOZ_TRY(String::ToLocaleUpperCase(aLocale, MakeStringSpan(aString), aBuffer));
return aBuffer.get_string_view();
}
TEST(IntlString, ToLocaleLowerCase)
{
TestBuffer<char16_t> buf;
ASSERT_EQ(ToLocaleLowerCase("en", u"test", buf).unwrap(), u"test");
ASSERT_EQ(ToLocaleLowerCase("en", u"TEST", buf).unwrap(), u"test");
// Turkish dotless i.
ASSERT_EQ(ToLocaleLowerCase("tr", u"I", buf).unwrap(), u"ı");
ASSERT_EQ(ToLocaleLowerCase("tr", u"İ", buf).unwrap(), u"i");
ASSERT_EQ(ToLocaleLowerCase("tr", u"I\u0307", buf).unwrap(), u"i");
}
TEST(IntlString, ToLocaleUpperCase)
{
TestBuffer<char16_t> buf;
ASSERT_EQ(ToLocaleUpperCase("en", u"test", buf).unwrap(), u"TEST");
ASSERT_EQ(ToLocaleUpperCase("en", u"TEST", buf).unwrap(), u"TEST");
// Turkish dotless i.
ASSERT_EQ(ToLocaleUpperCase("tr", u"i", buf).unwrap(), u"İ");
ASSERT_EQ(ToLocaleUpperCase("tr", u"ı", buf).unwrap(), u"I");
// Output can be longer than the input string.
ASSERT_EQ(ToLocaleUpperCase("en", u"Größenmaßstäbe", buf).unwrap(),
u"GRÖSSENMASSSTÄBE");
}
TEST(IntlString, NormalizeNFC)
{
using namespace std::literals;
using NormalizationForm = String::NormalizationForm;
using AlreadyNormalized = String::AlreadyNormalized;
TestBuffer<char16_t> buf;
auto alreadyNormalized =
String::Normalize(NormalizationForm::NFC, u""sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
ASSERT_EQ(buf.get_string_view(), u"");
alreadyNormalized =
String::Normalize(NormalizationForm::NFC, u"abcdef"sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
ASSERT_EQ(buf.get_string_view(), u"");
alreadyNormalized =
String::Normalize(NormalizationForm::NFC, u"a\u0308"sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
ASSERT_EQ(buf.get_string_view(), u"ä");
buf.clear();
alreadyNormalized = String::Normalize(NormalizationForm::NFC, u"½"sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
ASSERT_EQ(buf.get_string_view(), u"");
}
TEST(IntlString, NormalizeNFD)
{
using namespace std::literals;
using NormalizationForm = String::NormalizationForm;
using AlreadyNormalized = String::AlreadyNormalized;
TestBuffer<char16_t> buf;
auto alreadyNormalized =
String::Normalize(NormalizationForm::NFD, u""sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
ASSERT_EQ(buf.get_string_view(), u"");
alreadyNormalized =
String::Normalize(NormalizationForm::NFD, u"abcdef"sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
ASSERT_EQ(buf.get_string_view(), u"");
alreadyNormalized = String::Normalize(NormalizationForm::NFD, u"ä"sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
ASSERT_EQ(buf.get_string_view(), u"a\u0308");
buf.clear();
alreadyNormalized = String::Normalize(NormalizationForm::NFD, u"½"sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
ASSERT_EQ(buf.get_string_view(), u"");
// Test with inline capacity.
TestBuffer<char16_t, 2> buf2;
alreadyNormalized = String::Normalize(NormalizationForm::NFD, u" ç"sv, buf2);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
ASSERT_EQ(buf2.get_string_view(), u" c\u0327");
}
TEST(IntlString, NormalizeNFKC)
{
using namespace std::literals;
using NormalizationForm = String::NormalizationForm;
using AlreadyNormalized = String::AlreadyNormalized;
TestBuffer<char16_t> buf;
auto alreadyNormalized =
String::Normalize(NormalizationForm::NFKC, u""sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
ASSERT_EQ(buf.get_string_view(), u"");
alreadyNormalized =
String::Normalize(NormalizationForm::NFKC, u"abcdef"sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
ASSERT_EQ(buf.get_string_view(), u"");
alreadyNormalized =
String::Normalize(NormalizationForm::NFKC, u"a\u0308"sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
ASSERT_EQ(buf.get_string_view(), u"ä");
buf.clear();
alreadyNormalized = String::Normalize(NormalizationForm::NFKC, u"½"sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
ASSERT_EQ(buf.get_string_view(), u"12");
}
TEST(IntlString, NormalizeNFKD)
{
using namespace std::literals;
using NormalizationForm = String::NormalizationForm;
using AlreadyNormalized = String::AlreadyNormalized;
TestBuffer<char16_t> buf;
auto alreadyNormalized =
String::Normalize(NormalizationForm::NFKD, u""sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
ASSERT_EQ(buf.get_string_view(), u"");
alreadyNormalized =
String::Normalize(NormalizationForm::NFKD, u"abcdef"sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
ASSERT_EQ(buf.get_string_view(), u"");
alreadyNormalized = String::Normalize(NormalizationForm::NFKD, u"ä"sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
ASSERT_EQ(buf.get_string_view(), u"a\u0308");
buf.clear();
alreadyNormalized = String::Normalize(NormalizationForm::NFKD, u"½"sv, buf);
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
ASSERT_EQ(buf.get_string_view(), u"12");
}
TEST(IntlString, ComposePairNFC)
{
// Pair of base characters do not compose
ASSERT_EQ(String::ComposePairNFC(U'a', U'b'), U'\0');
// Base letter + accent
ASSERT_EQ(String::ComposePairNFC(U'a', U'\u0308'), U'ä');
// Accented letter + a further accent
ASSERT_EQ(String::ComposePairNFC(U'ä', U'\u0304'), U'ǟ');
// Accented letter + a further accent, but doubly-accented form is not
// available
ASSERT_EQ(String::ComposePairNFC(U'ä', U'\u0301'), U'\0');
// These do not compose because although U+0344 has the decomposition <0308,
// 0301> (see below), it also has the Full_Composition_Exclusion property.
ASSERT_EQ(String::ComposePairNFC(U'\u0308', U'\u0301'), U'\0');
// Supplementary-plane letter + accent
ASSERT_EQ(String::ComposePairNFC(U'\U00011099', U'\U000110BA'),
U'\U0001109A');
}
TEST(IntlString, DecomposeRawNFD)
{
char32_t buf[2];
// Non-decomposable character
ASSERT_EQ(String::DecomposeRawNFD(U'a', buf), 0);
// Singleton decomposition
ASSERT_EQ(String::DecomposeRawNFD(U'\u212A', buf), 1);
ASSERT_EQ(buf[0], U'K');
// Simple accented letter
ASSERT_EQ(String::DecomposeRawNFD(U'ä', buf), 2);
ASSERT_EQ(buf[0], U'a');
ASSERT_EQ(buf[1], U'\u0308');
// Double-accented letter decomposes by only one level
ASSERT_EQ(String::DecomposeRawNFD(U'ǟ', buf), 2);
ASSERT_EQ(buf[0], U'ä');
ASSERT_EQ(buf[1], U'\u0304');
// Non-starter can decompose, but will not recompose (see above)
ASSERT_EQ(String::DecomposeRawNFD(U'\u0344', buf), 2);
ASSERT_EQ(buf[0], U'\u0308');
ASSERT_EQ(buf[1], U'\u0301');
// Supplementary-plane letter with decomposition
ASSERT_EQ(String::DecomposeRawNFD(U'\U0001109A', buf), 2);
ASSERT_EQ(buf[0], U'\U00011099');
ASSERT_EQ(buf[1], U'\U000110BA');
}
TEST(IntlString, IsCased)
{
ASSERT_TRUE(String::IsCased(U'a'));
ASSERT_FALSE(String::IsCased(U'0'));
}
TEST(IntlString, IsCaseIgnorable)
{
ASSERT_FALSE(String::IsCaseIgnorable(U'a'));
ASSERT_TRUE(String::IsCaseIgnorable(U'.'));
}
TEST(IntlString, GetUnicodeVersion)
{
auto version = String::GetUnicodeVersion();
ASSERT_TRUE(std::all_of(version.begin(), version.end(), [](char ch) {
return IsAsciiDigit(ch) || ch == '.';
}));
}
} // namespace mozilla::intl