261 lines
8.3 KiB
C++
261 lines
8.3 KiB
C++
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||
#include "gtest/gtest.h"
|
||
|
||
#include "mozilla/intl/String.h"
|
||
#include "mozilla/Span.h"
|
||
#include "mozilla/TextUtils.h"
|
||
#include "mozilla/Try.h"
|
||
|
||
#include <algorithm>
|
||
|
||
#include "TestBuffer.h"
|
||
|
||
namespace mozilla::intl {
|
||
|
||
static Result<std::u16string_view, ICUError> ToLocaleLowerCase(
|
||
const char* aLocale, const char16_t* aString,
|
||
TestBuffer<char16_t>& aBuffer) {
|
||
aBuffer.clear();
|
||
|
||
MOZ_TRY(String::ToLocaleLowerCase(aLocale, MakeStringSpan(aString), aBuffer));
|
||
|
||
return aBuffer.get_string_view();
|
||
}
|
||
|
||
static Result<std::u16string_view, ICUError> ToLocaleUpperCase(
|
||
const char* aLocale, const char16_t* aString,
|
||
TestBuffer<char16_t>& aBuffer) {
|
||
aBuffer.clear();
|
||
|
||
MOZ_TRY(String::ToLocaleUpperCase(aLocale, MakeStringSpan(aString), aBuffer));
|
||
|
||
return aBuffer.get_string_view();
|
||
}
|
||
|
||
TEST(IntlString, ToLocaleLowerCase)
|
||
{
|
||
TestBuffer<char16_t> buf;
|
||
|
||
ASSERT_EQ(ToLocaleLowerCase("en", u"test", buf).unwrap(), u"test");
|
||
ASSERT_EQ(ToLocaleLowerCase("en", u"TEST", buf).unwrap(), u"test");
|
||
|
||
// Turkish dotless i.
|
||
ASSERT_EQ(ToLocaleLowerCase("tr", u"I", buf).unwrap(), u"ı");
|
||
ASSERT_EQ(ToLocaleLowerCase("tr", u"İ", buf).unwrap(), u"i");
|
||
ASSERT_EQ(ToLocaleLowerCase("tr", u"I\u0307", buf).unwrap(), u"i");
|
||
}
|
||
|
||
TEST(IntlString, ToLocaleUpperCase)
|
||
{
|
||
TestBuffer<char16_t> buf;
|
||
|
||
ASSERT_EQ(ToLocaleUpperCase("en", u"test", buf).unwrap(), u"TEST");
|
||
ASSERT_EQ(ToLocaleUpperCase("en", u"TEST", buf).unwrap(), u"TEST");
|
||
|
||
// Turkish dotless i.
|
||
ASSERT_EQ(ToLocaleUpperCase("tr", u"i", buf).unwrap(), u"İ");
|
||
ASSERT_EQ(ToLocaleUpperCase("tr", u"ı", buf).unwrap(), u"I");
|
||
|
||
// Output can be longer than the input string.
|
||
ASSERT_EQ(ToLocaleUpperCase("en", u"Größenmaßstäbe", buf).unwrap(),
|
||
u"GRÖSSENMASSSTÄBE");
|
||
}
|
||
|
||
TEST(IntlString, NormalizeNFC)
|
||
{
|
||
using namespace std::literals;
|
||
|
||
using NormalizationForm = String::NormalizationForm;
|
||
using AlreadyNormalized = String::AlreadyNormalized;
|
||
|
||
TestBuffer<char16_t> buf;
|
||
|
||
auto alreadyNormalized =
|
||
String::Normalize(NormalizationForm::NFC, u""sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
|
||
ASSERT_EQ(buf.get_string_view(), u"");
|
||
|
||
alreadyNormalized =
|
||
String::Normalize(NormalizationForm::NFC, u"abcdef"sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
|
||
ASSERT_EQ(buf.get_string_view(), u"");
|
||
|
||
alreadyNormalized =
|
||
String::Normalize(NormalizationForm::NFC, u"a\u0308"sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
|
||
ASSERT_EQ(buf.get_string_view(), u"ä");
|
||
|
||
buf.clear();
|
||
|
||
alreadyNormalized = String::Normalize(NormalizationForm::NFC, u"½"sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
|
||
ASSERT_EQ(buf.get_string_view(), u"");
|
||
}
|
||
|
||
TEST(IntlString, NormalizeNFD)
|
||
{
|
||
using namespace std::literals;
|
||
|
||
using NormalizationForm = String::NormalizationForm;
|
||
using AlreadyNormalized = String::AlreadyNormalized;
|
||
|
||
TestBuffer<char16_t> buf;
|
||
|
||
auto alreadyNormalized =
|
||
String::Normalize(NormalizationForm::NFD, u""sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
|
||
ASSERT_EQ(buf.get_string_view(), u"");
|
||
|
||
alreadyNormalized =
|
||
String::Normalize(NormalizationForm::NFD, u"abcdef"sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
|
||
ASSERT_EQ(buf.get_string_view(), u"");
|
||
|
||
alreadyNormalized = String::Normalize(NormalizationForm::NFD, u"ä"sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
|
||
ASSERT_EQ(buf.get_string_view(), u"a\u0308");
|
||
|
||
buf.clear();
|
||
|
||
alreadyNormalized = String::Normalize(NormalizationForm::NFD, u"½"sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
|
||
ASSERT_EQ(buf.get_string_view(), u"");
|
||
|
||
// Test with inline capacity.
|
||
TestBuffer<char16_t, 2> buf2;
|
||
|
||
alreadyNormalized = String::Normalize(NormalizationForm::NFD, u" ç"sv, buf2);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
|
||
ASSERT_EQ(buf2.get_string_view(), u" c\u0327");
|
||
}
|
||
|
||
TEST(IntlString, NormalizeNFKC)
|
||
{
|
||
using namespace std::literals;
|
||
|
||
using NormalizationForm = String::NormalizationForm;
|
||
using AlreadyNormalized = String::AlreadyNormalized;
|
||
|
||
TestBuffer<char16_t> buf;
|
||
|
||
auto alreadyNormalized =
|
||
String::Normalize(NormalizationForm::NFKC, u""sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
|
||
ASSERT_EQ(buf.get_string_view(), u"");
|
||
|
||
alreadyNormalized =
|
||
String::Normalize(NormalizationForm::NFKC, u"abcdef"sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
|
||
ASSERT_EQ(buf.get_string_view(), u"");
|
||
|
||
alreadyNormalized =
|
||
String::Normalize(NormalizationForm::NFKC, u"a\u0308"sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
|
||
ASSERT_EQ(buf.get_string_view(), u"ä");
|
||
|
||
buf.clear();
|
||
|
||
alreadyNormalized = String::Normalize(NormalizationForm::NFKC, u"½"sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
|
||
ASSERT_EQ(buf.get_string_view(), u"1⁄2");
|
||
}
|
||
|
||
TEST(IntlString, NormalizeNFKD)
|
||
{
|
||
using namespace std::literals;
|
||
|
||
using NormalizationForm = String::NormalizationForm;
|
||
using AlreadyNormalized = String::AlreadyNormalized;
|
||
|
||
TestBuffer<char16_t> buf;
|
||
|
||
auto alreadyNormalized =
|
||
String::Normalize(NormalizationForm::NFKD, u""sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
|
||
ASSERT_EQ(buf.get_string_view(), u"");
|
||
|
||
alreadyNormalized =
|
||
String::Normalize(NormalizationForm::NFKD, u"abcdef"sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::Yes);
|
||
ASSERT_EQ(buf.get_string_view(), u"");
|
||
|
||
alreadyNormalized = String::Normalize(NormalizationForm::NFKD, u"ä"sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
|
||
ASSERT_EQ(buf.get_string_view(), u"a\u0308");
|
||
|
||
buf.clear();
|
||
|
||
alreadyNormalized = String::Normalize(NormalizationForm::NFKD, u"½"sv, buf);
|
||
ASSERT_EQ(alreadyNormalized.unwrap(), AlreadyNormalized::No);
|
||
ASSERT_EQ(buf.get_string_view(), u"1⁄2");
|
||
}
|
||
|
||
TEST(IntlString, ComposePairNFC)
|
||
{
|
||
// Pair of base characters do not compose
|
||
ASSERT_EQ(String::ComposePairNFC(U'a', U'b'), U'\0');
|
||
// Base letter + accent
|
||
ASSERT_EQ(String::ComposePairNFC(U'a', U'\u0308'), U'ä');
|
||
// Accented letter + a further accent
|
||
ASSERT_EQ(String::ComposePairNFC(U'ä', U'\u0304'), U'ǟ');
|
||
// Accented letter + a further accent, but doubly-accented form is not
|
||
// available
|
||
ASSERT_EQ(String::ComposePairNFC(U'ä', U'\u0301'), U'\0');
|
||
// These do not compose because although U+0344 has the decomposition <0308,
|
||
// 0301> (see below), it also has the Full_Composition_Exclusion property.
|
||
ASSERT_EQ(String::ComposePairNFC(U'\u0308', U'\u0301'), U'\0');
|
||
// Supplementary-plane letter + accent
|
||
ASSERT_EQ(String::ComposePairNFC(U'\U00011099', U'\U000110BA'),
|
||
U'\U0001109A');
|
||
}
|
||
|
||
TEST(IntlString, DecomposeRawNFD)
|
||
{
|
||
char32_t buf[2];
|
||
// Non-decomposable character
|
||
ASSERT_EQ(String::DecomposeRawNFD(U'a', buf), 0);
|
||
// Singleton decomposition
|
||
ASSERT_EQ(String::DecomposeRawNFD(U'\u212A', buf), 1);
|
||
ASSERT_EQ(buf[0], U'K');
|
||
// Simple accented letter
|
||
ASSERT_EQ(String::DecomposeRawNFD(U'ä', buf), 2);
|
||
ASSERT_EQ(buf[0], U'a');
|
||
ASSERT_EQ(buf[1], U'\u0308');
|
||
// Double-accented letter decomposes by only one level
|
||
ASSERT_EQ(String::DecomposeRawNFD(U'ǟ', buf), 2);
|
||
ASSERT_EQ(buf[0], U'ä');
|
||
ASSERT_EQ(buf[1], U'\u0304');
|
||
// Non-starter can decompose, but will not recompose (see above)
|
||
ASSERT_EQ(String::DecomposeRawNFD(U'\u0344', buf), 2);
|
||
ASSERT_EQ(buf[0], U'\u0308');
|
||
ASSERT_EQ(buf[1], U'\u0301');
|
||
// Supplementary-plane letter with decomposition
|
||
ASSERT_EQ(String::DecomposeRawNFD(U'\U0001109A', buf), 2);
|
||
ASSERT_EQ(buf[0], U'\U00011099');
|
||
ASSERT_EQ(buf[1], U'\U000110BA');
|
||
}
|
||
|
||
TEST(IntlString, IsCased)
|
||
{
|
||
ASSERT_TRUE(String::IsCased(U'a'));
|
||
ASSERT_FALSE(String::IsCased(U'0'));
|
||
}
|
||
|
||
TEST(IntlString, IsCaseIgnorable)
|
||
{
|
||
ASSERT_FALSE(String::IsCaseIgnorable(U'a'));
|
||
ASSERT_TRUE(String::IsCaseIgnorable(U'.'));
|
||
}
|
||
|
||
TEST(IntlString, GetUnicodeVersion)
|
||
{
|
||
auto version = String::GetUnicodeVersion();
|
||
|
||
ASSERT_TRUE(std::all_of(version.begin(), version.end(), [](char ch) {
|
||
return IsAsciiDigit(ch) || ch == '.';
|
||
}));
|
||
}
|
||
|
||
} // namespace mozilla::intl
|