summaryrefslogtreecommitdiffstats
path: root/intl/components/gtest/TestScript.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/components/gtest/TestScript.cpp')
-rw-r--r--intl/components/gtest/TestScript.cpp62
1 files changed, 62 insertions, 0 deletions
diff --git a/intl/components/gtest/TestScript.cpp b/intl/components/gtest/TestScript.cpp
new file mode 100644
index 0000000000..72d8cd1087
--- /dev/null
+++ b/intl/components/gtest/TestScript.cpp
@@ -0,0 +1,62 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "gtest/gtest.h"
+
+#include "mozilla/intl/UnicodeProperties.h"
+#include "mozilla/intl/UnicodeScriptCodes.h"
+
+namespace mozilla::intl {
+TEST(IntlScript, GetExtensions)
+{
+ UnicodeProperties::ScriptExtensionVector extensions;
+
+ // 0x0000..0x0040 are Common.
+ for (char32_t ch = 0; ch < 0x0041; ch++) {
+ ASSERT_TRUE(UnicodeProperties::GetExtensions(ch, extensions).isOk());
+ ASSERT_EQ(extensions.length(), 1u);
+ ASSERT_EQ(Script(extensions[0]), Script::COMMON);
+ }
+
+ // 0x0300..0x0341 are Inherited.
+ for (char32_t ch = 0x300; ch < 0x0341; ch++) {
+ ASSERT_TRUE(UnicodeProperties::GetExtensions(ch, extensions).isOk());
+ ASSERT_EQ(extensions.length(), 1u);
+ ASSERT_EQ(Script(extensions[0]), Script::INHERITED);
+ }
+
+ // 0x1cf7's script code is Common, but its script extension is Beng.
+ ASSERT_TRUE(UnicodeProperties::GetExtensions(0x1cf7, extensions).isOk());
+ ASSERT_EQ(extensions.length(), 1u);
+ ASSERT_EQ(Script(extensions[0]), Script::BENGALI);
+
+ // ؿ
+ // https://unicode-table.com/en/063F/
+ // This character doesn't have any script extension, so the script code is
+ // returned.
+ ASSERT_TRUE(UnicodeProperties::GetExtensions(0x063f, extensions).isOk());
+ ASSERT_EQ(extensions.length(), 1u);
+ ASSERT_EQ(Script(extensions[0]), Script::ARABIC);
+
+ // 0xff65 is the unicode character '・', see https://unicode-table.com/en/FF65/
+ // Halfwidth Katakana Middle Dot.
+ ASSERT_TRUE(UnicodeProperties::GetExtensions(0xff65, extensions).isOk());
+
+ // 0xff65 should have the following script extensions:
+ // Bopo Hang Hani Hira Kana Yiii.
+ ASSERT_EQ(extensions.length(), 6u);
+
+ ASSERT_EQ(Script(extensions[0]), Script::BOPOMOFO);
+ ASSERT_EQ(Script(extensions[1]), Script::HAN);
+ ASSERT_EQ(Script(extensions[2]), Script::HANGUL);
+ ASSERT_EQ(Script(extensions[3]), Script::HIRAGANA);
+ ASSERT_EQ(Script(extensions[4]), Script::KATAKANA);
+ ASSERT_EQ(Script(extensions[5]), Script::YI);
+
+ // The max code point is 0x10ffff, so 0x110000 should be invalid.
+ // Script::UNKNOWN should be returned for an invalid code point.
+ ASSERT_TRUE(UnicodeProperties::GetExtensions(0x110000, extensions).isOk());
+ ASSERT_EQ(extensions.length(), 1u);
+ ASSERT_EQ(Script(extensions[0]), Script::UNKNOWN);
+}
+} // namespace mozilla::intl