summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/python/icutools/databuilder/test
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/python/icutools/databuilder/test')
-rw-r--r--intl/icu/source/python/icutools/databuilder/test/__init__.py2
-rw-r--r--intl/icu/source/python/icutools/databuilder/test/__main__.py14
-rw-r--r--intl/icu/source/python/icutools/databuilder/test/filtration_test.py421
-rw-r--r--intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json10
-rw-r--r--intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json197
-rw-r--r--intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json36
6 files changed, 680 insertions, 0 deletions
diff --git a/intl/icu/source/python/icutools/databuilder/test/__init__.py b/intl/icu/source/python/icutools/databuilder/test/__init__.py
new file mode 100644
index 0000000000..dd12bfa16e
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
diff --git a/intl/icu/source/python/icutools/databuilder/test/__main__.py b/intl/icu/source/python/icutools/databuilder/test/__main__.py
new file mode 100644
index 0000000000..6ae2c0f7c9
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/__main__.py
@@ -0,0 +1,14 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+import unittest
+
+from . import filtration_test
+
+def load_tests(loader, tests, pattern):
+ suite = unittest.TestSuite()
+ suite.addTest(filtration_test.suite)
+ return suite
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/intl/icu/source/python/icutools/databuilder/test/filtration_test.py b/intl/icu/source/python/icutools/databuilder/test/filtration_test.py
new file mode 100644
index 0000000000..416223bd7e
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/filtration_test.py
@@ -0,0 +1,421 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+import io as pyio
+import json
+import os
+import unittest
+
+from .. import InFile
+from ..comment_stripper import CommentStripper
+from ..filtration import Filter
+
+EXAMPLE_FILE_STEMS = [
+ "af_NA",
+ "af_VARIANT",
+ "af_ZA_VARIANT",
+ "af_ZA",
+ "af",
+ "ar",
+ "ar_SA",
+ "ars",
+ "bs_BA",
+ "bs_Cyrl_BA",
+ "bs_Cyrl",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ "en_001",
+ "en_150",
+ "en_DE",
+ "en_GB",
+ "en_US",
+ "root",
+ "sr_BA",
+ "sr_CS",
+ "sr_Cyrl_BA",
+ "sr_Cyrl_CS",
+ "sr_Cyrl_ME",
+ "sr_Cyrl",
+ "sr_Latn_BA",
+ "sr_Latn_CS",
+ "sr_Latn_ME_VARIANT",
+ "sr_Latn_ME",
+ "sr_Latn",
+ "sr_ME",
+ "sr",
+ "vai_Latn_LR",
+ "vai_Latn",
+ "vai_LR",
+ "vai_Vaii_LR",
+ "vai_Vaii",
+ "vai",
+ "yue",
+ "zh_CN",
+ "zh_Hans_CN",
+ "zh_Hans_HK",
+ "zh_Hans_MO",
+ "zh_Hans_SG",
+ "zh_Hans",
+ "zh_Hant_HK",
+ "zh_Hant_MO",
+ "zh_Hant_TW",
+ "zh_Hant",
+ "zh_HK",
+ "zh_MO",
+ "zh_SG",
+ "zh_TW",
+ "zh"
+]
+
+
+class TestIO(object):
+ def __init__(self):
+ pass
+
+ def read_locale_deps(self, tree):
+ if tree not in ("brkitr", "locales", "rbnf"):
+ return None
+ with pyio.open(os.path.join(
+ os.path.dirname(__file__),
+ "sample_data",
+ tree,
+ "LOCALE_DEPS.json"
+ ), "r", encoding="utf-8-sig") as f:
+ return json.load(CommentStripper(f))
+
+
+class FiltrationTest(unittest.TestCase):
+
+ def test_exclude(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "exclude"
+ }, TestIO()), [
+ ])
+
+ def test_default_whitelist(self):
+ self._check_filter(Filter.create_from_json({
+ "whitelist": [
+ "ars",
+ "zh_Hans"
+ ]
+ }, TestIO()), [
+ "ars",
+ "zh_Hans"
+ ])
+
+ def test_default_blacklist(self):
+ expected_matches = set(EXAMPLE_FILE_STEMS)
+ expected_matches.remove("ars")
+ expected_matches.remove("zh_Hans")
+ self._check_filter(Filter.create_from_json({
+ "blacklist": [
+ "ars",
+ "zh_Hans"
+ ]
+ }, TestIO()), expected_matches)
+
+ def test_language_whitelist(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "language",
+ "whitelist": [
+ "af",
+ "bs"
+ ]
+ }, TestIO()), [
+ "root",
+ "af_NA",
+ "af_VARIANT",
+ "af_ZA_VARIANT",
+ "af_ZA",
+ "af",
+ "bs_BA",
+ "bs_Cyrl_BA",
+ "bs_Cyrl",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs"
+ ])
+
+ def test_language_blacklist(self):
+ expected_matches = set(EXAMPLE_FILE_STEMS)
+ expected_matches.remove("af_NA")
+ expected_matches.remove("af_VARIANT")
+ expected_matches.remove("af_ZA_VARIANT")
+ expected_matches.remove("af_ZA")
+ expected_matches.remove("af")
+ self._check_filter(Filter.create_from_json({
+ "filterType": "language",
+ "blacklist": [
+ "af"
+ ]
+ }, TestIO()), expected_matches)
+
+ def test_regex_whitelist(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "regex",
+ "whitelist": [
+ r"^ar.*$",
+ r"^zh$"
+ ]
+ }, TestIO()), [
+ "ar",
+ "ar_SA",
+ "ars",
+ "zh"
+ ])
+
+ def test_regex_blacklist(self):
+ expected_matches = set(EXAMPLE_FILE_STEMS)
+ expected_matches.remove("ar")
+ expected_matches.remove("ar_SA")
+ expected_matches.remove("ars")
+ expected_matches.remove("zh")
+ self._check_filter(Filter.create_from_json({
+ "filterType": "regex",
+ "blacklist": [
+ r"^ar.*$",
+ r"^zh$"
+ ]
+ }, TestIO()), expected_matches)
+
+ def test_locale_basic(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "whitelist": [
+ # Default scripts:
+ # sr => Cyrl
+ # vai => Vaii
+ # zh => Hans
+ "bs_BA", # is an alias to bs_Latn_BA
+ "en_DE",
+ "sr", # Language with no script
+ "vai_Latn", # Language with non-default script
+ "zh_Hans" # Language with default script
+ ]
+ }, TestIO()), [
+ "root",
+ # bs: should include the full dependency tree of bs_BA
+ "bs_BA",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ # en: should include the full dependency tree of en_DE
+ "en",
+ "en_DE",
+ "en_150",
+ "en_001",
+ # sr: include Cyrl, the default, but not Latn.
+ "sr",
+ "sr_BA",
+ "sr_CS",
+ "sr_Cyrl",
+ "sr_Cyrl_BA",
+ "sr_Cyrl_CS",
+ "sr_Cyrl_ME",
+ # vai: include Latn but NOT Vaii.
+ "vai_Latn",
+ "vai_Latn_LR",
+ # zh: include Hans but NOT Hant.
+ "zh",
+ "zh_CN",
+ "zh_SG",
+ "zh_Hans",
+ "zh_Hans_CN",
+ "zh_Hans_HK",
+ "zh_Hans_MO",
+ "zh_Hans_SG"
+ ])
+
+ def test_locale_no_children(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "includeChildren": False,
+ "whitelist": [
+ # See comments in test_locale_basic.
+ "bs_BA",
+ "en_DE",
+ "sr",
+ "vai_Latn",
+ "zh_Hans"
+ ]
+ }, TestIO()), [
+ "root",
+ "bs_BA",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ "en",
+ "en_DE",
+ "en_150",
+ "en_001",
+ "sr",
+ "vai_Latn",
+ "zh",
+ "zh_Hans",
+ ])
+
+ def test_locale_include_scripts(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "includeScripts": True,
+ "whitelist": [
+ # See comments in test_locale_basic.
+ "bs_BA",
+ "en_DE",
+ "sr",
+ "vai_Latn",
+ "zh_Hans"
+ ]
+ }, TestIO()), [
+ "root",
+ # bs: includeScripts only works for language-only (without region)
+ "bs_BA",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ # en: should include the full dependency tree of en_DE
+ "en",
+ "en_DE",
+ "en_150",
+ "en_001",
+ # sr: include Latn, since no particular script was requested.
+ "sr_BA",
+ "sr_CS",
+ "sr_Cyrl_BA",
+ "sr_Cyrl_CS",
+ "sr_Cyrl_ME",
+ "sr_Cyrl",
+ "sr_Latn_BA",
+ "sr_Latn_CS",
+ "sr_Latn_ME_VARIANT",
+ "sr_Latn_ME",
+ "sr_Latn",
+ "sr_ME",
+ "sr",
+ # vai: do NOT include Vaii; the script was explicitly requested.
+ "vai_Latn_LR",
+ "vai_Latn",
+ # zh: do NOT include Hant; the script was explicitly requested.
+ "zh_CN",
+ "zh_SG",
+ "zh_Hans_CN",
+ "zh_Hans_HK",
+ "zh_Hans_MO",
+ "zh_Hans_SG",
+ "zh_Hans",
+ "zh"
+ ])
+
+ def test_locale_no_children_include_scripts(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "includeChildren": False,
+ "includeScripts": True,
+ "whitelist": [
+ # See comments in test_locale_basic.
+ "bs_BA",
+ "en_DE",
+ "sr",
+ "vai_Latn",
+ "zh_Hans"
+ ]
+ }, TestIO()), [
+ "root",
+ # bs: includeScripts only works for language-only (without region)
+ "bs_BA",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ # en: should include the full dependency tree of en_DE
+ "en",
+ "en_DE",
+ "en_150",
+ "en_001",
+ # sr: include Cyrl and Latn but no other children
+ "sr",
+ "sr_Cyrl",
+ "sr_Latn",
+ # vai: include only the requested script
+ "vai_Latn",
+ # zh: include only the requested script
+ "zh",
+ "zh_Hans",
+ ])
+
+ def test_union(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "union",
+ "unionOf": [
+ {
+ "whitelist": [
+ "ars",
+ "zh_Hans"
+ ]
+ },
+ {
+ "filterType": "regex",
+ "whitelist": [
+ r"^bs.*$",
+ r"^zh$"
+ ]
+ }
+ ]
+ }, TestIO()), [
+ "ars",
+ "zh_Hans",
+ "bs_BA",
+ "bs_Cyrl_BA",
+ "bs_Cyrl",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ "zh"
+ ])
+
+ def test_hk_deps_normal(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "whitelist": [
+ "zh_HK"
+ ]
+ }, TestIO()), [
+ "root",
+ "zh_Hant",
+ "zh_Hant_HK",
+ "zh_HK",
+ ])
+
+ def test_hk_deps_rbnf(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "whitelist": [
+ "zh_HK"
+ ]
+ }, TestIO()), [
+ "root",
+ "yue",
+ "zh_Hant_HK",
+ "zh_HK",
+ ], "rbnf")
+
+ def test_no_alias_parent_structure(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "whitelist": [
+ "zh_HK"
+ ]
+ }, TestIO()), [
+ "root",
+ "zh_HK",
+ "zh",
+ ], "brkitr")
+
+ def _check_filter(self, filter, expected_matches, tree="locales"):
+ for file_stem in EXAMPLE_FILE_STEMS:
+ is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem)))
+ expected_match = file_stem in expected_matches
+ self.assertEqual(is_match, expected_match, file_stem)
+
+# Export the test for the runner
+suite = unittest.makeSuite(FiltrationTest)
diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json
new file mode 100644
index 0000000000..674db09278
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json
@@ -0,0 +1,10 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+//////////////////////////////////////////////////////////////
+// This is a sample LOCALE_DEPS.json file for testing only. //
+//////////////////////////////////////////////////////////////
+
+{
+ "cldrVersion": "36.1"
+}
diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json
new file mode 100644
index 0000000000..1456ea0d9a
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json
@@ -0,0 +1,197 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+//////////////////////////////////////////////////////////////
+// This is a sample LOCALE_DEPS.json file for testing only. //
+//////////////////////////////////////////////////////////////
+
+{
+ "cldrVersion": "36.1",
+ "aliases": {
+ "ars": "ar_SA",
+ "az_AZ": "az_Latn_AZ",
+ "bs_BA": "bs_Latn_BA",
+ "en_NH": "en_VU",
+ "en_RH": "en_ZW",
+ "ff_CM": "ff_Latn_CM",
+ "ff_GN": "ff_Latn_GN",
+ "ff_MR": "ff_Latn_MR",
+ "ff_SN": "ff_Latn_SN",
+ "in": "id",
+ "in_ID": "id_ID",
+ "iw": "he",
+ "iw_IL": "he_IL",
+ "mo": "ro",
+ "no": "nb",
+ "no_NO": "nb_NO",
+ "no_NO_NY": "nn_NO",
+ "pa_IN": "pa_Guru_IN",
+ "pa_PK": "pa_Arab_PK",
+ "sh": "sr_Latn",
+ "sh_BA": "sr_Latn_BA",
+ "sh_CS": "sr_Latn_RS",
+ "sh_YU": "sr_Latn_RS",
+ "shi_MA": "shi_Tfng_MA",
+ "sr_BA": "sr_Cyrl_BA",
+ "sr_CS": "sr_Cyrl_RS",
+ "sr_Cyrl_CS": "sr_Cyrl_RS",
+ "sr_Cyrl_YU": "sr_Cyrl_RS",
+ "sr_Latn_CS": "sr_Latn_RS",
+ "sr_Latn_YU": "sr_Latn_RS",
+ "sr_ME": "sr_Latn_ME",
+ "sr_RS": "sr_Cyrl_RS",
+ "sr_XK": "sr_Cyrl_XK",
+ "sr_YU": "sr_Cyrl_RS",
+ "tl": "fil",
+ "tl_PH": "fil_PH",
+ "uz_AF": "uz_Arab_AF",
+ "uz_UZ": "uz_Latn_UZ",
+ "vai_LR": "vai_Vaii_LR",
+ "yue_CN": "yue_Hans_CN",
+ "yue_HK": "yue_Hant_HK",
+ "zh_CN": "zh_Hans_CN",
+ "zh_HK": "zh_Hant_HK",
+ "zh_MO": "zh_Hant_MO",
+ "zh_SG": "zh_Hans_SG",
+ "zh_TW": "zh_Hant_TW"
+ },
+ "parents": {
+ "az_Cyrl": "root",
+ "bs_Cyrl": "root",
+ "en_150": "en_001",
+ "en_AG": "en_001",
+ "en_AI": "en_001",
+ "en_AT": "en_150",
+ "en_AU": "en_001",
+ "en_BB": "en_001",
+ "en_BE": "en_150",
+ "en_BM": "en_001",
+ "en_BS": "en_001",
+ "en_BW": "en_001",
+ "en_BZ": "en_001",
+ "en_CA": "en_001",
+ "en_CC": "en_001",
+ "en_CH": "en_150",
+ "en_CK": "en_001",
+ "en_CM": "en_001",
+ "en_CX": "en_001",
+ "en_CY": "en_001",
+ "en_DE": "en_150",
+ "en_DG": "en_001",
+ "en_DK": "en_150",
+ "en_DM": "en_001",
+ "en_ER": "en_001",
+ "en_FI": "en_150",
+ "en_FJ": "en_001",
+ "en_FK": "en_001",
+ "en_FM": "en_001",
+ "en_GB": "en_001",
+ "en_GD": "en_001",
+ "en_GG": "en_001",
+ "en_GH": "en_001",
+ "en_GI": "en_001",
+ "en_GM": "en_001",
+ "en_GY": "en_001",
+ "en_HK": "en_001",
+ "en_IE": "en_001",
+ "en_IL": "en_001",
+ "en_IM": "en_001",
+ "en_IN": "en_001",
+ "en_IO": "en_001",
+ "en_JE": "en_001",
+ "en_JM": "en_001",
+ "en_KE": "en_001",
+ "en_KI": "en_001",
+ "en_KN": "en_001",
+ "en_KY": "en_001",
+ "en_LC": "en_001",
+ "en_LR": "en_001",
+ "en_LS": "en_001",
+ "en_MG": "en_001",
+ "en_MO": "en_001",
+ "en_MS": "en_001",
+ "en_MT": "en_001",
+ "en_MU": "en_001",
+ "en_MW": "en_001",
+ "en_MY": "en_001",
+ "en_NA": "en_001",
+ "en_NF": "en_001",
+ "en_NG": "en_001",
+ "en_NL": "en_150",
+ "en_NR": "en_001",
+ "en_NU": "en_001",
+ "en_NZ": "en_001",
+ "en_PG": "en_001",
+ "en_PH": "en_001",
+ "en_PK": "en_001",
+ "en_PN": "en_001",
+ "en_PW": "en_001",
+ "en_RW": "en_001",
+ "en_SB": "en_001",
+ "en_SC": "en_001",
+ "en_SD": "en_001",
+ "en_SE": "en_150",
+ "en_SG": "en_001",
+ "en_SH": "en_001",
+ "en_SI": "en_150",
+ "en_SL": "en_001",
+ "en_SS": "en_001",
+ "en_SX": "en_001",
+ "en_SZ": "en_001",
+ "en_TC": "en_001",
+ "en_TK": "en_001",
+ "en_TO": "en_001",
+ "en_TT": "en_001",
+ "en_TV": "en_001",
+ "en_TZ": "en_001",
+ "en_UG": "en_001",
+ "en_VC": "en_001",
+ "en_VG": "en_001",
+ "en_VU": "en_001",
+ "en_WS": "en_001",
+ "en_ZA": "en_001",
+ "en_ZM": "en_001",
+ "en_ZW": "en_001",
+ "es_AR": "es_419",
+ "es_BO": "es_419",
+ "es_BR": "es_419",
+ "es_BZ": "es_419",
+ "es_CL": "es_419",
+ "es_CO": "es_419",
+ "es_CR": "es_419",
+ "es_CU": "es_419",
+ "es_DO": "es_419",
+ "es_EC": "es_419",
+ "es_GT": "es_419",
+ "es_HN": "es_419",
+ "es_MX": "es_419",
+ "es_NI": "es_419",
+ "es_PA": "es_419",
+ "es_PE": "es_419",
+ "es_PR": "es_419",
+ "es_PY": "es_419",
+ "es_SV": "es_419",
+ "es_US": "es_419",
+ "es_UY": "es_419",
+ "es_VE": "es_419",
+ "pa_Arab": "root",
+ "pt_AO": "pt_PT",
+ "pt_CH": "pt_PT",
+ "pt_CV": "pt_PT",
+ "pt_GQ": "pt_PT",
+ "pt_GW": "pt_PT",
+ "pt_LU": "pt_PT",
+ "pt_MO": "pt_PT",
+ "pt_MZ": "pt_PT",
+ "pt_ST": "pt_PT",
+ "pt_TL": "pt_PT",
+ "shi_Latn": "root",
+ "sr_Latn": "root",
+ "uz_Arab": "root",
+ "uz_Cyrl": "root",
+ "vai_Latn": "root",
+ "yue_Hans": "root",
+ "zh_Hant": "root",
+ "zh_Hant_MO": "zh_Hant_HK"
+ }
+}
diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json
new file mode 100644
index 0000000000..c6ec208add
--- /dev/null
+++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json
@@ -0,0 +1,36 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+//////////////////////////////////////////////////////////////
+// This is a sample LOCALE_DEPS.json file for testing only. //
+//////////////////////////////////////////////////////////////
+
+{
+ "cldrVersion": "36.1",
+ "aliases": {
+ "ars": "ar_SA",
+ "in": "id",
+ "iw": "he",
+ "no": "nb",
+ "sh": "sr_Latn",
+ "zh_HK": "zh_Hant_HK",
+ "zh_Hant_HK": "yue",
+ "zh_MO": "zh_Hant_MO",
+ "zh_TW": "zh_Hant_TW"
+ },
+ "parents": {
+ "en_IN": "en_001",
+ "es_DO": "es_419",
+ "es_GT": "es_419",
+ "es_HN": "es_419",
+ "es_MX": "es_419",
+ "es_NI": "es_419",
+ "es_PA": "es_419",
+ "es_PR": "es_419",
+ "es_SV": "es_419",
+ "es_US": "es_419",
+ "sr_Latn": "root",
+ "yue_Hans": "root",
+ "zh_Hant": "root"
+ }
+}