diff options
Diffstat (limited to 'intl/icu/source/python/icutools/databuilder/test')
6 files changed, 680 insertions, 0 deletions
diff --git a/intl/icu/source/python/icutools/databuilder/test/__init__.py b/intl/icu/source/python/icutools/databuilder/test/__init__.py new file mode 100644 index 0000000000..dd12bfa16e --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/test/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html diff --git a/intl/icu/source/python/icutools/databuilder/test/__main__.py b/intl/icu/source/python/icutools/databuilder/test/__main__.py new file mode 100644 index 0000000000..6ae2c0f7c9 --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/test/__main__.py @@ -0,0 +1,14 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +import unittest + +from . import filtration_test + +def load_tests(loader, tests, pattern): + suite = unittest.TestSuite() + suite.addTest(filtration_test.suite) + return suite + +if __name__ == '__main__': + unittest.main() diff --git a/intl/icu/source/python/icutools/databuilder/test/filtration_test.py b/intl/icu/source/python/icutools/databuilder/test/filtration_test.py new file mode 100644 index 0000000000..416223bd7e --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/test/filtration_test.py @@ -0,0 +1,421 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +import io as pyio +import json +import os +import unittest + +from .. import InFile +from ..comment_stripper import CommentStripper +from ..filtration import Filter + +EXAMPLE_FILE_STEMS = [ + "af_NA", + "af_VARIANT", + "af_ZA_VARIANT", + "af_ZA", + "af", + "ar", + "ar_SA", + "ars", + "bs_BA", + "bs_Cyrl_BA", + "bs_Cyrl", + "bs_Latn_BA", + "bs_Latn", + "bs", + "en_001", + "en_150", + "en_DE", + "en_GB", + "en_US", + "root", + "sr_BA", + "sr_CS", + "sr_Cyrl_BA", + "sr_Cyrl_CS", + "sr_Cyrl_ME", + "sr_Cyrl", + "sr_Latn_BA", + "sr_Latn_CS", + "sr_Latn_ME_VARIANT", + "sr_Latn_ME", + "sr_Latn", + "sr_ME", + "sr", + "vai_Latn_LR", + "vai_Latn", + "vai_LR", + "vai_Vaii_LR", + "vai_Vaii", + "vai", + "yue", + "zh_CN", + "zh_Hans_CN", + "zh_Hans_HK", + "zh_Hans_MO", + "zh_Hans_SG", + "zh_Hans", + "zh_Hant_HK", + "zh_Hant_MO", + "zh_Hant_TW", + "zh_Hant", + "zh_HK", + "zh_MO", + "zh_SG", + "zh_TW", + "zh" +] + + +class TestIO(object): + def __init__(self): + pass + + def read_locale_deps(self, tree): + if tree not in ("brkitr", "locales", "rbnf"): + return None + with pyio.open(os.path.join( + os.path.dirname(__file__), + "sample_data", + tree, + "LOCALE_DEPS.json" + ), "r", encoding="utf-8-sig") as f: + return json.load(CommentStripper(f)) + + +class FiltrationTest(unittest.TestCase): + + def test_exclude(self): + self._check_filter(Filter.create_from_json({ + "filterType": "exclude" + }, TestIO()), [ + ]) + + def test_default_whitelist(self): + self._check_filter(Filter.create_from_json({ + "whitelist": [ + "ars", + "zh_Hans" + ] + }, TestIO()), [ + "ars", + "zh_Hans" + ]) + + def test_default_blacklist(self): + expected_matches = set(EXAMPLE_FILE_STEMS) + expected_matches.remove("ars") + expected_matches.remove("zh_Hans") + self._check_filter(Filter.create_from_json({ + "blacklist": [ + "ars", + "zh_Hans" + ] + }, TestIO()), expected_matches) + + def test_language_whitelist(self): + self._check_filter(Filter.create_from_json({ + "filterType": "language", + "whitelist": [ + "af", + "bs" + ] + }, TestIO()), [ + "root", + "af_NA", + "af_VARIANT", + "af_ZA_VARIANT", + "af_ZA", + "af", + "bs_BA", + "bs_Cyrl_BA", + "bs_Cyrl", + "bs_Latn_BA", + "bs_Latn", + "bs" + ]) + + def test_language_blacklist(self): + expected_matches = set(EXAMPLE_FILE_STEMS) + expected_matches.remove("af_NA") + expected_matches.remove("af_VARIANT") + expected_matches.remove("af_ZA_VARIANT") + expected_matches.remove("af_ZA") + expected_matches.remove("af") + self._check_filter(Filter.create_from_json({ + "filterType": "language", + "blacklist": [ + "af" + ] + }, TestIO()), expected_matches) + + def test_regex_whitelist(self): + self._check_filter(Filter.create_from_json({ + "filterType": "regex", + "whitelist": [ + r"^ar.*$", + r"^zh$" + ] + }, TestIO()), [ + "ar", + "ar_SA", + "ars", + "zh" + ]) + + def test_regex_blacklist(self): + expected_matches = set(EXAMPLE_FILE_STEMS) + expected_matches.remove("ar") + expected_matches.remove("ar_SA") + expected_matches.remove("ars") + expected_matches.remove("zh") + self._check_filter(Filter.create_from_json({ + "filterType": "regex", + "blacklist": [ + r"^ar.*$", + r"^zh$" + ] + }, TestIO()), expected_matches) + + def test_locale_basic(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + # Default scripts: + # sr => Cyrl + # vai => Vaii + # zh => Hans + "bs_BA", # is an alias to bs_Latn_BA + "en_DE", + "sr", # Language with no script + "vai_Latn", # Language with non-default script + "zh_Hans" # Language with default script + ] + }, TestIO()), [ + "root", + # bs: should include the full dependency tree of bs_BA + "bs_BA", + "bs_Latn_BA", + "bs_Latn", + "bs", + # en: should include the full dependency tree of en_DE + "en", + "en_DE", + "en_150", + "en_001", + # sr: include Cyrl, the default, but not Latn. + "sr", + "sr_BA", + "sr_CS", + "sr_Cyrl", + "sr_Cyrl_BA", + "sr_Cyrl_CS", + "sr_Cyrl_ME", + # vai: include Latn but NOT Vaii. + "vai_Latn", + "vai_Latn_LR", + # zh: include Hans but NOT Hant. + "zh", + "zh_CN", + "zh_SG", + "zh_Hans", + "zh_Hans_CN", + "zh_Hans_HK", + "zh_Hans_MO", + "zh_Hans_SG" + ]) + + def test_locale_no_children(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "includeChildren": False, + "whitelist": [ + # See comments in test_locale_basic. + "bs_BA", + "en_DE", + "sr", + "vai_Latn", + "zh_Hans" + ] + }, TestIO()), [ + "root", + "bs_BA", + "bs_Latn_BA", + "bs_Latn", + "bs", + "en", + "en_DE", + "en_150", + "en_001", + "sr", + "vai_Latn", + "zh", + "zh_Hans", + ]) + + def test_locale_include_scripts(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "includeScripts": True, + "whitelist": [ + # See comments in test_locale_basic. + "bs_BA", + "en_DE", + "sr", + "vai_Latn", + "zh_Hans" + ] + }, TestIO()), [ + "root", + # bs: includeScripts only works for language-only (without region) + "bs_BA", + "bs_Latn_BA", + "bs_Latn", + "bs", + # en: should include the full dependency tree of en_DE + "en", + "en_DE", + "en_150", + "en_001", + # sr: include Latn, since no particular script was requested. + "sr_BA", + "sr_CS", + "sr_Cyrl_BA", + "sr_Cyrl_CS", + "sr_Cyrl_ME", + "sr_Cyrl", + "sr_Latn_BA", + "sr_Latn_CS", + "sr_Latn_ME_VARIANT", + "sr_Latn_ME", + "sr_Latn", + "sr_ME", + "sr", + # vai: do NOT include Vaii; the script was explicitly requested. + "vai_Latn_LR", + "vai_Latn", + # zh: do NOT include Hant; the script was explicitly requested. + "zh_CN", + "zh_SG", + "zh_Hans_CN", + "zh_Hans_HK", + "zh_Hans_MO", + "zh_Hans_SG", + "zh_Hans", + "zh" + ]) + + def test_locale_no_children_include_scripts(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "includeChildren": False, + "includeScripts": True, + "whitelist": [ + # See comments in test_locale_basic. + "bs_BA", + "en_DE", + "sr", + "vai_Latn", + "zh_Hans" + ] + }, TestIO()), [ + "root", + # bs: includeScripts only works for language-only (without region) + "bs_BA", + "bs_Latn_BA", + "bs_Latn", + "bs", + # en: should include the full dependency tree of en_DE + "en", + "en_DE", + "en_150", + "en_001", + # sr: include Cyrl and Latn but no other children + "sr", + "sr_Cyrl", + "sr_Latn", + # vai: include only the requested script + "vai_Latn", + # zh: include only the requested script + "zh", + "zh_Hans", + ]) + + def test_union(self): + self._check_filter(Filter.create_from_json({ + "filterType": "union", + "unionOf": [ + { + "whitelist": [ + "ars", + "zh_Hans" + ] + }, + { + "filterType": "regex", + "whitelist": [ + r"^bs.*$", + r"^zh$" + ] + } + ] + }, TestIO()), [ + "ars", + "zh_Hans", + "bs_BA", + "bs_Cyrl_BA", + "bs_Cyrl", + "bs_Latn_BA", + "bs_Latn", + "bs", + "zh" + ]) + + def test_hk_deps_normal(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + "zh_HK" + ] + }, TestIO()), [ + "root", + "zh_Hant", + "zh_Hant_HK", + "zh_HK", + ]) + + def test_hk_deps_rbnf(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + "zh_HK" + ] + }, TestIO()), [ + "root", + "yue", + "zh_Hant_HK", + "zh_HK", + ], "rbnf") + + def test_no_alias_parent_structure(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + "zh_HK" + ] + }, TestIO()), [ + "root", + "zh_HK", + "zh", + ], "brkitr") + + def _check_filter(self, filter, expected_matches, tree="locales"): + for file_stem in EXAMPLE_FILE_STEMS: + is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem))) + expected_match = file_stem in expected_matches + self.assertEqual(is_match, expected_match, file_stem) + +# Export the test for the runner +suite = unittest.makeSuite(FiltrationTest) diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json new file mode 100644 index 0000000000..674db09278 --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/brkitr/LOCALE_DEPS.json @@ -0,0 +1,10 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +////////////////////////////////////////////////////////////// +// This is a sample LOCALE_DEPS.json file for testing only. // +////////////////////////////////////////////////////////////// + +{ + "cldrVersion": "36.1" +} diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json new file mode 100644 index 0000000000..1456ea0d9a --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/locales/LOCALE_DEPS.json @@ -0,0 +1,197 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +////////////////////////////////////////////////////////////// +// This is a sample LOCALE_DEPS.json file for testing only. // +////////////////////////////////////////////////////////////// + +{ + "cldrVersion": "36.1", + "aliases": { + "ars": "ar_SA", + "az_AZ": "az_Latn_AZ", + "bs_BA": "bs_Latn_BA", + "en_NH": "en_VU", + "en_RH": "en_ZW", + "ff_CM": "ff_Latn_CM", + "ff_GN": "ff_Latn_GN", + "ff_MR": "ff_Latn_MR", + "ff_SN": "ff_Latn_SN", + "in": "id", + "in_ID": "id_ID", + "iw": "he", + "iw_IL": "he_IL", + "mo": "ro", + "no": "nb", + "no_NO": "nb_NO", + "no_NO_NY": "nn_NO", + "pa_IN": "pa_Guru_IN", + "pa_PK": "pa_Arab_PK", + "sh": "sr_Latn", + "sh_BA": "sr_Latn_BA", + "sh_CS": "sr_Latn_RS", + "sh_YU": "sr_Latn_RS", + "shi_MA": "shi_Tfng_MA", + "sr_BA": "sr_Cyrl_BA", + "sr_CS": "sr_Cyrl_RS", + "sr_Cyrl_CS": "sr_Cyrl_RS", + "sr_Cyrl_YU": "sr_Cyrl_RS", + "sr_Latn_CS": "sr_Latn_RS", + "sr_Latn_YU": "sr_Latn_RS", + "sr_ME": "sr_Latn_ME", + "sr_RS": "sr_Cyrl_RS", + "sr_XK": "sr_Cyrl_XK", + "sr_YU": "sr_Cyrl_RS", + "tl": "fil", + "tl_PH": "fil_PH", + "uz_AF": "uz_Arab_AF", + "uz_UZ": "uz_Latn_UZ", + "vai_LR": "vai_Vaii_LR", + "yue_CN": "yue_Hans_CN", + "yue_HK": "yue_Hant_HK", + "zh_CN": "zh_Hans_CN", + "zh_HK": "zh_Hant_HK", + "zh_MO": "zh_Hant_MO", + "zh_SG": "zh_Hans_SG", + "zh_TW": "zh_Hant_TW" + }, + "parents": { + "az_Cyrl": "root", + "bs_Cyrl": "root", + "en_150": "en_001", + "en_AG": "en_001", + "en_AI": "en_001", + "en_AT": "en_150", + "en_AU": "en_001", + "en_BB": "en_001", + "en_BE": "en_150", + "en_BM": "en_001", + "en_BS": "en_001", + "en_BW": "en_001", + "en_BZ": "en_001", + "en_CA": "en_001", + "en_CC": "en_001", + "en_CH": "en_150", + "en_CK": "en_001", + "en_CM": "en_001", + "en_CX": "en_001", + "en_CY": "en_001", + "en_DE": "en_150", + "en_DG": "en_001", + "en_DK": "en_150", + "en_DM": "en_001", + "en_ER": "en_001", + "en_FI": "en_150", + "en_FJ": "en_001", + "en_FK": "en_001", + "en_FM": "en_001", + "en_GB": "en_001", + "en_GD": "en_001", + "en_GG": "en_001", + "en_GH": "en_001", + "en_GI": "en_001", + "en_GM": "en_001", + "en_GY": "en_001", + "en_HK": "en_001", + "en_IE": "en_001", + "en_IL": "en_001", + "en_IM": "en_001", + "en_IN": "en_001", + "en_IO": "en_001", + "en_JE": "en_001", + "en_JM": "en_001", + "en_KE": "en_001", + "en_KI": "en_001", + "en_KN": "en_001", + "en_KY": "en_001", + "en_LC": "en_001", + "en_LR": "en_001", + "en_LS": "en_001", + "en_MG": "en_001", + "en_MO": "en_001", + "en_MS": "en_001", + "en_MT": "en_001", + "en_MU": "en_001", + "en_MW": "en_001", + "en_MY": "en_001", + "en_NA": "en_001", + "en_NF": "en_001", + "en_NG": "en_001", + "en_NL": "en_150", + "en_NR": "en_001", + "en_NU": "en_001", + "en_NZ": "en_001", + "en_PG": "en_001", + "en_PH": "en_001", + "en_PK": "en_001", + "en_PN": "en_001", + "en_PW": "en_001", + "en_RW": "en_001", + "en_SB": "en_001", + "en_SC": "en_001", + "en_SD": "en_001", + "en_SE": "en_150", + "en_SG": "en_001", + "en_SH": "en_001", + "en_SI": "en_150", + "en_SL": "en_001", + "en_SS": "en_001", + "en_SX": "en_001", + "en_SZ": "en_001", + "en_TC": "en_001", + "en_TK": "en_001", + "en_TO": "en_001", + "en_TT": "en_001", + "en_TV": "en_001", + "en_TZ": "en_001", + "en_UG": "en_001", + "en_VC": "en_001", + "en_VG": "en_001", + "en_VU": "en_001", + "en_WS": "en_001", + "en_ZA": "en_001", + "en_ZM": "en_001", + "en_ZW": "en_001", + "es_AR": "es_419", + "es_BO": "es_419", + "es_BR": "es_419", + "es_BZ": "es_419", + "es_CL": "es_419", + "es_CO": "es_419", + "es_CR": "es_419", + "es_CU": "es_419", + "es_DO": "es_419", + "es_EC": "es_419", + "es_GT": "es_419", + "es_HN": "es_419", + "es_MX": "es_419", + "es_NI": "es_419", + "es_PA": "es_419", + "es_PE": "es_419", + "es_PR": "es_419", + "es_PY": "es_419", + "es_SV": "es_419", + "es_US": "es_419", + "es_UY": "es_419", + "es_VE": "es_419", + "pa_Arab": "root", + "pt_AO": "pt_PT", + "pt_CH": "pt_PT", + "pt_CV": "pt_PT", + "pt_GQ": "pt_PT", + "pt_GW": "pt_PT", + "pt_LU": "pt_PT", + "pt_MO": "pt_PT", + "pt_MZ": "pt_PT", + "pt_ST": "pt_PT", + "pt_TL": "pt_PT", + "shi_Latn": "root", + "sr_Latn": "root", + "uz_Arab": "root", + "uz_Cyrl": "root", + "vai_Latn": "root", + "yue_Hans": "root", + "zh_Hant": "root", + "zh_Hant_MO": "zh_Hant_HK" + } +} diff --git a/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json b/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json new file mode 100644 index 0000000000..c6ec208add --- /dev/null +++ b/intl/icu/source/python/icutools/databuilder/test/sample_data/rbnf/LOCALE_DEPS.json @@ -0,0 +1,36 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +////////////////////////////////////////////////////////////// +// This is a sample LOCALE_DEPS.json file for testing only. // +////////////////////////////////////////////////////////////// + +{ + "cldrVersion": "36.1", + "aliases": { + "ars": "ar_SA", + "in": "id", + "iw": "he", + "no": "nb", + "sh": "sr_Latn", + "zh_HK": "zh_Hant_HK", + "zh_Hant_HK": "yue", + "zh_MO": "zh_Hant_MO", + "zh_TW": "zh_Hant_TW" + }, + "parents": { + "en_IN": "en_001", + "es_DO": "es_419", + "es_GT": "es_419", + "es_HN": "es_419", + "es_MX": "es_419", + "es_NI": "es_419", + "es_PA": "es_419", + "es_PR": "es_419", + "es_SV": "es_419", + "es_US": "es_419", + "sr_Latn": "root", + "yue_Hans": "root", + "zh_Hant": "root" + } +} |