summaryrefslogtreecommitdiffstats
path: root/third_party/rust/icu_locid
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--third_party/rust/icu_locid/.cargo-checksum.json1
-rw-r--r--third_party/rust/icu_locid/Cargo.lock710
-rw-r--r--third_party/rust/icu_locid/Cargo.toml134
-rw-r--r--third_party/rust/icu_locid/LICENSE44
-rw-r--r--third_party/rust/icu_locid/README.md53
-rw-r--r--third_party/rust/icu_locid/benches/fixtures/langid.json48
-rw-r--r--third_party/rust/icu_locid/benches/fixtures/locale.json26
-rw-r--r--third_party/rust/icu_locid/benches/fixtures/mod.rs25
-rw-r--r--third_party/rust/icu_locid/benches/fixtures/subtags.json18
-rw-r--r--third_party/rust/icu_locid/benches/helpers/macros.rs110
-rw-r--r--third_party/rust/icu_locid/benches/helpers/mod.rs17
-rw-r--r--third_party/rust/icu_locid/benches/iai_langid.rs124
-rw-r--r--third_party/rust/icu_locid/benches/langid.rs93
-rw-r--r--third_party/rust/icu_locid/benches/locale.rs87
-rw-r--r--third_party/rust/icu_locid/benches/subtags.rs39
-rw-r--r--third_party/rust/icu_locid/examples/filter_langids.rs69
-rw-r--r--third_party/rust/icu_locid/examples/syntatically_canonicalize_locales.rs54
-rw-r--r--third_party/rust/icu_locid/src/databake.rs23
-rw-r--r--third_party/rust/icu_locid/src/extensions/mod.rs324
-rw-r--r--third_party/rust/icu_locid/src/extensions/other/mod.rs186
-rw-r--r--third_party/rust/icu_locid/src/extensions/other/subtag.rs36
-rw-r--r--third_party/rust/icu_locid/src/extensions/private/mod.rs187
-rw-r--r--third_party/rust/icu_locid/src/extensions/private/other.rs32
-rw-r--r--third_party/rust/icu_locid/src/extensions/transform/fields.rs221
-rw-r--r--third_party/rust/icu_locid/src/extensions/transform/key.rs32
-rw-r--r--third_party/rust/icu_locid/src/extensions/transform/mod.rs237
-rw-r--r--third_party/rust/icu_locid/src/extensions/transform/value.rs134
-rw-r--r--third_party/rust/icu_locid/src/extensions/unicode/attribute.rs34
-rw-r--r--third_party/rust/icu_locid/src/extensions/unicode/attributes.rs120
-rw-r--r--third_party/rust/icu_locid/src/extensions/unicode/key.rs32
-rw-r--r--third_party/rust/icu_locid/src/extensions/unicode/keywords.rs393
-rw-r--r--third_party/rust/icu_locid/src/extensions/unicode/mod.rs237
-rw-r--r--third_party/rust/icu_locid/src/extensions/unicode/value.rs196
-rw-r--r--third_party/rust/icu_locid/src/helpers.rs698
-rw-r--r--third_party/rust/icu_locid/src/langid.rs574
-rw-r--r--third_party/rust/icu_locid/src/lib.rs93
-rw-r--r--third_party/rust/icu_locid/src/locale.rs511
-rw-r--r--third_party/rust/icu_locid/src/macros.rs191
-rw-r--r--third_party/rust/icu_locid/src/ordering.rs62
-rw-r--r--third_party/rust/icu_locid/src/parser/errors.rs72
-rw-r--r--third_party/rust/icu_locid/src/parser/langid.rs278
-rw-r--r--third_party/rust/icu_locid/src/parser/locale.rs42
-rw-r--r--third_party/rust/icu_locid/src/parser/mod.rs231
-rw-r--r--third_party/rust/icu_locid/src/serde.rs135
-rw-r--r--third_party/rust/icu_locid/src/subtags/language.rs107
-rw-r--r--third_party/rust/icu_locid/src/subtags/mod.rs62
-rw-r--r--third_party/rust/icu_locid/src/subtags/region.rs62
-rw-r--r--third_party/rust/icu_locid/src/subtags/script.rs33
-rw-r--r--third_party/rust/icu_locid/src/subtags/variant.rs35
-rw-r--r--third_party/rust/icu_locid/src/subtags/variants.rs128
-rw-r--r--third_party/rust/icu_locid/src/zerovec.rs132
-rw-r--r--third_party/rust/icu_locid/tests/fixtures/canonicalize.json68
-rw-r--r--third_party/rust/icu_locid/tests/fixtures/invalid-extensions.json152
-rw-r--r--third_party/rust/icu_locid/tests/fixtures/invalid.json134
-rw-r--r--third_party/rust/icu_locid/tests/fixtures/langid.json167
-rw-r--r--third_party/rust/icu_locid/tests/fixtures/locale.json298
-rw-r--r--third_party/rust/icu_locid/tests/fixtures/mod.rs261
-rw-r--r--third_party/rust/icu_locid/tests/helpers/mod.rs15
-rw-r--r--third_party/rust/icu_locid/tests/langid.rs158
-rw-r--r--third_party/rust/icu_locid/tests/locale.rs120
-rw-r--r--third_party/rust/icu_locid_transform/.cargo-checksum.json1
-rw-r--r--third_party/rust/icu_locid_transform/Cargo.toml128
-rw-r--r--third_party/rust/icu_locid_transform/LICENSE44
-rw-r--r--third_party/rust/icu_locid_transform/README.md77
-rw-r--r--third_party/rust/icu_locid_transform/benches/fixtures/locales.json41
-rw-r--r--third_party/rust/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json88
-rw-r--r--third_party/rust/icu_locid_transform/benches/helpers/mod.rs15
-rw-r--r--third_party/rust/icu_locid_transform/benches/locale_canonicalizer.rs99
-rw-r--r--third_party/rust/icu_locid_transform/src/canonicalizer.rs618
-rw-r--r--third_party/rust/icu_locid_transform/src/directionality.rs231
-rw-r--r--third_party/rust/icu_locid_transform/src/error.rs27
-rw-r--r--third_party/rust/icu_locid_transform/src/expander.rs722
-rw-r--r--third_party/rust/icu_locid_transform/src/fallback/algorithms.rs487
-rw-r--r--third_party/rust/icu_locid_transform/src/fallback/mod.rs304
-rw-r--r--third_party/rust/icu_locid_transform/src/lib.rs116
-rw-r--r--third_party/rust/icu_locid_transform/src/provider/canonicalizer.rs81
-rw-r--r--third_party/rust/icu_locid_transform/src/provider/directionality.rs36
-rw-r--r--third_party/rust/icu_locid_transform/src/provider/expander.rs243
-rw-r--r--third_party/rust/icu_locid_transform/src/provider/fallback.rs102
-rw-r--r--third_party/rust/icu_locid_transform/src/provider/mod.rs114
-rw-r--r--third_party/rust/icu_locid_transform/tests/fixtures/canonicalize.json444
-rw-r--r--third_party/rust/icu_locid_transform/tests/fixtures/maximize.json182
-rw-r--r--third_party/rust/icu_locid_transform/tests/fixtures/minimize.json26
-rw-r--r--third_party/rust/icu_locid_transform/tests/fixtures/mod.rs12
-rw-r--r--third_party/rust/icu_locid_transform/tests/helpers/mod.rs15
-rw-r--r--third_party/rust/icu_locid_transform/tests/locale_canonicalizer.rs82
-rw-r--r--third_party/rust/icu_locid_transform_data/.cargo-checksum.json1
-rw-r--r--third_party/rust/icu_locid_transform_data/Cargo.toml33
-rw-r--r--third_party/rust/icu_locid_transform_data/LICENSE44
-rw-r--r--third_party/rust/icu_locid_transform_data/README.md11
-rw-r--r--third_party/rust/icu_locid_transform_data/data/macros.rs66
-rw-r--r--third_party/rust/icu_locid_transform_data/data/macros/fallback_likelysubtags_v1.rs.data44
-rw-r--r--third_party/rust/icu_locid_transform_data/data/macros/fallback_parents_v1.rs.data32
-rw-r--r--third_party/rust/icu_locid_transform_data/data/macros/fallback_supplement_co_v1.rs.data36
-rw-r--r--third_party/rust/icu_locid_transform_data/data/macros/locid_transform_aliases_v1.rs.data66
-rw-r--r--third_party/rust/icu_locid_transform_data/data/macros/locid_transform_likelysubtags_ext_v1.rs.data52
-rw-r--r--third_party/rust/icu_locid_transform_data/data/macros/locid_transform_likelysubtags_l_v1.rs.data41
-rw-r--r--third_party/rust/icu_locid_transform_data/data/macros/locid_transform_likelysubtags_sr_v1.rs.data40
-rw-r--r--third_party/rust/icu_locid_transform_data/data/macros/locid_transform_script_dir_v1.rs.data27
-rw-r--r--third_party/rust/icu_locid_transform_data/src/lib.rs12
100 files changed, 13735 insertions, 0 deletions
diff --git a/third_party/rust/icu_locid/.cargo-checksum.json b/third_party/rust/icu_locid/.cargo-checksum.json
new file mode 100644
index 0000000000..f281e7af64
--- /dev/null
+++ b/third_party/rust/icu_locid/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.lock":"fa3d0827a7b2f8e928c86434cfa2f6b0f9e4aef207610f70fb500106afd58973","Cargo.toml":"def3e85ebc790882e4d0b39634ca78c0464e5a16f5a444881227c4c7b08ade1c","LICENSE":"853f87c96f3d249f200fec6db1114427bc8bdf4afddc93c576956d78152ce978","README.md":"27c4ba4df8933825ab38b7da3de3b678bee7f1b4a011c6268861da643ac52de2","benches/fixtures/langid.json":"373c11527653c63c685c9e229a8de5ae2b557c25b686a9d891c59e1f603232d8","benches/fixtures/locale.json":"669b19db933094290a45bf856559920f4e92401072e364ac82c482119dc9233a","benches/fixtures/mod.rs":"9a9671eddcf38a6faa10cb814949f8abc15d89f5e70f3ad6f684f1bc3ffe72ea","benches/fixtures/subtags.json":"28be3a639e452d713e807d5779b6819e06277e2dbbf67801ef34964fb9b074b6","benches/helpers/macros.rs":"bba0945a826bc083156bc302507c48c0c99c4d965e2a84352644d768591b0339","benches/helpers/mod.rs":"c98167d866fdb7f66c8cab41e8d57b5aab9e9707dfc66c37ef136e088dac6fef","benches/iai_langid.rs":"8e8f93e4b4e2e70771f86eccfaec8c38f2f8a79f569d72eef29a64bb730f3e0d","benches/langid.rs":"4e3d307d48fd9071308a567a0ef927b229814978abd2ba29f57c65edd51f38e4","benches/locale.rs":"b8d5b1e3f8b5578c549a5149229656fb60de26b76a1bf66b6c1abce75042d674","benches/subtags.rs":"e7e80dabaf31bf031779456614f139cafcdadb805986e71b49133ac964928432","examples/filter_langids.rs":"e000b860432b1646c74709177e4e4df1cfdc9620f41a677d15a5016bd7eb9b29","examples/syntatically_canonicalize_locales.rs":"49184e6e52e2264c61a479c0df88c01e7f7079f3da991445faecca3844594079","src/databake.rs":"894d0f73836d99915c5726d8984e7833498e841e2985cedfd5893aeb2bdcc958","src/extensions/mod.rs":"679a7df9291d4a86872a835288cb91b511ac9e5117af355a54ff2e44cbf242fd","src/extensions/other/mod.rs":"b126de24198275f847737f8d18715dc7276417f1028b14c84f970275d231c014","src/extensions/other/subtag.rs":"41e9e943d67d5940c1fa01d027e8713c5e95819e4542c4a7f8fbadc9fc47b085","src/extensions/private/mod.rs":"febf66bf7415d9789bf423aaf7ec45b4a228fca1dc4a1d4f955a68ad77773ff6","src/extensions/private/other.rs":"604341d3888b946102bbc1f9d6d1cc146a1aed26c55158385641f7a80a434b72","src/extensions/transform/fields.rs":"208f7783a49f53e0e120c51b8cdf8df43387e69d0f8cca0621802097b17c9094","src/extensions/transform/key.rs":"05ef426db886862257b4d8e11d0d7762694e858ed8e6e683e40a765be1d7f05b","src/extensions/transform/mod.rs":"5d753717c10f66929c4a1698606370fdeca8074adf4eac98243d665d72ccd838","src/extensions/transform/value.rs":"31f596b2f70fe19e42992e08dd0ca1130a4b89a41719983609ebf486fe8e0985","src/extensions/unicode/attribute.rs":"021115b6b1880048411dc6a983039dbf4cfce8eabf6895afc008356f13ced960","src/extensions/unicode/attributes.rs":"3b6c10548f78c5a1735d2c6b6c6b5cd9a11c7195843b2f3d71241e2931286412","src/extensions/unicode/key.rs":"3822a2710eeb6d8569666a0f4097cc0a85e5e490d8b7ff0b75a983e686cb26d3","src/extensions/unicode/keywords.rs":"bc33ab96f39d5c0d0b94ed076aec778ebb296a6ac14d141aee0ee3785c442c6d","src/extensions/unicode/mod.rs":"9aaa6e8a3b243d8398bc30d95be7eb003a82d64937979544e52287663196452b","src/extensions/unicode/value.rs":"b25db7ee38e42aa8473bdb7ee7b6ae48339f8f6de2a7f6bddc7d93804df91f39","src/helpers.rs":"8860167ebd2de94a977241efb0a3b60699db537fc64633318fba71c659adcce8","src/langid.rs":"ec8d94542a20b8efba78318aae0a2ec8bcb7d33791ed32aec845b9bc7dc6a146","src/lib.rs":"b0086f71477baa14abe327aece797f3a305ebc0b7cfc0fb7105f1a1dd64108ca","src/locale.rs":"51a28b67ac5df4261fee82501d5bd42e2621b328b84cf85cdddd7c5f1dadc707","src/macros.rs":"f7154fc103ea1120a55bb5898540b20df80de6eec42e70ce15f339d997f2bf52","src/ordering.rs":"d76c6f26ffb5eb1e24646b70ce532985af326c46c488abda52c89074387f1dcc","src/parser/errors.rs":"8af937e67d197272c0f4806cc40cb191c878085b8687f987e358ee01ac2b5004","src/parser/langid.rs":"282678684bf1530a92d5070cd02caef0e5a2797eeebb2a793febe5c74cb15d23","src/parser/locale.rs":"075c74803891894ad50bbedc69366931b8e76c0992b3caa1a5632f0a6816ccfd","src/parser/mod.rs":"5182392624876a419b1469d135d175aba680bb13d14e4f6ea0cfc4e071fbc743","src/serde.rs":"06e940e4f2d15f02d313b4e2b233aea3e74c93c6c43076f5ffe52d49c133608f","src/subtags/language.rs":"9d256e02908b57afdec69a453862af1a1267b04323b9522080e5dafc891a7a63","src/subtags/mod.rs":"a31350b679598b7452849ee6f8f56aefb2f2e370575ffe34dd643b62a0ca3fec","src/subtags/region.rs":"22a6dbe130899ebaab5482ca4f512de931eda1c5194639302995f696082344a2","src/subtags/script.rs":"1d57233bd238af03d4c347adb06b238bc108649da2bd395c03d6c1e879725f8a","src/subtags/variant.rs":"a0a14e1f26f1438b47e9419e696247f197c4a1042070a6e4a41620be720b5ac5","src/subtags/variants.rs":"33c9a8f979078461ae27b1812358a1c984594f4b5e66d2e5ba3ff649f89da7ba","src/zerovec.rs":"a34cfbce609b9ca711d22098a73f3cf17eec6ea73fd00f3106dca698b7dee29e","tests/fixtures/canonicalize.json":"c55a1cfa487f18d0019424802a9913fdb2349b286e6ca60e47d9d230c079f75a","tests/fixtures/invalid-extensions.json":"4b7888006360b216030597257de8c301e22877e75216818967bbd8c83b6dbb0b","tests/fixtures/invalid.json":"5247849a6eb805619b8e70254c855227f7bdaf71431b071c91c6cc378ae9766e","tests/fixtures/langid.json":"960fd01722217ef1ea9077e2e0821d7089fe318a241bd7fb7918f50bf8f3f5c3","tests/fixtures/locale.json":"df1b195b18780758a6b1c0264206b9cd9ac8c4741c5d6b0cc2b92f8e17991c17","tests/fixtures/mod.rs":"aea619960540b92199345cbd20ff03d2cb451aa2ce9aa6cf7915223ee9f812a3","tests/helpers/mod.rs":"d3bf59e7eed6230f340bef6c87a7b8de3a387ec391f60afc1b15a0d001cbfb67","tests/langid.rs":"82da0497c4b7d7c5d416ddb96bad0d13d4e51b735b5ed3164a25861de28e2118","tests/locale.rs":"25744873d84fdad4508af91acc51e9ca5971926afae8aaf9334f1618987987c1"},"package":"5c0aa2536adc14c07e2a521e95512b75ed8ef832f0fdf9299d4a0a45d2be2a9d"} \ No newline at end of file
diff --git a/third_party/rust/icu_locid/Cargo.lock b/third_party/rust/icu_locid/Cargo.lock
new file mode 100644
index 0000000000..f1e1741ef6
--- /dev/null
+++ b/third_party/rust/icu_locid/Cargo.lock
@@ -0,0 +1,710 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anes"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bumpalo"
+version = "3.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
+
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "ciborium"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b"
+dependencies = [
+ "ciborium-io",
+ "half",
+]
+
+[[package]]
+name = "clap"
+version = "3.2.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
+dependencies = [
+ "bitflags",
+ "clap_lex",
+ "indexmap",
+ "textwrap",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
+dependencies = [
+ "os_str_bytes",
+]
+
+[[package]]
+name = "cobs"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15"
+
+[[package]]
+name = "criterion"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb"
+dependencies = [
+ "anes",
+ "atty",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "itertools",
+ "lazy_static",
+ "num-traits",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
+dependencies = [
+ "cast",
+ "itertools",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
+dependencies = [
+ "cfg-if",
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
+dependencies = [
+ "autocfg",
+ "cfg-if",
+ "crossbeam-utils",
+ "memoffset",
+ "scopeguard",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "databake"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "82175d72e69414ceafbe2b49686794d3a8bed846e0d50267355f83ea8fdd953a"
+dependencies = [
+ "databake-derive",
+ "proc-macro2",
+ "quote",
+]
+
+[[package]]
+name = "databake-derive"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "377af281d8f23663862a7c84623bc5dcf7f8c44b13c7496a590bdc157f941a43"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "displaydoc"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "either"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
+
+[[package]]
+name = "half"
+version = "1.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "iai"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71a816c97c42258aa5834d07590b718b4c9a598944cd39a52dc25b351185d678"
+
+[[package]]
+name = "icu_locid"
+version = "1.4.0"
+dependencies = [
+ "criterion",
+ "databake",
+ "displaydoc",
+ "iai",
+ "litemap",
+ "postcard",
+ "serde",
+ "serde_json",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "indexmap"
+version = "1.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
+dependencies = [
+ "autocfg",
+ "hashbrown",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
+
+[[package]]
+name = "js-sys"
+version = "0.3.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
+dependencies = [
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "libc"
+version = "0.2.148"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b"
+
+[[package]]
+name = "litemap"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da"
+
+[[package]]
+name = "log"
+version = "0.4.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
+
+[[package]]
+name = "memchr"
+version = "2.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
+
+[[package]]
+name = "memoffset"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
+
+[[package]]
+name = "oorandom"
+version = "11.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
+
+[[package]]
+name = "os_str_bytes"
+version = "6.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac"
+
+[[package]]
+name = "plotters"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab"
+dependencies = [
+ "plotters-backend",
+]
+
+[[package]]
+name = "postcard"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d534c6e61df1c7166e636ca612d9820d486fe96ddad37f7abc671517b297488e"
+dependencies = [
+ "cobs",
+ "serde",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.67"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rayon"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "regex"
+version = "1.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
+
+[[package]]
+name = "ryu"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "serde"
+version = "1.0.188"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.188"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.107"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "synstructure"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "285ba80e733fac80aa4270fbcdf83772a79b80aa35c97075320abfee4a915b06"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "unicode-xid",
+]
+
+[[package]]
+name = "textwrap"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
+
+[[package]]
+name = "tinystr"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83c02bf3c538ab32ba913408224323915f4ef9a6d61c0e85d493f355921c0ece"
+dependencies = [
+ "displaydoc",
+ "serde",
+]
+
+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
+
+[[package]]
+name = "walkdir"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
+dependencies = [
+ "cfg-if",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
+dependencies = [
+ "bumpalo",
+ "log",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
+
+[[package]]
+name = "web-sys"
+version = "0.3.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "writeable"
+version = "0.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dad7bb64b8ef9c0aa27b6da38b452b0ee9fd82beaf276a87dd796fb55cbae14e"
+
+[[package]]
+name = "zerofrom"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "655b0814c5c0b19ade497851070c640773304939a6c0fd5f5fb43da0696d05b7"
+
+[[package]]
+name = "zerovec"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eff4439ae91fb5c72b8abc12f3f2dbf51bd27e6eadb9f8a5bc8898dddb0e27ea"
+dependencies = [
+ "zerofrom",
+]
diff --git a/third_party/rust/icu_locid/Cargo.toml b/third_party/rust/icu_locid/Cargo.toml
new file mode 100644
index 0000000000..8f8effbbd0
--- /dev/null
+++ b/third_party/rust/icu_locid/Cargo.toml
@@ -0,0 +1,134 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+rust-version = "1.67"
+name = "icu_locid"
+version = "1.4.0"
+authors = ["The ICU4X Project Developers"]
+include = [
+ "data/**/*",
+ "src/**/*",
+ "examples/**/*",
+ "benches/**/*",
+ "tests/**/*",
+ "Cargo.toml",
+ "LICENSE",
+ "README.md",
+]
+description = "API for managing Unicode Language and Locale Identifiers"
+homepage = "https://icu4x.unicode.org"
+readme = "README.md"
+categories = ["internationalization"]
+license-file = "LICENSE"
+repository = "https://github.com/unicode-org/icu4x"
+
+[package.metadata.cargo-all-features]
+denylist = ["bench"]
+
+[package.metadata.docs.rs]
+all-features = true
+
+[lib]
+bench = false
+
+[[example]]
+name = "filter_langids"
+test = true
+
+[[example]]
+name = "syntatically_canonicalize_locales"
+test = true
+
+[[bench]]
+name = "subtags"
+harness = false
+required-features = ["bench"]
+
+[[bench]]
+name = "langid"
+harness = false
+
+[[bench]]
+name = "locale"
+harness = false
+
+[[bench]]
+name = "iai_langid"
+harness = false
+required-features = ["bench"]
+
+[dependencies.databake]
+version = "0.1.7"
+features = ["derive"]
+optional = true
+default-features = false
+
+[dependencies.displaydoc]
+version = "0.2.3"
+default-features = false
+
+[dependencies.litemap]
+version = "0.7.2"
+features = ["alloc"]
+default-features = false
+
+[dependencies.serde]
+version = "1.0"
+features = [
+ "alloc",
+ "derive",
+]
+optional = true
+default-features = false
+
+[dependencies.tinystr]
+version = "0.7.4"
+features = ["alloc"]
+default-features = false
+
+[dependencies.writeable]
+version = "0.5.4"
+default-features = false
+
+[dependencies.zerovec]
+version = "0.10.1"
+optional = true
+default-features = false
+
+[dev-dependencies.iai]
+version = "0.1.1"
+
+[dev-dependencies.postcard]
+version = "1.0.0"
+features = ["use-std"]
+default-features = false
+
+[dev-dependencies.serde]
+version = "1.0"
+features = ["derive"]
+
+[dev-dependencies.serde_json]
+version = "1.0"
+
+[features]
+bench = ["serde"]
+databake = ["dep:databake"]
+serde = [
+ "dep:serde",
+ "tinystr/serde",
+]
+std = []
+zerovec = ["dep:zerovec"]
+
+[target."cfg(not(target_arch = \"wasm32\"))".dev-dependencies.criterion]
+version = "0.4"
diff --git a/third_party/rust/icu_locid/LICENSE b/third_party/rust/icu_locid/LICENSE
new file mode 100644
index 0000000000..9845aa5f48
--- /dev/null
+++ b/third_party/rust/icu_locid/LICENSE
@@ -0,0 +1,44 @@
+UNICODE LICENSE V3
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 2020-2023 Unicode, Inc.
+
+NOTICE TO USER: Carefully read the following legal agreement. BY
+DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
+SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
+DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of data files and any associated documentation (the "Data Files") or
+software and any associated documentation (the "Software") to deal in the
+Data Files or Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, and/or sell
+copies of the Data Files or Software, and to permit persons to whom the
+Data Files or Software are furnished to do so, provided that either (a)
+this copyright and permission notice appear with all copies of the Data
+Files or Software, or (b) this copyright and permission notice appear in
+associated Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
+THIRD PARTY RIGHTS.
+
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
+FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall
+not be used in advertising or otherwise to promote the sale, use or other
+dealings in these Data Files or Software without prior written
+authorization of the copyright holder.
+
+—
+
+Portions of ICU4X may have been adapted from ICU4C and/or ICU4J.
+ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others.
diff --git a/third_party/rust/icu_locid/README.md b/third_party/rust/icu_locid/README.md
new file mode 100644
index 0000000000..9469e9b3cf
--- /dev/null
+++ b/third_party/rust/icu_locid/README.md
@@ -0,0 +1,53 @@
+# icu_locid [![crates.io](https://img.shields.io/crates/v/icu_locid)](https://crates.io/crates/icu_locid)
+
+<!-- cargo-rdme start -->
+
+Parsing, manipulating, and serializing Unicode Language and Locale Identifiers.
+
+This module is published as its own crate ([`icu_locid`](https://docs.rs/icu_locid/latest/icu_locid/))
+and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project.
+
+The module provides algorithms for parsing a string into a well-formed language or locale identifier
+as defined by [`UTS #35: Unicode LDML 3. Unicode Language and Locale Identifiers`].
+
+[`Locale`] is the most common structure to use for storing information about a language,
+script, region, variants and extensions. In almost all cases, this struct should be used as the
+base unit for all locale management operations.
+
+[`LanguageIdentifier`] is a strict subset of [`Locale`] which can be useful in a narrow range of
+cases where [`Unicode Extensions`] are not relevant.
+
+If in doubt, use [`Locale`].
+
+## Examples
+
+```rust
+use icu::locid::Locale;
+use icu::locid::{
+ locale,
+ subtags::{language, region},
+};
+
+let mut loc: Locale = locale!("en-US");
+
+assert_eq!(loc.id.language, language!("en"));
+assert_eq!(loc.id.script, None);
+assert_eq!(loc.id.region, Some(region!("US")));
+assert_eq!(loc.id.variants.len(), 0);
+
+loc.id.region = Some(region!("GB"));
+
+assert_eq!(loc, locale!("en-GB"));
+```
+
+For more details, see [`Locale`] and [`LanguageIdentifier`].
+
+[`UTS #35: Unicode LDML 3. Unicode Language and Locale Identifiers`]: https://unicode.org/reports/tr35/tr35.html#Unicode_Language_and_Locale_Identifiers
+[`ICU4X`]: ../icu/index.html
+[`Unicode Extensions`]: extensions
+
+<!-- cargo-rdme end -->
+
+## More Information
+
+For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x).
diff --git a/third_party/rust/icu_locid/benches/fixtures/langid.json b/third_party/rust/icu_locid/benches/fixtures/langid.json
new file mode 100644
index 0000000000..43c56d5a20
--- /dev/null
+++ b/third_party/rust/icu_locid/benches/fixtures/langid.json
@@ -0,0 +1,48 @@
+{
+ "canonicalized": [
+ "en-US",
+ "en-GB",
+ "es-AR",
+ "it",
+ "zh-Hans-CN",
+ "de-AT",
+ "pl",
+ "fr-FR",
+ "de-AT",
+ "sr-Cyrl-SR",
+ "nb-NO",
+ "fr-FR",
+ "mk",
+ "uk",
+ "en-US",
+ "en-GB",
+ "es-AR",
+ "th",
+ "de",
+ "zh-Cyrl-HN",
+ "en-Latn-US"
+ ],
+ "casing": [
+ "En_uS",
+ "EN-GB",
+ "ES-aR",
+ "iT",
+ "zH_HaNs_cN",
+ "dE-aT",
+ "Pl",
+ "FR-FR",
+ "de_AT",
+ "sR-CyrL_sr",
+ "NB-NO",
+ "fr_fr",
+ "Mk",
+ "uK",
+ "en-us",
+ "en_gb",
+ "ES-AR",
+ "tH",
+ "DE",
+ "ZH_cyrl_hN",
+ "eN-lAtN-uS"
+ ]
+}
diff --git a/third_party/rust/icu_locid/benches/fixtures/locale.json b/third_party/rust/icu_locid/benches/fixtures/locale.json
new file mode 100644
index 0000000000..f974a166ff
--- /dev/null
+++ b/third_party/rust/icu_locid/benches/fixtures/locale.json
@@ -0,0 +1,26 @@
+{
+ "canonicalized": [
+ "en-US-u-hc-h12",
+ "en-GB-u-ca-gregory-hc-h12",
+ "es-AR-x-private",
+ "th-u-ca-buddhist",
+ "de-u-co-phonebk-ka-shifted",
+ "ar-u-nu-native",
+ "ar-u-nu-latn",
+ "ja-t-it",
+ "ja-Kana-t-it",
+ "und-Latn-t-und-cyrl"
+ ],
+ "casing": [
+ "en-US-U-hc-h12",
+ "en-GB-u-CA-gregory-hc-h12",
+ "es-AR-x-Private",
+ "th-u-ca-buDDhist",
+ "de-u-co-phonebk-KA-shifted",
+ "AR_U-NU-native",
+ "ar-u-nu-LaTN",
+ "jA-T-it",
+ "ja-kanA-T-IT",
+ "unD-Latn-T-und-cyrl"
+ ]
+}
diff --git a/third_party/rust/icu_locid/benches/fixtures/mod.rs b/third_party/rust/icu_locid/benches/fixtures/mod.rs
new file mode 100644
index 0000000000..006b223120
--- /dev/null
+++ b/third_party/rust/icu_locid/benches/fixtures/mod.rs
@@ -0,0 +1,25 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use serde::Deserialize;
+
+#[derive(Deserialize)]
+pub struct SubtagData {
+ pub valid: Vec<String>,
+ pub invalid: Vec<String>,
+}
+
+#[derive(Deserialize)]
+pub struct Subtags {
+ pub language: SubtagData,
+ pub script: SubtagData,
+ pub region: SubtagData,
+ pub variant: SubtagData,
+}
+
+#[derive(Deserialize)]
+pub struct LocaleList {
+ pub canonicalized: Vec<String>,
+ pub casing: Vec<String>,
+}
diff --git a/third_party/rust/icu_locid/benches/fixtures/subtags.json b/third_party/rust/icu_locid/benches/fixtures/subtags.json
new file mode 100644
index 0000000000..cf8419cc96
--- /dev/null
+++ b/third_party/rust/icu_locid/benches/fixtures/subtags.json
@@ -0,0 +1,18 @@
+{
+ "language": {
+ "valid": ["en", "it", "pl", "de", "fr", "cs", "csb", "und", "ru", "nb", "NB", "UK", "pL", "Zh", "ES"],
+ "invalid": ["", "1", "$", "a1", "1211", "as_sa^a", "-0we", "3e3", "kk$$22", "testingaverylongstring"]
+ },
+ "script": {
+ "valid": ["Latn", "latn", "Arab", "xxxx", "Flan", "fAlA", "oOoO", "pPlQ", "esta", "RUSS"],
+ "invalid": ["", "1", "$", "a1", "1211", "assaa", "-0we", "3e3", "kk$$22", "testingaverylongstring"]
+ },
+ "region": {
+ "valid": ["DE", "321", "zh", "IA", "fN", "rU", "ru", "RU", "Ru", "CN", "AR"],
+ "invalid": ["", "1", "$", "a1", "1211", "assaa", "-0we", "3e3", "kk$$22", "testingaverylongstring"]
+ },
+ "variant": {
+ "valid": ["macos", "MaCoS", "windows", "posix", "POSIX", "Posix", "linux", "lINUX", "mAcOs", "testing", "WWWWWW"],
+ "invalid": ["", "1", "$", "a1", "a211", "ass__aa", "-0we", "3e3", "kk$$22", "testingaverylongstring"]
+ }
+}
diff --git a/third_party/rust/icu_locid/benches/helpers/macros.rs b/third_party/rust/icu_locid/benches/helpers/macros.rs
new file mode 100644
index 0000000000..848a360c41
--- /dev/null
+++ b/third_party/rust/icu_locid/benches/helpers/macros.rs
@@ -0,0 +1,110 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#[macro_export]
+macro_rules! overview {
+ ($c:expr, $struct:ident, $data_str:expr, $compare:expr) => {
+ $c.bench_function("overview", |b| {
+ b.iter(|| {
+ let mut values = vec![];
+ for s in $data_str {
+ let value: Result<$struct, _> = black_box(s).parse();
+ values.push(value.expect("Parsing failed"));
+ }
+ let _ = values
+ .iter()
+ .filter(|&v| v.normalizing_eq($compare))
+ .count();
+
+ values
+ .iter()
+ .map(|v| v.to_string())
+ .collect::<Vec<String>>()
+ })
+ });
+ };
+}
+
+#[macro_export]
+macro_rules! construct {
+ ($c:expr, $struct:ident, $struct_name:expr, $data_str:expr) => {
+ $c.bench_function($struct_name, |b| {
+ b.iter(|| {
+ for s in $data_str {
+ let _: Result<$struct, _> = black_box(s).parse();
+ }
+ })
+ });
+ };
+}
+
+#[macro_export]
+macro_rules! to_string {
+ ($c:expr, $struct:ident, $struct_name:expr, $data:expr) => {
+ $c.bench_function($struct_name, |b| {
+ b.iter(|| {
+ for s in $data {
+ let _ = black_box(s).to_string();
+ }
+ })
+ });
+ $c.bench_function(std::concat!($struct_name, "/writeable"), |b| {
+ use writeable::Writeable;
+ b.iter(|| {
+ for s in $data {
+ let _ = black_box(s).write_to_string();
+ }
+ })
+ });
+ };
+}
+
+#[macro_export]
+macro_rules! compare_struct {
+ ($c:expr, $struct:ident, $struct_name:expr, $data1:expr, $data2:expr) => {
+ $c.bench_function(BenchmarkId::new("struct", $struct_name), |b| {
+ b.iter(|| {
+ for (lid1, lid2) in $data1.iter().zip($data2.iter()) {
+ let _ = black_box(lid1) == black_box(lid2);
+ }
+ })
+ });
+ };
+}
+
+#[macro_export]
+macro_rules! compare_str {
+ ($c:expr, $struct:ident, $struct_name:expr, $data1:expr, $data2:expr) => {
+ $c.bench_function(BenchmarkId::new("str", $struct_name), |b| {
+ b.iter(|| {
+ for (lid, s) in $data1.iter().zip($data2.iter()) {
+ let _ = black_box(lid).normalizing_eq(&black_box(s));
+ }
+ })
+ });
+ $c.bench_function(BenchmarkId::new("strict_cmp", $struct_name), |b| {
+ b.iter(|| {
+ for (lid, s) in $data1.iter().zip($data2.iter()) {
+ let _ = black_box(lid).strict_cmp(&black_box(s).as_str().as_bytes());
+ }
+ })
+ });
+ };
+}
+
+#[macro_export]
+macro_rules! canonicalize {
+ ($c:expr, $struct:ident, $struct_name:expr, $data:expr) => {
+ $c.bench_function($struct_name, |b| {
+ b.iter(|| {
+ for s in $data {
+ let _ = black_box(s).to_string();
+ }
+ for s in $data {
+ let _ = $struct::canonicalize(black_box(s));
+ }
+ })
+ });
+ };
+}
diff --git a/third_party/rust/icu_locid/benches/helpers/mod.rs b/third_party/rust/icu_locid/benches/helpers/mod.rs
new file mode 100644
index 0000000000..27e455f7be
--- /dev/null
+++ b/third_party/rust/icu_locid/benches/helpers/mod.rs
@@ -0,0 +1,17 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod macros;
+
+use std::fs::File;
+use std::io::{BufReader, Error};
+
+pub fn read_fixture<T>(path: &str) -> Result<T, Error>
+where
+ T: serde::de::DeserializeOwned,
+{
+ let file = File::open(path)?;
+ let reader = BufReader::new(file);
+ Ok(serde_json::from_reader(reader)?)
+}
diff --git a/third_party/rust/icu_locid/benches/iai_langid.rs b/third_party/rust/icu_locid/benches/iai_langid.rs
new file mode 100644
index 0000000000..979da2f0f0
--- /dev/null
+++ b/third_party/rust/icu_locid/benches/iai_langid.rs
@@ -0,0 +1,124 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use icu_locid::{langid, subtags::language, subtags::region, LanguageIdentifier};
+use writeable::Writeable;
+
+const LIDS: &[LanguageIdentifier] = &[
+ langid!("en"),
+ langid!("pl"),
+ langid!("fr-CA"),
+ langid!("zh-Hans"),
+ langid!("en-US"),
+ langid!("en-Latn-US"),
+ langid!("sr-Cyrl-BA"),
+];
+
+const LIDS_STR: &[&str] = &[
+ "en",
+ "pl",
+ "fr-CA",
+ "zh-Hans",
+ "en-US",
+ "en-Latn-US",
+ "sr-Cyrl-BA",
+];
+
+fn bench_langid_constr() {
+ // Tests the instructions required to construct a LID from an str.
+
+ let _: Vec<LanguageIdentifier> = LIDS_STR
+ .iter()
+ .map(|l| l.parse().expect("Failed to parse"))
+ .collect();
+}
+
+fn bench_langid_compare_components() {
+ // Tests the cost of comparing LID components.
+
+ let result = LIDS
+ .iter()
+ .filter(|l| l.language == language!("en") && l.region == Some(region!("US")))
+ .count();
+
+ assert_eq!(result, 2);
+}
+
+fn bench_langid_compare_components_str() {
+ // Tests the cost of comparing LID components to str.
+
+ let result = LIDS
+ .iter()
+ .filter(|l| {
+ l.language == language!("en") && l.region.map(|r| r == region!("US")).unwrap_or(false)
+ })
+ .count();
+
+ assert_eq!(result, 2);
+}
+
+fn bench_langid_strict_cmp() {
+ // Tests the cost of comparing a langid against byte strings.
+ use core::cmp::Ordering;
+
+ let lid = langid!("en_us");
+
+ let result = LIDS_STR
+ .iter()
+ .filter(|s| lid.strict_cmp(s.as_bytes()) == Ordering::Equal)
+ .count();
+
+ assert_eq!(result, 1);
+}
+
+fn bench_langid_matching() {
+ // Tests matching a LID against other LIDs.
+
+ let lid = langid!("en_us");
+
+ let count = LIDS.iter().filter(|l| lid == **l).count();
+ assert_eq!(count, 1);
+}
+
+fn bench_langid_matching_str() {
+ // Tests matching a LID against list of str.
+
+ let lid = langid!("en_us");
+
+ let count = LIDS_STR.iter().filter(|&l| lid.normalizing_eq(l)).count();
+ assert_eq!(count, 1);
+}
+
+fn bench_langid_serialize() {
+ // Tests serialization of LIDs.
+
+ let _: Vec<String> = LIDS.iter().map(|l| l.to_string()).collect();
+}
+
+fn bench_langid_serialize_writeable() {
+ // Tests serialization of LIDs.
+
+ let _: Vec<_> = LIDS.iter().map(|l| l.write_to_string()).collect();
+}
+
+fn bench_langid_canonicalize() {
+ // Tests canonicalization of strings.
+
+ let _: Vec<String> = LIDS_STR
+ .iter()
+ .map(|l| LanguageIdentifier::canonicalize(l).expect("Canonicalization failed"))
+ .collect();
+}
+
+iai::main!(
+ bench_langid_constr,
+ bench_langid_compare_components,
+ bench_langid_compare_components_str,
+ bench_langid_strict_cmp,
+ bench_langid_matching,
+ bench_langid_matching_str,
+ bench_langid_serialize,
+ bench_langid_serialize_writeable,
+ bench_langid_canonicalize,
+);
diff --git a/third_party/rust/icu_locid/benches/langid.rs b/third_party/rust/icu_locid/benches/langid.rs
new file mode 100644
index 0000000000..e5c9b67340
--- /dev/null
+++ b/third_party/rust/icu_locid/benches/langid.rs
@@ -0,0 +1,93 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod fixtures;
+mod helpers;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+use icu_locid::LanguageIdentifier;
+
+fn langid_benches(c: &mut Criterion) {
+ let path = "./benches/fixtures/langid.json";
+ let data: fixtures::LocaleList = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ // Overview
+ {
+ let mut group = c.benchmark_group("langid");
+
+ overview!(group, LanguageIdentifier, &data.canonicalized, "en-US");
+
+ group.finish();
+ }
+
+ #[cfg(feature = "bench")]
+ {
+ use criterion::BenchmarkId;
+
+ // Construct
+ {
+ let mut group = c.benchmark_group("langid/construct");
+
+ construct!(group, LanguageIdentifier, "langid", &data.canonicalized);
+
+ group.finish();
+ }
+
+ // Stringify
+ {
+ let mut group = c.benchmark_group("langid/to_string");
+
+ let langids: Vec<LanguageIdentifier> = data
+ .canonicalized
+ .iter()
+ .map(|s| s.parse().unwrap())
+ .collect();
+
+ to_string!(group, LanguageIdentifier, "langid", &langids);
+
+ group.finish();
+ }
+
+ // Compare
+ {
+ let mut group = c.benchmark_group("langid/compare");
+
+ let langids: Vec<LanguageIdentifier> = data
+ .canonicalized
+ .iter()
+ .map(|s| s.parse().unwrap())
+ .collect();
+ let langids2: Vec<LanguageIdentifier> = data
+ .canonicalized
+ .iter()
+ .map(|s| s.parse().unwrap())
+ .collect();
+
+ compare_struct!(group, LanguageIdentifier, "langid", &langids, &langids2);
+
+ compare_str!(
+ group,
+ LanguageIdentifier,
+ "langid",
+ &langids,
+ &data.canonicalized
+ );
+
+ group.finish();
+ }
+
+ // Canonicalize
+ {
+ let mut group = c.benchmark_group("langid/canonicalize");
+
+ canonicalize!(group, LanguageIdentifier, "langid", &data.casing);
+
+ group.finish();
+ }
+ }
+}
+
+criterion_group!(benches, langid_benches,);
+criterion_main!(benches);
diff --git a/third_party/rust/icu_locid/benches/locale.rs b/third_party/rust/icu_locid/benches/locale.rs
new file mode 100644
index 0000000000..948fbb5e8e
--- /dev/null
+++ b/third_party/rust/icu_locid/benches/locale.rs
@@ -0,0 +1,87 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod fixtures;
+mod helpers;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+use icu_locid::Locale;
+
+fn locale_benches(c: &mut Criterion) {
+ let path = "./benches/fixtures/locale.json";
+ let data: fixtures::LocaleList = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ // Overview
+ {
+ let mut group = c.benchmark_group("locale");
+
+ overview!(group, Locale, &data.canonicalized, "en-US");
+
+ group.finish();
+ }
+
+ #[cfg(feature = "bench")]
+ {
+ use criterion::BenchmarkId;
+
+ // Construct
+ {
+ let mut group = c.benchmark_group("locale/construct");
+
+ construct!(group, Locale, "locale", &data.canonicalized);
+
+ group.finish();
+ }
+
+ // Stringify
+ {
+ let mut group = c.benchmark_group("locale/to_string");
+
+ let locales: Vec<Locale> = data
+ .canonicalized
+ .iter()
+ .map(|s| s.parse().unwrap())
+ .collect();
+
+ to_string!(group, Locale, "locale", &locales);
+
+ group.finish();
+ }
+
+ // Compare
+ {
+ let mut group = c.benchmark_group("locale/compare");
+
+ let locales: Vec<Locale> = data
+ .canonicalized
+ .iter()
+ .map(|s| s.parse().unwrap())
+ .collect();
+ let locales2: Vec<Locale> = data
+ .canonicalized
+ .iter()
+ .map(|s| s.parse().unwrap())
+ .collect();
+
+ compare_struct!(group, Locale, "locale", &locales, &locales2);
+
+ compare_str!(group, Locale, "locale", &locales, &data.canonicalized);
+
+ group.finish();
+ }
+
+ // Canonicalize
+ {
+ let mut group = c.benchmark_group("locale/canonicalize");
+
+ canonicalize!(group, Locale, "locale", &data.casing);
+
+ group.finish();
+ }
+ }
+}
+
+criterion_group!(benches, locale_benches,);
+criterion_main!(benches);
diff --git a/third_party/rust/icu_locid/benches/subtags.rs b/third_party/rust/icu_locid/benches/subtags.rs
new file mode 100644
index 0000000000..4f81b71d2e
--- /dev/null
+++ b/third_party/rust/icu_locid/benches/subtags.rs
@@ -0,0 +1,39 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod fixtures;
+mod helpers;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+use icu_locid::subtags::{Language, Region, Script, Variant};
+use icu_locid::ParserError;
+
+macro_rules! subtag_bench {
+ ($c:expr, $name:expr, $subtag:ident, $data:expr) => {
+ $c.bench_function(&format!("subtags/{}/parse", $name), |b| {
+ b.iter(|| {
+ for s in &$data.valid {
+ let _: $subtag = black_box(s).parse().unwrap();
+ }
+ for s in &$data.invalid {
+ let _: ParserError = black_box(s).parse::<$subtag>().unwrap_err();
+ }
+ })
+ });
+ };
+}
+
+fn subtags_bench(c: &mut Criterion) {
+ let path = "./benches/fixtures/subtags.json";
+ let data: fixtures::Subtags = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ subtag_bench!(c, "language", Language, data.language);
+ subtag_bench!(c, "script", Script, data.script);
+ subtag_bench!(c, "region", Region, data.region);
+ subtag_bench!(c, "variant", Variant, data.variant);
+}
+
+criterion_group!(benches, subtags_bench,);
+criterion_main!(benches);
diff --git a/third_party/rust/icu_locid/examples/filter_langids.rs b/third_party/rust/icu_locid/examples/filter_langids.rs
new file mode 100644
index 0000000000..67828a1181
--- /dev/null
+++ b/third_party/rust/icu_locid/examples/filter_langids.rs
@@ -0,0 +1,69 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// A sample application which takes a comma separated list of language identifiers,
+// filters out identifiers with language subtags different than `en` and serializes
+// the list back into a comma separated list in canonical syntax.
+//
+// Note: This is an example of the API use, and is not a good base for language matching.
+// For language matching, please consider algorithms such as Locale Matcher.
+
+#![no_main] // https://github.com/unicode-org/icu4x/issues/395
+
+icu_benchmark_macros::static_setup!();
+
+use std::env;
+
+use icu_locid::{subtags, LanguageIdentifier};
+use writeable::Writeable;
+
+const DEFAULT_INPUT: &str =
+ "de, en-us, zh-hant, sr-cyrl, fr-ca, es-cl, pl, en-latn-us, ca-valencia, und-arab";
+
+fn filter_input(input: &str) -> String {
+ // 1. Parse the input string into a list of language identifiers.
+ let langids = input.split(',').filter_map(|s| s.trim().parse().ok());
+
+ // 2. Filter for LanguageIdentifiers with Language subtag `en`.
+ let en_lang: subtags::Language = "en".parse().expect("Failed to parse language subtag.");
+
+ let en_langids = langids.filter(|langid: &LanguageIdentifier| langid.language == en_lang);
+
+ // 3. Serialize the output.
+ let en_strs: Vec<String> = en_langids
+ .map(|langid| langid.write_to_string().into_owned())
+ .collect();
+
+ en_strs.join(", ")
+}
+
+#[no_mangle]
+fn main(_argc: isize, _argv: *const *const u8) -> isize {
+ icu_benchmark_macros::main_setup!();
+ let args: Vec<String> = env::args().collect();
+
+ let input = if let Some(input) = args.get(1) {
+ input.as_str()
+ } else {
+ DEFAULT_INPUT
+ };
+ let _output = filter_input(input);
+
+ #[cfg(debug_assertions)]
+ println!("\nInput: {input}\nOutput: {_output}");
+
+ 0
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const DEFAULT_OUTPUT: &str = "en-US, en-Latn-US";
+
+ #[test]
+ fn ensure_default_output() {
+ assert_eq!(filter_input(DEFAULT_INPUT), DEFAULT_OUTPUT);
+ }
+}
diff --git a/third_party/rust/icu_locid/examples/syntatically_canonicalize_locales.rs b/third_party/rust/icu_locid/examples/syntatically_canonicalize_locales.rs
new file mode 100644
index 0000000000..1f967504eb
--- /dev/null
+++ b/third_party/rust/icu_locid/examples/syntatically_canonicalize_locales.rs
@@ -0,0 +1,54 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// A sample application which takes a comma separated list of locales,
+// makes them syntatically canonical and serializes the list back into a comma separated list.
+
+icu_benchmark_macros::static_setup!();
+
+use std::env;
+
+use icu_locid::Locale;
+
+const DEFAULT_INPUT: &str = "sr-cyrL-rS, es-mx, und-arab-u-ca-Buddhist";
+
+fn syntatically_canonicalize_locales(input: &str) -> String {
+ // Split input string and canonicalize each locale identifier.
+ let canonical_locales: Vec<String> = input
+ .split(',')
+ .filter_map(|s| Locale::canonicalize(s.trim()).ok())
+ .collect();
+
+ canonical_locales.join(", ")
+}
+
+fn main() {
+ icu_benchmark_macros::main_setup!();
+ let args: Vec<String> = env::args().collect();
+
+ let input = if let Some(input) = args.get(1) {
+ input.as_str()
+ } else {
+ DEFAULT_INPUT
+ };
+ let _output = syntatically_canonicalize_locales(input);
+
+ #[cfg(debug_assertions)]
+ println!("\nInput: {input}\nOutput: {_output}");
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const DEFAULT_OUTPUT: &str = "sr-Cyrl-RS, es-MX, und-Arab-u-ca-buddhist";
+
+ #[test]
+ fn ensure_default_output() {
+ assert_eq!(
+ syntatically_canonicalize_locales(DEFAULT_INPUT),
+ DEFAULT_OUTPUT
+ );
+ }
+}
diff --git a/third_party/rust/icu_locid/src/databake.rs b/third_party/rust/icu_locid/src/databake.rs
new file mode 100644
index 0000000000..03b7357c5e
--- /dev/null
+++ b/third_party/rust/icu_locid/src/databake.rs
@@ -0,0 +1,23 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::LanguageIdentifier;
+use alloc::string::ToString;
+use databake::*;
+
+impl Bake for LanguageIdentifier {
+ fn bake(&self, env: &CrateEnv) -> TokenStream {
+ env.insert("icu_locid");
+ let repr = self.to_string();
+ if self.variants.len() <= 1 {
+ quote! {
+ icu_locid::langid!(#repr)
+ }
+ } else {
+ quote! {
+ icu_locid::LanguageIdentifier::from_str(#repr).unwrap()
+ }
+ }
+ }
+}
diff --git a/third_party/rust/icu_locid/src/extensions/mod.rs b/third_party/rust/icu_locid/src/extensions/mod.rs
new file mode 100644
index 0000000000..a37bf8b9fc
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/mod.rs
@@ -0,0 +1,324 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Unicode Extensions provide a mechanism to extend the [`LanguageIdentifier`] with
+//! additional bits of information - a combination of a [`LanguageIdentifier`] and [`Extensions`]
+//! is called [`Locale`].
+//!
+//! There are four types of extensions:
+//!
+//! * [`Unicode Extensions`] - marked as `u`.
+//! * [`Transform Extensions`] - marked as `t`.
+//! * [`Private Use Extensions`] - marked as `x`.
+//! * [`Other Extensions`] - marked as any `a-z` except of `u`, `t` and `x`.
+//!
+//! One can think of extensions as a bag of extra information on top of basic 4 [`subtags`].
+//!
+//! Notice: `Other` extension type is currently not supported.
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::extensions::unicode::{Key, Value};
+//! use icu::locid::Locale;
+//!
+//! let loc: Locale = "en-US-u-ca-buddhist-t-en-us-h0-hybrid-x-foo"
+//! .parse()
+//! .expect("Failed to parse.");
+//!
+//! assert_eq!(loc.id.language, "en".parse().unwrap());
+//! assert_eq!(loc.id.script, None);
+//! assert_eq!(loc.id.region, Some("US".parse().unwrap()));
+//! assert_eq!(loc.id.variants.len(), 0);
+//!
+//! let key: Key = "ca".parse().expect("Parsing key failed.");
+//! let value: Value = "buddhist".parse().expect("Parsing value failed.");
+//! assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value));
+//! ```
+//!
+//! [`LanguageIdentifier`]: super::LanguageIdentifier
+//! [`Locale`]: super::Locale
+//! [`subtags`]: super::subtags
+//! [`Other Extensions`]: other
+//! [`Private Use Extensions`]: private
+//! [`Transform Extensions`]: transform
+//! [`Unicode Extensions`]: unicode
+pub mod other;
+pub mod private;
+pub mod transform;
+pub mod unicode;
+
+use other::Other;
+use private::Private;
+use transform::Transform;
+use unicode::Unicode;
+
+use alloc::vec::Vec;
+
+use crate::parser::ParserError;
+use crate::parser::SubtagIterator;
+
+/// Defines the type of extension.
+#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)]
+#[non_exhaustive]
+pub enum ExtensionType {
+ /// Transform Extension Type marked as `t`.
+ Transform,
+ /// Unicode Extension Type marked as `u`.
+ Unicode,
+ /// Private Extension Type marked as `x`.
+ Private,
+ /// All other extension types.
+ Other(u8),
+}
+
+impl ExtensionType {
+ pub(crate) const fn try_from_byte(key: u8) -> Result<Self, ParserError> {
+ let key = key.to_ascii_lowercase();
+ match key {
+ b'u' => Ok(Self::Unicode),
+ b't' => Ok(Self::Transform),
+ b'x' => Ok(Self::Private),
+ b'a'..=b'z' => Ok(Self::Other(key)),
+ _ => Err(ParserError::InvalidExtension),
+ }
+ }
+
+ pub(crate) const fn try_from_bytes_manual_slice(
+ bytes: &[u8],
+ start: usize,
+ end: usize,
+ ) -> Result<Self, ParserError> {
+ if end - start != 1 {
+ return Err(ParserError::InvalidExtension);
+ }
+ #[allow(clippy::indexing_slicing)]
+ Self::try_from_byte(bytes[start])
+ }
+}
+
+/// A map of extensions associated with a given [`Locale`](crate::Locale).
+#[derive(Debug, Default, PartialEq, Eq, Clone, Hash)]
+#[non_exhaustive]
+pub struct Extensions {
+ /// A representation of the data for a Unicode extension, when present in the locale identifier.
+ pub unicode: Unicode,
+ /// A representation of the data for a transform extension, when present in the locale identifier.
+ pub transform: Transform,
+ /// A representation of the data for a private-use extension, when present in the locale identifier.
+ pub private: Private,
+ /// A sequence of any other extensions that are present in the locale identifier but are not formally
+ /// [defined](https://unicode.org/reports/tr35/) and represented explicitly as [`Unicode`], [`Transform`],
+ /// and [`Private`] are.
+ pub other: Vec<Other>,
+}
+
+impl Extensions {
+ /// Returns a new empty map of extensions. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::Extensions;
+ ///
+ /// assert_eq!(Extensions::new(), Extensions::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self {
+ unicode: Unicode::new(),
+ transform: Transform::new(),
+ private: Private::new(),
+ other: Vec::new(),
+ }
+ }
+
+ /// Function to create a new map of extensions containing exactly one unicode extension, callable in `const`
+ /// context.
+ #[inline]
+ pub const fn from_unicode(unicode: Unicode) -> Self {
+ Self {
+ unicode,
+ transform: Transform::new(),
+ private: Private::new(),
+ other: Vec::new(),
+ }
+ }
+
+ /// Returns whether there are no extensions present.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed.");
+ ///
+ /// assert!(!loc.extensions.is_empty());
+ /// ```
+ pub fn is_empty(&self) -> bool {
+ self.unicode.is_empty()
+ && self.transform.is_empty()
+ && self.private.is_empty()
+ && self.other.is_empty()
+ }
+
+ /// Retains the specified extension types, clearing all others.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::ExtensionType;
+ /// use icu::locid::Locale;
+ ///
+ /// let loc: Locale =
+ /// "und-a-hello-t-mul-u-world-z-zzz-x-extra".parse().unwrap();
+ ///
+ /// let mut only_unicode = loc.clone();
+ /// only_unicode
+ /// .extensions
+ /// .retain_by_type(|t| t == ExtensionType::Unicode);
+ /// assert_eq!(only_unicode, "und-u-world".parse().unwrap());
+ ///
+ /// let mut only_t_z = loc.clone();
+ /// only_t_z.extensions.retain_by_type(|t| {
+ /// t == ExtensionType::Transform || t == ExtensionType::Other(b'z')
+ /// });
+ /// assert_eq!(only_t_z, "und-t-mul-z-zzz".parse().unwrap());
+ /// ```
+ pub fn retain_by_type<F>(&mut self, mut predicate: F)
+ where
+ F: FnMut(ExtensionType) -> bool,
+ {
+ if !predicate(ExtensionType::Unicode) {
+ self.unicode.clear();
+ }
+ if !predicate(ExtensionType::Transform) {
+ self.transform.clear();
+ }
+ if !predicate(ExtensionType::Private) {
+ self.private.clear();
+ }
+ self.other
+ .retain(|o| predicate(ExtensionType::Other(o.get_ext_byte())));
+ }
+
+ pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
+ let mut unicode = None;
+ let mut transform = None;
+ let mut private = None;
+ let mut other = Vec::new();
+
+ while let Some(subtag) = iter.next() {
+ if subtag.is_empty() {
+ return Err(ParserError::InvalidExtension);
+ }
+ match subtag.first().map(|b| ExtensionType::try_from_byte(*b)) {
+ Some(Ok(ExtensionType::Unicode)) => {
+ if unicode.is_some() {
+ return Err(ParserError::DuplicatedExtension);
+ }
+ unicode = Some(Unicode::try_from_iter(iter)?);
+ }
+ Some(Ok(ExtensionType::Transform)) => {
+ if transform.is_some() {
+ return Err(ParserError::DuplicatedExtension);
+ }
+ transform = Some(Transform::try_from_iter(iter)?);
+ }
+ Some(Ok(ExtensionType::Private)) => {
+ if private.is_some() {
+ return Err(ParserError::DuplicatedExtension);
+ }
+ private = Some(Private::try_from_iter(iter)?);
+ }
+ Some(Ok(ExtensionType::Other(ext))) => {
+ if other.iter().any(|o: &Other| o.get_ext_byte() == ext) {
+ return Err(ParserError::DuplicatedExtension);
+ }
+ let parsed = Other::try_from_iter(ext, iter)?;
+ if let Err(idx) = other.binary_search(&parsed) {
+ other.insert(idx, parsed);
+ } else {
+ return Err(ParserError::InvalidExtension);
+ }
+ }
+ _ => return Err(ParserError::InvalidExtension),
+ }
+ }
+
+ Ok(Self {
+ unicode: unicode.unwrap_or_default(),
+ transform: transform.unwrap_or_default(),
+ private: private.unwrap_or_default(),
+ other,
+ })
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ let mut wrote_tu = false;
+ // Alphabetic by singleton
+ self.other.iter().try_for_each(|other| {
+ if other.get_ext() > 't' && !wrote_tu {
+ // Since 't' and 'u' are next to each other in alphabetical
+ // order, write both now.
+ self.transform.for_each_subtag_str(f)?;
+ self.unicode.for_each_subtag_str(f)?;
+ wrote_tu = true;
+ }
+ other.for_each_subtag_str(f)?;
+ Ok(())
+ })?;
+
+ if !wrote_tu {
+ self.transform.for_each_subtag_str(f)?;
+ self.unicode.for_each_subtag_str(f)?;
+ }
+
+ // Private must be written last, since it allows single character
+ // keys. Extensions must also be written in alphabetical order,
+ // which would seem to imply that other extensions `y` and `z` are
+ // invalid, but this is not specified.
+ self.private.for_each_subtag_str(f)?;
+ Ok(())
+ }
+}
+
+impl_writeable_for_each_subtag_str_no_test!(Extensions);
+
+#[test]
+fn test_writeable() {
+ use crate::Locale;
+ use writeable::assert_writeable_eq;
+ assert_writeable_eq!(Extensions::new(), "");
+ assert_writeable_eq!(
+ "my-t-my-d0-zawgyi".parse::<Locale>().unwrap().extensions,
+ "t-my-d0-zawgyi",
+ );
+ assert_writeable_eq!(
+ "ar-SA-u-ca-islamic-civil"
+ .parse::<Locale>()
+ .unwrap()
+ .extensions,
+ "u-ca-islamic-civil",
+ );
+ assert_writeable_eq!(
+ "en-001-x-foo-bar".parse::<Locale>().unwrap().extensions,
+ "x-foo-bar",
+ );
+ assert_writeable_eq!(
+ "und-t-m0-true".parse::<Locale>().unwrap().extensions,
+ "t-m0-true",
+ );
+ assert_writeable_eq!(
+ "und-a-foo-t-foo-u-foo-w-foo-z-foo-x-foo"
+ .parse::<Locale>()
+ .unwrap()
+ .extensions,
+ "a-foo-t-foo-u-foo-w-foo-z-foo-x-foo",
+ );
+}
diff --git a/third_party/rust/icu_locid/src/extensions/other/mod.rs b/third_party/rust/icu_locid/src/extensions/other/mod.rs
new file mode 100644
index 0000000000..933128739d
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/other/mod.rs
@@ -0,0 +1,186 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Other Use Extensions is a list of extensions other than unicode,
+//! transform or private.
+//!
+//! Those extensions are treated as a pass-through, and no Unicode related
+//! behavior depends on them.
+//!
+//! The main struct for this extension is [`Other`] which is a list of [`Subtag`]s.
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::extensions::other::Other;
+//! use icu::locid::Locale;
+//!
+//! let mut loc: Locale = "en-US-a-foo-faa".parse().expect("Parsing failed.");
+//! ```
+
+mod subtag;
+
+use crate::helpers::ShortSlice;
+use crate::parser::ParserError;
+use crate::parser::SubtagIterator;
+use alloc::vec::Vec;
+#[doc(inline)]
+pub use subtag::{subtag, Subtag};
+
+/// A list of [`Other Use Extensions`] as defined in [`Unicode Locale
+/// Identifier`] specification.
+///
+/// Those extensions are treated as a pass-through, and no Unicode related
+/// behavior depends on them.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::other::{Other, Subtag};
+///
+/// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag.");
+/// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag.");
+///
+/// let other = Other::from_vec_unchecked(b'a', vec![subtag1, subtag2]);
+/// assert_eq!(&other.to_string(), "a-foo-bar");
+/// ```
+///
+/// [`Other Use Extensions`]: https://unicode.org/reports/tr35/#other_extensions
+/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
+#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
+pub struct Other {
+ ext: u8,
+ keys: ShortSlice<Subtag>,
+}
+
+impl Other {
+ /// A constructor which takes a pre-sorted list of [`Subtag`].
+ ///
+ /// # Panics
+ ///
+ /// Panics if `ext` is not ASCII alphabetic.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::other::{Other, Subtag};
+ ///
+ /// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag.");
+ /// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag.");
+ ///
+ /// let other = Other::from_vec_unchecked(b'a', vec![subtag1, subtag2]);
+ /// assert_eq!(&other.to_string(), "a-foo-bar");
+ /// ```
+ pub fn from_vec_unchecked(ext: u8, keys: Vec<Subtag>) -> Self {
+ Self::from_short_slice_unchecked(ext, keys.into())
+ }
+
+ pub(crate) fn from_short_slice_unchecked(ext: u8, keys: ShortSlice<Subtag>) -> Self {
+ assert!(ext.is_ascii_alphabetic());
+ Self { ext, keys }
+ }
+
+ pub(crate) fn try_from_iter(ext: u8, iter: &mut SubtagIterator) -> Result<Self, ParserError> {
+ debug_assert!(ext.is_ascii_alphabetic());
+
+ let mut keys = ShortSlice::new();
+ while let Some(subtag) = iter.peek() {
+ if !Subtag::valid_key(subtag) {
+ break;
+ }
+ if let Ok(key) = Subtag::try_from_bytes(subtag) {
+ keys.push(key);
+ }
+ iter.next();
+ }
+
+ Ok(Self::from_short_slice_unchecked(ext, keys))
+ }
+
+ /// Gets the tag character for this extension as a &str.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let loc: Locale = "und-a-hello-world".parse().unwrap();
+ /// let other_ext = &loc.extensions.other[0];
+ /// assert_eq!(other_ext.get_ext_str(), "a");
+ /// ```
+ pub fn get_ext_str(&self) -> &str {
+ debug_assert!(self.ext.is_ascii_alphabetic());
+ unsafe { core::str::from_utf8_unchecked(core::slice::from_ref(&self.ext)) }
+ }
+
+ /// Gets the tag character for this extension as a char.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let loc: Locale = "und-a-hello-world".parse().unwrap();
+ /// let other_ext = &loc.extensions.other[0];
+ /// assert_eq!(other_ext.get_ext(), 'a');
+ /// ```
+ pub fn get_ext(&self) -> char {
+ self.ext as char
+ }
+
+ /// Gets the tag character for this extension as a byte.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let loc: Locale = "und-a-hello-world".parse().unwrap();
+ /// let other_ext = &loc.extensions.other[0];
+ /// assert_eq!(other_ext.get_ext_byte(), b'a');
+ /// ```
+ pub fn get_ext_byte(&self) -> u8 {
+ self.ext
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ f(self.get_ext_str())?;
+ self.keys.iter().map(|t| t.as_str()).try_for_each(f)
+ }
+}
+
+writeable::impl_display_with_writeable!(Other);
+
+impl writeable::Writeable for Other {
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ sink.write_str(self.get_ext_str())?;
+ for key in self.keys.iter() {
+ sink.write_char('-')?;
+ writeable::Writeable::write_to(key, sink)?;
+ }
+
+ Ok(())
+ }
+
+ fn writeable_length_hint(&self) -> writeable::LengthHint {
+ let mut result = writeable::LengthHint::exact(1);
+ for key in self.keys.iter() {
+ result += writeable::Writeable::writeable_length_hint(key) + 1;
+ }
+ result
+ }
+
+ fn write_to_string(&self) -> alloc::borrow::Cow<str> {
+ if self.keys.is_empty() {
+ return alloc::borrow::Cow::Borrowed(self.get_ext_str());
+ }
+ let mut string =
+ alloc::string::String::with_capacity(self.writeable_length_hint().capacity());
+ let _ = self.write_to(&mut string);
+ alloc::borrow::Cow::Owned(string)
+ }
+}
diff --git a/third_party/rust/icu_locid/src/extensions/other/subtag.rs b/third_party/rust/icu_locid/src/extensions/other/subtag.rs
new file mode 100644
index 0000000000..03be569406
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/other/subtag.rs
@@ -0,0 +1,36 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A single item used in a list of [`Other`](super::Other) extensions.
+ ///
+ /// The subtag has to be an ASCII alphanumerical string no shorter than
+ /// two characters and no longer than eight.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::other::subtag;
+ ///
+ /// assert_eq!(subtag!("Foo").as_str(), "foo");
+ /// ```
+ Subtag,
+ extensions::other,
+ subtag,
+ extensions_other_subtag,
+ 2..=8,
+ s,
+ s.is_ascii_alphanumeric(),
+ s.to_ascii_lowercase(),
+ s.is_ascii_alphanumeric() && s.is_ascii_lowercase(),
+ InvalidExtension,
+ ["foo12"],
+ ["y", "toolooong"],
+);
+
+impl Subtag {
+ pub(crate) const fn valid_key(v: &[u8]) -> bool {
+ 2 <= v.len() && v.len() <= 8
+ }
+}
diff --git a/third_party/rust/icu_locid/src/extensions/private/mod.rs b/third_party/rust/icu_locid/src/extensions/private/mod.rs
new file mode 100644
index 0000000000..5b41fdce09
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/private/mod.rs
@@ -0,0 +1,187 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Private Use Extensions is a list of extensions intended for
+//! private use.
+//!
+//! Those extensions are treated as a pass-through, and no Unicode related
+//! behavior depends on them.
+//!
+//! The main struct for this extension is [`Private`] which is a list of [`Subtag`]s.
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::extensions::private::subtag;
+//! use icu::locid::{locale, Locale};
+//!
+//! let mut loc: Locale = "en-US-x-foo-faa".parse().expect("Parsing failed.");
+//!
+//! assert!(loc.extensions.private.contains(&subtag!("foo")));
+//! assert_eq!(loc.extensions.private.iter().next(), Some(&subtag!("foo")));
+//!
+//! loc.extensions.private.clear();
+//!
+//! assert!(loc.extensions.private.is_empty());
+//! assert_eq!(loc, locale!("en-US"));
+//! ```
+
+mod other;
+
+use alloc::vec::Vec;
+use core::ops::Deref;
+
+#[doc(inline)]
+pub use other::{subtag, Subtag};
+
+use crate::helpers::ShortSlice;
+use crate::parser::ParserError;
+use crate::parser::SubtagIterator;
+
+/// A list of [`Private Use Extensions`] as defined in [`Unicode Locale
+/// Identifier`] specification.
+///
+/// Those extensions are treated as a pass-through, and no Unicode related
+/// behavior depends on them.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::private::{Private, Subtag};
+///
+/// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag.");
+/// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag.");
+///
+/// let private = Private::from_vec_unchecked(vec![subtag1, subtag2]);
+/// assert_eq!(&private.to_string(), "x-foo-bar");
+/// ```
+///
+/// [`Private Use Extensions`]: https://unicode.org/reports/tr35/#pu_extensions
+/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
+#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
+pub struct Private(ShortSlice<Subtag>);
+
+impl Private {
+ /// Returns a new empty list of private-use extensions. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::private::Private;
+ ///
+ /// assert_eq!(Private::new(), Private::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self(ShortSlice::new())
+ }
+
+ /// A constructor which takes a pre-sorted list of [`Subtag`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::private::{Private, Subtag};
+ ///
+ /// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag.");
+ /// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag.");
+ ///
+ /// let private = Private::from_vec_unchecked(vec![subtag1, subtag2]);
+ /// assert_eq!(&private.to_string(), "x-foo-bar");
+ /// ```
+ pub fn from_vec_unchecked(input: Vec<Subtag>) -> Self {
+ Self(input.into())
+ }
+
+ /// A constructor which takes a single [`Subtag`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::private::{Private, Subtag};
+ ///
+ /// let subtag: Subtag = "foo".parse().expect("Failed to parse a Subtag.");
+ ///
+ /// let private = Private::new_single(subtag);
+ /// assert_eq!(&private.to_string(), "x-foo");
+ /// ```
+ pub const fn new_single(input: Subtag) -> Self {
+ Self(ShortSlice::new_single(input))
+ }
+
+ /// Empties the [`Private`] list.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::private::{Private, Subtag};
+ ///
+ /// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag.");
+ /// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag.");
+ /// let mut private = Private::from_vec_unchecked(vec![subtag1, subtag2]);
+ ///
+ /// assert_eq!(&private.to_string(), "x-foo-bar");
+ ///
+ /// private.clear();
+ ///
+ /// assert_eq!(private, Private::new());
+ /// ```
+ pub fn clear(&mut self) {
+ self.0.clear();
+ }
+
+ pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
+ let keys = iter
+ .map(Subtag::try_from_bytes)
+ .collect::<Result<ShortSlice<_>, _>>()?;
+
+ Ok(Self(keys))
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ if self.is_empty() {
+ return Ok(());
+ }
+ f("x")?;
+ self.deref().iter().map(|t| t.as_str()).try_for_each(f)
+ }
+}
+
+writeable::impl_display_with_writeable!(Private);
+
+impl writeable::Writeable for Private {
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ if self.is_empty() {
+ return Ok(());
+ }
+ sink.write_str("x")?;
+ for key in self.iter() {
+ sink.write_char('-')?;
+ writeable::Writeable::write_to(key, sink)?;
+ }
+ Ok(())
+ }
+
+ fn writeable_length_hint(&self) -> writeable::LengthHint {
+ if self.is_empty() {
+ return writeable::LengthHint::exact(0);
+ }
+ let mut result = writeable::LengthHint::exact(1);
+ for key in self.iter() {
+ result += writeable::Writeable::writeable_length_hint(key) + 1;
+ }
+ result
+ }
+}
+
+impl Deref for Private {
+ type Target = [Subtag];
+
+ fn deref(&self) -> &Self::Target {
+ self.0.deref()
+ }
+}
diff --git a/third_party/rust/icu_locid/src/extensions/private/other.rs b/third_party/rust/icu_locid/src/extensions/private/other.rs
new file mode 100644
index 0000000000..810ffa2f49
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/private/other.rs
@@ -0,0 +1,32 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A single item used in a list of [`Private`](super::Private) extensions.
+ ///
+ /// The subtag has to be an ASCII alphanumerical string no shorter than
+ /// one character and no longer than eight.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::private::Subtag;
+ ///
+ /// let subtag1: Subtag = "Foo".parse().expect("Failed to parse a Subtag.");
+ ///
+ /// assert_eq!(subtag1.as_str(), "foo");
+ /// ```
+ Subtag,
+ extensions::private,
+ subtag,
+ extensions_private_subtag,
+ 1..=8,
+ s,
+ s.is_ascii_alphanumeric(),
+ s.to_ascii_lowercase(),
+ s.is_ascii_alphanumeric() && s.is_ascii_lowercase(),
+ InvalidExtension,
+ ["foo12"],
+ ["toolooong"],
+);
diff --git a/third_party/rust/icu_locid/src/extensions/transform/fields.rs b/third_party/rust/icu_locid/src/extensions/transform/fields.rs
new file mode 100644
index 0000000000..2f12de9d14
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/transform/fields.rs
@@ -0,0 +1,221 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::borrow::Borrow;
+use core::iter::FromIterator;
+use litemap::LiteMap;
+
+use super::Key;
+use super::Value;
+
+/// A list of [`Key`]-[`Value`] pairs representing functional information
+/// about content transformations.
+///
+/// Here are examples of fields used in Unicode:
+/// - `s0`, `d0` - Transform source/destination
+/// - `t0` - Machine Translation
+/// - `h0` - Hybrid Locale Identifiers
+///
+/// You can find the full list in [`Unicode BCP 47 T Extension`] section of LDML.
+///
+/// [`Unicode BCP 47 T Extension`]: https://unicode.org/reports/tr35/tr35.html#BCP47_T_Extension
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::transform::{key, Fields, Key, Value};
+///
+/// let value = "hybrid".parse::<Value>().expect("Failed to parse a Value.");
+/// let fields = [(key!("h0"), value)].into_iter().collect::<Fields>();
+///
+/// assert_eq!(&fields.to_string(), "h0-hybrid");
+/// ```
+#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
+pub struct Fields(LiteMap<Key, Value>);
+
+impl Fields {
+ /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::Fields;
+ ///
+ /// assert_eq!(Fields::new(), Fields::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self(LiteMap::new())
+ }
+
+ /// Returns `true` if there are no fields.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::Fields;
+ /// use icu::locid::locale;
+ /// use icu::locid::Locale;
+ ///
+ /// let loc1 = Locale::try_from_bytes(b"und-t-h0-hybrid").unwrap();
+ /// let loc2 = locale!("und-u-ca-buddhist");
+ ///
+ /// assert!(!loc1.extensions.transform.fields.is_empty());
+ /// assert!(loc2.extensions.transform.fields.is_empty());
+ /// ```
+ pub fn is_empty(&self) -> bool {
+ self.0.is_empty()
+ }
+
+ /// Empties the [`Fields`] list.
+ ///
+ /// Returns the old list.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::{key, Fields, Value};
+ ///
+ /// let value = "hybrid".parse::<Value>().expect("Failed to parse a Value.");
+ /// let mut fields = [(key!("h0"), value)].into_iter().collect::<Fields>();
+ ///
+ /// assert_eq!(&fields.to_string(), "h0-hybrid");
+ ///
+ /// fields.clear();
+ ///
+ /// assert_eq!(fields, Fields::new());
+ /// ```
+ pub fn clear(&mut self) -> Self {
+ core::mem::take(self)
+ }
+
+ /// Returns `true` if the list contains a [`Value`] for the specified [`Key`].
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::{Fields, Key, Value};
+ ///
+ /// let key: Key = "h0".parse().expect("Failed to parse a Key.");
+ /// let value: Value = "hybrid".parse().expect("Failed to parse a Value.");
+ /// let mut fields = [(key, value)].into_iter().collect::<Fields>();
+ ///
+ /// let key: Key = "h0".parse().expect("Failed to parse a Key.");
+ /// assert!(&fields.contains_key(&key));
+ /// ```
+ pub fn contains_key<Q>(&self, key: &Q) -> bool
+ where
+ Key: Borrow<Q>,
+ Q: Ord,
+ {
+ self.0.contains_key(key)
+ }
+
+ /// Returns a reference to the [`Value`] corresponding to the [`Key`].
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::{key, Fields, Key, Value};
+ ///
+ /// let value = "hybrid".parse::<Value>().unwrap();
+ /// let fields = [(key!("h0"), value.clone())]
+ /// .into_iter()
+ /// .collect::<Fields>();
+ ///
+ /// assert_eq!(fields.get(&key!("h0")), Some(&value));
+ /// ```
+ pub fn get<Q>(&self, key: &Q) -> Option<&Value>
+ where
+ Key: Borrow<Q>,
+ Q: Ord,
+ {
+ self.0.get(key)
+ }
+
+ /// Sets the specified keyword, returning the old value if it already existed.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::{key, Key, Value};
+ /// use icu::locid::Locale;
+ ///
+ /// let lower = "lower".parse::<Value>().expect("valid extension subtag");
+ /// let casefold = "casefold".parse::<Value>().expect("valid extension subtag");
+ ///
+ /// let mut loc: Locale = "en-t-hi-d0-casefold"
+ /// .parse()
+ /// .expect("valid BCP-47 identifier");
+ /// let old_value = loc.extensions.transform.fields.set(key!("d0"), lower);
+ ///
+ /// assert_eq!(old_value, Some(casefold));
+ /// assert_eq!(loc, "en-t-hi-d0-lower".parse().unwrap());
+ /// ```
+ pub fn set(&mut self, key: Key, value: Value) -> Option<Value> {
+ self.0.insert(key, value)
+ }
+
+ /// Retains a subset of fields as specified by the predicate function.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::key;
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "und-t-h0-hybrid-d0-hex-m0-xml".parse().unwrap();
+ ///
+ /// loc.extensions
+ /// .transform
+ /// .fields
+ /// .retain_by_key(|&k| k == key!("h0"));
+ /// assert_eq!(loc, "und-t-h0-hybrid".parse().unwrap());
+ ///
+ /// loc.extensions
+ /// .transform
+ /// .fields
+ /// .retain_by_key(|&k| k == key!("d0"));
+ /// assert_eq!(loc, Locale::UND);
+ /// ```
+ pub fn retain_by_key<F>(&mut self, mut predicate: F)
+ where
+ F: FnMut(&Key) -> bool,
+ {
+ self.0.retain(|k, _| predicate(k))
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ for (k, v) in self.0.iter() {
+ f(k.as_str())?;
+ v.for_each_subtag_str(f)?;
+ }
+ Ok(())
+ }
+
+ /// This needs to be its own method to help with type inference in helpers.rs
+ #[cfg(test)]
+ pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self {
+ v.into_iter().collect()
+ }
+}
+
+impl From<LiteMap<Key, Value>> for Fields {
+ fn from(map: LiteMap<Key, Value>) -> Self {
+ Self(map)
+ }
+}
+
+impl FromIterator<(Key, Value)> for Fields {
+ fn from_iter<I: IntoIterator<Item = (Key, Value)>>(iter: I) -> Self {
+ LiteMap::from_iter(iter).into()
+ }
+}
+
+impl_writeable_for_key_value!(Fields, "h0", "hybrid", "m0", "m0-true");
diff --git a/third_party/rust/icu_locid/src/extensions/transform/key.rs b/third_party/rust/icu_locid/src/extensions/transform/key.rs
new file mode 100644
index 0000000000..afdb31d760
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/transform/key.rs
@@ -0,0 +1,32 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A key used in a list of [`Fields`](super::Fields).
+ ///
+ /// The key has to be a two ASCII characters long, with the first
+ /// character being alphabetic, and the second being a number.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::Key;
+ ///
+ /// let key1: Key = "k0".parse().expect("Failed to parse a Key.");
+ ///
+ /// assert_eq!(key1.as_str(), "k0");
+ /// ```
+ Key,
+ extensions::transform,
+ key,
+ extensions_transform_key,
+ 2..=2,
+ s,
+ s.all_bytes()[0].is_ascii_alphabetic() && s.all_bytes()[1].is_ascii_digit(),
+ s.to_ascii_lowercase(),
+ s.all_bytes()[0].is_ascii_lowercase() && s.all_bytes()[1].is_ascii_digit(),
+ InvalidExtension,
+ ["k0"],
+ ["", "k", "0k", "k12"],
+);
diff --git a/third_party/rust/icu_locid/src/extensions/transform/mod.rs b/third_party/rust/icu_locid/src/extensions/transform/mod.rs
new file mode 100644
index 0000000000..f5bb74e0db
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/transform/mod.rs
@@ -0,0 +1,237 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Transform Extensions provide information on content transformations in a given locale.
+//!
+//! The main struct for this extension is [`Transform`] which contains [`Fields`] and an
+//! optional [`LanguageIdentifier`].
+//!
+//! [`LanguageIdentifier`]: super::super::LanguageIdentifier
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::extensions::transform::{Fields, Key, Transform, Value};
+//! use icu::locid::{LanguageIdentifier, Locale};
+//!
+//! let mut loc: Locale =
+//! "en-US-t-es-ar-h0-hybrid".parse().expect("Parsing failed.");
+//!
+//! let lang: LanguageIdentifier =
+//! "es-AR".parse().expect("Parsing LanguageIdentifier failed.");
+//!
+//! let key: Key = "h0".parse().expect("Parsing key failed.");
+//! let value: Value = "hybrid".parse().expect("Parsing value failed.");
+//!
+//! assert_eq!(loc.extensions.transform.lang, Some(lang));
+//! assert!(loc.extensions.transform.fields.contains_key(&key));
+//! assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
+//!
+//! assert_eq!(&loc.extensions.transform.to_string(), "t-es-ar-h0-hybrid");
+//! ```
+mod fields;
+mod key;
+mod value;
+
+pub use fields::Fields;
+#[doc(inline)]
+pub use key::{key, Key};
+pub use value::Value;
+
+use crate::helpers::ShortSlice;
+use crate::parser::SubtagIterator;
+use crate::parser::{parse_language_identifier_from_iter, ParserError, ParserMode};
+use crate::subtags::Language;
+use crate::LanguageIdentifier;
+use litemap::LiteMap;
+
+/// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale
+/// Identifier`] specification.
+///
+/// Transform extension carries information about source language or script of
+/// transformed content, including content that has been transliterated, transcribed,
+/// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details).
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::transform::{Key, Value};
+/// use icu::locid::{LanguageIdentifier, Locale};
+///
+/// let mut loc: Locale =
+/// "de-t-en-us-h0-hybrid".parse().expect("Parsing failed.");
+///
+/// let en_us: LanguageIdentifier = "en-US".parse().expect("Parsing failed.");
+///
+/// assert_eq!(loc.extensions.transform.lang, Some(en_us));
+/// let key: Key = "h0".parse().expect("Parsing key failed.");
+/// let value: Value = "hybrid".parse().expect("Parsing value failed.");
+/// assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
+/// ```
+/// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension
+/// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt
+/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
+#[derive(Clone, PartialEq, Eq, Debug, Default, Hash)]
+#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
+pub struct Transform {
+ /// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present.
+ pub lang: Option<LanguageIdentifier>,
+ /// The key-value pairs present in this locale extension, with each extension key subtag
+ /// associated to its provided value subtag.
+ pub fields: Fields,
+}
+
+impl Transform {
+ /// Returns a new empty map of Transform extensions. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::Transform;
+ ///
+ /// assert_eq!(Transform::new(), Transform::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self {
+ lang: None,
+ fields: Fields::new(),
+ }
+ }
+
+ /// Returns `true` if there are no tfields and no tlang in the `TransformExtensionList`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "en-US-t-es-ar".parse().expect("Parsing failed.");
+ ///
+ /// assert!(!loc.extensions.transform.is_empty());
+ /// ```
+ pub fn is_empty(&self) -> bool {
+ self.lang.is_none() && self.fields.is_empty()
+ }
+
+ /// Clears the transform extension, effectively removing it from the locale.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "en-US-t-es-ar".parse().unwrap();
+ /// loc.extensions.transform.clear();
+ /// assert_eq!(loc, "en-US".parse().unwrap());
+ /// ```
+ pub fn clear(&mut self) {
+ self.lang = None;
+ self.fields.clear();
+ }
+
+ pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
+ let mut tlang = None;
+ let mut tfields = LiteMap::new();
+
+ if let Some(subtag) = iter.peek() {
+ if Language::try_from_bytes(subtag).is_ok() {
+ tlang = Some(parse_language_identifier_from_iter(
+ iter,
+ ParserMode::Partial,
+ )?);
+ }
+ }
+
+ let mut current_tkey = None;
+ let mut current_tvalue = ShortSlice::new();
+ let mut has_current_tvalue = false;
+
+ while let Some(subtag) = iter.peek() {
+ if let Some(tkey) = current_tkey {
+ if let Ok(val) = Value::parse_subtag(subtag) {
+ has_current_tvalue = true;
+ if let Some(val) = val {
+ current_tvalue.push(val);
+ }
+ } else {
+ if !has_current_tvalue {
+ return Err(ParserError::InvalidExtension);
+ }
+ tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
+ current_tkey = None;
+ current_tvalue = ShortSlice::new();
+ has_current_tvalue = false;
+ continue;
+ }
+ } else if let Ok(tkey) = Key::try_from_bytes(subtag) {
+ current_tkey = Some(tkey);
+ } else {
+ break;
+ }
+
+ iter.next();
+ }
+
+ if let Some(tkey) = current_tkey {
+ if !has_current_tvalue {
+ return Err(ParserError::InvalidExtension);
+ }
+ tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
+ }
+
+ Ok(Self {
+ lang: tlang,
+ fields: tfields.into(),
+ })
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ if self.is_empty() {
+ return Ok(());
+ }
+ f("t")?;
+ if let Some(lang) = &self.lang {
+ lang.for_each_subtag_str_lowercased(f)?;
+ }
+ self.fields.for_each_subtag_str(f)
+ }
+}
+
+writeable::impl_display_with_writeable!(Transform);
+
+impl writeable::Writeable for Transform {
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ if self.is_empty() {
+ return Ok(());
+ }
+ sink.write_str("t")?;
+ if let Some(lang) = &self.lang {
+ sink.write_char('-')?;
+ lang.write_lowercased_to(sink)?;
+ }
+ if !self.fields.is_empty() {
+ sink.write_char('-')?;
+ writeable::Writeable::write_to(&self.fields, sink)?;
+ }
+ Ok(())
+ }
+
+ fn writeable_length_hint(&self) -> writeable::LengthHint {
+ if self.is_empty() {
+ return writeable::LengthHint::exact(0);
+ }
+ let mut result = writeable::LengthHint::exact(1);
+ if let Some(lang) = &self.lang {
+ result += writeable::Writeable::writeable_length_hint(lang) + 1;
+ }
+ if !self.fields.is_empty() {
+ result += writeable::Writeable::writeable_length_hint(&self.fields) + 1;
+ }
+ result
+ }
+}
diff --git a/third_party/rust/icu_locid/src/extensions/transform/value.rs b/third_party/rust/icu_locid/src/extensions/transform/value.rs
new file mode 100644
index 0000000000..798e84793d
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/transform/value.rs
@@ -0,0 +1,134 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::helpers::ShortSlice;
+use crate::parser::{ParserError, SubtagIterator};
+use core::ops::RangeInclusive;
+use core::str::FromStr;
+use tinystr::TinyAsciiStr;
+
+/// A value used in a list of [`Fields`](super::Fields).
+///
+/// The value has to be a sequence of one or more alphanumerical strings
+/// separated by `-`.
+/// Each part of the sequence has to be no shorter than three characters and no
+/// longer than 8.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::transform::Value;
+///
+/// "hybrid".parse::<Value>().expect("Valid Value.");
+///
+/// "hybrid-foobar".parse::<Value>().expect("Valid Value.");
+///
+/// "no".parse::<Value>().expect_err("Invalid Value.");
+/// ```
+#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Default)]
+pub struct Value(ShortSlice<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>);
+
+const TYPE_LENGTH: RangeInclusive<usize> = 3..=8;
+const TRUE_TVALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");
+
+impl Value {
+ /// A constructor which takes a utf8 slice, parses it and
+ /// produces a well-formed [`Value`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::Value;
+ ///
+ /// let value = Value::try_from_bytes(b"hybrid").expect("Parsing failed.");
+ /// ```
+ pub fn try_from_bytes(input: &[u8]) -> Result<Self, ParserError> {
+ let mut v = ShortSlice::default();
+ let mut has_value = false;
+
+ for subtag in SubtagIterator::new(input) {
+ if !Self::is_type_subtag(subtag) {
+ return Err(ParserError::InvalidExtension);
+ }
+ has_value = true;
+ let val =
+ TinyAsciiStr::from_bytes(subtag).map_err(|_| ParserError::InvalidExtension)?;
+ if val != TRUE_TVALUE {
+ v.push(val);
+ }
+ }
+
+ if !has_value {
+ return Err(ParserError::InvalidExtension);
+ }
+ Ok(Self(v))
+ }
+
+ pub(crate) fn from_short_slice_unchecked(
+ input: ShortSlice<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>,
+ ) -> Self {
+ Self(input)
+ }
+
+ pub(crate) fn is_type_subtag(t: &[u8]) -> bool {
+ TYPE_LENGTH.contains(&t.len()) && t.iter().all(u8::is_ascii_alphanumeric)
+ }
+
+ pub(crate) fn parse_subtag(
+ t: &[u8],
+ ) -> Result<Option<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>, ParserError> {
+ let s = TinyAsciiStr::from_bytes(t).map_err(|_| ParserError::InvalidSubtag)?;
+ if !TYPE_LENGTH.contains(&t.len()) || !s.is_ascii_alphanumeric() {
+ return Err(ParserError::InvalidExtension);
+ }
+
+ let s = s.to_ascii_lowercase();
+
+ if s == TRUE_TVALUE {
+ Ok(None)
+ } else {
+ Ok(Some(s))
+ }
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ if self.0.is_empty() {
+ f("true")?;
+ } else {
+ self.0.iter().map(TinyAsciiStr::as_str).try_for_each(f)?;
+ }
+ Ok(())
+ }
+}
+
+impl FromStr for Value {
+ type Err = ParserError;
+
+ fn from_str(source: &str) -> Result<Self, Self::Err> {
+ Self::try_from_bytes(source.as_bytes())
+ }
+}
+
+impl_writeable_for_each_subtag_str_no_test!(Value, selff, selff.0.is_empty() => alloc::borrow::Cow::Borrowed("true"));
+
+#[test]
+fn test_writeable() {
+ use writeable::assert_writeable_eq;
+
+ let hybrid = "hybrid".parse().unwrap();
+ let foobar = "foobar".parse().unwrap();
+
+ assert_writeable_eq!(Value::default(), "true");
+ assert_writeable_eq!(
+ Value::from_short_slice_unchecked(vec![hybrid].into()),
+ "hybrid"
+ );
+ assert_writeable_eq!(
+ Value::from_short_slice_unchecked(vec![hybrid, foobar].into()),
+ "hybrid-foobar"
+ );
+}
diff --git a/third_party/rust/icu_locid/src/extensions/unicode/attribute.rs b/third_party/rust/icu_locid/src/extensions/unicode/attribute.rs
new file mode 100644
index 0000000000..f6fc53e057
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/unicode/attribute.rs
@@ -0,0 +1,34 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// An attribute used in a set of [`Attributes`](super::Attributes).
+ ///
+ /// An attribute has to be a sequence of alphanumerical characters no
+ /// shorter than three and no longer than eight characters.
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{attribute, Attribute};
+ ///
+ /// let attr: Attribute =
+ /// "buddhist".parse().expect("Failed to parse an Attribute.");
+ ///
+ /// assert_eq!(attr, attribute!("buddhist"));
+ /// ```
+ Attribute,
+ extensions::unicode,
+ attribute,
+ extensions_unicode_attribute,
+ 3..=8,
+ s,
+ s.is_ascii_alphanumeric(),
+ s.to_ascii_lowercase(),
+ s.is_ascii_alphanumeric() && s.is_ascii_lowercase(),
+ InvalidExtension,
+ ["foo12"],
+ ["no", "toolooong"],
+);
diff --git a/third_party/rust/icu_locid/src/extensions/unicode/attributes.rs b/third_party/rust/icu_locid/src/extensions/unicode/attributes.rs
new file mode 100644
index 0000000000..1cdaded306
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/unicode/attributes.rs
@@ -0,0 +1,120 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::Attribute;
+
+use crate::helpers::ShortSlice;
+use alloc::vec::Vec;
+use core::ops::Deref;
+
+/// A set of [`Attribute`] elements as defined in [`Unicode Extension Attributes`].
+///
+/// [`Unicode Extension Attributes`]: https://unicode.org/reports/tr35/tr35.html#u_Extension
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::unicode::{Attribute, Attributes};
+///
+/// let attribute1: Attribute =
+/// "foobar".parse().expect("Failed to parse a variant subtag.");
+///
+/// let attribute2: Attribute = "testing"
+/// .parse()
+/// .expect("Failed to parse a variant subtag.");
+/// let mut v = vec![attribute1, attribute2];
+/// v.sort();
+/// v.dedup();
+///
+/// let attributes: Attributes = Attributes::from_vec_unchecked(v);
+/// assert_eq!(attributes.to_string(), "foobar-testing");
+/// ```
+#[derive(Default, Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
+pub struct Attributes(ShortSlice<Attribute>);
+
+impl Attributes {
+ /// Returns a new empty set of attributes. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Attributes;
+ ///
+ /// assert_eq!(Attributes::new(), Attributes::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self(ShortSlice::new())
+ }
+
+ /// A constructor which takes a pre-sorted list of [`Attribute`] elements.
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{Attribute, Attributes};
+ ///
+ /// let attribute1: Attribute = "foobar".parse().expect("Parsing failed.");
+ /// let attribute2: Attribute = "testing".parse().expect("Parsing failed.");
+ /// let mut v = vec![attribute1, attribute2];
+ /// v.sort();
+ /// v.dedup();
+ ///
+ /// let attributes = Attributes::from_vec_unchecked(v);
+ /// ```
+ ///
+ /// Notice: For performance- and memory-constrained environments, it is recommended
+ /// for the caller to use [`binary_search`](slice::binary_search) instead of [`sort`](slice::sort)
+ /// and [`dedup`](Vec::dedup()).
+ pub fn from_vec_unchecked(input: Vec<Attribute>) -> Self {
+ Self(input.into())
+ }
+
+ pub(crate) fn from_short_slice_unchecked(input: ShortSlice<Attribute>) -> Self {
+ Self(input)
+ }
+
+ /// Empties the [`Attributes`] list.
+ ///
+ /// Returns the old list.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{attribute, Attribute, Attributes};
+ /// use writeable::assert_writeable_eq;
+ ///
+ /// let mut attributes = Attributes::from_vec_unchecked(vec![
+ /// attribute!("foobar"),
+ /// attribute!("testing"),
+ /// ]);
+ ///
+ /// assert_writeable_eq!(attributes, "foobar-testing");
+ ///
+ /// attributes.clear();
+ ///
+ /// assert_writeable_eq!(attributes, "");
+ /// ```
+ pub fn clear(&mut self) -> Self {
+ core::mem::take(self)
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ self.deref().iter().map(|t| t.as_str()).try_for_each(f)
+ }
+}
+
+impl_writeable_for_subtag_list!(Attributes, "foobar", "testing");
+
+impl Deref for Attributes {
+ type Target = [Attribute];
+
+ fn deref(&self) -> &[Attribute] {
+ self.0.deref()
+ }
+}
diff --git a/third_party/rust/icu_locid/src/extensions/unicode/key.rs b/third_party/rust/icu_locid/src/extensions/unicode/key.rs
new file mode 100644
index 0000000000..e008ffd5a8
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/unicode/key.rs
@@ -0,0 +1,32 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A key used in a list of [`Keywords`](super::Keywords).
+ ///
+ /// The key has to be a two ASCII alphanumerical characters long, with the first
+ /// character being alphanumeric, and the second being alphabetic.
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Key;
+ ///
+ /// assert!("ca".parse::<Key>().is_ok());
+ /// ```
+ Key,
+ extensions::unicode,
+ key,
+ extensions_unicode_key,
+ 2..=2,
+ s,
+ s.all_bytes()[0].is_ascii_alphanumeric() && s.all_bytes()[1].is_ascii_alphabetic(),
+ s.to_ascii_lowercase(),
+ (s.all_bytes()[0].is_ascii_lowercase() || s.all_bytes()[0].is_ascii_digit())
+ && s.all_bytes()[1].is_ascii_lowercase(),
+ InvalidExtension,
+ ["ca", "8a"],
+ ["a", "a8", "abc"],
+);
diff --git a/third_party/rust/icu_locid/src/extensions/unicode/keywords.rs b/third_party/rust/icu_locid/src/extensions/unicode/keywords.rs
new file mode 100644
index 0000000000..c2839fa44f
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/unicode/keywords.rs
@@ -0,0 +1,393 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::borrow::Borrow;
+use core::cmp::Ordering;
+use core::iter::FromIterator;
+use litemap::LiteMap;
+
+use super::Key;
+use super::Value;
+use crate::helpers::ShortSlice;
+use crate::ordering::SubtagOrderingResult;
+
+/// A list of [`Key`]-[`Value`] pairs representing functional information
+/// about locale's internationalization preferences.
+///
+/// Here are examples of fields used in Unicode:
+/// - `hc` - Hour Cycle (`h11`, `h12`, `h23`, `h24`)
+/// - `ca` - Calendar (`buddhist`, `gregory`, ...)
+/// - `fw` - First Day Of the Week (`sun`, `mon`, `sat`, ...)
+///
+/// You can find the full list in [`Unicode BCP 47 U Extension`] section of LDML.
+///
+/// [`Unicode BCP 47 U Extension`]: https://unicode.org/reports/tr35/tr35.html#Key_And_Type_Definitions_
+///
+/// # Examples
+///
+/// Manually build up a [`Keywords`] object:
+///
+/// ```
+/// use icu::locid::{
+/// extensions::unicode::{key, value, Keywords},
+/// locale,
+/// };
+///
+/// let keywords = [(key!("hc"), value!("h23"))]
+/// .into_iter()
+/// .collect::<Keywords>();
+///
+/// assert_eq!(&keywords.to_string(), "hc-h23");
+/// ```
+///
+/// Access a [`Keywords`] object from a [`Locale`]:
+///
+/// ```
+/// use icu::locid::{
+/// extensions::unicode::{key, value},
+/// Locale,
+/// };
+///
+/// let loc: Locale = "und-u-hc-h23-kc-true".parse().expect("Valid BCP-47");
+///
+/// assert_eq!(loc.extensions.unicode.keywords.get(&key!("ca")), None);
+/// assert_eq!(
+/// loc.extensions.unicode.keywords.get(&key!("hc")),
+/// Some(&value!("h23"))
+/// );
+/// assert_eq!(
+/// loc.extensions.unicode.keywords.get(&key!("kc")),
+/// Some(&value!("true"))
+/// );
+///
+/// assert_eq!(loc.extensions.unicode.keywords.to_string(), "hc-h23-kc");
+/// ```
+///
+/// [`Locale`]: crate::Locale
+#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
+pub struct Keywords(LiteMap<Key, Value, ShortSlice<(Key, Value)>>);
+
+impl Keywords {
+ /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Keywords;
+ ///
+ /// assert_eq!(Keywords::new(), Keywords::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self(LiteMap::new())
+ }
+
+ /// Create a new list of key-value pairs having exactly one pair, callable in a `const` context.
+ #[inline]
+ pub const fn new_single(key: Key, value: Value) -> Self {
+ Self(LiteMap::from_sorted_store_unchecked(
+ ShortSlice::new_single((key, value)),
+ ))
+ }
+
+ /// Returns `true` if there are no keywords.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Keywords;
+ /// use icu::locid::locale;
+ /// use icu::locid::Locale;
+ ///
+ /// let loc1 = Locale::try_from_bytes(b"und-t-h0-hybrid").unwrap();
+ /// let loc2 = locale!("und-u-ca-buddhist");
+ ///
+ /// assert!(loc1.extensions.unicode.keywords.is_empty());
+ /// assert!(!loc2.extensions.unicode.keywords.is_empty());
+ /// ```
+ pub fn is_empty(&self) -> bool {
+ self.0.is_empty()
+ }
+
+ /// Returns `true` if the list contains a [`Value`] for the specified [`Key`].
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{key, value, Keywords};
+ ///
+ /// let keywords = [(key!("ca"), value!("gregory"))]
+ /// .into_iter()
+ /// .collect::<Keywords>();
+ ///
+ /// assert!(&keywords.contains_key(&key!("ca")));
+ /// ```
+ pub fn contains_key<Q>(&self, key: &Q) -> bool
+ where
+ Key: Borrow<Q>,
+ Q: Ord,
+ {
+ self.0.contains_key(key)
+ }
+
+ /// Returns a reference to the [`Value`] corresponding to the [`Key`].
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{key, value, Keywords};
+ ///
+ /// let keywords = [(key!("ca"), value!("buddhist"))]
+ /// .into_iter()
+ /// .collect::<Keywords>();
+ ///
+ /// assert_eq!(keywords.get(&key!("ca")), Some(&value!("buddhist")));
+ /// ```
+ pub fn get<Q>(&self, key: &Q) -> Option<&Value>
+ where
+ Key: Borrow<Q>,
+ Q: Ord,
+ {
+ self.0.get(key)
+ }
+
+ /// Returns a mutable reference to the [`Value`] corresponding to the [`Key`].
+ ///
+ /// Returns `None` if the key doesn't exist or if the key has no value.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{key, value, Keywords};
+ ///
+ /// let mut keywords = [(key!("ca"), value!("buddhist"))]
+ /// .into_iter()
+ /// .collect::<Keywords>();
+ ///
+ /// if let Some(value) = keywords.get_mut(&key!("ca")) {
+ /// *value = value!("gregory");
+ /// }
+ /// assert_eq!(keywords.get(&key!("ca")), Some(&value!("gregory")));
+ /// ```
+ pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut Value>
+ where
+ Key: Borrow<Q>,
+ Q: Ord,
+ {
+ self.0.get_mut(key)
+ }
+
+ /// Sets the specified keyword, returning the old value if it already existed.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Key;
+ /// use icu::locid::extensions::unicode::Value;
+ /// use icu::locid::extensions::unicode::{key, value};
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12"
+ /// .parse()
+ /// .expect("valid BCP-47 identifier");
+ /// let old_value = loc
+ /// .extensions
+ /// .unicode
+ /// .keywords
+ /// .set(key!("ca"), value!("japanese"));
+ ///
+ /// assert_eq!(old_value, Some(value!("buddhist")));
+ /// assert_eq!(loc, "und-u-hello-ca-japanese-hc-h12".parse().unwrap());
+ /// ```
+ pub fn set(&mut self, key: Key, value: Value) -> Option<Value> {
+ self.0.insert(key, value)
+ }
+
+ /// Removes the specified keyword, returning the old value if it existed.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{key, Key};
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12"
+ /// .parse()
+ /// .expect("valid BCP-47 identifier");
+ /// loc.extensions.unicode.keywords.remove(key!("ca"));
+ /// assert_eq!(loc, "und-u-hello-hc-h12".parse().unwrap());
+ /// ```
+ pub fn remove<Q: Borrow<Key>>(&mut self, key: Q) -> Option<Value> {
+ self.0.remove(key.borrow())
+ }
+
+ /// Clears all Unicode extension keywords, leaving Unicode attributes.
+ ///
+ /// Returns the old Unicode extension keywords.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12".parse().unwrap();
+ /// loc.extensions.unicode.keywords.clear();
+ /// assert_eq!(loc, "und-u-hello".parse().unwrap());
+ /// ```
+ pub fn clear(&mut self) -> Self {
+ core::mem::take(self)
+ }
+
+ /// Retains a subset of keywords as specified by the predicate function.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::key;
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "und-u-ca-buddhist-hc-h12-ms-metric".parse().unwrap();
+ ///
+ /// loc.extensions
+ /// .unicode
+ /// .keywords
+ /// .retain_by_key(|&k| k == key!("hc"));
+ /// assert_eq!(loc, "und-u-hc-h12".parse().unwrap());
+ ///
+ /// loc.extensions
+ /// .unicode
+ /// .keywords
+ /// .retain_by_key(|&k| k == key!("ms"));
+ /// assert_eq!(loc, Locale::UND);
+ /// ```
+ pub fn retain_by_key<F>(&mut self, mut predicate: F)
+ where
+ F: FnMut(&Key) -> bool,
+ {
+ self.0.retain(|k, _| predicate(k))
+ }
+
+ /// Compare this [`Keywords`] with BCP-47 bytes.
+ ///
+ /// The return value is equivalent to what would happen if you first converted this
+ /// [`Keywords`] to a BCP-47 string and then performed a byte comparison.
+ ///
+ /// This function is case-sensitive and results in a *total order*, so it is appropriate for
+ /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Keywords;
+ /// use icu::locid::Locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let bcp47_strings: &[&str] =
+ /// &["ca-hebrew", "ca-japanese", "ca-japanese-nu-latn", "nu-latn"];
+ ///
+ /// for ab in bcp47_strings.windows(2) {
+ /// let a = ab[0];
+ /// let b = ab[1];
+ /// assert!(a.cmp(b) == Ordering::Less);
+ /// let a_kwds = format!("und-u-{}", a)
+ /// .parse::<Locale>()
+ /// .unwrap()
+ /// .extensions
+ /// .unicode
+ /// .keywords;
+ /// assert!(a_kwds.strict_cmp(a.as_bytes()) == Ordering::Equal);
+ /// assert!(a_kwds.strict_cmp(b.as_bytes()) == Ordering::Less);
+ /// }
+ /// ```
+ pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
+ self.strict_cmp_iter(other.split(|b| *b == b'-')).end()
+ }
+
+ /// Compare this [`Keywords`] with an iterator of BCP-47 subtags.
+ ///
+ /// This function has the same equality semantics as [`Keywords::strict_cmp`]. It is intended as
+ /// a more modular version that allows multiple subtag iterators to be chained together.
+ ///
+ /// For an additional example, see [`SubtagOrderingResult`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Keywords;
+ /// use icu::locid::locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let subtags: &[&[u8]] = &[b"ca", b"buddhist"];
+ ///
+ /// let kwds = locale!("und-u-ca-buddhist").extensions.unicode.keywords;
+ /// assert_eq!(
+ /// Ordering::Equal,
+ /// kwds.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let kwds = locale!("und").extensions.unicode.keywords;
+ /// assert_eq!(
+ /// Ordering::Less,
+ /// kwds.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let kwds = locale!("und-u-nu-latn").extensions.unicode.keywords;
+ /// assert_eq!(
+ /// Ordering::Greater,
+ /// kwds.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ /// ```
+ pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
+ where
+ I: Iterator<Item = &'l [u8]>,
+ {
+ let r = self.for_each_subtag_str(&mut |subtag| {
+ if let Some(other) = subtags.next() {
+ match subtag.as_bytes().cmp(other) {
+ Ordering::Equal => Ok(()),
+ not_equal => Err(not_equal),
+ }
+ } else {
+ Err(Ordering::Greater)
+ }
+ });
+ match r {
+ Ok(_) => SubtagOrderingResult::Subtags(subtags),
+ Err(o) => SubtagOrderingResult::Ordering(o),
+ }
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ for (k, v) in self.0.iter() {
+ f(k.as_str())?;
+ v.for_each_subtag_str(f)?;
+ }
+ Ok(())
+ }
+
+ /// This needs to be its own method to help with type inference in helpers.rs
+ #[cfg(test)]
+ pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self {
+ v.into_iter().collect()
+ }
+}
+
+impl From<LiteMap<Key, Value, ShortSlice<(Key, Value)>>> for Keywords {
+ fn from(map: LiteMap<Key, Value, ShortSlice<(Key, Value)>>) -> Self {
+ Self(map)
+ }
+}
+
+impl FromIterator<(Key, Value)> for Keywords {
+ fn from_iter<I: IntoIterator<Item = (Key, Value)>>(iter: I) -> Self {
+ LiteMap::from_iter(iter).into()
+ }
+}
+
+impl_writeable_for_key_value!(Keywords, "ca", "islamic-civil", "mm", "mm");
diff --git a/third_party/rust/icu_locid/src/extensions/unicode/mod.rs b/third_party/rust/icu_locid/src/extensions/unicode/mod.rs
new file mode 100644
index 0000000000..95f1a2d781
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/unicode/mod.rs
@@ -0,0 +1,237 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Unicode Extensions provide information about user preferences in a given locale.
+//!
+//! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and
+//! [`Attributes`].
+//!
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::extensions::unicode::{attribute, key, value, Unicode};
+//! use icu::locid::Locale;
+//!
+//! let loc: Locale = "en-US-u-foobar-hc-h12".parse().expect("Parsing failed.");
+//!
+//! assert_eq!(
+//! loc.extensions.unicode.keywords.get(&key!("hc")),
+//! Some(&value!("h12"))
+//! );
+//! assert!(loc
+//! .extensions
+//! .unicode
+//! .attributes
+//! .contains(&attribute!("foobar")));
+//! ```
+mod attribute;
+mod attributes;
+mod key;
+mod keywords;
+mod value;
+
+#[doc(inline)]
+pub use attribute::{attribute, Attribute};
+pub use attributes::Attributes;
+#[doc(inline)]
+pub use key::{key, Key};
+pub use keywords::Keywords;
+#[doc(inline)]
+pub use value::{value, Value};
+
+use crate::helpers::ShortSlice;
+use crate::parser::ParserError;
+use crate::parser::SubtagIterator;
+use litemap::LiteMap;
+
+/// Unicode Extensions provide information about user preferences in a given locale.
+///
+/// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale
+/// Identifier`] specification.
+///
+/// Unicode extensions provide subtags that specify language and/or locale-based behavior
+/// or refinements to language tags, according to work done by the Unicode Consortium.
+/// (See [`RFC 6067`] for details).
+///
+/// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension
+/// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt
+/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::unicode::{key, value};
+/// use icu::locid::Locale;
+///
+/// let loc: Locale =
+/// "de-u-hc-h12-ca-buddhist".parse().expect("Parsing failed.");
+///
+/// assert_eq!(
+/// loc.extensions.unicode.keywords.get(&key!("ca")),
+/// Some(&value!("buddhist"))
+/// );
+/// ```
+#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
+#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
+pub struct Unicode {
+ /// The key-value pairs present in this locale extension, with each extension key subtag
+ /// associated to its provided value subtag.
+ pub keywords: Keywords,
+ /// A canonically ordered sequence of single standalone subtags for this locale extension.
+ pub attributes: Attributes,
+}
+
+impl Unicode {
+ /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Unicode;
+ ///
+ /// assert_eq!(Unicode::new(), Unicode::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self {
+ keywords: Keywords::new(),
+ attributes: Attributes::new(),
+ }
+ }
+
+ /// Returns [`true`] if there list of keywords and attributes is empty.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed.");
+ ///
+ /// assert!(!loc.extensions.unicode.is_empty());
+ /// ```
+ pub fn is_empty(&self) -> bool {
+ self.keywords.is_empty() && self.attributes.is_empty()
+ }
+
+ /// Clears all Unicode extension keywords and attributes, effectively removing
+ /// the Unicode extension.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale =
+ /// "und-t-mul-u-hello-ca-buddhist-hc-h12".parse().unwrap();
+ /// loc.extensions.unicode.clear();
+ /// assert_eq!(loc, "und-t-mul".parse().unwrap());
+ /// ```
+ pub fn clear(&mut self) {
+ self.keywords.clear();
+ self.attributes.clear();
+ }
+
+ pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
+ let mut attributes = ShortSlice::new();
+
+ while let Some(subtag) = iter.peek() {
+ if let Ok(attr) = Attribute::try_from_bytes(subtag) {
+ if let Err(idx) = attributes.binary_search(&attr) {
+ attributes.insert(idx, attr);
+ }
+ } else {
+ break;
+ }
+ iter.next();
+ }
+
+ let mut keywords = LiteMap::new();
+
+ let mut current_keyword = None;
+ let mut current_value = ShortSlice::new();
+
+ while let Some(subtag) = iter.peek() {
+ let slen = subtag.len();
+ if slen == 2 {
+ if let Some(kw) = current_keyword.take() {
+ keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
+ current_value = ShortSlice::new();
+ }
+ current_keyword = Some(Key::try_from_bytes(subtag)?);
+ } else if current_keyword.is_some() {
+ match Value::parse_subtag(subtag) {
+ Ok(Some(t)) => current_value.push(t),
+ Ok(None) => {}
+ Err(_) => break,
+ }
+ } else {
+ break;
+ }
+ iter.next();
+ }
+
+ if let Some(kw) = current_keyword.take() {
+ keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
+ }
+
+ // Ensure we've defined at least one attribute or keyword
+ if attributes.is_empty() && keywords.is_empty() {
+ return Err(ParserError::InvalidExtension);
+ }
+
+ Ok(Self {
+ keywords: keywords.into(),
+ attributes: Attributes::from_short_slice_unchecked(attributes),
+ })
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ if self.is_empty() {
+ return Ok(());
+ }
+ f("u")?;
+ self.attributes.for_each_subtag_str(f)?;
+ self.keywords.for_each_subtag_str(f)?;
+ Ok(())
+ }
+}
+
+writeable::impl_display_with_writeable!(Unicode);
+
+impl writeable::Writeable for Unicode {
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ if self.is_empty() {
+ return Ok(());
+ }
+ sink.write_str("u")?;
+ if !self.attributes.is_empty() {
+ sink.write_char('-')?;
+ writeable::Writeable::write_to(&self.attributes, sink)?;
+ }
+ if !self.keywords.is_empty() {
+ sink.write_char('-')?;
+ writeable::Writeable::write_to(&self.keywords, sink)?;
+ }
+ Ok(())
+ }
+
+ fn writeable_length_hint(&self) -> writeable::LengthHint {
+ if self.is_empty() {
+ return writeable::LengthHint::exact(0);
+ }
+ let mut result = writeable::LengthHint::exact(1);
+ if !self.attributes.is_empty() {
+ result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1;
+ }
+ if !self.keywords.is_empty() {
+ result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1;
+ }
+ result
+ }
+}
diff --git a/third_party/rust/icu_locid/src/extensions/unicode/value.rs b/third_party/rust/icu_locid/src/extensions/unicode/value.rs
new file mode 100644
index 0000000000..d935656a97
--- /dev/null
+++ b/third_party/rust/icu_locid/src/extensions/unicode/value.rs
@@ -0,0 +1,196 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::helpers::ShortSlice;
+use crate::parser::{ParserError, SubtagIterator};
+use core::ops::RangeInclusive;
+use core::str::FromStr;
+use tinystr::TinyAsciiStr;
+
+/// A value used in a list of [`Keywords`](super::Keywords).
+///
+/// The value has to be a sequence of one or more alphanumerical strings
+/// separated by `-`.
+/// Each part of the sequence has to be no shorter than three characters and no
+/// longer than 8.
+///
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::unicode::{value, Value};
+/// use writeable::assert_writeable_eq;
+///
+/// assert_writeable_eq!(value!("gregory"), "gregory");
+/// assert_writeable_eq!(
+/// "islamic-civil".parse::<Value>().unwrap(),
+/// "islamic-civil"
+/// );
+///
+/// // The value "true" has the special, empty string representation
+/// assert_eq!(value!("true").to_string(), "");
+/// ```
+#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Default)]
+pub struct Value(ShortSlice<TinyAsciiStr<{ *VALUE_LENGTH.end() }>>);
+
+const VALUE_LENGTH: RangeInclusive<usize> = 3..=8;
+const TRUE_VALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");
+
+impl Value {
+ /// A constructor which takes a utf8 slice, parses it and
+ /// produces a well-formed [`Value`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Value;
+ ///
+ /// Value::try_from_bytes(b"buddhist").expect("Parsing failed.");
+ /// ```
+ pub fn try_from_bytes(input: &[u8]) -> Result<Self, ParserError> {
+ let mut v = ShortSlice::new();
+
+ if !input.is_empty() {
+ for subtag in SubtagIterator::new(input) {
+ let val = Self::subtag_from_bytes(subtag)?;
+ if let Some(val) = val {
+ v.push(val);
+ }
+ }
+ }
+ Ok(Self(v))
+ }
+
+ /// Const constructor for when the value contains only a single subtag.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Value;
+ ///
+ /// Value::try_from_single_subtag(b"buddhist").expect("valid subtag");
+ /// Value::try_from_single_subtag(b"#####").expect_err("invalid subtag");
+ /// Value::try_from_single_subtag(b"foo-bar").expect_err("not a single subtag");
+ /// ```
+ pub const fn try_from_single_subtag(subtag: &[u8]) -> Result<Self, ParserError> {
+ match Self::subtag_from_bytes(subtag) {
+ Err(_) => Err(ParserError::InvalidExtension),
+ Ok(option) => Ok(Self::from_tinystr(option)),
+ }
+ }
+
+ #[doc(hidden)]
+ pub fn as_tinystr_slice(&self) -> &[TinyAsciiStr<8>] {
+ &self.0
+ }
+
+ #[doc(hidden)]
+ pub const fn as_single_subtag(&self) -> Option<&TinyAsciiStr<8>> {
+ self.0.single()
+ }
+
+ #[doc(hidden)]
+ pub const fn from_tinystr(subtag: Option<TinyAsciiStr<8>>) -> Self {
+ match subtag {
+ None => Self(ShortSlice::new()),
+ Some(val) => {
+ debug_assert!(val.is_ascii_alphanumeric());
+ debug_assert!(!matches!(val, TRUE_VALUE));
+ Self(ShortSlice::new_single(val))
+ }
+ }
+ }
+
+ pub(crate) fn from_short_slice_unchecked(input: ShortSlice<TinyAsciiStr<8>>) -> Self {
+ Self(input)
+ }
+
+ #[doc(hidden)]
+ pub const fn subtag_from_bytes(bytes: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
+ Self::parse_subtag_from_bytes_manual_slice(bytes, 0, bytes.len())
+ }
+
+ pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
+ Self::parse_subtag_from_bytes_manual_slice(t, 0, t.len())
+ }
+
+ pub(crate) const fn parse_subtag_from_bytes_manual_slice(
+ bytes: &[u8],
+ start: usize,
+ end: usize,
+ ) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
+ let slice_len = end - start;
+ if slice_len > *VALUE_LENGTH.end() || slice_len < *VALUE_LENGTH.start() {
+ return Err(ParserError::InvalidExtension);
+ }
+
+ match TinyAsciiStr::from_bytes_manual_slice(bytes, start, end) {
+ Ok(TRUE_VALUE) => Ok(None),
+ Ok(s) if s.is_ascii_alphanumeric() => Ok(Some(s.to_ascii_lowercase())),
+ Ok(_) => Err(ParserError::InvalidExtension),
+ Err(_) => Err(ParserError::InvalidSubtag),
+ }
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ self.0.iter().map(TinyAsciiStr::as_str).try_for_each(f)
+ }
+}
+
+impl FromStr for Value {
+ type Err = ParserError;
+
+ fn from_str(source: &str) -> Result<Self, Self::Err> {
+ Self::try_from_bytes(source.as_bytes())
+ }
+}
+
+impl_writeable_for_subtag_list!(Value, "islamic", "civil");
+
+/// A macro allowing for compile-time construction of valid Unicode [`Value`] subtag.
+///
+/// The macro only supports single-subtag values.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::unicode::{key, value};
+/// use icu::locid::Locale;
+///
+/// let loc: Locale = "de-u-ca-buddhist".parse().unwrap();
+///
+/// assert_eq!(
+/// loc.extensions.unicode.keywords.get(&key!("ca")),
+/// Some(&value!("buddhist"))
+/// );
+/// ```
+///
+/// [`Value`]: crate::extensions::unicode::Value
+#[macro_export]
+#[doc(hidden)]
+macro_rules! extensions_unicode_value {
+ ($value:literal) => {{
+ // What we want:
+ // const R: $crate::extensions::unicode::Value =
+ // match $crate::extensions::unicode::Value::try_from_single_subtag($value.as_bytes()) {
+ // Ok(r) => r,
+ // #[allow(clippy::panic)] // const context
+ // _ => panic!(concat!("Invalid Unicode extension value: ", $value)),
+ // };
+ // Workaround until https://github.com/rust-lang/rust/issues/73255 lands:
+ const R: $crate::extensions::unicode::Value =
+ $crate::extensions::unicode::Value::from_tinystr(
+ match $crate::extensions::unicode::Value::subtag_from_bytes($value.as_bytes()) {
+ Ok(r) => r,
+ _ => panic!(concat!("Invalid Unicode extension value: ", $value)),
+ },
+ );
+ R
+ }};
+}
+#[doc(inline)]
+pub use extensions_unicode_value as value;
diff --git a/third_party/rust/icu_locid/src/helpers.rs b/third_party/rust/icu_locid/src/helpers.rs
new file mode 100644
index 0000000000..d12435fbf3
--- /dev/null
+++ b/third_party/rust/icu_locid/src/helpers.rs
@@ -0,0 +1,698 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::iter::FromIterator;
+
+use alloc::boxed::Box;
+use alloc::vec;
+use alloc::vec::Vec;
+use core::ops::{Deref, DerefMut};
+use litemap::store::*;
+
+/// Internal: A vector that supports no-allocation, constant values if length 0 or 1.
+/// Using ZeroOne(Option<T>) saves 8 bytes in ShortSlice via niche optimization.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) enum ShortSlice<T> {
+ ZeroOne(Option<T>),
+ Multi(Box<[T]>),
+}
+
+impl<T> ShortSlice<T> {
+ #[inline]
+ pub const fn new() -> Self {
+ Self::ZeroOne(None)
+ }
+
+ #[inline]
+ pub const fn new_single(item: T) -> Self {
+ Self::ZeroOne(Some(item))
+ }
+
+ pub fn push(&mut self, item: T) {
+ *self = match core::mem::replace(self, Self::ZeroOne(None)) {
+ ShortSlice::ZeroOne(None) => ShortSlice::ZeroOne(Some(item)),
+ ShortSlice::ZeroOne(Some(prev_item)) => {
+ ShortSlice::Multi(vec![prev_item, item].into_boxed_slice())
+ }
+ ShortSlice::Multi(items) => {
+ let mut items = items.into_vec();
+ items.push(item);
+ ShortSlice::Multi(items.into_boxed_slice())
+ }
+ };
+ }
+
+ #[inline]
+ pub const fn single(&self) -> Option<&T> {
+ match self {
+ ShortSlice::ZeroOne(Some(v)) => Some(v),
+ _ => None,
+ }
+ }
+
+ #[inline]
+ pub fn len(&self) -> usize {
+ match self {
+ ShortSlice::ZeroOne(None) => 0,
+ ShortSlice::ZeroOne(_) => 1,
+ ShortSlice::Multi(ref v) => v.len(),
+ }
+ }
+
+ pub fn insert(&mut self, index: usize, elt: T) {
+ assert!(
+ index <= self.len(),
+ "insertion index (is {}) should be <= len (is {})",
+ index,
+ self.len()
+ );
+
+ *self = match core::mem::replace(self, ShortSlice::ZeroOne(None)) {
+ ShortSlice::ZeroOne(None) => ShortSlice::ZeroOne(Some(elt)),
+ ShortSlice::ZeroOne(Some(item)) => {
+ let items = if index == 0 {
+ vec![elt, item].into_boxed_slice()
+ } else {
+ vec![item, elt].into_boxed_slice()
+ };
+ ShortSlice::Multi(items)
+ }
+ ShortSlice::Multi(items) => {
+ let mut items = items.into_vec();
+ items.insert(index, elt);
+ ShortSlice::Multi(items.into_boxed_slice())
+ }
+ }
+ }
+
+ pub fn remove(&mut self, index: usize) -> T {
+ assert!(
+ index < self.len(),
+ "removal index (is {}) should be < len (is {})",
+ index,
+ self.len()
+ );
+
+ let (replaced, removed_item) = match core::mem::replace(self, ShortSlice::ZeroOne(None)) {
+ ShortSlice::ZeroOne(None) => unreachable!(),
+ ShortSlice::ZeroOne(Some(v)) => (ShortSlice::ZeroOne(None), v),
+ ShortSlice::Multi(v) => {
+ let mut v = v.into_vec();
+ let removed_item = v.remove(index);
+ match v.len() {
+ #[allow(clippy::unwrap_used)]
+ // we know that the vec has exactly one element left
+ 1 => (ShortSlice::ZeroOne(Some(v.pop().unwrap())), removed_item),
+ // v has at least 2 elements, create a Multi variant
+ _ => (ShortSlice::Multi(v.into_boxed_slice()), removed_item),
+ }
+ }
+ };
+ *self = replaced;
+ removed_item
+ }
+
+ #[inline]
+ pub fn clear(&mut self) {
+ let _ = core::mem::replace(self, ShortSlice::ZeroOne(None));
+ }
+
+ pub fn retain<F>(&mut self, mut f: F)
+ where
+ F: FnMut(&T) -> bool,
+ {
+ *self = match core::mem::take(self) {
+ Self::ZeroOne(Some(one)) if f(&one) => Self::ZeroOne(Some(one)),
+ Self::ZeroOne(_) => Self::ZeroOne(None),
+ Self::Multi(slice) => {
+ let mut vec = slice.into_vec();
+ vec.retain(f);
+ Self::from(vec)
+ }
+ };
+ }
+}
+
+impl<T> Deref for ShortSlice<T> {
+ type Target = [T];
+
+ fn deref(&self) -> &Self::Target {
+ match self {
+ ShortSlice::ZeroOne(None) => &[],
+ ShortSlice::ZeroOne(Some(v)) => core::slice::from_ref(v),
+ ShortSlice::Multi(v) => v,
+ }
+ }
+}
+
+impl<T> DerefMut for ShortSlice<T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ match self {
+ ShortSlice::ZeroOne(None) => &mut [],
+ ShortSlice::ZeroOne(Some(v)) => core::slice::from_mut(v),
+ ShortSlice::Multi(v) => v,
+ }
+ }
+}
+
+impl<T> From<Vec<T>> for ShortSlice<T> {
+ fn from(v: Vec<T>) -> Self {
+ match v.len() {
+ 0 => ShortSlice::ZeroOne(None),
+ #[allow(clippy::unwrap_used)] // we know that the vec is not empty
+ 1 => ShortSlice::ZeroOne(Some(v.into_iter().next().unwrap())),
+ _ => ShortSlice::Multi(v.into_boxed_slice()),
+ }
+ }
+}
+
+impl<T> Default for ShortSlice<T> {
+ fn default() -> Self {
+ ShortSlice::ZeroOne(None)
+ }
+}
+
+impl<T> FromIterator<T> for ShortSlice<T> {
+ fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
+ let mut iter = iter.into_iter();
+ match (iter.next(), iter.next()) {
+ (Some(first), Some(second)) => {
+ // Size hint behaviour same as `Vec::extend` + 2
+ let mut vec = Vec::with_capacity(iter.size_hint().0.saturating_add(3));
+ vec.push(first);
+ vec.push(second);
+ vec.extend(iter);
+ Self::Multi(vec.into_boxed_slice())
+ }
+ (first, _) => Self::ZeroOne(first),
+ }
+ }
+}
+
+impl<K, V> StoreConstEmpty<K, V> for ShortSlice<(K, V)> {
+ const EMPTY: ShortSlice<(K, V)> = ShortSlice::ZeroOne(None);
+}
+
+impl<K, V> Store<K, V> for ShortSlice<(K, V)> {
+ #[inline]
+ fn lm_len(&self) -> usize {
+ self.len()
+ }
+
+ #[inline]
+ fn lm_is_empty(&self) -> bool {
+ matches!(self, ShortSlice::ZeroOne(None))
+ }
+
+ #[inline]
+ fn lm_get(&self, index: usize) -> Option<(&K, &V)> {
+ self.get(index).map(|elt| (&elt.0, &elt.1))
+ }
+
+ #[inline]
+ fn lm_last(&self) -> Option<(&K, &V)> {
+ match self {
+ ShortSlice::ZeroOne(v) => v.as_ref(),
+ ShortSlice::Multi(v) => v.last(),
+ }
+ .map(|elt| (&elt.0, &elt.1))
+ }
+
+ #[inline]
+ fn lm_binary_search_by<F>(&self, mut cmp: F) -> Result<usize, usize>
+ where
+ F: FnMut(&K) -> core::cmp::Ordering,
+ {
+ self.binary_search_by(|(k, _)| cmp(k))
+ }
+}
+
+impl<K: Ord, V> StoreFromIterable<K, V> for ShortSlice<(K, V)> {
+ fn lm_sort_from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self {
+ let v: Vec<(K, V)> = Vec::lm_sort_from_iter(iter);
+ v.into()
+ }
+}
+
+impl<K, V> StoreMut<K, V> for ShortSlice<(K, V)> {
+ fn lm_with_capacity(_capacity: usize) -> Self {
+ ShortSlice::ZeroOne(None)
+ }
+
+ fn lm_reserve(&mut self, _additional: usize) {}
+
+ fn lm_get_mut(&mut self, index: usize) -> Option<(&K, &mut V)> {
+ self.get_mut(index).map(|elt| (&elt.0, &mut elt.1))
+ }
+
+ fn lm_push(&mut self, key: K, value: V) {
+ self.push((key, value))
+ }
+
+ fn lm_insert(&mut self, index: usize, key: K, value: V) {
+ self.insert(index, (key, value))
+ }
+
+ fn lm_remove(&mut self, index: usize) -> (K, V) {
+ self.remove(index)
+ }
+
+ fn lm_clear(&mut self) {
+ self.clear();
+ }
+
+ fn lm_retain<F>(&mut self, mut predicate: F)
+ where
+ F: FnMut(&K, &V) -> bool,
+ {
+ self.retain(|(k, v)| predicate(k, v))
+ }
+}
+
+impl<'a, K: 'a, V: 'a> StoreIterable<'a, K, V> for ShortSlice<(K, V)> {
+ type KeyValueIter =
+ core::iter::Map<core::slice::Iter<'a, (K, V)>, for<'r> fn(&'r (K, V)) -> (&'r K, &'r V)>;
+
+ fn lm_iter(&'a self) -> Self::KeyValueIter {
+ self.iter().map(|elt| (&elt.0, &elt.1))
+ }
+}
+
+impl<K, V> StoreFromIterator<K, V> for ShortSlice<(K, V)> {}
+
+#[test]
+fn test_short_slice_impl() {
+ litemap::testing::check_store::<ShortSlice<(u32, u64)>>();
+}
+
+macro_rules! impl_tinystr_subtag {
+ (
+ $(#[$doc:meta])*
+ $name:ident,
+ $($path:ident)::+,
+ $macro_name:ident,
+ $legacy_macro_name:ident,
+ $len_start:literal..=$len_end:literal,
+ $tinystr_ident:ident,
+ $validate:expr,
+ $normalize:expr,
+ $is_normalized:expr,
+ $error:ident,
+ [$good_example:literal $(,$more_good_examples:literal)*],
+ [$bad_example:literal $(, $more_bad_examples:literal)*],
+ ) => {
+ #[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)]
+ #[cfg_attr(feature = "serde", derive(serde::Serialize))]
+ #[repr(transparent)]
+ $(#[$doc])*
+ pub struct $name(tinystr::TinyAsciiStr<$len_end>);
+
+ impl $name {
+ /// A constructor which takes a UTF-8 slice, parses it and
+ #[doc = concat!("produces a well-formed [`", stringify!($name), "`].")]
+ ///
+ /// # Examples
+ ///
+ /// ```
+ #[doc = concat!("use icu_locid::", stringify!($($path::)+), stringify!($name), ";")]
+ ///
+ #[doc = concat!("assert!(", stringify!($name), "::try_from_bytes(b", stringify!($good_example), ").is_ok());")]
+ #[doc = concat!("assert!(", stringify!($name), "::try_from_bytes(b", stringify!($bad_example), ").is_err());")]
+ /// ```
+ pub const fn try_from_bytes(v: &[u8]) -> Result<Self, crate::parser::errors::ParserError> {
+ Self::try_from_bytes_manual_slice(v, 0, v.len())
+ }
+
+ /// Equivalent to [`try_from_bytes(bytes[start..end])`](Self::try_from_bytes),
+ /// but callable in a `const` context (which range indexing is not).
+ pub const fn try_from_bytes_manual_slice(
+ v: &[u8],
+ start: usize,
+ end: usize,
+ ) -> Result<Self, crate::parser::errors::ParserError> {
+ let slen = end - start;
+
+ #[allow(clippy::double_comparisons)] // if len_start == len_end
+ if slen < $len_start || slen > $len_end {
+ return Err(crate::parser::errors::ParserError::$error);
+ }
+
+ match tinystr::TinyAsciiStr::from_bytes_manual_slice(v, start, end) {
+ Ok($tinystr_ident) if $validate => Ok(Self($normalize)),
+ _ => Err(crate::parser::errors::ParserError::$error),
+ }
+ }
+
+ #[doc = concat!("Safely creates a [`", stringify!($name), "`] from its raw format")]
+ /// as returned by [`Self::into_raw`]. Unlike [`Self::try_from_bytes`],
+ /// this constructor only takes normalized values.
+ pub const fn try_from_raw(
+ v: [u8; $len_end],
+ ) -> Result<Self, crate::parser::errors::ParserError> {
+ if let Ok($tinystr_ident) = tinystr::TinyAsciiStr::<$len_end>::try_from_raw(v) {
+ if $tinystr_ident.len() >= $len_start && $is_normalized {
+ Ok(Self($tinystr_ident))
+ } else {
+ Err(crate::parser::errors::ParserError::$error)
+ }
+ } else {
+ Err(crate::parser::errors::ParserError::$error)
+ }
+ }
+
+ #[doc = concat!("Unsafely creates a [`", stringify!($name), "`] from its raw format")]
+ /// as returned by [`Self::into_raw`]. Unlike [`Self::try_from_bytes`],
+ /// this constructor only takes normalized values.
+ ///
+ /// # Safety
+ ///
+ /// This function is safe iff [`Self::try_from_raw`] returns an `Ok`. This is the case
+ /// for inputs that are correctly normalized.
+ pub const unsafe fn from_raw_unchecked(v: [u8; $len_end]) -> Self {
+ Self(tinystr::TinyAsciiStr::from_bytes_unchecked(v))
+ }
+
+ /// Deconstructs into a raw format to be consumed by
+ /// [`from_raw_unchecked`](Self::from_raw_unchecked()) or
+ /// [`try_from_raw`](Self::try_from_raw()).
+ pub const fn into_raw(self) -> [u8; $len_end] {
+ *self.0.all_bytes()
+ }
+
+ #[inline]
+ /// A helper function for displaying as a `&str`.
+ pub const fn as_str(&self) -> &str {
+ self.0.as_str()
+ }
+
+ #[doc(hidden)]
+ pub const fn into_tinystr(&self) -> tinystr::TinyAsciiStr<$len_end> {
+ self.0
+ }
+
+ /// Compare with BCP-47 bytes.
+ ///
+ /// The return value is equivalent to what would happen if you first converted
+ /// `self` to a BCP-47 string and then performed a byte comparison.
+ ///
+ /// This function is case-sensitive and results in a *total order*, so it is appropriate for
+ /// binary search. The only argument producing [`Ordering::Equal`](core::cmp::Ordering::Equal)
+ /// is `self.as_str().as_bytes()`.
+ #[inline]
+ pub fn strict_cmp(self, other: &[u8]) -> core::cmp::Ordering {
+ self.as_str().as_bytes().cmp(other)
+ }
+
+ /// Compare with a potentially unnormalized BCP-47 string.
+ ///
+ /// The return value is equivalent to what would happen if you first parsed the
+ /// BCP-47 string and then performed a structural comparison.
+ ///
+ #[inline]
+ pub fn normalizing_eq(self, other: &str) -> bool {
+ self.as_str().eq_ignore_ascii_case(other)
+ }
+ }
+
+ impl core::str::FromStr for $name {
+ type Err = crate::parser::errors::ParserError;
+
+ fn from_str(source: &str) -> Result<Self, Self::Err> {
+ Self::try_from_bytes(source.as_bytes())
+ }
+ }
+
+ impl<'l> From<&'l $name> for &'l str {
+ fn from(input: &'l $name) -> Self {
+ input.as_str()
+ }
+ }
+
+ impl From<$name> for tinystr::TinyAsciiStr<$len_end> {
+ fn from(input: $name) -> Self {
+ input.into_tinystr()
+ }
+ }
+
+ impl writeable::Writeable for $name {
+ #[inline]
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ sink.write_str(self.as_str())
+ }
+ #[inline]
+ fn writeable_length_hint(&self) -> writeable::LengthHint {
+ writeable::LengthHint::exact(self.0.len())
+ }
+ #[inline]
+ fn write_to_string(&self) -> alloc::borrow::Cow<str> {
+ alloc::borrow::Cow::Borrowed(self.0.as_str())
+ }
+ }
+
+ writeable::impl_display_with_writeable!($name);
+
+ #[doc = concat!("A macro allowing for compile-time construction of valid [`", stringify!($name), "`] subtags.")]
+ ///
+ /// # Examples
+ ///
+ /// Parsing errors don't have to be handled at runtime:
+ /// ```
+ /// assert_eq!(
+ #[doc = concat!(" icu_locid::", $(stringify!($path), "::",)+ stringify!($macro_name), "!(", stringify!($good_example) ,"),")]
+ #[doc = concat!(" ", stringify!($good_example), ".parse::<icu_locid::", $(stringify!($path), "::",)+ stringify!($name), ">().unwrap()")]
+ /// );
+ /// ```
+ ///
+ /// Invalid input is a compile failure:
+ /// ```compile_fail,E0080
+ #[doc = concat!("icu_locid::", $(stringify!($path), "::",)+ stringify!($macro_name), "!(", stringify!($bad_example) ,");")]
+ /// ```
+ ///
+ #[doc = concat!("[`", stringify!($name), "`]: crate::", $(stringify!($path), "::",)+ stringify!($name))]
+ #[macro_export]
+ #[doc(hidden)]
+ macro_rules! $legacy_macro_name {
+ ($string:literal) => {{
+ use $crate::$($path ::)+ $name;
+ const R: $name =
+ match $name::try_from_bytes($string.as_bytes()) {
+ Ok(r) => r,
+ #[allow(clippy::panic)] // const context
+ _ => panic!(concat!("Invalid ", $(stringify!($path), "::",)+ stringify!($name), ": ", $string)),
+ };
+ R
+ }};
+ }
+ #[doc(inline)]
+ pub use $legacy_macro_name as $macro_name;
+
+ #[cfg(feature = "databake")]
+ impl databake::Bake for $name {
+ fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
+ env.insert("icu_locid");
+ let string = self.as_str();
+ databake::quote! { icu_locid::$($path::)+ $macro_name!(#string) }
+ }
+ }
+
+ #[test]
+ fn test_construction() {
+ let maybe = $name::try_from_bytes($good_example.as_bytes());
+ assert!(maybe.is_ok());
+ assert_eq!(maybe, $name::try_from_raw(maybe.unwrap().into_raw()));
+ assert_eq!(maybe.unwrap().as_str(), $good_example);
+ $(
+ let maybe = $name::try_from_bytes($more_good_examples.as_bytes());
+ assert!(maybe.is_ok());
+ assert_eq!(maybe, $name::try_from_raw(maybe.unwrap().into_raw()));
+ assert_eq!(maybe.unwrap().as_str(), $more_good_examples);
+ )*
+ assert!($name::try_from_bytes($bad_example.as_bytes()).is_err());
+ $(
+ assert!($name::try_from_bytes($more_bad_examples.as_bytes()).is_err());
+ )*
+ }
+
+ #[test]
+ fn test_writeable() {
+ writeable::assert_writeable_eq!(&$good_example.parse::<$name>().unwrap(), $good_example);
+ $(
+ writeable::assert_writeable_eq!($more_good_examples.parse::<$name>().unwrap(), $more_good_examples);
+ )*
+ }
+
+ #[cfg(feature = "serde")]
+ impl<'de> serde::Deserialize<'de> for $name {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::de::Deserializer<'de>,
+ {
+ struct Visitor;
+
+ impl<'de> serde::de::Visitor<'de> for Visitor {
+ type Value = $name;
+
+ fn expecting(
+ &self,
+ formatter: &mut core::fmt::Formatter<'_>,
+ ) -> core::fmt::Result {
+ write!(formatter, "a valid BCP-47 {}", stringify!($name))
+ }
+
+ fn visit_str<E: serde::de::Error>(self, s: &str) -> Result<Self::Value, E> {
+ s.parse().map_err(serde::de::Error::custom)
+ }
+ }
+
+ if deserializer.is_human_readable() {
+ deserializer.deserialize_string(Visitor)
+ } else {
+ Self::try_from_raw(serde::de::Deserialize::deserialize(deserializer)?)
+ .map_err(serde::de::Error::custom)
+ }
+ }
+ }
+
+ // Safety checklist for ULE:
+ //
+ // 1. Must not include any uninitialized or padding bytes (true since transparent over a ULE).
+ // 2. Must have an alignment of 1 byte (true since transparent over a ULE).
+ // 3. ULE::validate_byte_slice() checks that the given byte slice represents a valid slice.
+ // 4. ULE::validate_byte_slice() checks that the given byte slice has a valid length.
+ // 5. All other methods must be left with their default impl.
+ // 6. Byte equality is semantic equality.
+ #[cfg(feature = "zerovec")]
+ unsafe impl zerovec::ule::ULE for $name {
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), zerovec::ZeroVecError> {
+ let it = bytes.chunks_exact(core::mem::size_of::<Self>());
+ if !it.remainder().is_empty() {
+ return Err(zerovec::ZeroVecError::length::<Self>(bytes.len()));
+ }
+ for v in it {
+ // The following can be removed once `array_chunks` is stabilized.
+ let mut a = [0; core::mem::size_of::<Self>()];
+ a.copy_from_slice(v);
+ if Self::try_from_raw(a).is_err() {
+ return Err(zerovec::ZeroVecError::parse::<Self>());
+ }
+ }
+ Ok(())
+ }
+ }
+
+ #[cfg(feature = "zerovec")]
+ impl zerovec::ule::AsULE for $name {
+ type ULE = Self;
+ fn to_unaligned(self) -> Self::ULE {
+ self
+ }
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned
+ }
+ }
+
+ #[cfg(feature = "zerovec")]
+ impl<'a> zerovec::maps::ZeroMapKV<'a> for $name {
+ type Container = zerovec::ZeroVec<'a, $name>;
+ type Slice = zerovec::ZeroSlice<$name>;
+ type GetType = $name;
+ type OwnedType = $name;
+ }
+ };
+}
+
+macro_rules! impl_writeable_for_each_subtag_str_no_test {
+ ($type:tt $(, $self:ident, $borrow_cond:expr => $borrow:expr)?) => {
+ impl writeable::Writeable for $type {
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ let mut initial = true;
+ self.for_each_subtag_str(&mut |subtag| {
+ if initial {
+ initial = false;
+ } else {
+ sink.write_char('-')?;
+ }
+ sink.write_str(subtag)
+ })
+ }
+
+ #[inline]
+ fn writeable_length_hint(&self) -> writeable::LengthHint {
+ let mut result = writeable::LengthHint::exact(0);
+ let mut initial = true;
+ self.for_each_subtag_str::<core::convert::Infallible, _>(&mut |subtag| {
+ if initial {
+ initial = false;
+ } else {
+ result += 1;
+ }
+ result += subtag.len();
+ Ok(())
+ })
+ .expect("infallible");
+ result
+ }
+
+ $(
+ fn write_to_string(&self) -> alloc::borrow::Cow<str> {
+ #[allow(clippy::unwrap_used)] // impl_writeable_for_subtag_list's $borrow uses unwrap
+ let $self = self;
+ if $borrow_cond {
+ $borrow
+ } else {
+ let mut output = alloc::string::String::with_capacity(self.writeable_length_hint().capacity());
+ let _ = self.write_to(&mut output);
+ alloc::borrow::Cow::Owned(output)
+ }
+ }
+ )?
+ }
+
+ writeable::impl_display_with_writeable!($type);
+ };
+}
+
+macro_rules! impl_writeable_for_subtag_list {
+ ($type:tt, $sample1:literal, $sample2:literal) => {
+ impl_writeable_for_each_subtag_str_no_test!($type, selff, selff.0.len() == 1 => alloc::borrow::Cow::Borrowed(selff.0.get(0).unwrap().as_str()));
+
+ #[test]
+ fn test_writeable() {
+ writeable::assert_writeable_eq!(&$type::default(), "");
+ writeable::assert_writeable_eq!(
+ &$type::from_short_slice_unchecked(alloc::vec![$sample1.parse().unwrap()].into()),
+ $sample1,
+ );
+ writeable::assert_writeable_eq!(
+ &$type::from_short_slice_unchecked(vec![
+ $sample1.parse().unwrap(),
+ $sample2.parse().unwrap()
+ ].into()),
+ core::concat!($sample1, "-", $sample2),
+ );
+ }
+ };
+}
+
+macro_rules! impl_writeable_for_key_value {
+ ($type:tt, $key1:literal, $value1:literal, $key2:literal, $expected2:literal) => {
+ impl_writeable_for_each_subtag_str_no_test!($type);
+
+ #[test]
+ fn test_writeable() {
+ writeable::assert_writeable_eq!(&$type::default(), "");
+ writeable::assert_writeable_eq!(
+ &$type::from_tuple_vec(vec![($key1.parse().unwrap(), $value1.parse().unwrap())]),
+ core::concat!($key1, "-", $value1),
+ );
+ writeable::assert_writeable_eq!(
+ &$type::from_tuple_vec(vec![
+ ($key1.parse().unwrap(), $value1.parse().unwrap()),
+ ($key2.parse().unwrap(), "true".parse().unwrap())
+ ]),
+ core::concat!($key1, "-", $value1, "-", $expected2),
+ );
+ }
+ };
+}
diff --git a/third_party/rust/icu_locid/src/langid.rs b/third_party/rust/icu_locid/src/langid.rs
new file mode 100644
index 0000000000..eac8c83713
--- /dev/null
+++ b/third_party/rust/icu_locid/src/langid.rs
@@ -0,0 +1,574 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::cmp::Ordering;
+use core::str::FromStr;
+
+use crate::ordering::SubtagOrderingResult;
+use crate::parser::{
+ parse_language_identifier, parse_language_identifier_with_single_variant, ParserError,
+ ParserMode, SubtagIterator,
+};
+use crate::subtags;
+use alloc::string::String;
+use writeable::Writeable;
+
+/// A core struct representing a [`Unicode BCP47 Language Identifier`].
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::{
+/// langid,
+/// subtags::{language, region},
+/// };
+///
+/// let li = langid!("en-US");
+///
+/// assert_eq!(li.language, language!("en"));
+/// assert_eq!(li.script, None);
+/// assert_eq!(li.region, Some(region!("US")));
+/// assert_eq!(li.variants.len(), 0);
+/// ```
+///
+/// # Parsing
+///
+/// Unicode recognizes three levels of standard conformance for any language identifier:
+///
+/// * *well-formed* - syntactically correct
+/// * *valid* - well-formed and only uses registered language, region, script and variant subtags...
+/// * *canonical* - valid and no deprecated codes or structure.
+///
+/// At the moment parsing normalizes a well-formed language identifier converting
+/// `_` separators to `-` and adjusting casing to conform to the Unicode standard.
+///
+/// Any bogus subtags will cause the parsing to fail with an error.
+/// No subtag validation is performed.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::{
+/// langid,
+/// subtags::{language, region, script, variant},
+/// };
+///
+/// let li = langid!("eN_latn_Us-Valencia");
+///
+/// assert_eq!(li.language, language!("en"));
+/// assert_eq!(li.script, Some(script!("Latn")));
+/// assert_eq!(li.region, Some(region!("US")));
+/// assert_eq!(li.variants.get(0), Some(&variant!("valencia")));
+/// ```
+///
+/// [`Unicode BCP47 Language Identifier`]: https://unicode.org/reports/tr35/tr35.html#Unicode_language_identifier
+#[derive(Default, PartialEq, Eq, Clone, Hash)]
+#[allow(clippy::exhaustive_structs)] // This struct is stable (and invoked by a macro)
+pub struct LanguageIdentifier {
+ /// Language subtag of the language identifier.
+ pub language: subtags::Language,
+ /// Script subtag of the language identifier.
+ pub script: Option<subtags::Script>,
+ /// Region subtag of the language identifier.
+ pub region: Option<subtags::Region>,
+ /// Variant subtags of the language identifier.
+ pub variants: subtags::Variants,
+}
+
+impl LanguageIdentifier {
+ /// A constructor which takes a utf8 slice, parses it and
+ /// produces a well-formed [`LanguageIdentifier`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::LanguageIdentifier;
+ ///
+ /// LanguageIdentifier::try_from_bytes(b"en-US").expect("Parsing failed");
+ /// ```
+ pub fn try_from_bytes(v: &[u8]) -> Result<Self, ParserError> {
+ parse_language_identifier(v, ParserMode::LanguageIdentifier)
+ }
+
+ #[doc(hidden)]
+ #[allow(clippy::type_complexity)]
+ // The return type should be `Result<Self, ParserError>` once the `const_precise_live_drops`
+ // is stabilized ([rust-lang#73255](https://github.com/rust-lang/rust/issues/73255)).
+ pub const fn try_from_bytes_with_single_variant(
+ v: &[u8],
+ ) -> Result<
+ (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Option<subtags::Variant>,
+ ),
+ ParserError,
+ > {
+ parse_language_identifier_with_single_variant(v, ParserMode::LanguageIdentifier)
+ }
+
+ /// A constructor which takes a utf8 slice which may contain extension keys,
+ /// parses it and produces a well-formed [`LanguageIdentifier`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::{langid, LanguageIdentifier};
+ ///
+ /// let li = LanguageIdentifier::try_from_locale_bytes(b"en-US-x-posix")
+ /// .expect("Parsing failed.");
+ ///
+ /// assert_eq!(li, langid!("en-US"));
+ /// ```
+ ///
+ /// This method should be used for input that may be a locale identifier.
+ /// All extensions will be lost.
+ pub fn try_from_locale_bytes(v: &[u8]) -> Result<Self, ParserError> {
+ parse_language_identifier(v, ParserMode::Locale)
+ }
+
+ /// The default undefined language "und". Same as [`default()`](Default::default()).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::LanguageIdentifier;
+ ///
+ /// assert_eq!(LanguageIdentifier::default(), LanguageIdentifier::UND);
+ /// ```
+ pub const UND: Self = Self {
+ language: subtags::Language::UND,
+ script: None,
+ region: None,
+ variants: subtags::Variants::new(),
+ };
+
+ /// This is a best-effort operation that performs all available levels of canonicalization.
+ ///
+ /// At the moment the operation will normalize casing and the separator, but in the future
+ /// it may also validate and update from deprecated subtags to canonical ones.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::LanguageIdentifier;
+ ///
+ /// assert_eq!(
+ /// LanguageIdentifier::canonicalize("pL_latn_pl").as_deref(),
+ /// Ok("pl-Latn-PL")
+ /// );
+ /// ```
+ pub fn canonicalize<S: AsRef<[u8]>>(input: S) -> Result<String, ParserError> {
+ let lang_id = Self::try_from_bytes(input.as_ref())?;
+ Ok(lang_id.write_to_string().into_owned())
+ }
+
+ /// Compare this [`LanguageIdentifier`] with BCP-47 bytes.
+ ///
+ /// The return value is equivalent to what would happen if you first converted this
+ /// [`LanguageIdentifier`] to a BCP-47 string and then performed a byte comparison.
+ ///
+ /// This function is case-sensitive and results in a *total order*, so it is appropriate for
+ /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::LanguageIdentifier;
+ /// use std::cmp::Ordering;
+ ///
+ /// let bcp47_strings: &[&str] = &[
+ /// "pl-Latn-PL",
+ /// "und",
+ /// "und-Adlm",
+ /// "und-GB",
+ /// "und-ZA",
+ /// "und-fonipa",
+ /// "zh",
+ /// ];
+ ///
+ /// for ab in bcp47_strings.windows(2) {
+ /// let a = ab[0];
+ /// let b = ab[1];
+ /// assert!(a.cmp(b) == Ordering::Less);
+ /// let a_langid = a.parse::<LanguageIdentifier>().unwrap();
+ /// assert!(a_langid.strict_cmp(a.as_bytes()) == Ordering::Equal);
+ /// assert!(a_langid.strict_cmp(b.as_bytes()) == Ordering::Less);
+ /// }
+ /// ```
+ pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
+ self.strict_cmp_iter(other.split(|b| *b == b'-')).end()
+ }
+
+ /// Compare this [`LanguageIdentifier`] with an iterator of BCP-47 subtags.
+ ///
+ /// This function has the same equality semantics as [`LanguageIdentifier::strict_cmp`]. It is intended as
+ /// a more modular version that allows multiple subtag iterators to be chained together.
+ ///
+ /// For an additional example, see [`SubtagOrderingResult`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::LanguageIdentifier;
+ /// use std::cmp::Ordering;
+ ///
+ /// let subtags: &[&[u8]] = &[b"ca", b"ES", b"valencia"];
+ ///
+ /// let loc = "ca-ES-valencia".parse::<LanguageIdentifier>().unwrap();
+ /// assert_eq!(
+ /// Ordering::Equal,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let loc = "ca-ES".parse::<LanguageIdentifier>().unwrap();
+ /// assert_eq!(
+ /// Ordering::Less,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let loc = "ca-ZA".parse::<LanguageIdentifier>().unwrap();
+ /// assert_eq!(
+ /// Ordering::Greater,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ /// ```
+ pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
+ where
+ I: Iterator<Item = &'l [u8]>,
+ {
+ let r = self.for_each_subtag_str(&mut |subtag| {
+ if let Some(other) = subtags.next() {
+ match subtag.as_bytes().cmp(other) {
+ Ordering::Equal => Ok(()),
+ not_equal => Err(not_equal),
+ }
+ } else {
+ Err(Ordering::Greater)
+ }
+ });
+ match r {
+ Ok(_) => SubtagOrderingResult::Subtags(subtags),
+ Err(o) => SubtagOrderingResult::Ordering(o),
+ }
+ }
+
+ /// Compare this `LanguageIdentifier` with a potentially unnormalized BCP-47 string.
+ ///
+ /// The return value is equivalent to what would happen if you first parsed the
+ /// BCP-47 string to a `LanguageIdentifier` and then performed a structural comparison.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::LanguageIdentifier;
+ /// use std::cmp::Ordering;
+ ///
+ /// let bcp47_strings: &[&str] = &[
+ /// "pl-LaTn-pL",
+ /// "uNd",
+ /// "UnD-adlm",
+ /// "uNd-GB",
+ /// "UND-FONIPA",
+ /// "ZH",
+ /// ];
+ ///
+ /// for a in bcp47_strings {
+ /// assert!(a.parse::<LanguageIdentifier>().unwrap().normalizing_eq(a));
+ /// }
+ /// ```
+ pub fn normalizing_eq(&self, other: &str) -> bool {
+ macro_rules! subtag_matches {
+ ($T:ty, $iter:ident, $expected:expr) => {
+ $iter
+ .next()
+ .map(|b| <$T>::try_from_bytes(b) == Ok($expected))
+ .unwrap_or(false)
+ };
+ }
+
+ let mut iter = SubtagIterator::new(other.as_bytes());
+ if !subtag_matches!(subtags::Language, iter, self.language) {
+ return false;
+ }
+ if let Some(ref script) = self.script {
+ if !subtag_matches!(subtags::Script, iter, *script) {
+ return false;
+ }
+ }
+ if let Some(ref region) = self.region {
+ if !subtag_matches!(subtags::Region, iter, *region) {
+ return false;
+ }
+ }
+ for variant in self.variants.iter() {
+ if !subtag_matches!(subtags::Variant, iter, *variant) {
+ return false;
+ }
+ }
+ iter.next().is_none()
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ f(self.language.as_str())?;
+ if let Some(ref script) = self.script {
+ f(script.as_str())?;
+ }
+ if let Some(ref region) = self.region {
+ f(region.as_str())?;
+ }
+ for variant in self.variants.iter() {
+ f(variant.as_str())?;
+ }
+ Ok(())
+ }
+
+ /// Executes `f` on each subtag string of this `LanguageIdentifier`, with every string in
+ /// lowercase ascii form.
+ ///
+ /// The default canonicalization of language identifiers uses titlecase scripts and uppercase
+ /// regions. However, this differs from [RFC6497 (BCP 47 Extension T)], which specifies:
+ ///
+ /// > _The canonical form for all subtags in the extension is lowercase, with the fields
+ /// ordered by the separators, alphabetically._
+ ///
+ /// Hence, this method is used inside [`Transform Extensions`] to be able to get the correct
+ /// canonicalization of the language identifier.
+ ///
+ /// As an example, the canonical form of locale **EN-LATN-CA-T-EN-LATN-CA** is
+ /// **en-Latn-CA-t-en-latn-ca**, with the script and region parts lowercased inside T extensions,
+ /// but titlecased and uppercased outside T extensions respectively.
+ ///
+ /// [RFC6497 (BCP 47 Extension T)]: https://www.ietf.org/rfc/rfc6497.txt
+ /// [`Transform extensions`]: crate::extensions::transform
+ pub(crate) fn for_each_subtag_str_lowercased<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ f(self.language.as_str())?;
+ if let Some(ref script) = self.script {
+ f(script.into_tinystr().to_ascii_lowercase().as_str())?;
+ }
+ if let Some(ref region) = self.region {
+ f(region.into_tinystr().to_ascii_lowercase().as_str())?;
+ }
+ for variant in self.variants.iter() {
+ f(variant.as_str())?;
+ }
+ Ok(())
+ }
+
+ /// Writes this `LanguageIdentifier` to a sink, replacing uppercase ascii chars with
+ /// lowercase ascii chars.
+ ///
+ /// The default canonicalization of language identifiers uses titlecase scripts and uppercase
+ /// regions. However, this differs from [RFC6497 (BCP 47 Extension T)], which specifies:
+ ///
+ /// > _The canonical form for all subtags in the extension is lowercase, with the fields
+ /// ordered by the separators, alphabetically._
+ ///
+ /// Hence, this method is used inside [`Transform Extensions`] to be able to get the correct
+ /// canonicalization of the language identifier.
+ ///
+ /// As an example, the canonical form of locale **EN-LATN-CA-T-EN-LATN-CA** is
+ /// **en-Latn-CA-t-en-latn-ca**, with the script and region parts lowercased inside T extensions,
+ /// but titlecased and uppercased outside T extensions respectively.
+ ///
+ /// [RFC6497 (BCP 47 Extension T)]: https://www.ietf.org/rfc/rfc6497.txt
+ /// [`Transform extensions`]: crate::extensions::transform
+ pub(crate) fn write_lowercased_to<W: core::fmt::Write + ?Sized>(
+ &self,
+ sink: &mut W,
+ ) -> core::fmt::Result {
+ let mut initial = true;
+ self.for_each_subtag_str_lowercased(&mut |subtag| {
+ if initial {
+ initial = false;
+ } else {
+ sink.write_char('-')?;
+ }
+ sink.write_str(subtag)
+ })
+ }
+}
+
+impl AsRef<LanguageIdentifier> for LanguageIdentifier {
+ fn as_ref(&self) -> &Self {
+ self
+ }
+}
+
+impl AsMut<LanguageIdentifier> for LanguageIdentifier {
+ fn as_mut(&mut self) -> &mut Self {
+ self
+ }
+}
+
+impl core::fmt::Debug for LanguageIdentifier {
+ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+ core::fmt::Display::fmt(&self, f)
+ }
+}
+
+impl FromStr for LanguageIdentifier {
+ type Err = ParserError;
+
+ fn from_str(source: &str) -> Result<Self, Self::Err> {
+ Self::try_from_bytes(source.as_bytes())
+ }
+}
+
+impl_writeable_for_each_subtag_str_no_test!(LanguageIdentifier, selff, selff.script.is_none() && selff.region.is_none() && selff.variants.is_empty() => selff.language.write_to_string());
+
+#[test]
+fn test_writeable() {
+ use writeable::assert_writeable_eq;
+ assert_writeable_eq!(LanguageIdentifier::UND, "und");
+ assert_writeable_eq!("und-001".parse::<LanguageIdentifier>().unwrap(), "und-001");
+ assert_writeable_eq!(
+ "und-Mymr".parse::<LanguageIdentifier>().unwrap(),
+ "und-Mymr",
+ );
+ assert_writeable_eq!(
+ "my-Mymr-MM".parse::<LanguageIdentifier>().unwrap(),
+ "my-Mymr-MM",
+ );
+ assert_writeable_eq!(
+ "my-Mymr-MM-posix".parse::<LanguageIdentifier>().unwrap(),
+ "my-Mymr-MM-posix",
+ );
+ assert_writeable_eq!(
+ "zh-macos-posix".parse::<LanguageIdentifier>().unwrap(),
+ "zh-macos-posix",
+ );
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::{langid, subtags::language, LanguageIdentifier};
+///
+/// assert_eq!(LanguageIdentifier::from(language!("en")), langid!("en"));
+/// ```
+impl From<subtags::Language> for LanguageIdentifier {
+ fn from(language: subtags::Language) -> Self {
+ Self {
+ language,
+ ..Default::default()
+ }
+ }
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::{langid, subtags::script, LanguageIdentifier};
+///
+/// assert_eq!(
+/// LanguageIdentifier::from(Some(script!("latn"))),
+/// langid!("und-Latn")
+/// );
+/// ```
+impl From<Option<subtags::Script>> for LanguageIdentifier {
+ fn from(script: Option<subtags::Script>) -> Self {
+ Self {
+ script,
+ ..Default::default()
+ }
+ }
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::{langid, subtags::region, LanguageIdentifier};
+///
+/// assert_eq!(
+/// LanguageIdentifier::from(Some(region!("US"))),
+/// langid!("und-US")
+/// );
+/// ```
+impl From<Option<subtags::Region>> for LanguageIdentifier {
+ fn from(region: Option<subtags::Region>) -> Self {
+ Self {
+ region,
+ ..Default::default()
+ }
+ }
+}
+
+/// Convert from an LSR tuple to a [`LanguageIdentifier`].
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::{
+/// langid,
+/// subtags::{language, region, script},
+/// LanguageIdentifier,
+/// };
+///
+/// let lang = language!("en");
+/// let script = script!("Latn");
+/// let region = region!("US");
+/// assert_eq!(
+/// LanguageIdentifier::from((lang, Some(script), Some(region))),
+/// langid!("en-Latn-US")
+/// );
+/// ```
+impl
+ From<(
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ )> for LanguageIdentifier
+{
+ fn from(
+ lsr: (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ ),
+ ) -> Self {
+ Self {
+ language: lsr.0,
+ script: lsr.1,
+ region: lsr.2,
+ ..Default::default()
+ }
+ }
+}
+
+/// Convert from a [`LanguageIdentifier`] to an LSR tuple.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::{
+/// langid,
+/// subtags::{language, region, script},
+/// };
+///
+/// let lid = langid!("en-Latn-US");
+/// let (lang, script, region) = (&lid).into();
+///
+/// assert_eq!(lang, language!("en"));
+/// assert_eq!(script, Some(script!("Latn")));
+/// assert_eq!(region, Some(region!("US")));
+/// ```
+impl From<&LanguageIdentifier>
+ for (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ )
+{
+ fn from(langid: &LanguageIdentifier) -> Self {
+ (langid.language, langid.script, langid.region)
+ }
+}
diff --git a/third_party/rust/icu_locid/src/lib.rs b/third_party/rust/icu_locid/src/lib.rs
new file mode 100644
index 0000000000..9c6c46ca51
--- /dev/null
+++ b/third_party/rust/icu_locid/src/lib.rs
@@ -0,0 +1,93 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Parsing, manipulating, and serializing Unicode Language and Locale Identifiers.
+//!
+//! This module is published as its own crate ([`icu_locid`](https://docs.rs/icu_locid/latest/icu_locid/))
+//! and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project.
+//!
+//! The module provides algorithms for parsing a string into a well-formed language or locale identifier
+//! as defined by [`UTS #35: Unicode LDML 3. Unicode Language and Locale Identifiers`].
+//!
+//! [`Locale`] is the most common structure to use for storing information about a language,
+//! script, region, variants and extensions. In almost all cases, this struct should be used as the
+//! base unit for all locale management operations.
+//!
+//! [`LanguageIdentifier`] is a strict subset of [`Locale`] which can be useful in a narrow range of
+//! cases where [`Unicode Extensions`] are not relevant.
+//!
+//! If in doubt, use [`Locale`].
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::Locale;
+//! use icu::locid::{
+//! locale,
+//! subtags::{language, region},
+//! };
+//!
+//! let mut loc: Locale = locale!("en-US");
+//!
+//! assert_eq!(loc.id.language, language!("en"));
+//! assert_eq!(loc.id.script, None);
+//! assert_eq!(loc.id.region, Some(region!("US")));
+//! assert_eq!(loc.id.variants.len(), 0);
+//!
+//! loc.id.region = Some(region!("GB"));
+//!
+//! assert_eq!(loc, locale!("en-GB"));
+//! ```
+//!
+//! For more details, see [`Locale`] and [`LanguageIdentifier`].
+//!
+//! [`UTS #35: Unicode LDML 3. Unicode Language and Locale Identifiers`]: https://unicode.org/reports/tr35/tr35.html#Unicode_Language_and_Locale_Identifiers
+//! [`ICU4X`]: ../icu/index.html
+//! [`Unicode Extensions`]: extensions
+
+// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations
+#![cfg_attr(not(any(test, feature = "std")), no_std)]
+#![cfg_attr(
+ not(test),
+ deny(
+ clippy::indexing_slicing,
+ clippy::unwrap_used,
+ clippy::expect_used,
+ clippy::panic,
+ clippy::exhaustive_structs,
+ clippy::exhaustive_enums,
+ missing_debug_implementations,
+ )
+)]
+#![warn(missing_docs)]
+
+extern crate alloc;
+
+#[macro_use]
+mod helpers;
+
+mod langid;
+mod locale;
+mod macros;
+mod ordering;
+mod parser;
+
+pub use langid::LanguageIdentifier;
+pub use locale::Locale;
+pub use ordering::SubtagOrderingResult;
+pub use parser::errors::ParserError;
+
+#[doc(no_inline)]
+pub use ParserError as Error;
+
+pub mod extensions;
+#[macro_use]
+pub mod subtags;
+pub mod zerovec;
+
+#[cfg(feature = "serde")]
+mod serde;
+
+#[cfg(feature = "databake")]
+mod databake;
diff --git a/third_party/rust/icu_locid/src/locale.rs b/third_party/rust/icu_locid/src/locale.rs
new file mode 100644
index 0000000000..e87cdf1a20
--- /dev/null
+++ b/third_party/rust/icu_locid/src/locale.rs
@@ -0,0 +1,511 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::ordering::SubtagOrderingResult;
+use crate::parser::{
+ parse_locale, parse_locale_with_single_variant_single_keyword_unicode_keyword_extension,
+ ParserError, ParserMode, SubtagIterator,
+};
+use crate::{extensions, subtags, LanguageIdentifier};
+use alloc::string::String;
+use core::cmp::Ordering;
+use core::str::FromStr;
+use tinystr::TinyAsciiStr;
+use writeable::Writeable;
+
+/// A core struct representing a [`Unicode Locale Identifier`].
+///
+/// A locale is made of two parts:
+/// * Unicode Language Identifier
+/// * A set of Unicode Extensions
+///
+/// [`Locale`] exposes all of the same fields and methods as [`LanguageIdentifier`], and
+/// on top of that is able to parse, manipulate and serialize unicode extension fields.
+///
+///
+/// # Examples
+///
+/// ```
+/// use icu_locid::{
+/// extensions::unicode::{key, value},
+/// locale,
+/// subtags::{language, region},
+/// };
+///
+/// let loc = locale!("en-US-u-ca-buddhist");
+///
+/// assert_eq!(loc.id.language, language!("en"));
+/// assert_eq!(loc.id.script, None);
+/// assert_eq!(loc.id.region, Some(region!("US")));
+/// assert_eq!(loc.id.variants.len(), 0);
+/// assert_eq!(
+/// loc.extensions.unicode.keywords.get(&key!("ca")),
+/// Some(&value!("buddhist"))
+/// );
+/// ```
+///
+/// # Parsing
+///
+/// Unicode recognizes three levels of standard conformance for a locale:
+///
+/// * *well-formed* - syntactically correct
+/// * *valid* - well-formed and only uses registered language subtags, extensions, keywords, types...
+/// * *canonical* - valid and no deprecated codes or structure.
+///
+/// At the moment parsing normalizes a well-formed locale identifier converting
+/// `_` separators to `-` and adjusting casing to conform to the Unicode standard.
+///
+/// Any bogus subtags will cause the parsing to fail with an error.
+///
+/// No subtag validation or alias resolution is performed.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::{subtags::*, Locale};
+///
+/// let loc: Locale = "eN_latn_Us-Valencia_u-hC-H12"
+/// .parse()
+/// .expect("Failed to parse.");
+///
+/// assert_eq!(loc.id.language, "en".parse::<Language>().unwrap());
+/// assert_eq!(loc.id.script, "Latn".parse::<Script>().ok());
+/// assert_eq!(loc.id.region, "US".parse::<Region>().ok());
+/// assert_eq!(
+/// loc.id.variants.get(0),
+/// "valencia".parse::<Variant>().ok().as_ref()
+/// );
+/// ```
+/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/tr35.html#Unicode_locale_identifier
+#[derive(Default, PartialEq, Eq, Clone, Hash)]
+#[allow(clippy::exhaustive_structs)] // This struct is stable (and invoked by a macro)
+pub struct Locale {
+ /// The basic language/script/region components in the locale identifier along with any variants.
+ pub id: LanguageIdentifier,
+ /// Any extensions present in the locale identifier.
+ pub extensions: extensions::Extensions,
+}
+
+#[test]
+fn test_sizes() {
+ assert_eq!(core::mem::size_of::<subtags::Language>(), 3);
+ assert_eq!(core::mem::size_of::<subtags::Script>(), 4);
+ assert_eq!(core::mem::size_of::<subtags::Region>(), 3);
+ assert_eq!(core::mem::size_of::<subtags::Variant>(), 8);
+ assert_eq!(core::mem::size_of::<subtags::Variants>(), 16);
+ assert_eq!(core::mem::size_of::<LanguageIdentifier>(), 32);
+
+ assert_eq!(core::mem::size_of::<extensions::transform::Transform>(), 56);
+ assert_eq!(core::mem::size_of::<Option<LanguageIdentifier>>(), 32);
+ assert_eq!(core::mem::size_of::<extensions::transform::Fields>(), 24);
+
+ assert_eq!(core::mem::size_of::<extensions::unicode::Attributes>(), 16);
+ assert_eq!(core::mem::size_of::<extensions::unicode::Keywords>(), 24);
+ assert_eq!(core::mem::size_of::<Vec<extensions::other::Other>>(), 24);
+ assert_eq!(core::mem::size_of::<extensions::private::Private>(), 16);
+ assert_eq!(core::mem::size_of::<extensions::Extensions>(), 136);
+
+ assert_eq!(core::mem::size_of::<Locale>(), 168);
+}
+
+impl Locale {
+ /// A constructor which takes a utf8 slice, parses it and
+ /// produces a well-formed [`Locale`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// Locale::try_from_bytes(b"en-US-u-hc-h12").unwrap();
+ /// ```
+ pub fn try_from_bytes(v: &[u8]) -> Result<Self, ParserError> {
+ parse_locale(v)
+ }
+
+ /// The default undefined locale "und". Same as [`default()`](Default::default()).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// assert_eq!(Locale::default(), Locale::UND);
+ /// ```
+ pub const UND: Self = Self {
+ id: LanguageIdentifier::UND,
+ extensions: extensions::Extensions::new(),
+ };
+
+ /// This is a best-effort operation that performs all available levels of canonicalization.
+ ///
+ /// At the moment the operation will normalize casing and the separator, but in the future
+ /// it may also validate and update from deprecated subtags to canonical ones.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// assert_eq!(
+ /// Locale::canonicalize("pL_latn_pl-U-HC-H12").as_deref(),
+ /// Ok("pl-Latn-PL-u-hc-h12")
+ /// );
+ /// ```
+ pub fn canonicalize<S: AsRef<[u8]>>(input: S) -> Result<String, ParserError> {
+ let locale = Self::try_from_bytes(input.as_ref())?;
+ Ok(locale.write_to_string().into_owned())
+ }
+
+ /// Compare this [`Locale`] with BCP-47 bytes.
+ ///
+ /// The return value is equivalent to what would happen if you first converted this
+ /// [`Locale`] to a BCP-47 string and then performed a byte comparison.
+ ///
+ /// This function is case-sensitive and results in a *total order*, so it is appropriate for
+ /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let bcp47_strings: &[&str] = &[
+ /// "pl-Latn-PL",
+ /// "und",
+ /// "und-fonipa",
+ /// "und-t-m0-true",
+ /// "und-u-ca-hebrew",
+ /// "und-u-ca-japanese",
+ /// "zh",
+ /// ];
+ ///
+ /// for ab in bcp47_strings.windows(2) {
+ /// let a = ab[0];
+ /// let b = ab[1];
+ /// assert!(a.cmp(b) == Ordering::Less);
+ /// let a_loc = a.parse::<Locale>().unwrap();
+ /// assert!(a_loc.strict_cmp(a.as_bytes()) == Ordering::Equal);
+ /// assert!(a_loc.strict_cmp(b.as_bytes()) == Ordering::Less);
+ /// }
+ /// ```
+ pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
+ self.strict_cmp_iter(other.split(|b| *b == b'-')).end()
+ }
+
+ /// Compare this [`Locale`] with an iterator of BCP-47 subtags.
+ ///
+ /// This function has the same equality semantics as [`Locale::strict_cmp`]. It is intended as
+ /// a more modular version that allows multiple subtag iterators to be chained together.
+ ///
+ /// For an additional example, see [`SubtagOrderingResult`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let subtags: &[&[u8]] =
+ /// &[b"ca", b"ES", b"valencia", b"u", b"ca", b"hebrew"];
+ ///
+ /// let loc = locale!("ca-ES-valencia-u-ca-hebrew");
+ /// assert_eq!(
+ /// Ordering::Equal,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let loc = locale!("ca-ES-valencia");
+ /// assert_eq!(
+ /// Ordering::Less,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let loc = locale!("ca-ES-valencia-u-nu-arab");
+ /// assert_eq!(
+ /// Ordering::Greater,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ /// ```
+ pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
+ where
+ I: Iterator<Item = &'l [u8]>,
+ {
+ let r = self.for_each_subtag_str(&mut |subtag| {
+ if let Some(other) = subtags.next() {
+ match subtag.as_bytes().cmp(other) {
+ Ordering::Equal => Ok(()),
+ not_equal => Err(not_equal),
+ }
+ } else {
+ Err(Ordering::Greater)
+ }
+ });
+ match r {
+ Ok(_) => SubtagOrderingResult::Subtags(subtags),
+ Err(o) => SubtagOrderingResult::Ordering(o),
+ }
+ }
+
+ /// Compare this `Locale` with a potentially unnormalized BCP-47 string.
+ ///
+ /// The return value is equivalent to what would happen if you first parsed the
+ /// BCP-47 string to a `Locale` and then performed a structural comparison.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let bcp47_strings: &[&str] = &[
+ /// "pl-LaTn-pL",
+ /// "uNd",
+ /// "UND-FONIPA",
+ /// "UnD-t-m0-TrUe",
+ /// "uNd-u-CA-Japanese",
+ /// "ZH",
+ /// ];
+ ///
+ /// for a in bcp47_strings {
+ /// assert!(a.parse::<Locale>().unwrap().normalizing_eq(a));
+ /// }
+ /// ```
+ pub fn normalizing_eq(&self, other: &str) -> bool {
+ macro_rules! subtag_matches {
+ ($T:ty, $iter:ident, $expected:expr) => {
+ $iter
+ .next()
+ .map(|b| <$T>::try_from_bytes(b) == Ok($expected))
+ .unwrap_or(false)
+ };
+ }
+
+ let mut iter = SubtagIterator::new(other.as_bytes());
+ if !subtag_matches!(subtags::Language, iter, self.id.language) {
+ return false;
+ }
+ if let Some(ref script) = self.id.script {
+ if !subtag_matches!(subtags::Script, iter, *script) {
+ return false;
+ }
+ }
+ if let Some(ref region) = self.id.region {
+ if !subtag_matches!(subtags::Region, iter, *region) {
+ return false;
+ }
+ }
+ for variant in self.id.variants.iter() {
+ if !subtag_matches!(subtags::Variant, iter, *variant) {
+ return false;
+ }
+ }
+ if !self.extensions.is_empty() {
+ match extensions::Extensions::try_from_iter(&mut iter) {
+ Ok(exts) => {
+ if self.extensions != exts {
+ return false;
+ }
+ }
+ Err(_) => {
+ return false;
+ }
+ }
+ }
+ iter.next().is_none()
+ }
+
+ #[doc(hidden)]
+ #[allow(clippy::type_complexity)]
+ pub const fn try_from_bytes_with_single_variant_single_keyword_unicode_extension(
+ v: &[u8],
+ ) -> Result<
+ (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Option<subtags::Variant>,
+ Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>,
+ ),
+ ParserError,
+ > {
+ parse_locale_with_single_variant_single_keyword_unicode_keyword_extension(
+ v,
+ ParserMode::Locale,
+ )
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ self.id.for_each_subtag_str(f)?;
+ self.extensions.for_each_subtag_str(f)?;
+ Ok(())
+ }
+}
+
+impl FromStr for Locale {
+ type Err = ParserError;
+
+ fn from_str(source: &str) -> Result<Self, Self::Err> {
+ Self::try_from_bytes(source.as_bytes())
+ }
+}
+
+impl From<LanguageIdentifier> for Locale {
+ fn from(id: LanguageIdentifier) -> Self {
+ Self {
+ id,
+ extensions: extensions::Extensions::default(),
+ }
+ }
+}
+
+impl From<Locale> for LanguageIdentifier {
+ fn from(loc: Locale) -> Self {
+ loc.id
+ }
+}
+
+impl AsRef<LanguageIdentifier> for Locale {
+ fn as_ref(&self) -> &LanguageIdentifier {
+ &self.id
+ }
+}
+
+impl AsMut<LanguageIdentifier> for Locale {
+ fn as_mut(&mut self) -> &mut LanguageIdentifier {
+ &mut self.id
+ }
+}
+
+impl core::fmt::Debug for Locale {
+ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+ writeable::Writeable::write_to(self, f)
+ }
+}
+
+impl_writeable_for_each_subtag_str_no_test!(Locale, selff, selff.extensions.is_empty() => selff.id.write_to_string());
+
+#[test]
+fn test_writeable() {
+ use writeable::assert_writeable_eq;
+ assert_writeable_eq!(Locale::UND, "und");
+ assert_writeable_eq!("und-001".parse::<Locale>().unwrap(), "und-001");
+ assert_writeable_eq!("und-Mymr".parse::<Locale>().unwrap(), "und-Mymr");
+ assert_writeable_eq!("my-Mymr-MM".parse::<Locale>().unwrap(), "my-Mymr-MM");
+ assert_writeable_eq!(
+ "my-Mymr-MM-posix".parse::<Locale>().unwrap(),
+ "my-Mymr-MM-posix",
+ );
+ assert_writeable_eq!(
+ "zh-macos-posix".parse::<Locale>().unwrap(),
+ "zh-macos-posix",
+ );
+ assert_writeable_eq!(
+ "my-t-my-d0-zawgyi".parse::<Locale>().unwrap(),
+ "my-t-my-d0-zawgyi",
+ );
+ assert_writeable_eq!(
+ "ar-SA-u-ca-islamic-civil".parse::<Locale>().unwrap(),
+ "ar-SA-u-ca-islamic-civil",
+ );
+ assert_writeable_eq!(
+ "en-001-x-foo-bar".parse::<Locale>().unwrap(),
+ "en-001-x-foo-bar",
+ );
+ assert_writeable_eq!("und-t-m0-true".parse::<Locale>().unwrap(), "und-t-m0-true",);
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::Locale;
+/// use icu::locid::{locale, subtags::language};
+///
+/// assert_eq!(Locale::from(language!("en")), locale!("en"));
+/// ```
+impl From<subtags::Language> for Locale {
+ fn from(language: subtags::Language) -> Self {
+ Self {
+ id: language.into(),
+ ..Default::default()
+ }
+ }
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::Locale;
+/// use icu::locid::{locale, subtags::script};
+///
+/// assert_eq!(Locale::from(Some(script!("latn"))), locale!("und-Latn"));
+/// ```
+impl From<Option<subtags::Script>> for Locale {
+ fn from(script: Option<subtags::Script>) -> Self {
+ Self {
+ id: script.into(),
+ ..Default::default()
+ }
+ }
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::Locale;
+/// use icu::locid::{locale, subtags::region};
+///
+/// assert_eq!(Locale::from(Some(region!("US"))), locale!("und-US"));
+/// ```
+impl From<Option<subtags::Region>> for Locale {
+ fn from(region: Option<subtags::Region>) -> Self {
+ Self {
+ id: region.into(),
+ ..Default::default()
+ }
+ }
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::Locale;
+/// use icu::locid::{
+/// locale,
+/// subtags::{language, region, script},
+/// };
+///
+/// assert_eq!(
+/// Locale::from((
+/// language!("en"),
+/// Some(script!("Latn")),
+/// Some(region!("US"))
+/// )),
+/// locale!("en-Latn-US")
+/// );
+/// ```
+impl
+ From<(
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ )> for Locale
+{
+ fn from(
+ lsr: (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ ),
+ ) -> Self {
+ Self {
+ id: lsr.into(),
+ ..Default::default()
+ }
+ }
+}
diff --git a/third_party/rust/icu_locid/src/macros.rs b/third_party/rust/icu_locid/src/macros.rs
new file mode 100644
index 0000000000..4537cd4031
--- /dev/null
+++ b/third_party/rust/icu_locid/src/macros.rs
@@ -0,0 +1,191 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+/// A macro allowing for compile-time construction of valid [`LanguageIdentifier`]s.
+///
+/// The macro will perform syntax canonicalization of the tag.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::{langid, LanguageIdentifier};
+///
+/// const DE_AT: LanguageIdentifier = langid!("de_at");
+///
+/// let de_at: LanguageIdentifier = "de_at".parse().unwrap();
+///
+/// assert_eq!(DE_AT, de_at);
+/// ```
+///
+/// *Note*: The macro cannot produce language identifiers with more than one variants due to const
+/// limitations (see [`Heap Allocations in Constants`]):
+///
+/// ```compile_fail,E0080
+/// icu::locid::langid!("und-variant1-variant2");
+/// ```
+///
+/// Use runtime parsing instead:
+/// ```
+/// "und-variant1-variant2"
+/// .parse::<icu::locid::LanguageIdentifier>()
+/// .unwrap();
+/// ```
+///
+/// [`LanguageIdentifier`]: crate::LanguageIdentifier
+/// [`Heap Allocations in Constants`]: https://github.com/rust-lang/const-eval/issues/20
+#[macro_export]
+macro_rules! langid {
+ ($langid:literal) => {{
+ const R: $crate::LanguageIdentifier =
+ match $crate::LanguageIdentifier::try_from_bytes_with_single_variant($langid.as_bytes()) {
+ Ok((language, script, region, variant)) => $crate::LanguageIdentifier {
+ language,
+ script,
+ region,
+ variants: match variant {
+ Some(v) => $crate::subtags::Variants::from_variant(v),
+ None => $crate::subtags::Variants::new(),
+ }
+ },
+ #[allow(clippy::panic)] // const context
+ _ => panic!(concat!("Invalid language code: ", $langid, " . Note langid! macro can only support up to a single variant tag. Use runtime parsing instead.")),
+ };
+ R
+ }};
+}
+
+/// A macro allowing for compile-time construction of valid [`Locale`]s.
+///
+/// The macro will perform syntax canonicalization of the tag.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::{locale, Locale};
+///
+/// const DE_AT: Locale = locale!("de_at");
+///
+/// let de_at: Locale = "de_at".parse().unwrap();
+///
+/// assert_eq!(DE_AT, de_at);
+/// ```
+///
+/// *Note*: The macro cannot produce locales with more than one variant or multiple extensions
+/// (only single keyword unicode extension is supported) due to const
+/// limitations (see [`Heap Allocations in Constants`]):
+///
+/// ```compile_fail,E0080
+/// icu::locid::locale!("sl-IT-rozaj-biske-1994");
+/// ```
+/// Use runtime parsing instead:
+/// ```
+/// "sl-IT-rozaj-biske-1994"
+/// .parse::<icu::locid::Locale>()
+/// .unwrap();
+/// ```
+///
+/// Locales with multiple keys are not supported
+/// ```compile_fail,E0080
+/// icu::locid::locale!("th-TH-u-ca-buddhist-nu-thai");
+/// ```
+/// Use runtime parsing instead:
+/// ```
+/// "th-TH-u-ca-buddhist-nu-thai"
+/// .parse::<icu::locid::Locale>()
+/// .unwrap();
+/// ```
+///
+/// Locales with attributes are not supported
+/// ```compile_fail,E0080
+/// icu::locid::locale!("en-US-u-foobar-ca-buddhist");
+/// ```
+/// Use runtime parsing instead:
+/// ```
+/// "en-US-u-foobar-ca-buddhist"
+/// .parse::<icu::locid::Locale>()
+/// .unwrap();
+/// ```
+///
+/// Locales with single key but multiple types are not supported
+/// ```compile_fail,E0080
+/// icu::locid::locale!("en-US-u-ca-islamic-umalqura");
+/// ```
+/// Use runtime parsing instead:
+/// ```
+/// "en-US-u-ca-islamic-umalqura"
+/// .parse::<icu::locid::Locale>()
+/// .unwrap();
+/// ```
+/// [`Locale`]: crate::Locale
+/// [`Heap Allocations in Constants`]: https://github.com/rust-lang/const-eval/issues/20
+#[macro_export]
+macro_rules! locale {
+ ($locale:literal) => {{
+ const R: $crate::Locale =
+ match $crate::Locale::try_from_bytes_with_single_variant_single_keyword_unicode_extension(
+ $locale.as_bytes(),
+ ) {
+ Ok((language, script, region, variant, keyword)) => $crate::Locale {
+ id: $crate::LanguageIdentifier {
+ language,
+ script,
+ region,
+ variants: match variant {
+ Some(v) => $crate::subtags::Variants::from_variant(v),
+ None => $crate::subtags::Variants::new(),
+ },
+ },
+ extensions: match keyword {
+ Some(k) => $crate::extensions::Extensions::from_unicode(
+ $crate::extensions::unicode::Unicode {
+ keywords: $crate::extensions::unicode::Keywords::new_single(
+ k.0,
+ $crate::extensions::unicode::Value::from_tinystr(k.1),
+ ),
+
+ attributes: $crate::extensions::unicode::Attributes::new(),
+ },
+ ),
+ None => $crate::extensions::Extensions::new(),
+ },
+ },
+ #[allow(clippy::panic)] // const context
+ _ => panic!(concat!(
+ "Invalid language code: ",
+ $locale,
+ " . Note the locale! macro only supports up to one variant tag; \
+ unicode extensions are not supported. Use \
+ runtime parsing instead."
+ )),
+ };
+ R
+ }};
+}
+
+#[cfg(test)]
+mod test {
+ use crate::LanguageIdentifier;
+ use crate::Locale;
+
+ #[test]
+ fn test_langid_macro_can_parse_langid_with_single_variant() {
+ const DE_AT_FOOBAR: LanguageIdentifier = langid!("de_at-foobar");
+ let de_at_foobar: LanguageIdentifier = "de_at-foobar".parse().unwrap();
+ assert_eq!(DE_AT_FOOBAR, de_at_foobar);
+ }
+
+ #[test]
+ fn test_locale_macro_can_parse_locale_with_single_variant() {
+ const DE_AT_FOOBAR: Locale = locale!("de_at-foobar");
+ let de_at_foobar: Locale = "de_at-foobar".parse().unwrap();
+ assert_eq!(DE_AT_FOOBAR, de_at_foobar);
+ }
+
+ #[test]
+ fn test_locale_macro_can_parse_locale_with_single_keyword_unicode_extension() {
+ const DE_AT_U_CA_FOOBAR: Locale = locale!("de_at-u-ca-foobar");
+ let de_at_u_ca_foobar: Locale = "de_at-u-ca-foobar".parse().unwrap();
+ assert_eq!(DE_AT_U_CA_FOOBAR, de_at_u_ca_foobar);
+ }
+}
diff --git a/third_party/rust/icu_locid/src/ordering.rs b/third_party/rust/icu_locid/src/ordering.rs
new file mode 100644
index 0000000000..c877c60c39
--- /dev/null
+++ b/third_party/rust/icu_locid/src/ordering.rs
@@ -0,0 +1,62 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Utilities for performing ordering operations on locales.
+
+use core::cmp::Ordering;
+
+/// The result of a subtag iterator comparison operation.
+///
+/// See [`Locale::strict_cmp_iter`].
+///
+/// # Examples
+///
+/// Check whether a stream of subtags contains two expected locales back-to-back:
+///
+/// ```
+/// use icu::locid::{locale, Locale, SubtagOrderingResult};
+/// use std::cmp::Ordering;
+///
+/// let subtags = b"en-US-it-IT".split(|b| *b == b'-');
+/// let locales = [locale!("en-US"), locale!("it-IT")];
+/// let mut result = SubtagOrderingResult::Subtags(subtags);
+/// for loc in locales.iter() {
+/// match result {
+/// SubtagOrderingResult::Subtags(it) => {
+/// result = loc.strict_cmp_iter(it);
+/// }
+/// SubtagOrderingResult::Ordering(ord) => break,
+/// }
+/// }
+///
+/// assert_eq!(Ordering::Equal, result.end());
+/// ```
+///
+/// [`Locale::strict_cmp_iter`]: crate::Locale::strict_cmp_iter
+#[allow(clippy::exhaustive_enums)] // well-defined exhaustive enum semantics
+#[derive(Debug)]
+pub enum SubtagOrderingResult<I> {
+ /// Potentially remaining subtags after the comparison operation.
+ Subtags(I),
+ /// Resolved ordering between the locale object and the subtags.
+ Ordering(Ordering),
+}
+
+impl<I> SubtagOrderingResult<I>
+where
+ I: Iterator,
+{
+ /// Invoke this function if there are no remaining locale objects to chain in order to get
+ /// a fully resolved [`Ordering`].
+ #[inline]
+ pub fn end(self) -> Ordering {
+ match self {
+ Self::Subtags(mut it) => match it.next() {
+ Some(_) => Ordering::Less,
+ None => Ordering::Equal,
+ },
+ Self::Ordering(o) => o,
+ }
+ }
+}
diff --git a/third_party/rust/icu_locid/src/parser/errors.rs b/third_party/rust/icu_locid/src/parser/errors.rs
new file mode 100644
index 0000000000..b2262460c1
--- /dev/null
+++ b/third_party/rust/icu_locid/src/parser/errors.rs
@@ -0,0 +1,72 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use displaydoc::Display;
+
+/// List of parser errors that can be generated
+/// while parsing [`LanguageIdentifier`](crate::LanguageIdentifier), [`Locale`](crate::Locale),
+/// [`subtags`](crate::subtags) or [`extensions`](crate::extensions).
+///
+/// Re-exported as [`Error`](crate::Error).
+#[derive(Display, Debug, PartialEq, Copy, Clone)]
+#[non_exhaustive]
+pub enum ParserError {
+ /// Invalid language subtag.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ /// use icu::locid::ParserError;
+ ///
+ /// assert_eq!("x2".parse::<Language>(), Err(ParserError::InvalidLanguage));
+ /// ```
+ #[displaydoc("The given language subtag is invalid")]
+ InvalidLanguage,
+
+ /// Invalid script, region or variant subtag.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Region;
+ /// use icu::locid::ParserError;
+ ///
+ /// assert_eq!("#@2X".parse::<Region>(), Err(ParserError::InvalidSubtag));
+ /// ```
+ #[displaydoc("Invalid subtag")]
+ InvalidSubtag,
+
+ /// Invalid extension subtag.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Key;
+ /// use icu::locid::ParserError;
+ ///
+ /// assert_eq!("#@2X".parse::<Key>(), Err(ParserError::InvalidExtension));
+ /// ```
+ #[displaydoc("Invalid extension")]
+ InvalidExtension,
+
+ /// Duplicated extension.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ /// use icu::locid::ParserError;
+ ///
+ /// assert_eq!(
+ /// "und-u-hc-h12-u-ca-calendar".parse::<Locale>(),
+ /// Err(ParserError::DuplicatedExtension)
+ /// );
+ /// ```
+ #[displaydoc("Duplicated extension")]
+ DuplicatedExtension,
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for ParserError {}
diff --git a/third_party/rust/icu_locid/src/parser/langid.rs b/third_party/rust/icu_locid/src/parser/langid.rs
new file mode 100644
index 0000000000..2c6ddeb037
--- /dev/null
+++ b/third_party/rust/icu_locid/src/parser/langid.rs
@@ -0,0 +1,278 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+pub use super::errors::ParserError;
+use crate::extensions::unicode::{Attribute, Key, Value};
+use crate::extensions::ExtensionType;
+use crate::helpers::ShortSlice;
+use crate::parser::SubtagIterator;
+use crate::LanguageIdentifier;
+use crate::{extensions, subtags};
+use tinystr::TinyAsciiStr;
+
+#[derive(PartialEq, Clone, Copy)]
+pub enum ParserMode {
+ LanguageIdentifier,
+ Locale,
+ Partial,
+}
+
+#[derive(PartialEq, Clone, Copy)]
+enum ParserPosition {
+ Script,
+ Region,
+ Variant,
+}
+
+pub fn parse_language_identifier_from_iter(
+ iter: &mut SubtagIterator,
+ mode: ParserMode,
+) -> Result<LanguageIdentifier, ParserError> {
+ let mut script = None;
+ let mut region = None;
+ let mut variants = ShortSlice::new();
+
+ let language = if let Some(subtag) = iter.next() {
+ subtags::Language::try_from_bytes(subtag)?
+ } else {
+ return Err(ParserError::InvalidLanguage);
+ };
+
+ let mut position = ParserPosition::Script;
+
+ while let Some(subtag) = iter.peek() {
+ if mode != ParserMode::LanguageIdentifier && subtag.len() == 1 {
+ break;
+ }
+
+ if position == ParserPosition::Script {
+ if let Ok(s) = subtags::Script::try_from_bytes(subtag) {
+ script = Some(s);
+ position = ParserPosition::Region;
+ } else if let Ok(s) = subtags::Region::try_from_bytes(subtag) {
+ region = Some(s);
+ position = ParserPosition::Variant;
+ } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
+ if let Err(idx) = variants.binary_search(&v) {
+ variants.insert(idx, v);
+ }
+ position = ParserPosition::Variant;
+ } else if mode == ParserMode::Partial {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if position == ParserPosition::Region {
+ if let Ok(s) = subtags::Region::try_from_bytes(subtag) {
+ region = Some(s);
+ position = ParserPosition::Variant;
+ } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
+ if let Err(idx) = variants.binary_search(&v) {
+ variants.insert(idx, v);
+ }
+ position = ParserPosition::Variant;
+ } else if mode == ParserMode::Partial {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
+ if let Err(idx) = variants.binary_search(&v) {
+ variants.insert(idx, v);
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if mode == ParserMode::Partial {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ iter.next();
+ }
+
+ Ok(LanguageIdentifier {
+ language,
+ script,
+ region,
+ variants: subtags::Variants::from_short_slice_unchecked(variants),
+ })
+}
+
+pub fn parse_language_identifier(
+ t: &[u8],
+ mode: ParserMode,
+) -> Result<LanguageIdentifier, ParserError> {
+ let mut iter = SubtagIterator::new(t);
+ parse_language_identifier_from_iter(&mut iter, mode)
+}
+
+#[allow(clippy::type_complexity)]
+pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(
+ mut iter: SubtagIterator,
+ mode: ParserMode,
+) -> Result<
+ (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Option<subtags::Variant>,
+ Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>,
+ ),
+ ParserError,
+> {
+ let language;
+ let mut script = None;
+ let mut region = None;
+ let mut variant = None;
+ let mut keyword = None;
+
+ if let (i, Some((start, end))) = iter.next_manual() {
+ iter = i;
+ match subtags::Language::try_from_bytes_manual_slice(iter.slice, start, end) {
+ Ok(l) => language = l,
+ Err(e) => return Err(e),
+ }
+ } else {
+ return Err(ParserError::InvalidLanguage);
+ }
+
+ let mut position = ParserPosition::Script;
+
+ while let Some((start, end)) = iter.peek_manual() {
+ if !matches!(mode, ParserMode::LanguageIdentifier) && end - start == 1 {
+ break;
+ }
+
+ if matches!(position, ParserPosition::Script) {
+ if let Ok(s) = subtags::Script::try_from_bytes_manual_slice(iter.slice, start, end) {
+ script = Some(s);
+ position = ParserPosition::Region;
+ } else if let Ok(r) =
+ subtags::Region::try_from_bytes_manual_slice(iter.slice, start, end)
+ {
+ region = Some(r);
+ position = ParserPosition::Variant;
+ } else if let Ok(v) =
+ subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end)
+ {
+ // We cannot handle multiple variants in a const context
+ debug_assert!(variant.is_none());
+ variant = Some(v);
+ position = ParserPosition::Variant;
+ } else if matches!(mode, ParserMode::Partial) {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if matches!(position, ParserPosition::Region) {
+ if let Ok(s) = subtags::Region::try_from_bytes_manual_slice(iter.slice, start, end) {
+ region = Some(s);
+ position = ParserPosition::Variant;
+ } else if let Ok(v) =
+ subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end)
+ {
+ // We cannot handle multiple variants in a const context
+ debug_assert!(variant.is_none());
+ variant = Some(v);
+ position = ParserPosition::Variant;
+ } else if matches!(mode, ParserMode::Partial) {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if let Ok(v) = subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end)
+ {
+ debug_assert!(matches!(position, ParserPosition::Variant));
+ if variant.is_some() {
+ // We cannot handle multiple variants in a const context
+ return Err(ParserError::InvalidSubtag);
+ }
+ variant = Some(v);
+ } else if matches!(mode, ParserMode::Partial) {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+
+ iter = iter.next_manual().0;
+ }
+
+ if matches!(mode, ParserMode::Locale) {
+ if let Some((start, end)) = iter.peek_manual() {
+ match ExtensionType::try_from_bytes_manual_slice(iter.slice, start, end) {
+ Ok(ExtensionType::Unicode) => {
+ iter = iter.next_manual().0;
+ if let Some((start, end)) = iter.peek_manual() {
+ if Attribute::try_from_bytes_manual_slice(iter.slice, start, end).is_ok() {
+ // We cannot handle Attributes in a const context
+ return Err(ParserError::InvalidSubtag);
+ }
+ }
+
+ let mut key = None;
+ let mut current_type = None;
+
+ while let Some((start, end)) = iter.peek_manual() {
+ let slen = end - start;
+ if slen == 2 {
+ if key.is_some() {
+ // We cannot handle more than one Key in a const context
+ return Err(ParserError::InvalidSubtag);
+ }
+ match Key::try_from_bytes_manual_slice(iter.slice, start, end) {
+ Ok(k) => key = Some(k),
+ Err(e) => return Err(e),
+ };
+ } else if key.is_some() {
+ match Value::parse_subtag_from_bytes_manual_slice(
+ iter.slice, start, end,
+ ) {
+ Ok(Some(t)) => {
+ if current_type.is_some() {
+ // We cannot handle more than one type in a const context
+ return Err(ParserError::InvalidSubtag);
+ }
+ current_type = Some(t);
+ }
+ Ok(None) => {}
+ Err(e) => return Err(e),
+ }
+ } else {
+ break;
+ }
+ iter = iter.next_manual().0
+ }
+ if let Some(k) = key {
+ keyword = Some((k, current_type));
+ }
+ }
+ // We cannot handle Transform, Private, Other extensions in a const context
+ Ok(_) => return Err(ParserError::InvalidSubtag),
+ Err(e) => return Err(e),
+ }
+ }
+ }
+
+ Ok((language, script, region, variant, keyword))
+}
+
+#[allow(clippy::type_complexity)]
+pub const fn parse_language_identifier_with_single_variant(
+ t: &[u8],
+ mode: ParserMode,
+) -> Result<
+ (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Option<subtags::Variant>,
+ ),
+ ParserError,
+> {
+ let iter = SubtagIterator::new(t);
+ match parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode) {
+ Ok((l, s, r, v, _)) => Ok((l, s, r, v)),
+ Err(e) => Err(e),
+ }
+}
diff --git a/third_party/rust/icu_locid/src/parser/locale.rs b/third_party/rust/icu_locid/src/parser/locale.rs
new file mode 100644
index 0000000000..175fd3a05b
--- /dev/null
+++ b/third_party/rust/icu_locid/src/parser/locale.rs
@@ -0,0 +1,42 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use tinystr::TinyAsciiStr;
+
+use crate::extensions::{self, Extensions};
+use crate::parser::errors::ParserError;
+use crate::parser::{parse_language_identifier_from_iter, ParserMode, SubtagIterator};
+use crate::{subtags, Locale};
+
+use super::parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter;
+
+pub fn parse_locale(t: &[u8]) -> Result<Locale, ParserError> {
+ let mut iter = SubtagIterator::new(t);
+
+ let id = parse_language_identifier_from_iter(&mut iter, ParserMode::Locale)?;
+ let extensions = if iter.peek().is_some() {
+ Extensions::try_from_iter(&mut iter)?
+ } else {
+ Extensions::default()
+ };
+ Ok(Locale { id, extensions })
+}
+
+#[allow(clippy::type_complexity)]
+pub const fn parse_locale_with_single_variant_single_keyword_unicode_keyword_extension(
+ t: &[u8],
+ mode: ParserMode,
+) -> Result<
+ (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Option<subtags::Variant>,
+ Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>,
+ ),
+ ParserError,
+> {
+ let iter = SubtagIterator::new(t);
+ parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode)
+}
diff --git a/third_party/rust/icu_locid/src/parser/mod.rs b/third_party/rust/icu_locid/src/parser/mod.rs
new file mode 100644
index 0000000000..4b02f71c9a
--- /dev/null
+++ b/third_party/rust/icu_locid/src/parser/mod.rs
@@ -0,0 +1,231 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+pub mod errors;
+mod langid;
+mod locale;
+
+pub use errors::ParserError;
+pub use langid::{
+ parse_language_identifier, parse_language_identifier_from_iter,
+ parse_language_identifier_with_single_variant,
+ parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter, ParserMode,
+};
+
+pub use locale::{
+ parse_locale, parse_locale_with_single_variant_single_keyword_unicode_keyword_extension,
+};
+
+#[inline]
+const fn is_separator(slice: &[u8], idx: usize) -> bool {
+ #[allow(clippy::indexing_slicing)]
+ let b = slice[idx];
+ b == b'-' || b == b'_'
+}
+
+const fn get_current_subtag(slice: &[u8], idx: usize) -> (usize, usize) {
+ debug_assert!(idx < slice.len());
+
+ // This function is called only on the idx == 0 or on a separator.
+ let (start, mut end) = if is_separator(slice, idx) {
+ // If it's a separator, set the start to idx+1 and advance the idx to the next char.
+ (idx + 1, idx + 1)
+ } else {
+ // If it's idx=0, start is 0 and end is set to 1
+ debug_assert!(idx == 0);
+ (0, 1)
+ };
+
+ while end < slice.len() && !is_separator(slice, end) {
+ // Advance until we reach end of slice or a separator.
+ end += 1;
+ }
+ // Notice: this slice may be empty (start == end) for cases like `"en-"` or `"en--US"`
+ (start, end)
+}
+
+// `SubtagIterator` is a helper iterator for [`LanguageIdentifier`] and [`Locale`] parsing.
+//
+// It is quite extraordinary due to focus on performance and Rust limitations for `const`
+// functions.
+//
+// The iterator is eager and fallible allowing it to reject invalid slices such as `"-"`, `"-en"`,
+// `"en-"` etc.
+//
+// The iterator provides methods available for static users - `next_manual` and `peek_manual`,
+// as well as typical `Peekable` iterator APIs - `next` and `peek`.
+//
+// All methods return an `Option` of a `Result`.
+#[derive(Copy, Clone, Debug)]
+pub struct SubtagIterator<'a> {
+ pub slice: &'a [u8],
+ done: bool,
+ // done + subtag is faster than Option<(usize, usize)>
+ // at the time of writing.
+ subtag: (usize, usize),
+}
+
+impl<'a> SubtagIterator<'a> {
+ pub const fn new(slice: &'a [u8]) -> Self {
+ let subtag = if slice.is_empty() || is_separator(slice, 0) {
+ // This returns (0, 0) which returns Some(b"") for slices like `"-en"` or `"-"`
+ (0, 0)
+ } else {
+ get_current_subtag(slice, 0)
+ };
+ Self {
+ slice,
+ done: false,
+ subtag,
+ }
+ }
+
+ pub const fn next_manual(mut self) -> (Self, Option<(usize, usize)>) {
+ if self.done {
+ return (self, None);
+ }
+ let result = self.subtag;
+ if result.1 < self.slice.len() {
+ self.subtag = get_current_subtag(self.slice, result.1);
+ } else {
+ self.done = true;
+ }
+ (self, Some(result))
+ }
+
+ pub const fn peek_manual(&self) -> Option<(usize, usize)> {
+ if self.done {
+ return None;
+ }
+ Some(self.subtag)
+ }
+
+ pub fn peek(&self) -> Option<&'a [u8]> {
+ #[allow(clippy::indexing_slicing)] // peek_manual returns valid indices
+ self.peek_manual().map(|(s, e)| &self.slice[s..e])
+ }
+}
+
+impl<'a> Iterator for SubtagIterator<'a> {
+ type Item = &'a [u8];
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let (s, res) = self.next_manual();
+ *self = s;
+ #[allow(clippy::indexing_slicing)] // next_manual returns valid indices
+ res.map(|(s, e)| &self.slice[s..e])
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ fn slice_to_str(input: &[u8]) -> &str {
+ std::str::from_utf8(input).unwrap()
+ }
+
+ #[test]
+ fn subtag_iterator_peek_test() {
+ let slice = "de_at-u-ca-foobar";
+ let mut si = SubtagIterator::new(slice.as_bytes());
+
+ assert_eq!(si.peek().map(slice_to_str), Some("de"));
+ assert_eq!(si.peek().map(slice_to_str), Some("de"));
+ assert_eq!(si.next().map(slice_to_str), Some("de"));
+
+ assert_eq!(si.peek().map(slice_to_str), Some("at"));
+ assert_eq!(si.peek().map(slice_to_str), Some("at"));
+ assert_eq!(si.next().map(slice_to_str), Some("at"));
+ }
+
+ #[test]
+ fn subtag_iterator_test() {
+ let slice = "";
+ let mut si = SubtagIterator::new(slice.as_bytes());
+ assert_eq!(si.next().map(slice_to_str), Some(""));
+
+ let slice = "-";
+ let mut si = SubtagIterator::new(slice.as_bytes());
+ assert_eq!(si.next().map(slice_to_str), Some(""));
+
+ let slice = "-en";
+ let mut si = SubtagIterator::new(slice.as_bytes());
+ assert_eq!(si.next().map(slice_to_str), Some(""));
+ assert_eq!(si.next().map(slice_to_str), Some("en"));
+ assert_eq!(si.next(), None);
+
+ let slice = "en";
+ let si = SubtagIterator::new(slice.as_bytes());
+ assert_eq!(si.map(slice_to_str).collect::<Vec<_>>(), vec!["en",]);
+
+ let slice = "en-";
+ let si = SubtagIterator::new(slice.as_bytes());
+ assert_eq!(si.map(slice_to_str).collect::<Vec<_>>(), vec!["en", "",]);
+
+ let slice = "--";
+ let mut si = SubtagIterator::new(slice.as_bytes());
+ assert_eq!(si.next().map(slice_to_str), Some(""));
+ assert_eq!(si.next().map(slice_to_str), Some(""));
+ assert_eq!(si.next().map(slice_to_str), Some(""));
+ assert_eq!(si.next(), None);
+
+ let slice = "-en-";
+ let mut si = SubtagIterator::new(slice.as_bytes());
+ assert_eq!(si.next().map(slice_to_str), Some(""));
+ assert_eq!(si.next().map(slice_to_str), Some("en"));
+ assert_eq!(si.next().map(slice_to_str), Some(""));
+ assert_eq!(si.next(), None);
+
+ let slice = "de_at-u-ca-foobar";
+ let si = SubtagIterator::new(slice.as_bytes());
+ assert_eq!(
+ si.map(slice_to_str).collect::<Vec<_>>(),
+ vec!["de", "at", "u", "ca", "foobar",]
+ );
+ }
+
+ #[test]
+ fn get_current_subtag_test() {
+ let slice = "-";
+ let current = get_current_subtag(slice.as_bytes(), 0);
+ assert_eq!(current, (1, 1));
+
+ let slice = "-en";
+ let current = get_current_subtag(slice.as_bytes(), 0);
+ assert_eq!(current, (1, 3));
+
+ let slice = "-en-";
+ let current = get_current_subtag(slice.as_bytes(), 3);
+ assert_eq!(current, (4, 4));
+
+ let slice = "en-";
+ let current = get_current_subtag(slice.as_bytes(), 0);
+ assert_eq!(current, (0, 2));
+
+ let current = get_current_subtag(slice.as_bytes(), 2);
+ assert_eq!(current, (3, 3));
+
+ let slice = "en--US";
+ let current = get_current_subtag(slice.as_bytes(), 0);
+ assert_eq!(current, (0, 2));
+
+ let current = get_current_subtag(slice.as_bytes(), 2);
+ assert_eq!(current, (3, 3));
+
+ let current = get_current_subtag(slice.as_bytes(), 3);
+ assert_eq!(current, (4, 6));
+
+ let slice = "--";
+ let current = get_current_subtag(slice.as_bytes(), 0);
+ assert_eq!(current, (1, 1));
+
+ let current = get_current_subtag(slice.as_bytes(), 1);
+ assert_eq!(current, (2, 2));
+
+ let slice = "-";
+ let current = get_current_subtag(slice.as_bytes(), 0);
+ assert_eq!(current, (1, 1));
+ }
+}
diff --git a/third_party/rust/icu_locid/src/serde.rs b/third_party/rust/icu_locid/src/serde.rs
new file mode 100644
index 0000000000..3bfe303887
--- /dev/null
+++ b/third_party/rust/icu_locid/src/serde.rs
@@ -0,0 +1,135 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::LanguageIdentifier;
+use alloc::string::ToString;
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
+
+impl Serialize for LanguageIdentifier {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ serializer.serialize_str(&self.to_string())
+ }
+}
+
+impl<'de> Deserialize<'de> for LanguageIdentifier {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ struct LanguageIdentifierVisitor;
+
+ impl<'de> serde::de::Visitor<'de> for LanguageIdentifierVisitor {
+ type Value = LanguageIdentifier;
+
+ fn expecting(&self, formatter: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(formatter, "a valid Unicode Language Identifier")
+ }
+
+ fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
+ where
+ E: serde::de::Error,
+ {
+ s.parse::<LanguageIdentifier>()
+ .map_err(serde::de::Error::custom)
+ }
+ }
+
+ deserializer.deserialize_string(LanguageIdentifierVisitor)
+ }
+}
+
+#[test]
+fn json() {
+ use crate::langid;
+ use crate::subtags::{Language, Region, Script};
+
+ assert_eq!(
+ serde_json::to_string(&langid!("en-US")).unwrap(),
+ r#""en-US""#
+ );
+ assert_eq!(
+ serde_json::from_str::<LanguageIdentifier>(r#""en-US""#).unwrap(),
+ langid!("en-US")
+ );
+ assert!(serde_json::from_str::<LanguageIdentifier>(r#""2Xs""#).is_err());
+
+ assert_eq!(
+ serde_json::to_string(&"fr".parse::<Language>().unwrap()).unwrap(),
+ r#""fr""#
+ );
+ assert_eq!(
+ serde_json::from_str::<Language>(r#""fr""#).unwrap(),
+ "fr".parse::<Language>().unwrap()
+ );
+ assert!(serde_json::from_str::<Language>(r#""2Xs""#).is_err());
+
+ assert_eq!(
+ serde_json::to_string(&"Latn".parse::<Script>().unwrap()).unwrap(),
+ r#""Latn""#
+ );
+ assert_eq!(
+ serde_json::from_str::<Script>(r#""Latn""#).unwrap(),
+ "Latn".parse::<Script>().unwrap()
+ );
+ assert!(serde_json::from_str::<Script>(r#""2Xs""#).is_err());
+
+ assert_eq!(
+ serde_json::to_string(&"US".parse::<Region>().unwrap()).unwrap(),
+ r#""US""#
+ );
+ assert_eq!(
+ serde_json::from_str::<Region>(r#""US""#).unwrap(),
+ "US".parse::<Region>().unwrap()
+ );
+ assert!(serde_json::from_str::<Region>(r#""2Xs""#).is_err());
+}
+
+#[test]
+fn postcard() {
+ use crate::langid;
+ use crate::subtags::{Language, Region, Script};
+
+ assert_eq!(
+ postcard::to_stdvec(&langid!("en-US")).unwrap(),
+ &[5, b'e', b'n', b'-', b'U', b'S']
+ );
+ assert_eq!(
+ postcard::from_bytes::<LanguageIdentifier>(&[5, b'e', b'n', b'-', b'U', b'S']).unwrap(),
+ langid!("en-US")
+ );
+ assert!(postcard::from_bytes::<LanguageIdentifier>(&[3, b'2', b'X', b's']).is_err());
+
+ assert_eq!(
+ postcard::to_stdvec(&"fr".parse::<Language>().unwrap()).unwrap(),
+ b"fr\0"
+ );
+ assert_eq!(
+ postcard::from_bytes::<Language>(b"fr\0").unwrap(),
+ "fr".parse::<Language>().unwrap()
+ );
+ assert!(postcard::from_bytes::<Language>(b"2Xs").is_err());
+
+ assert_eq!(
+ postcard::to_stdvec(&"Latn".parse::<Script>().unwrap()).unwrap(),
+ b"Latn"
+ );
+ assert_eq!(
+ postcard::from_bytes::<Script>(b"Latn").unwrap(),
+ "Latn".parse::<Script>().unwrap()
+ );
+ assert!(postcard::from_bytes::<Script>(b"2Xss").is_err());
+
+ assert_eq!(
+ postcard::to_stdvec(&"US".parse::<Region>().unwrap()).unwrap(),
+ b"US\0"
+ );
+ assert_eq!(
+ postcard::from_bytes::<Region>(b"US\0").unwrap(),
+ "US".parse::<Region>().unwrap()
+ );
+ assert!(postcard::from_bytes::<Region>(b"2Xs").is_err());
+}
diff --git a/third_party/rust/icu_locid/src/subtags/language.rs b/third_party/rust/icu_locid/src/subtags/language.rs
new file mode 100644
index 0000000000..6fd08a2d5f
--- /dev/null
+++ b/third_party/rust/icu_locid/src/subtags/language.rs
@@ -0,0 +1,107 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A language subtag (examples: `"en"`, `"csb"`, `"zh"`, `"und"`, etc.)
+ ///
+ /// [`Language`] represents a Unicode base language code conformant to the
+ /// [`unicode_language_id`] field of the Language and Locale Identifier.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ ///
+ /// let language: Language =
+ /// "en".parse().expect("Failed to parse a language subtag.");
+ /// ```
+ ///
+ /// If the [`Language`] has no value assigned, it serializes to a string `"und"`, which
+ /// can be then parsed back to an empty [`Language`] field.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ ///
+ /// assert_eq!(Language::default().as_str(), "und");
+ /// ```
+ ///
+ /// `Notice`: ICU4X uses a narrow form of language subtag of 2-3 characters.
+ /// The specification allows language subtag to optionally also be 5-8 characters
+ /// but that form has not been used and ICU4X does not support it right now.
+ ///
+ /// [`unicode_language_id`]: https://unicode.org/reports/tr35/#unicode_language_id
+ Language,
+ subtags,
+ language,
+ subtags_language,
+ 2..=3,
+ s,
+ s.is_ascii_alphabetic(),
+ s.to_ascii_lowercase(),
+ s.is_ascii_alphabetic_lowercase(),
+ InvalidLanguage,
+ ["en", "foo"],
+ ["419", "german", "en1"],
+);
+
+impl Language {
+ /// The default undefined language "und". Same as [`default()`](Default::default()).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ ///
+ /// assert_eq!(Language::default(), Language::UND);
+ /// ```
+ pub const UND: Self = unsafe { Self::from_raw_unchecked(*b"und") };
+
+ /// Resets the [`Language`] subtag to an empty one (equal to `"und"`).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::{language, Language};
+ ///
+ /// let mut lang = language!("csb");
+ ///
+ /// assert_ne!(lang, Language::UND);
+ ///
+ /// lang.clear();
+ ///
+ /// assert_eq!(lang, Language::UND);
+ /// ```
+ #[inline]
+ pub fn clear(&mut self) {
+ *self = Self::UND
+ }
+
+ /// Tests if the [`Language`] subtag is empty (equal to `"und"`).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ ///
+ /// let mut lang = Language::UND;
+ ///
+ /// assert!(lang.is_empty());
+ ///
+ /// lang.clear();
+ ///
+ /// assert!(lang.is_empty());
+ /// ```
+ #[inline]
+ pub fn is_empty(self) -> bool {
+ self == Self::UND
+ }
+}
+
+impl Default for Language {
+ fn default() -> Language {
+ Language::UND
+ }
+}
diff --git a/third_party/rust/icu_locid/src/subtags/mod.rs b/third_party/rust/icu_locid/src/subtags/mod.rs
new file mode 100644
index 0000000000..9cc04dac8c
--- /dev/null
+++ b/third_party/rust/icu_locid/src/subtags/mod.rs
@@ -0,0 +1,62 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Language Identifier and Locale contains a set of subtags
+//! which represent different fields of the structure.
+//!
+//! * [`Language`] is the only mandatory field, which when empty,
+//! takes the value `und`.
+//! * [`Script`] is an optional field representing the written script used by the locale.
+//! * [`Region`] is the region used by the locale.
+//! * [`Variants`] is a list of optional [`Variant`] subtags containing information about the
+//! variant adjustments used by the locale.
+//!
+//! Subtags can be used in isolation, and all basic operations such as parsing, syntax canonicalization
+//! and serialization are supported on each individual subtag, but most commonly
+//! they are used to construct a [`LanguageIdentifier`] instance.
+//!
+//! [`Variants`] is a special structure which contains a list of [`Variant`] subtags.
+//! It is wrapped around to allow for sorting and deduplication of variants, which
+//! is one of the required steps of language identifier and locale syntax canonicalization.
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::subtags::{Language, Region, Script, Variant};
+//!
+//! let language: Language =
+//! "en".parse().expect("Failed to parse a language subtag.");
+//! let script: Script =
+//! "arab".parse().expect("Failed to parse a script subtag.");
+//! let region: Region =
+//! "cn".parse().expect("Failed to parse a region subtag.");
+//! let variant: Variant =
+//! "MacOS".parse().expect("Failed to parse a variant subtag.");
+//!
+//! assert_eq!(language.as_str(), "en");
+//! assert_eq!(script.as_str(), "Arab");
+//! assert_eq!(region.as_str(), "CN");
+//! assert_eq!(variant.as_str(), "macos");
+//! ```
+//!
+//! `Notice`: The subtags are canonicalized on parsing. That means
+//! that all operations work on a canonicalized version of the subtag
+//! and serialization is very cheap.
+//!
+//! [`LanguageIdentifier`]: super::LanguageIdentifier
+mod language;
+mod region;
+mod script;
+mod variant;
+mod variants;
+
+#[doc(inline)]
+pub use language::{language, Language};
+#[doc(inline)]
+pub use region::{region, Region};
+#[doc(inline)]
+pub use script::{script, Script};
+#[doc(inline)]
+pub use variant::{variant, Variant};
+pub use variants::Variants;
diff --git a/third_party/rust/icu_locid/src/subtags/region.rs b/third_party/rust/icu_locid/src/subtags/region.rs
new file mode 100644
index 0000000000..4348f15e79
--- /dev/null
+++ b/third_party/rust/icu_locid/src/subtags/region.rs
@@ -0,0 +1,62 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A region subtag (examples: `"US"`, `"CN"`, `"AR"` etc.)
+ ///
+ /// [`Region`] represents a Unicode base language code conformant to the
+ /// [`unicode_region_id`] field of the Language and Locale Identifier.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Region;
+ ///
+ /// let region: Region =
+ /// "DE".parse().expect("Failed to parse a region subtag.");
+ /// ```
+ ///
+ /// [`unicode_region_id`]: https://unicode.org/reports/tr35/#unicode_region_id
+ Region,
+ subtags,
+ region,
+ subtags_region,
+ 2..=3,
+ s,
+ if s.len() == 2 {
+ s.is_ascii_alphabetic()
+ } else {
+ s.is_ascii_numeric()
+ },
+ if s.len() == 2 {
+ s.to_ascii_uppercase()
+ } else {
+ s
+ },
+ if s.len() == 2 {
+ s.is_ascii_alphabetic_uppercase()
+ } else {
+ s.is_ascii_numeric()
+ },
+ InvalidSubtag,
+ ["FR", "123"],
+ ["12", "FRA", "b2"],
+);
+
+impl Region {
+ /// Returns true if the Region has an alphabetic code.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Region;
+ ///
+ /// let region = Region::try_from_bytes(b"us").expect("Parsing failed.");
+ ///
+ /// assert!(region.is_alphabetic());
+ /// ```
+ pub fn is_alphabetic(&self) -> bool {
+ self.0.len() == 2
+ }
+}
diff --git a/third_party/rust/icu_locid/src/subtags/script.rs b/third_party/rust/icu_locid/src/subtags/script.rs
new file mode 100644
index 0000000000..79ead0390c
--- /dev/null
+++ b/third_party/rust/icu_locid/src/subtags/script.rs
@@ -0,0 +1,33 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A script subtag (examples: `"Latn"`, `"Arab"`, etc.)
+ ///
+ /// [`Script`] represents a Unicode base language code conformant to the
+ /// [`unicode_script_id`] field of the Language and Locale Identifier.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Script;
+ ///
+ /// let script: Script =
+ /// "Latn".parse().expect("Failed to parse a script subtag.");
+ /// ```
+ ///
+ /// [`unicode_script_id`]: https://unicode.org/reports/tr35/#unicode_script_id
+ Script,
+ subtags,
+ script,
+ subtags_script,
+ 4..=4,
+ s,
+ s.is_ascii_alphabetic(),
+ s.to_ascii_titlecase(),
+ s.is_ascii_alphabetic_titlecase(),
+ InvalidSubtag,
+ ["Latn"],
+ ["Latin"],
+);
diff --git a/third_party/rust/icu_locid/src/subtags/variant.rs b/third_party/rust/icu_locid/src/subtags/variant.rs
new file mode 100644
index 0000000000..c60b138659
--- /dev/null
+++ b/third_party/rust/icu_locid/src/subtags/variant.rs
@@ -0,0 +1,35 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A variant subtag (examples: `"macos"`, `"posix"`, `"1996"` etc.)
+ ///
+ /// [`Variant`] represents a Unicode base language code conformant to the
+ /// [`unicode_variant_id`] field of the Language and Locale Identifier.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Variant;
+ ///
+ /// let variant: Variant =
+ /// "macos".parse().expect("Failed to parse a variant subtag.");
+ /// ```
+ ///
+ /// [`unicode_variant_id`]: https://unicode.org/reports/tr35/#unicode_variant_id
+ Variant,
+ subtags,
+ variant,
+ subtags_variant,
+ 4..=8,
+ s,
+ s.is_ascii_alphanumeric() && (s.len() != 4 || s.all_bytes()[0].is_ascii_digit()),
+ s.to_ascii_lowercase(),
+ s.is_ascii_lowercase()
+ && s.is_ascii_alphanumeric()
+ && (s.len() != 4 || s.all_bytes()[0].is_ascii_digit()),
+ InvalidSubtag,
+ ["posix", "1996"],
+ ["yes"],
+);
diff --git a/third_party/rust/icu_locid/src/subtags/variants.rs b/third_party/rust/icu_locid/src/subtags/variants.rs
new file mode 100644
index 0000000000..ba5ff1bc1a
--- /dev/null
+++ b/third_party/rust/icu_locid/src/subtags/variants.rs
@@ -0,0 +1,128 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::Variant;
+use crate::helpers::ShortSlice;
+
+use alloc::vec::Vec;
+use core::ops::Deref;
+
+/// A list of variants (examples: `["macos", "posix"]`, etc.)
+///
+/// [`Variants`] stores a list of [`Variant`] subtags in a canonical form
+/// by sorting and deduplicating them.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::subtags::{variant, Variants};
+///
+/// let mut v = vec![variant!("posix"), variant!("macos")];
+/// v.sort();
+/// v.dedup();
+///
+/// let variants: Variants = Variants::from_vec_unchecked(v);
+/// assert_eq!(variants.to_string(), "macos-posix");
+/// ```
+#[derive(Default, Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
+pub struct Variants(ShortSlice<Variant>);
+
+impl Variants {
+ /// Returns a new empty list of variants. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Variants;
+ ///
+ /// assert_eq!(Variants::new(), Variants::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self(ShortSlice::new())
+ }
+
+ /// Creates a new [`Variants`] set from a single [`Variant`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::{variant, Variants};
+ ///
+ /// let variants = Variants::from_variant(variant!("posix"));
+ /// ```
+ #[inline]
+ pub const fn from_variant(variant: Variant) -> Self {
+ Self(ShortSlice::new_single(variant))
+ }
+
+ /// Creates a new [`Variants`] set from a [`Vec`].
+ /// The caller is expected to provide sorted and deduplicated vector as
+ /// an input.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::{variant, Variants};
+ ///
+ /// let mut v = vec![variant!("posix"), variant!("macos")];
+ /// v.sort();
+ /// v.dedup();
+ ///
+ /// let variants = Variants::from_vec_unchecked(v);
+ /// ```
+ ///
+ /// Notice: For performance- and memory-constrained environments, it is recommended
+ /// for the caller to use [`binary_search`](slice::binary_search) instead of [`sort`](slice::sort)
+ /// and [`dedup`](Vec::dedup()).
+ pub fn from_vec_unchecked(input: Vec<Variant>) -> Self {
+ Self(input.into())
+ }
+
+ pub(crate) fn from_short_slice_unchecked(input: ShortSlice<Variant>) -> Self {
+ Self(input)
+ }
+
+ /// Empties the [`Variants`] list.
+ ///
+ /// Returns the old list.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::{variant, Variants};
+ ///
+ /// let mut v = vec![variant!("posix"), variant!("macos")];
+ /// v.sort();
+ /// v.dedup();
+ ///
+ /// let mut variants: Variants = Variants::from_vec_unchecked(v);
+ ///
+ /// assert_eq!(variants.to_string(), "macos-posix");
+ ///
+ /// variants.clear();
+ ///
+ /// assert_eq!(variants, Variants::default());
+ /// ```
+ pub fn clear(&mut self) -> Self {
+ core::mem::take(self)
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ self.deref().iter().map(|t| t.as_str()).try_for_each(f)
+ }
+}
+
+impl_writeable_for_subtag_list!(Variants, "macos", "posix");
+
+impl Deref for Variants {
+ type Target = [Variant];
+
+ fn deref(&self) -> &[Variant] {
+ self.0.deref()
+ }
+}
diff --git a/third_party/rust/icu_locid/src/zerovec.rs b/third_party/rust/icu_locid/src/zerovec.rs
new file mode 100644
index 0000000000..ba6a3e85d6
--- /dev/null
+++ b/third_party/rust/icu_locid/src/zerovec.rs
@@ -0,0 +1,132 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Documentation on zero-copy deserialization of locale types.
+//!
+//! [`Locale`] and [`LanguageIdentifier`] are highly structured types that cannot be directly
+//! stored in a zero-copy data structure, such as those provided by the [`zerovec`] crate.
+//! This page explains how to indirectly store these types in a [`zerovec`].
+//!
+//! There are two main use cases, which have different solutions:
+//!
+//! 1. **Lookup:** You need to locate a locale in a zero-copy vector, such as when querying a map.
+//! 2. **Obtain:** You have a locale stored in a zero-copy vector, and you need to obtain a proper
+//! [`Locale`] or [`LanguageIdentifier`] for use elsewhere in your program.
+//!
+//! # Lookup
+//!
+//! To perform lookup, store the stringified locale in a canonical BCP-47 form as a byte array,
+//! and then use [`Locale::strict_cmp()`] to perform an efficient, zero-allocation lookup.
+//!
+//! To produce more human-readable serialized output, you can use [`UnvalidatedStr`].
+//!
+//! ```
+//! use icu_locid::Locale;
+//! use zerovec::ule::UnvalidatedStr;
+//! use zerovec::ZeroMap;
+//!
+//! // ZeroMap from locales to integers
+//! let data: &[(&UnvalidatedStr, u32)] = &[
+//! ("de-DE-u-hc-h12".into(), 5),
+//! ("en-US-u-ca-buddhist".into(), 10),
+//! ("my-MM".into(), 15),
+//! ("sr-Cyrl-ME".into(), 20),
+//! ("zh-TW".into(), 25),
+//! ];
+//! let zm: ZeroMap<UnvalidatedStr, u32> = data.iter().copied().collect();
+//!
+//! // Get the value associated with a locale
+//! let loc: Locale = "en-US-u-ca-buddhist".parse().unwrap();
+//! let value = zm.get_copied_by(|uvstr| loc.strict_cmp(uvstr).reverse());
+//! assert_eq!(value, Some(10));
+//! ```
+//!
+//! # Obtain
+//!
+//! Obtaining a [`Locale`] or [`LanguageIdentifier`] is not generally a zero-copy operation, since
+//! both of these types may require memory allocation. If possible, architect your code such that
+//! you do not need to obtain a structured type.
+//!
+//! If you need the structured type, such as if you need to manipulate it in some way, there are two
+//! options: storing subtags, and storing a string for parsing.
+//!
+//! ## Storing Subtags
+//!
+//! If the data being stored only contains a limited number of subtags, you can store them as a
+//! tuple, and then construct the [`LanguageIdentifier`] externally.
+//!
+//! ```
+//! use icu_locid::subtags::{Language, Region, Script};
+//! use icu_locid::LanguageIdentifier;
+//! use icu_locid::{
+//! langid,
+//! subtags::{language, region, script},
+//! };
+//! use zerovec::ZeroMap;
+//!
+//! // ZeroMap from integer to LSR (language-script-region)
+//! let zm: ZeroMap<u32, (Language, Option<Script>, Option<Region>)> = [
+//! (5, (language!("de"), None, Some(region!("DE")))),
+//! (10, (language!("en"), None, Some(region!("US")))),
+//! (15, (language!("my"), None, Some(region!("MM")))),
+//! (
+//! 20,
+//! (language!("sr"), Some(script!("Cyrl")), Some(region!("ME"))),
+//! ),
+//! (25, (language!("zh"), None, Some(region!("TW")))),
+//! ]
+//! .into_iter()
+//! .collect();
+//!
+//! // Construct a LanguageIdentifier from a tuple entry
+//! let lid: LanguageIdentifier =
+//! zm.get_copied(&25).expect("element is present").into();
+//!
+//! assert_eq!(lid, langid!("zh-TW"));
+//! ```
+//!
+//! ## Storing Strings
+//!
+//! If it is necessary to store and obtain an arbitrary locale, it is currently recommended to
+//! store a BCP-47 string and parse it when needed.
+//!
+//! Since the string is stored in an unparsed state, it is not safe to `unwrap` the result from
+//! `Locale::try_from_bytes()`. See [icu4x#831](https://github.com/unicode-org/icu4x/issues/831)
+//! for a discussion on potential data models that could ensure that the locale is valid during
+//! deserialization.
+//!
+//! As above, to produce more human-readable serialized output, you can use [`UnvalidatedStr`].
+//!
+//! ```
+//! use icu_locid::langid;
+//! use icu_locid::Locale;
+//! use zerovec::ule::UnvalidatedStr;
+//! use zerovec::ZeroMap;
+//!
+//! // ZeroMap from integer to locale string
+//! let data: &[(u32, &UnvalidatedStr)] = &[
+//! (5, "de-DE-u-hc-h12".into()),
+//! (10, "en-US-u-ca-buddhist".into()),
+//! (15, "my-MM".into()),
+//! (20, "sr-Cyrl-ME".into()),
+//! (25, "zh-TW".into()),
+//! (30, "INVALID".into()),
+//! ];
+//! let zm: ZeroMap<u32, UnvalidatedStr> = data.iter().copied().collect();
+//!
+//! // Construct a Locale by parsing the string.
+//! let value = zm.get(&25).expect("element is present");
+//! let loc = Locale::try_from_bytes(value);
+//! assert_eq!(loc, Ok(langid!("zh-TW").into()));
+//!
+//! // Invalid entries are fallible
+//! let err_value = zm.get(&30).expect("element is present");
+//! let err_loc = Locale::try_from_bytes(err_value);
+//! assert!(matches!(err_loc, Err(_)));
+//! ```
+//!
+//! [`Locale`]: crate::Locale
+//! [`Locale::strict_cmp()`]: crate::Locale::strict_cmp()
+//! [`LanguageIdentifier`]: crate::LanguageIdentifier
+//! [`UnvalidatedStr`]: zerovec::ule::UnvalidatedStr
diff --git a/third_party/rust/icu_locid/tests/fixtures/canonicalize.json b/third_party/rust/icu_locid/tests/fixtures/canonicalize.json
new file mode 100644
index 0000000000..79a5057146
--- /dev/null
+++ b/third_party/rust/icu_locid/tests/fixtures/canonicalize.json
@@ -0,0 +1,68 @@
+[
+ {
+ "input": "Pl",
+ "output": "pl"
+ },
+ {
+ "input": "eN-uS",
+ "output": "en-US"
+ },
+ {
+ "input": "ZH_hans_hK",
+ "output": "zh-Hans-HK"
+ },
+ {
+ "input": "en-scouse-fonipa",
+ "output": "en-fonipa-scouse"
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-t-es-AR-x-foo"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "en-US-t-es-ar-x-foo"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-t-en-Latn-CA-emodeng"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "en-t-en-latn-ca-emodeng"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "EN-US-T-ES-AR-X-FOO"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "en-US-t-es-ar-x-foo"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "EN-T-EN-LATN-CA-EMODENG"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "en-t-en-latn-ca-emodeng"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "UND-CYRL-T-ES-LATN-M0-UNGEGN"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "und-Cyrl-t-es-latn-m0-ungegn"
+ }
+ }
+]
diff --git a/third_party/rust/icu_locid/tests/fixtures/invalid-extensions.json b/third_party/rust/icu_locid/tests/fixtures/invalid-extensions.json
new file mode 100644
index 0000000000..3aff2636b2
--- /dev/null
+++ b/third_party/rust/icu_locid/tests/fixtures/invalid-extensions.json
@@ -0,0 +1,152 @@
+[
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pl-US-x-waytoolongkey"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pl-US-x-@A_3"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pl-US-t-h0"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pl-US-t-h0-x-foo"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pl-US-t-h0"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-t-m0"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-t-m0-n0-mixed"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "da-u"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "da-u--"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "da-u-t-latn"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "cmn-hans-cn-u-u"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "de-u-ca-"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "de-u-ca-gregory-"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "de-u-ca-gregory-u-hc-hc24"
+ },
+ "output": {
+ "error": "DuplicatedExtension",
+ "text": "Duplicated extension"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "de-l-foo-l-bar"
+ },
+ "output": {
+ "error": "DuplicatedExtension",
+ "text": "Duplicated extension"
+ }
+ }
+]
diff --git a/third_party/rust/icu_locid/tests/fixtures/invalid.json b/third_party/rust/icu_locid/tests/fixtures/invalid.json
new file mode 100644
index 0000000000..c22459e65d
--- /dev/null
+++ b/third_party/rust/icu_locid/tests/fixtures/invalid.json
@@ -0,0 +1,134 @@
+[
+ {
+ "input": "-",
+ "output": {
+ "error": "InvalidLanguage",
+ "text": "The given language subtag is invalid"
+ }
+ },
+ {
+ "input": "--",
+ "output": {
+ "error": "InvalidLanguage",
+ "text": "The given subtag is invalid"
+ }
+ },
+ {
+ "input": "en-",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "The given subtag is invalid"
+ }
+ },
+ {
+ "input": "-en",
+ "output": {
+ "error": "InvalidLanguage",
+ "text": "The given subtag is invalid"
+ }
+ },
+ {
+ "input": "en-us-",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "The given subtag is invalid"
+ }
+ },
+ {
+ "input": "en--US",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "The given subtag is invalid"
+ }
+ },
+ {
+ "input": "-e-",
+ "output": {
+ "error": "InvalidLanguage",
+ "text": "The given subtag is invalid"
+ }
+ },
+ {
+ "input": "a1a",
+ "output": {
+ "error": "InvalidLanguage",
+ "text": "The given language subtag is invalid"
+ }
+ },
+ {
+ "input": "Arab-US",
+ "output": {
+ "error": "InvalidLanguage",
+ "text": "The given language subtag is invalid"
+ }
+ },
+ {
+ "input": "",
+ "output": {
+ "error": "InvalidLanguage",
+ "text": "The given language subtag is invalid"
+ }
+ },
+ {
+ "input": "pl-DSDAFAFDF",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": "pl-Latn-$1231",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": "pl-Latn-US-$1231",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": "pl-Latn-12",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": "pl-Latn-a12",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": "pl-Latn-US-3_dd",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pl-Latn-US-variant-h0-hybrid"
+ },
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-variant-emodeng-emodeng"
+ },
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ }
+]
diff --git a/third_party/rust/icu_locid/tests/fixtures/langid.json b/third_party/rust/icu_locid/tests/fixtures/langid.json
new file mode 100644
index 0000000000..31740d99aa
--- /dev/null
+++ b/third_party/rust/icu_locid/tests/fixtures/langid.json
@@ -0,0 +1,167 @@
+[
+ {
+ "input": "en",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "en"
+ }
+ },
+ {
+ "input": "lij",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "lij"
+ }
+ },
+ {
+ "input": "en-Latn",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "en",
+ "script": "Latn"
+ }
+ },
+ {
+ "input": "lij-Arab",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "lij",
+ "script": "Arab"
+ }
+ },
+ {
+ "input": "en-Latn-US",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "en",
+ "script": "Latn",
+ "region": "US"
+ }
+ },
+ {
+ "input": "lij-Arab-FA",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "lij",
+ "script": "Arab",
+ "region": "FA"
+ }
+ },
+ {
+ "input": "en-Latn-US-windows",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "en",
+ "script": "Latn",
+ "region": "US",
+ "variants": ["windows"]
+ }
+ },
+ {
+ "input": "lij-Arab-FA-linux",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "lij",
+ "script": "Arab",
+ "region": "FA",
+ "variants": ["linux"]
+ }
+ },
+ {
+ "input": "lij-Arab-FA-linux-nedis",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "lij",
+ "script": "Arab",
+ "region": "FA",
+ "variants": ["linux", "nedis"]
+ }
+ },
+ {
+ "input": "EN-latn-us",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "en",
+ "script": "Latn",
+ "region": "US"
+ }
+ },
+ {
+ "input": "sl-nedis",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "sl",
+ "variants": ["nedis"]
+ }
+ },
+ {
+ "input": "de-CH-1996",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "de",
+ "region": "CH",
+ "variants": ["1996"]
+ }
+ },
+ {
+ "input": "sr-Latn",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "sr",
+ "script": "Latn"
+ }
+ },
+ {
+ "input": "es-419",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "es",
+ "region": "419"
+ }
+ },
+ {
+ "input": "und-Latn-US",
+ "output": {
+ "type": "LanguageIdentifier",
+ "script": "Latn",
+ "region": "US"
+ }
+ },
+ {
+ "input": "und",
+ "output": {
+ "type": "LanguageIdentifier"
+ }
+ },
+ {
+ "input": "und-Latn",
+ "output": {
+ "type": "LanguageIdentifier",
+ "script": "Latn"
+ }
+ },
+ {
+ "input": "pl-macos-Windows-nedis-aRabic",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "pl",
+ "variants": ["arabic", "macos", "nedis", "windows"]
+ }
+ },
+ {
+ "input": "und-Latn-macos",
+ "output": {
+ "type": "LanguageIdentifier",
+ "script": "Latn",
+ "variants": ["macos"]
+ }
+ },
+ {
+ "input": "und-Latn-312",
+ "output": {
+ "type": "LanguageIdentifier",
+ "script": "Latn",
+ "region": "312"
+ }
+ }
+]
diff --git a/third_party/rust/icu_locid/tests/fixtures/locale.json b/third_party/rust/icu_locid/tests/fixtures/locale.json
new file mode 100644
index 0000000000..93679a0667
--- /dev/null
+++ b/third_party/rust/icu_locid/tests/fixtures/locale.json
@@ -0,0 +1,298 @@
+[
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-u-hc-h12"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "extensions": {
+ "unicode": {
+ "keywords": {
+ "hc": "h12"
+ }
+ }
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-u-hc-h23"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "unicode": {
+ "keywords": {
+ "hc": "h23"
+ }
+ }
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-u-foo"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "unicode": {
+ "attributes": [
+ "foo"
+ ]
+ }
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-u-hc-h23-ca-islamic-civil-ss-true"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "unicode": {
+ "keywords": {
+ "hc": "h23",
+ "ca": "islamic-civil",
+ "ss": "true"
+ }
+ }
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-t-pl-latn-de"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "transform": {
+ "tlang": "pl-Latn-DE"
+ }
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-x-private-foobar"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "private": ["private", "foobar"]
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-t-h0-hybrid-k0-platform-s0-true"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "transform": {
+ "tfields": {
+ "h0": "hybrid",
+ "k0": "platform",
+ "s0": "true"
+ }
+ }
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-t-es-ar-x-foo"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "transform": {
+ "tlang": "es-AR"
+ },
+ "private": ["foo"]
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-u-ca-buddhist-hc-h12-t-es-ar-h0-hybrid-x-private-foobar"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "unicode": {
+ "keywords": {
+ "ca": "buddhist",
+ "hc": "h12"
+ }
+ },
+ "transform": {
+ "tlang": "es-AR",
+ "tfields": {
+ "h0": "hybrid"
+ }
+ },
+ "private": ["private", "foobar"]
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "language": "es",
+ "region": "MX",
+ "extensions": {
+ "unicode": {
+ "keywords": {
+ "ca": "islamic",
+ "co": "search",
+ "nu": "roman"
+ }
+ }
+ }
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "es-MX-u-ca-islamic-co-search-nu-roman"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-u-kn"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "und-u-kn"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-u-kn-ca-calendar"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "und-u-ca-calendar-kn"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-u-kn-nu-arab"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "und-u-kn-nu-arab"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-t-m0-true"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "und-t-m0-true"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-t-m0-true-n0-mixed"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "und-t-m0-true-n0-mixed"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-t-m0-true-c0-mixed"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "und-t-c0-mixed-m0-true"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "da-u-ca-gregory-ca-buddhist"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "da-u-ca-gregory"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pt-u-attr2-attr1-ca-gregory"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "pt-u-attr1-attr2-ca-gregory"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pt-u-attr1-attr2-attr1-ca-gregory"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "pt-u-attr1-attr2-ca-gregory"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-a-not-assigned"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "en-a-not-assigned"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-w-bar-u-foo-a-bar-x-u-foo"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "en-a-bar-u-foo-w-bar-x-u-foo"
+ }
+ }
+]
diff --git a/third_party/rust/icu_locid/tests/fixtures/mod.rs b/third_party/rust/icu_locid/tests/fixtures/mod.rs
new file mode 100644
index 0000000000..f00fd6c3b9
--- /dev/null
+++ b/third_party/rust/icu_locid/tests/fixtures/mod.rs
@@ -0,0 +1,261 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use std::collections::HashMap;
+use std::convert::{TryFrom, TryInto};
+
+use icu_locid::extensions::private;
+use icu_locid::extensions::transform;
+use icu_locid::extensions::unicode;
+use icu_locid::extensions::Extensions;
+use icu_locid::{subtags, LanguageIdentifier, Locale, ParserError};
+use serde::Deserialize;
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct LocaleIdentifier {
+ #[serde(rename = "type")]
+ pub field_type: String,
+ pub identifier: String,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct LocaleExtensionUnicode {
+ #[serde(default)]
+ keywords: HashMap<String, Option<String>>,
+ #[serde(default)]
+ attributes: Vec<String>,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct LocaleExtensionTransform {
+ tlang: Option<String>,
+ #[serde(default)]
+ tfields: HashMap<String, Option<String>>,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct LocaleExtensions {
+ unicode: Option<LocaleExtensionUnicode>,
+ transform: Option<LocaleExtensionTransform>,
+ #[serde(default)]
+ private: Vec<String>,
+ _other: Option<String>,
+}
+
+impl TryFrom<LocaleExtensions> for Extensions {
+ type Error = ParserError;
+
+ fn try_from(input: LocaleExtensions) -> Result<Self, Self::Error> {
+ let mut ext = Extensions::default();
+ if let Some(unicode) = input.unicode {
+ ext.unicode.keywords = unicode
+ .keywords
+ .iter()
+ .map(|(k, v)| {
+ (
+ unicode::Key::try_from_bytes(k.as_bytes()).expect("Parsing key failed."),
+ v.as_ref().map_or(
+ unicode::Value::try_from_bytes(b"").expect("Failed to parse Value"),
+ |v| {
+ unicode::Value::try_from_bytes(v.as_bytes())
+ .expect("Parsing type failed.")
+ },
+ ),
+ )
+ })
+ .collect();
+ let v: Vec<unicode::Attribute> = unicode
+ .attributes
+ .iter()
+ .map(|v| {
+ unicode::Attribute::try_from_bytes(v.as_bytes())
+ .expect("Parsing attribute failed.")
+ })
+ .collect();
+ ext.unicode.attributes = unicode::Attributes::from_vec_unchecked(v);
+ }
+ if let Some(transform) = input.transform {
+ ext.transform.fields = transform
+ .tfields
+ .iter()
+ .map(|(k, v)| {
+ (
+ transform::Key::try_from_bytes(k.as_bytes()).expect("Parsing key failed."),
+ v.as_ref()
+ .map(|v| {
+ transform::Value::try_from_bytes(v.as_bytes())
+ .expect("Parsing value failed.")
+ })
+ .expect("Value cannot be empty."),
+ )
+ })
+ .collect();
+
+ if let Some(tlang) = transform.tlang {
+ ext.transform.lang = Some(tlang.parse().expect("Failed to parse tlang."));
+ }
+ }
+ let v: Vec<private::Subtag> = input
+ .private
+ .iter()
+ .map(|v| private::Subtag::try_from_bytes(v.as_bytes()).expect("Failed to add field."))
+ .collect();
+ ext.private = private::Private::from_vec_unchecked(v);
+ Ok(ext)
+ }
+}
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct LocaleSubtags {
+ #[serde(rename = "type")]
+ pub field_type: String,
+ pub language: Option<String>,
+ pub script: Option<String>,
+ pub region: Option<String>,
+ #[serde(default)]
+ pub variants: Vec<String>,
+ pub extensions: Option<LocaleExtensions>,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct LocaleError {
+ pub error: String,
+ pub text: String,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+#[serde(untagged)]
+#[allow(clippy::large_enum_variant)] // test code
+pub enum LocaleInfo {
+ String(String),
+ Error(LocaleError),
+ Identifier(LocaleIdentifier),
+ Object(LocaleSubtags),
+}
+
+impl TryFrom<LocaleInfo> for LanguageIdentifier {
+ type Error = ParserError;
+
+ fn try_from(input: LocaleInfo) -> Result<Self, Self::Error> {
+ match input {
+ LocaleInfo::String(s) => s.parse(),
+ LocaleInfo::Error(e) => Err(e.into()),
+ LocaleInfo::Identifier(ident) => ident.try_into(),
+ LocaleInfo::Object(o) => o.try_into(),
+ }
+ }
+}
+
+impl TryFrom<LocaleInfo> for Locale {
+ type Error = ParserError;
+
+ fn try_from(input: LocaleInfo) -> Result<Self, Self::Error> {
+ match input {
+ LocaleInfo::String(s) => s.parse(),
+ LocaleInfo::Error(e) => Err(e.into()),
+ LocaleInfo::Identifier(ident) => ident.try_into(),
+ LocaleInfo::Object(o) => o.try_into(),
+ }
+ }
+}
+
+impl TryFrom<LocaleIdentifier> for LanguageIdentifier {
+ type Error = ParserError;
+
+ fn try_from(input: LocaleIdentifier) -> Result<Self, Self::Error> {
+ LanguageIdentifier::try_from_locale_bytes(input.identifier.as_bytes())
+ }
+}
+
+impl TryFrom<LocaleIdentifier> for Locale {
+ type Error = ParserError;
+
+ fn try_from(input: LocaleIdentifier) -> Result<Self, Self::Error> {
+ Locale::try_from_bytes(input.identifier.as_bytes())
+ }
+}
+
+impl TryFrom<LocaleSubtags> for LanguageIdentifier {
+ type Error = ParserError;
+
+ fn try_from(subtags: LocaleSubtags) -> Result<Self, Self::Error> {
+ let language = if let Some(lang) = subtags.language {
+ lang.parse().expect("Failed to parse language subtag")
+ } else {
+ subtags::Language::default()
+ };
+ let script = subtags
+ .script
+ .map(|s| s.parse().expect("Failed to parse script subtag."));
+ let region = subtags
+ .region
+ .map(|s| s.parse().expect("Failed to parse region subtag."));
+ let variants = subtags
+ .variants
+ .iter()
+ .map(|v| v.parse().expect("Failed to parse variant subtag."))
+ .collect::<Vec<_>>();
+ Ok(LanguageIdentifier {
+ language,
+ script,
+ region,
+ variants: subtags::Variants::from_vec_unchecked(variants),
+ })
+ }
+}
+
+impl TryFrom<LocaleSubtags> for Locale {
+ type Error = ParserError;
+
+ fn try_from(subtags: LocaleSubtags) -> Result<Self, Self::Error> {
+ let language = if let Some(lang) = subtags.language {
+ lang.parse().expect("Failed to parse language subtag")
+ } else {
+ subtags::Language::default()
+ };
+ let script = subtags
+ .script
+ .map(|s| s.parse().expect("Failed to parse script subtag."));
+ let region = subtags
+ .region
+ .map(|s| s.parse().expect("Failed to parse region subtag."));
+ let variants = subtags
+ .variants
+ .iter()
+ .map(|v| v.parse().expect("Failed to parse variant subtag."))
+ .collect::<Vec<_>>();
+ let extensions = if let Some(e) = subtags.extensions {
+ e.try_into().expect("Failed to parse extensions.")
+ } else {
+ Extensions::default()
+ };
+ Ok(Locale {
+ id: LanguageIdentifier {
+ language,
+ script,
+ region,
+ variants: subtags::Variants::from_vec_unchecked(variants),
+ },
+ extensions,
+ })
+ }
+}
+
+impl From<LocaleError> for ParserError {
+ fn from(e: LocaleError) -> Self {
+ match e.error.as_str() {
+ "InvalidLanguage" => ParserError::InvalidLanguage,
+ "InvalidSubtag" => ParserError::InvalidSubtag,
+ "InvalidExtension" => ParserError::InvalidExtension,
+ "DuplicatedExtension" => ParserError::DuplicatedExtension,
+ _ => unreachable!("Unknown error name"),
+ }
+ }
+}
+
+#[derive(Debug, Deserialize)]
+pub struct LocaleTest {
+ pub input: LocaleInfo,
+ pub output: LocaleInfo,
+}
diff --git a/third_party/rust/icu_locid/tests/helpers/mod.rs b/third_party/rust/icu_locid/tests/helpers/mod.rs
new file mode 100644
index 0000000000..d250c510c5
--- /dev/null
+++ b/third_party/rust/icu_locid/tests/helpers/mod.rs
@@ -0,0 +1,15 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use std::fs::File;
+use std::io::{BufReader, Error};
+
+pub fn read_fixture<T>(path: &str) -> Result<T, Error>
+where
+ T: serde::de::DeserializeOwned,
+{
+ let file = File::open(path)?;
+ let reader = BufReader::new(file);
+ Ok(serde_json::from_reader(reader)?)
+}
diff --git a/third_party/rust/icu_locid/tests/langid.rs b/third_party/rust/icu_locid/tests/langid.rs
new file mode 100644
index 0000000000..ee7bb9817e
--- /dev/null
+++ b/third_party/rust/icu_locid/tests/langid.rs
@@ -0,0 +1,158 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod fixtures;
+mod helpers;
+
+use std::convert::TryInto;
+use writeable::*;
+
+use icu_locid::{subtags, LanguageIdentifier, ParserError};
+
+type Result = std::result::Result<LanguageIdentifier, ParserError>;
+
+fn test_langid_fixtures(tests: Vec<fixtures::LocaleTest>) {
+ for test in tests {
+ match test.output {
+ fixtures::LocaleInfo::String(s) => {
+ if let fixtures::LocaleInfo::Object(ref o) = &test.input {
+ if o.field_type == "Locale" {
+ continue;
+ }
+ }
+ let input: LanguageIdentifier = test.input.try_into().expect("Parsing failed.");
+ assert_writeable_eq!(input, s);
+ }
+ fixtures::LocaleInfo::Error(err) => {
+ let err: ParserError = err.into();
+ let input: Result = test.input.try_into();
+ assert_eq!(input, Err(err));
+ }
+ fixtures::LocaleInfo::Identifier(ident) => {
+ let input: LanguageIdentifier = test.input.try_into().expect("Parsing failed.");
+ let output: LanguageIdentifier = ident.try_into().expect("Parsing failed.");
+ assert_eq!(input, output);
+ }
+ fixtures::LocaleInfo::Object(o) => {
+ let input: LanguageIdentifier = test.input.try_into().expect("Parsing failed.");
+ let output: LanguageIdentifier = o.try_into().expect("Parsing failed.");
+ assert_eq!(input, output);
+ }
+ }
+ }
+}
+
+#[test]
+fn test_langid_parsing() {
+ let path = "./tests/fixtures/langid.json";
+ let data = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ test_langid_fixtures(data);
+}
+
+#[test]
+fn test_langid_invalid() {
+ let path = "./tests/fixtures/invalid.json";
+ let data = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ test_langid_fixtures(data);
+}
+
+#[test]
+fn test_langid_canonicalize() {
+ let path = "./tests/fixtures/canonicalize.json";
+ let data = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ test_langid_fixtures(data);
+}
+
+#[test]
+fn test_langid_from_locale() {
+ let path = "./tests/fixtures/locale.json";
+ let data = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ test_langid_fixtures(data);
+}
+
+#[test]
+fn test_langid_subtag_language() {
+ let mut lang: subtags::Language = "en".parse().expect("Failed to parse a language.");
+ assert_eq!(lang.as_str(), "en");
+
+ lang.clear();
+ assert_eq!(lang, subtags::Language::UND);
+ assert!(lang.is_empty());
+
+ assert_writeable_eq!(lang, "und");
+}
+
+#[test]
+fn test_langid_subtag_region() {
+ let region: subtags::Region = "en".parse().expect("Failed to parse a region.");
+ assert_eq!(region.as_str(), "EN");
+ assert_writeable_eq!(region, "EN");
+}
+
+#[test]
+fn test_langid_subtag_script() {
+ let script: subtags::Script = "Latn".parse().expect("Failed to parse a script.");
+ assert_eq!(script.as_str(), "Latn");
+ assert_writeable_eq!(script, "Latn");
+}
+
+#[test]
+fn test_langid_subtag_variant() {
+ let variant: subtags::Variant = "macos".parse().expect("Failed to parse a variant.");
+ assert_eq!(variant.as_str(), "macos");
+ assert_writeable_eq!(variant, "macos");
+}
+
+#[test]
+fn test_langid_subtag_variants() {
+ let variant: subtags::Variant = "macos".parse().expect("Failed to parse a variant.");
+ let mut variants = subtags::Variants::from_vec_unchecked(vec![variant]);
+ assert_eq!(variants.get(0), Some(&variant));
+ variants.clear();
+ assert_eq!(variants.len(), 0);
+}
+
+#[test]
+fn test_langid_normalizing_eq_str() {
+ let path = "./tests/fixtures/langid.json";
+ let tests: Vec<fixtures::LocaleTest> =
+ helpers::read_fixture(path).expect("Failed to read a fixture");
+ for test in tests {
+ let parsed: LanguageIdentifier = test.input.try_into().expect("Parsing failed.");
+ assert!(parsed.normalizing_eq(&parsed.write_to_string()));
+ }
+
+ // Check that trailing characters are not ignored
+ let lang: LanguageIdentifier = "en".parse().expect("Parsing failed.");
+ assert!(!lang.normalizing_eq("en-US"));
+}
+
+#[test]
+fn test_langid_strict_cmp() {
+ let path = "./tests/fixtures/langid.json";
+ let tests: Vec<fixtures::LocaleTest> =
+ helpers::read_fixture(path).expect("Failed to read a fixture");
+ let bcp47_strings = tests
+ .iter()
+ .map(|t| match t.input {
+ fixtures::LocaleInfo::String(ref s) => s.as_str(),
+ _ => panic!("Invalid fixture"),
+ })
+ .collect::<Vec<&str>>();
+ for a in bcp47_strings.iter() {
+ for b in bcp47_strings.iter() {
+ let a_langid = a
+ .parse::<LanguageIdentifier>()
+ .expect("Invalid BCP-47 in fixture");
+ let a_normalized = a_langid.write_to_string();
+ let string_cmp = a_normalized.as_bytes().cmp(b.as_bytes());
+ let test_cmp = a_langid.strict_cmp(b.as_bytes());
+ assert_eq!(string_cmp, test_cmp, "{a:?}/{b:?}");
+ }
+ }
+}
diff --git a/third_party/rust/icu_locid/tests/locale.rs b/third_party/rust/icu_locid/tests/locale.rs
new file mode 100644
index 0000000000..638db41383
--- /dev/null
+++ b/third_party/rust/icu_locid/tests/locale.rs
@@ -0,0 +1,120 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod fixtures;
+mod helpers;
+
+use std::convert::TryInto;
+use writeable::*;
+
+use icu_locid::{LanguageIdentifier, Locale, ParserError};
+
+type Result = std::result::Result<Locale, ParserError>;
+
+fn test_langid_fixtures(tests: Vec<fixtures::LocaleTest>) {
+ for test in tests {
+ match test.output {
+ fixtures::LocaleInfo::String(s) => {
+ let input: Locale = test.input.try_into().expect("Parsing failed.");
+ assert_writeable_eq!(input, s);
+ }
+ fixtures::LocaleInfo::Error(err) => {
+ let err: ParserError = err.into();
+ let input: Result = test.input.try_into();
+ assert_eq!(input, Err(err));
+ }
+ fixtures::LocaleInfo::Identifier(ident) => {
+ let input: Locale = test.input.try_into().expect("Parsing failed.");
+ let output: Locale = ident.clone().try_into().expect("Parsing failed.");
+ assert_eq!(input, output);
+ assert_writeable_eq!(input, ident.identifier);
+ }
+ fixtures::LocaleInfo::Object(o) => {
+ let input: Locale = test.input.try_into().expect("Parsing failed.");
+ let output: Locale = o.try_into().expect("Parsing failed.");
+ assert_eq!(input, output);
+ }
+ }
+ }
+}
+
+#[test]
+fn test_locale_parsing() {
+ let path = "./tests/fixtures/locale.json";
+ let data = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ test_langid_fixtures(data);
+}
+
+#[test]
+fn test_langid_invalid() {
+ let path = "./tests/fixtures/invalid-extensions.json";
+ let data = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ test_langid_fixtures(data);
+}
+
+#[test]
+fn test_locale_is_empty() {
+ let locale: Locale = Locale::default();
+ assert!(locale.extensions.is_empty());
+ assert_writeable_eq!(locale, "und");
+}
+
+#[test]
+fn test_locale_conversions() {
+ let locale: Locale = Locale::default();
+ let langid: LanguageIdentifier = locale.clone().into();
+ let locale2: Locale = langid.into();
+ assert_eq!(locale, locale2);
+}
+
+#[test]
+fn test_locale_canonicalize() {
+ let path = "./tests/fixtures/canonicalize.json";
+ let data = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ test_langid_fixtures(data);
+}
+
+#[test]
+fn test_locale_normalizing_eq_str() {
+ let path = "./tests/fixtures/locale.json";
+ let tests: Vec<fixtures::LocaleTest> =
+ helpers::read_fixture(path).expect("Failed to read a fixture");
+ for test in tests {
+ let parsed: Locale = test.input.try_into().expect("Parsing failed.");
+ assert!(parsed.normalizing_eq(&parsed.write_to_string()));
+ }
+
+ // Check that trailing characters are not ignored
+ let locale: Locale = "en".parse().expect("Parsing failed.");
+ assert!(!locale.normalizing_eq("en-US"));
+}
+
+#[test]
+fn test_locale_strict_cmp() {
+ let path = "./tests/fixtures/locale.json";
+ let tests: Vec<fixtures::LocaleTest> =
+ helpers::read_fixture(path).expect("Failed to read a fixture");
+ let bcp47_strings = tests
+ .iter()
+ .map(|t| match t.input {
+ fixtures::LocaleInfo::Identifier(ref s) => s.identifier.as_str(),
+ _ => match t.output {
+ fixtures::LocaleInfo::Identifier(ref s) => s.identifier.as_str(),
+ _ => panic!("No string in fixture input or output: {t:?}"),
+ },
+ })
+ .collect::<Vec<&str>>();
+ for a in bcp47_strings.iter() {
+ for b in bcp47_strings.iter() {
+ let a_langid = a.parse::<Locale>().expect("Invalid BCP-47 in fixture");
+ let a_normalized = a_langid.write_to_string();
+ let string_cmp = a_normalized.as_bytes().cmp(b.as_bytes());
+ let test_cmp = a_langid.strict_cmp(b.as_bytes());
+ assert_eq!(string_cmp, test_cmp, "{a:?}/{b:?}");
+ }
+ }
+}
diff --git a/third_party/rust/icu_locid_transform/.cargo-checksum.json b/third_party/rust/icu_locid_transform/.cargo-checksum.json
new file mode 100644
index 0000000000..99e50c8266
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.toml":"48806572c95da9d4936a0f62eba886a3ee1944c81e9d644d5da88176d33452de","LICENSE":"853f87c96f3d249f200fec6db1114427bc8bdf4afddc93c576956d78152ce978","README.md":"3ff3f2e2f9e5d4c5786132838576edef42a12c5529d5f080370f24aa6246bb92","benches/fixtures/locales.json":"9846601a29874baf140cac1252d4624fadc30182fec106d17f008ece886b9185","benches/fixtures/uncanonicalized-locales.json":"a866ed318b92f79d8853567e79b373c02984967023f5f39161140544e71b0c72","benches/helpers/mod.rs":"d3bf59e7eed6230f340bef6c87a7b8de3a387ec391f60afc1b15a0d001cbfb67","benches/locale_canonicalizer.rs":"477af27aa35385d107e19b8e8a0452466a69d20e147a63631b78634c17712fed","src/canonicalizer.rs":"7770767fad20f38aaae19382430451573293ccdeac587b2d063163b870781086","src/directionality.rs":"a031a9d55ffe827c86400637b7302dc424c708dcc52ea667504a33a16db822c2","src/error.rs":"486fda8a0e9b7bb5822bbb0defb51145364d6053b8d60b88ef71e4b2bcd6699d","src/expander.rs":"d3ef487a416425ea6fb2ce7bf08b7487e180a580002e54ce30d5524cfd7514e2","src/fallback/algorithms.rs":"47625130cd5a04cf085dd0494591e117ba204a9d2eb649788b0ff96773cc6e21","src/fallback/mod.rs":"409c29edf1f0336998afffd6b765680f16609d53dbb1268ce3ebe1b959417126","src/lib.rs":"5390facdc3df7e5ec5ab842bf59d4d13383d77d93a722685231a1d271cfba944","src/provider/canonicalizer.rs":"f848dbbc906b5f3be0b6384f5a2f26178898822a5c37334a57b12db8e1af0ed9","src/provider/directionality.rs":"fc516f501254af444cfa010d3c87aeea032dd6eccf5f82301c050ed3df2e05b1","src/provider/expander.rs":"6903d16138ada8216e0341d984126dcc1f6fac21468144e8140fc217b164572e","src/provider/fallback.rs":"d567e3d49261cac9de35825b3d57204d49068558f10579121f0bf0c42090c9cc","src/provider/mod.rs":"6d764ded43ba17301a8125008454f3017b45b83d00f76e1e30721d6d9ddbe595","tests/fixtures/canonicalize.json":"3dc2f661b04e4c9ecced70fc1b98a504eb5f5a0067b38665b10e50c25174bc4a","tests/fixtures/maximize.json":"df0a72846ad0b3190daef2d41541e535e54aa35f1f685975fe9d1965d03473c4","tests/fixtures/minimize.json":"3bb6f19c5525818212388dcbf778064e7f73d2c32a8a7e8c58d618583a77121a","tests/fixtures/mod.rs":"18a900aa4f74120b7e7e64fcb09eae38a16504d66e23f752e743dcd9b1ad6530","tests/helpers/mod.rs":"d3bf59e7eed6230f340bef6c87a7b8de3a387ec391f60afc1b15a0d001cbfb67","tests/locale_canonicalizer.rs":"1ebf7320f422b65cc3cc50468abdc8f08128feba85d936f5beb456b0b052a91d"},"package":"57c17d8f6524fdca4471101dd71f0a132eb6382b5d6d7f2970441cb25f6f435a"} \ No newline at end of file
diff --git a/third_party/rust/icu_locid_transform/Cargo.toml b/third_party/rust/icu_locid_transform/Cargo.toml
new file mode 100644
index 0000000000..1245403a28
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/Cargo.toml
@@ -0,0 +1,128 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+rust-version = "1.67"
+name = "icu_locid_transform"
+version = "1.4.0"
+authors = ["The ICU4X Project Developers"]
+include = [
+ "data/**/*",
+ "src/**/*",
+ "examples/**/*",
+ "benches/**/*",
+ "tests/**/*",
+ "Cargo.toml",
+ "LICENSE",
+ "README.md",
+]
+description = "API for Unicode Language and Locale Identifiers canonicalization"
+homepage = "https://icu4x.unicode.org"
+readme = "README.md"
+categories = ["internationalization"]
+license-file = "LICENSE"
+repository = "https://github.com/unicode-org/icu4x"
+
+[package.metadata.cargo-all-features]
+denylist = ["bench"]
+skip_optional_dependencies = true
+
+[package.metadata.docs.rs]
+all-features = true
+
+[lib]
+bench = false
+
+[[test]]
+name = "locale_canonicalizer"
+required-features = ["serde"]
+
+[[bench]]
+name = "locale_canonicalizer"
+harness = false
+
+[dependencies.databake]
+version = "0.1.7"
+features = ["derive"]
+optional = true
+default-features = false
+
+[dependencies.displaydoc]
+version = "0.2.3"
+default-features = false
+
+[dependencies.icu_locid]
+version = "~1.4.0"
+features = ["zerovec"]
+default-features = false
+
+[dependencies.icu_locid_transform_data]
+version = "~1.4.0"
+optional = true
+default-features = false
+
+[dependencies.icu_provider]
+version = "~1.4.0"
+features = ["macros"]
+default-features = false
+
+[dependencies.serde]
+version = "1.0"
+features = [
+ "derive",
+ "alloc",
+]
+optional = true
+default-features = false
+
+[dependencies.tinystr]
+version = "0.7.4"
+features = [
+ "alloc",
+ "zerovec",
+]
+default-features = false
+
+[dependencies.zerovec]
+version = "0.10.1"
+features = ["yoke"]
+default-features = false
+
+[dev-dependencies.serde]
+version = "1.0"
+features = ["derive"]
+
+[dev-dependencies.serde_json]
+version = "1.0"
+
+[features]
+bench = ["serde"]
+compiled_data = ["dep:icu_locid_transform_data"]
+datagen = [
+ "serde",
+ "dep:databake",
+ "zerovec/databake",
+ "icu_locid/databake",
+ "tinystr/databake",
+]
+default = ["compiled_data"]
+serde = [
+ "dep:serde",
+ "icu_locid/serde",
+ "tinystr/serde",
+ "zerovec/serde",
+ "icu_provider/serde",
+]
+std = []
+
+[target."cfg(not(target_arch = \"wasm32\"))".dev-dependencies.criterion]
+version = "0.4"
diff --git a/third_party/rust/icu_locid_transform/LICENSE b/third_party/rust/icu_locid_transform/LICENSE
new file mode 100644
index 0000000000..9845aa5f48
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/LICENSE
@@ -0,0 +1,44 @@
+UNICODE LICENSE V3
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 2020-2023 Unicode, Inc.
+
+NOTICE TO USER: Carefully read the following legal agreement. BY
+DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
+SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
+DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of data files and any associated documentation (the "Data Files") or
+software and any associated documentation (the "Software") to deal in the
+Data Files or Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, and/or sell
+copies of the Data Files or Software, and to permit persons to whom the
+Data Files or Software are furnished to do so, provided that either (a)
+this copyright and permission notice appear with all copies of the Data
+Files or Software, or (b) this copyright and permission notice appear in
+associated Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
+THIRD PARTY RIGHTS.
+
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
+FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall
+not be used in advertising or otherwise to promote the sale, use or other
+dealings in these Data Files or Software without prior written
+authorization of the copyright holder.
+
+—
+
+Portions of ICU4X may have been adapted from ICU4C and/or ICU4J.
+ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others.
diff --git a/third_party/rust/icu_locid_transform/README.md b/third_party/rust/icu_locid_transform/README.md
new file mode 100644
index 0000000000..b97fea2390
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/README.md
@@ -0,0 +1,77 @@
+# icu_locid_transform [![crates.io](https://img.shields.io/crates/v/icu_locid_transform)](https://crates.io/crates/icu_locid_transform)
+
+<!-- cargo-rdme start -->
+
+Canonicalization of locale identifiers based on [`CLDR`] data.
+
+This module is published as its own crate ([`icu_locid_transform`](https://docs.rs/icu_locid_transform/latest/icu_locid_transform/))
+and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project.
+
+It currently supports locale canonicalization based upon the canonicalization
+algorithm from [`UTS #35: Unicode LDML 3. LocaleId Canonicalization`],
+as well as the minimize and maximize likely subtags algorithms
+as described in [`UTS #35: Unicode LDML 3. Likely Subtags`].
+
+The maximize method potentially updates a passed in locale in place
+depending up the results of running the 'Add Likely Subtags' algorithm
+from [`UTS #35: Unicode LDML 3. Likely Subtags`].
+
+This minimize method returns a new Locale that is the result of running the
+'Remove Likely Subtags' algorithm from [`UTS #35: Unicode LDML 3. Likely Subtags`].
+
+## Examples
+
+```rust
+use icu::locid::Locale;
+use icu::locid_transform::{LocaleCanonicalizer, TransformResult};
+
+let lc = LocaleCanonicalizer::new();
+
+let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc"
+ .parse()
+ .expect("parse failed");
+assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified);
+assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse::<Locale>().unwrap());
+```
+
+```rust
+use icu::locid::locale;
+use icu::locid_transform::{LocaleExpander, TransformResult};
+
+let lc = LocaleExpander::new();
+
+let mut locale = locale!("zh-CN");
+assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+assert_eq!(locale, locale!("zh-Hans-CN"));
+
+let mut locale = locale!("zh-Hant-TW");
+assert_eq!(lc.maximize(&mut locale), TransformResult::Unmodified);
+assert_eq!(locale, locale!("zh-Hant-TW"));
+```
+
+```rust
+use icu::locid::locale;
+use icu::locid_transform::{LocaleExpander, TransformResult};
+use writeable::assert_writeable_eq;
+
+let lc = LocaleExpander::new();
+
+let mut locale = locale!("zh-Hans-CN");
+assert_eq!(lc.minimize(&mut locale), TransformResult::Modified);
+assert_eq!(locale, locale!("zh"));
+
+let mut locale = locale!("zh");
+assert_eq!(lc.minimize(&mut locale), TransformResult::Unmodified);
+assert_eq!(locale, locale!("zh"));
+```
+
+[`ICU4X`]: ../icu/index.html
+[`CLDR`]: http://cldr.unicode.org/
+[`UTS #35: Unicode LDML 3. Likely Subtags`]: https://www.unicode.org/reports/tr35/#Likely_Subtags.
+[`UTS #35: Unicode LDML 3. LocaleId Canonicalization`]: http://unicode.org/reports/tr35/#LocaleId_Canonicalization,
+
+<!-- cargo-rdme end -->
+
+## More Information
+
+For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x).
diff --git a/third_party/rust/icu_locid_transform/benches/fixtures/locales.json b/third_party/rust/icu_locid_transform/benches/fixtures/locales.json
new file mode 100644
index 0000000000..0e8ba8b798
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/benches/fixtures/locales.json
@@ -0,0 +1,41 @@
+[
+ "en-US",
+ "en-GB",
+ "es-AR",
+ "it",
+ "zh-Hans-CN",
+ "de-AT",
+ "pl",
+ "fr-FR",
+ "de-AT",
+ "sr-Cyrl-SR",
+ "nb-NO",
+ "fr-FR",
+ "mk",
+ "uk",
+ "und-PL",
+ "und-Latn-AM",
+ "ug-Cyrl",
+ "sr-ME",
+ "mn-Mong",
+ "lif-Limb",
+ "gan",
+ "zh-Hant",
+ "yue-Hans",
+ "unr",
+ "unr-Deva",
+ "und-Thai-CN",
+ "ug-Cyrl",
+ "en-Latn-DE",
+ "pl-FR",
+ "de-CH",
+ "tuq",
+ "sr-ME",
+ "ng",
+ "klx",
+ "kk-Arab",
+ "en-Cyrl",
+ "und-Cyrl-UK",
+ "und-Arab",
+ "und-Arab-FO"
+]
diff --git a/third_party/rust/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json b/third_party/rust/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json
new file mode 100644
index 0000000000..18eadbce68
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/benches/fixtures/uncanonicalized-locales.json
@@ -0,0 +1,88 @@
+[
+ "cka",
+ "cze",
+ "gfx",
+ "sgn-BR",
+ "sgn-DD",
+ "tam",
+ "und-aaland",
+ "nob-bokmal",
+ "no-nynorsk",
+ "und-Qaai",
+ "en-554",
+ "en-084",
+ "art-lojban",
+ "zh-guoyu",
+ "zh-hakka",
+ "zh-xiang",
+ "aar-x-private",
+ "heb-x-private",
+ "ces",
+ "hy-arevela",
+ "hy-arevmda",
+ "cel-gaulish",
+ "ja-latn-hepburn-heploc",
+ "ja-Latn-fonipa-hepburn-heploc",
+ "und-Armn-SU",
+ "sh",
+ "sh-Cyrl",
+ "cnr",
+ "cnr-BA",
+ "ru-SU",
+ "ru-810",
+ "en-SU",
+ "en-810",
+ "und-SU",
+ "und-810",
+ "und-Latn-SU",
+ "und-Latn-810",
+ "hy-SU",
+ "hy-810",
+ "und-Armn-SU",
+ "und-Armn-810",
+ "sr-CS",
+ "sr-Latn-CS",
+ "sr-Cyrl-CS",
+ "az-NT",
+ "sl-t-sl-rozaj-biske-1994",
+ "DE-T-M0-DIN-K0-QWERTZ",
+ "en-t-m0-true",
+ "en-t-iw",
+ "und-u-rg-no23",
+ "und-u-rg-cn11",
+ "und-u-rg-cz10a",
+ "und-u-rg-fra",
+ "und-u-rg-frg",
+ "und-u-rg-lud",
+ "und-NO-u-rg-no23",
+ "und-CN-u-rg-cn11",
+ "und-CZ-u-rg-cz10a",
+ "und-FR-u-rg-fra",
+ "und-FR-u-rg-frg",
+ "und-u-rg-lud",
+ "und-u-sd-no23",
+ "und-u-sd-cn11",
+ "und-u-sd-cz10a",
+ "und-u-sd-fra",
+ "hy-arevela",
+ "hy-Armn-arevela",
+ "hy-AM-arevela",
+ "hy-arevela-fonipa",
+ "hy-fonipa-arevela",
+ "hy-arevmda",
+ "hy-Armn-arevmda",
+ "hy-AM-arevmda",
+ "hy-arevmda-fonipa",
+ "hy-fonipa-arevmda",
+ "ja-Latn-hepburn-heploc",
+ "ja-Latn-JP-hepburn-heploc",
+ "sv-aaland",
+ "el-polytoni",
+ "ja-Latn-alalc97-hepburn-heploc",
+ "ja-Latn-hepburn-alalc97-heploc",
+ "ja-Latn-hepburn-heploc-alalc97",
+ "ja-Latn-heploc-hepburn",
+ "ja-Latn-heploc",
+ "ja-Latn-aaland-heploc",
+ "ja-Latn-heploc-polytoni"
+]
diff --git a/third_party/rust/icu_locid_transform/benches/helpers/mod.rs b/third_party/rust/icu_locid_transform/benches/helpers/mod.rs
new file mode 100644
index 0000000000..d250c510c5
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/benches/helpers/mod.rs
@@ -0,0 +1,15 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use std::fs::File;
+use std::io::{BufReader, Error};
+
+pub fn read_fixture<T>(path: &str) -> Result<T, Error>
+where
+ T: serde::de::DeserializeOwned,
+{
+ let file = File::open(path)?;
+ let reader = BufReader::new(file);
+ Ok(serde_json::from_reader(reader)?)
+}
diff --git a/third_party/rust/icu_locid_transform/benches/locale_canonicalizer.rs b/third_party/rust/icu_locid_transform/benches/locale_canonicalizer.rs
new file mode 100644
index 0000000000..1ea8df6b39
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/benches/locale_canonicalizer.rs
@@ -0,0 +1,99 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod helpers;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use icu_locid::Locale;
+use icu_locid_transform::LocaleCanonicalizer;
+use icu_locid_transform::LocaleExpander;
+
+fn canonicalize_bench(c: &mut Criterion) {
+ let lc = LocaleCanonicalizer::new();
+
+ let mut group = c.benchmark_group("uncanonicalized");
+
+ let path = "./benches/fixtures/uncanonicalized-locales.json";
+ let data: Vec<String> = helpers::read_fixture(path).expect("Failed to read a fixture");
+ let locales: Vec<Locale> = data.iter().map(|s| s.parse().unwrap()).collect();
+
+ group.bench_function("clone", |b| {
+ b.iter(|| {
+ for locale in &locales {
+ let _ = black_box(locale).clone();
+ }
+ })
+ });
+
+ group.bench_function("canonicalize", |b| {
+ b.iter(|| {
+ for locale in &locales {
+ let mut locale = black_box(locale).clone();
+ lc.canonicalize(&mut locale);
+ }
+ })
+ });
+
+ group.finish();
+}
+
+fn canonicalize_noop_bench(c: &mut Criterion) {
+ let lc = LocaleCanonicalizer::new();
+
+ let mut group = c.benchmark_group("canonicalized");
+
+ // None of these locales require canonicalization, so this measures the cost of calling
+ // the canonicalizer on locales that will not be modified.
+ let path = "./benches/fixtures/locales.json";
+ let data: Vec<String> = helpers::read_fixture(path).expect("Failed to read a fixture");
+ let locales: Vec<Locale> = data.iter().map(|s| s.parse().unwrap()).collect();
+
+ group.bench_function("clone", |b| {
+ b.iter(|| {
+ for locale in &locales {
+ let _ = black_box(locale).clone();
+ }
+ })
+ });
+
+ group.bench_function("canonicalize", |b| {
+ b.iter(|| {
+ for locale in &locales {
+ let mut locale = black_box(locale).clone();
+ lc.canonicalize(&mut locale);
+ }
+ })
+ });
+
+ group.finish();
+}
+
+fn maximize_bench(c: &mut Criterion) {
+ let lc = LocaleExpander::new();
+
+ let mut group = c.benchmark_group("likelysubtags");
+
+ let path = "./benches/fixtures/locales.json";
+ let data: Vec<String> = helpers::read_fixture(path).expect("Failed to read a fixture");
+ let locales: Vec<Locale> = data.iter().map(|s| s.parse().unwrap()).collect();
+
+ group.bench_function("maximize", |b| {
+ b.iter(|| {
+ for locale in &locales {
+ let mut locale = locale.clone();
+ lc.maximize(black_box(&mut locale));
+ }
+ })
+ });
+
+ group.finish();
+}
+
+criterion_group!(
+ benches,
+ canonicalize_bench,
+ canonicalize_noop_bench,
+ maximize_bench
+);
+criterion_main!(benches);
diff --git a/third_party/rust/icu_locid_transform/src/canonicalizer.rs b/third_party/rust/icu_locid_transform/src/canonicalizer.rs
new file mode 100644
index 0000000000..5a3782638a
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/src/canonicalizer.rs
@@ -0,0 +1,618 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! The collection of code for locale canonicalization.
+
+use crate::provider::*;
+use crate::LocaleTransformError;
+use alloc::vec::Vec;
+use core::cmp::Ordering;
+
+use crate::LocaleExpander;
+use crate::TransformResult;
+use icu_locid::subtags::{Language, Region, Script};
+use icu_locid::{
+ extensions::unicode::key,
+ subtags::{language, Variant, Variants},
+ LanguageIdentifier, Locale,
+};
+use icu_provider::prelude::*;
+use tinystr::TinyAsciiStr;
+
+/// Implements the algorithm defined in *[UTS #35: Annex C, LocaleId Canonicalization]*.
+///
+/// # Examples
+///
+/// ```
+/// use icu_locid::Locale;
+/// use icu_locid_transform::{LocaleCanonicalizer, TransformResult};
+///
+/// let lc = LocaleCanonicalizer::new();
+///
+/// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap();
+/// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified);
+/// assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse().unwrap());
+/// ```
+///
+/// [UTS #35: Annex C, LocaleId Canonicalization]: http://unicode.org/reports/tr35/#LocaleId_Canonicalization
+#[derive(Debug)]
+pub struct LocaleCanonicalizer {
+ /// Data to support canonicalization.
+ aliases: DataPayload<AliasesV1Marker>,
+ /// Likely subtags implementation for delegation.
+ expander: LocaleExpander,
+}
+
+#[inline]
+fn uts35_rule_matches<'a, I>(
+ source: &Locale,
+ language: Language,
+ script: Option<Script>,
+ region: Option<Region>,
+ raw_variants: I,
+) -> bool
+where
+ I: Iterator<Item = &'a str>,
+{
+ (language.is_empty() || language == source.id.language)
+ && (script.is_none() || script == source.id.script)
+ && (region.is_none() || region == source.id.region)
+ && {
+ // Checks if variants are a subset of source variants.
+ // As both iterators are sorted, this can be done linearly.
+ let mut source_variants = source.id.variants.iter();
+ 'outer: for it in raw_variants {
+ for cand in source_variants.by_ref() {
+ match cand.strict_cmp(it.as_bytes()) {
+ Ordering::Equal => {
+ continue 'outer;
+ }
+ Ordering::Less => {}
+ _ => {
+ return false;
+ }
+ }
+ }
+ return false;
+ }
+ true
+ }
+}
+
+fn uts35_replacement<'a, I>(
+ source: &mut Locale,
+ ruletype_has_language: bool,
+ ruletype_has_script: bool,
+ ruletype_has_region: bool,
+ ruletype_variants: Option<I>,
+ replacement: &LanguageIdentifier,
+) where
+ I: Iterator<Item = &'a str>,
+{
+ if ruletype_has_language || (source.id.language.is_empty() && !replacement.language.is_empty())
+ {
+ source.id.language = replacement.language;
+ }
+ if ruletype_has_script || (source.id.script.is_none() && replacement.script.is_some()) {
+ source.id.script = replacement.script;
+ }
+ if ruletype_has_region || (source.id.region.is_none() && replacement.region.is_some()) {
+ source.id.region = replacement.region;
+ }
+ if let Some(skips) = ruletype_variants {
+ // The rule matches if the ruletype variants are a subset of the source variants.
+ // This means ja-Latn-fonipa-hepburn-heploc matches against the rule for
+ // hepburn-heploc and is canonicalized to ja-Latn-alalc97-fonipa
+
+ // We're merging three sorted deduped iterators into a new sequence:
+ // sources - skips + replacements
+
+ let mut sources = source.id.variants.iter().copied().peekable();
+ let mut replacements = replacement.variants.iter().copied().peekable();
+ let mut skips = skips.peekable();
+
+ let mut variants: Vec<Variant> = Vec::new();
+
+ loop {
+ match (sources.peek(), skips.peek(), replacements.peek()) {
+ (Some(&source), Some(skip), _)
+ if source.strict_cmp(skip.as_bytes()) == Ordering::Greater =>
+ {
+ skips.next();
+ }
+ (Some(&source), Some(skip), _)
+ if source.strict_cmp(skip.as_bytes()) == Ordering::Equal =>
+ {
+ skips.next();
+ sources.next();
+ }
+ (Some(&source), _, Some(&replacement))
+ if replacement.cmp(&source) == Ordering::Less =>
+ {
+ variants.push(replacement);
+ replacements.next();
+ }
+ (Some(&source), _, Some(&replacement))
+ if replacement.cmp(&source) == Ordering::Equal =>
+ {
+ variants.push(source);
+ sources.next();
+ replacements.next();
+ }
+ (Some(&source), _, _) => {
+ variants.push(source);
+ sources.next();
+ }
+ (None, _, Some(&replacement)) => {
+ variants.push(replacement);
+ replacements.next();
+ }
+ (None, _, None) => {
+ break;
+ }
+ }
+ }
+ source.id.variants = Variants::from_vec_unchecked(variants);
+ }
+}
+
+#[inline]
+fn uts35_check_language_rules(
+ locale: &mut Locale,
+ alias_data: &DataPayload<AliasesV1Marker>,
+) -> TransformResult {
+ if !locale.id.language.is_empty() {
+ let lang: TinyAsciiStr<3> = locale.id.language.into();
+ let replacement = if lang.len() == 2 {
+ alias_data
+ .get()
+ .language_len2
+ .get(&lang.resize().to_unvalidated())
+ } else {
+ alias_data.get().language_len3.get(&lang.to_unvalidated())
+ };
+
+ if let Some(replacement) = replacement {
+ if let Ok(langid) = replacement.parse() {
+ uts35_replacement::<core::iter::Empty<&str>>(
+ locale, true, false, false, None, &langid,
+ );
+ return TransformResult::Modified;
+ }
+ }
+ }
+
+ TransformResult::Unmodified
+}
+
+fn is_iter_sorted<I, T>(mut iter: I) -> bool
+where
+ I: Iterator<Item = T>,
+ T: PartialOrd,
+{
+ if let Some(mut last) = iter.next() {
+ for curr in iter {
+ if last > curr {
+ return false;
+ }
+ last = curr;
+ }
+ }
+ true
+}
+
+#[cfg(feature = "compiled_data")]
+impl Default for LocaleCanonicalizer {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl LocaleCanonicalizer {
+ /// A constructor which creates a [`LocaleCanonicalizer`] from compiled data.
+ ///
+ /// ✨ *Enabled with the `compiled_data` Cargo feature.*
+ ///
+ /// [📚 Help choosing a constructor](icu_provider::constructors)
+ #[cfg(feature = "compiled_data")]
+ pub const fn new() -> Self {
+ Self::new_with_expander(LocaleExpander::new_extended())
+ }
+
+ // Note: This is a custom impl because the bounds on LocaleExpander::try_new_unstable changed
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new)]
+ pub fn try_new_with_any_provider(
+ provider: &(impl AnyProvider + ?Sized),
+ ) -> Result<LocaleCanonicalizer, LocaleTransformError> {
+ let expander = LocaleExpander::try_new_with_any_provider(provider)?;
+ Self::try_new_with_expander_unstable(&provider.as_downcasting(), expander)
+ }
+
+ // Note: This is a custom impl because the bounds on LocaleExpander::try_new_unstable changed
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(BUFFER, Self::new)]
+ #[cfg(feature = "serde")]
+ pub fn try_new_with_buffer_provider(
+ provider: &(impl BufferProvider + ?Sized),
+ ) -> Result<LocaleCanonicalizer, LocaleTransformError> {
+ let expander = LocaleExpander::try_new_with_buffer_provider(provider)?;
+ Self::try_new_with_expander_unstable(&provider.as_deserializing(), expander)
+ }
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
+ pub fn try_new_unstable<P>(provider: &P) -> Result<LocaleCanonicalizer, LocaleTransformError>
+ where
+ P: DataProvider<AliasesV1Marker>
+ + DataProvider<LikelySubtagsForLanguageV1Marker>
+ + DataProvider<LikelySubtagsForScriptRegionV1Marker>
+ + ?Sized,
+ {
+ let expander = LocaleExpander::try_new_unstable(provider)?;
+ Self::try_new_with_expander_unstable(provider, expander)
+ }
+
+ /// Creates a [`LocaleCanonicalizer`] with a custom [`LocaleExpander`] and compiled data.
+ ///
+ /// ✨ *Enabled with the `compiled_data` Cargo feature.*
+ ///
+ /// [📚 Help choosing a constructor](icu_provider::constructors)
+ #[cfg(feature = "compiled_data")]
+ pub const fn new_with_expander(expander: LocaleExpander) -> Self {
+ Self {
+ aliases: DataPayload::from_static_ref(
+ crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_ALIASES_V1,
+ ),
+ expander,
+ }
+ }
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_with_expander)]
+ pub fn try_new_with_expander_unstable<P>(
+ provider: &P,
+ expander: LocaleExpander,
+ ) -> Result<LocaleCanonicalizer, LocaleTransformError>
+ where
+ P: DataProvider<AliasesV1Marker> + ?Sized,
+ {
+ let aliases: DataPayload<AliasesV1Marker> =
+ provider.load(Default::default())?.take_payload()?;
+
+ Ok(LocaleCanonicalizer { aliases, expander })
+ }
+
+ icu_provider::gen_any_buffer_data_constructors!(
+ locale: skip,
+ options: LocaleExpander,
+ error: LocaleTransformError,
+ #[cfg(skip)]
+ functions: [
+ new_with_expander,
+ try_new_with_expander_with_any_provider,
+ try_new_with_expander_with_buffer_provider,
+ try_new_with_expander_unstable,
+ Self,
+ ]
+ );
+
+ /// The canonicalize method potentially updates a passed in locale in place
+ /// depending up the results of running the canonicalization algorithm
+ /// from <http://unicode.org/reports/tr35/#LocaleId_Canonicalization>.
+ ///
+ /// Some BCP47 canonicalization data is not part of the CLDR json package. Because
+ /// of this, some canonicalizations are not performed, e.g. the canonicalization of
+ /// `und-u-ca-islamicc` to `und-u-ca-islamic-civil`. This will be fixed in a future
+ /// release once the missing data has been added to the CLDR json data. See:
+ /// <https://github.com/unicode-org/icu4x/issues/746>
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_locid::Locale;
+ /// use icu_locid_transform::{LocaleCanonicalizer, TransformResult};
+ ///
+ /// let lc = LocaleCanonicalizer::new();
+ ///
+ /// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap();
+ /// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified);
+ /// assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse().unwrap());
+ /// ```
+ pub fn canonicalize(&self, locale: &mut Locale) -> TransformResult {
+ let mut result = TransformResult::Unmodified;
+
+ // This loops until we get a 'fixed point', where applying the rules do not
+ // result in any more changes.
+ 'outer: loop {
+ // These are linear searches due to the ordering imposed by the canonicalization
+ // rules, where rules with more variants should be considered first. With the
+ // current data in CLDR, we will only do this for locales which have variants,
+ // or new rules which we haven't special-cased yet (of which there are fewer
+ // than 20).
+ if !locale.id.variants.is_empty() {
+ // These language/variant comibnations have around 20 rules
+ for StrStrPair(raw_lang_variants, raw_to) in self
+ .aliases
+ .get()
+ .language_variants
+ .iter()
+ .map(zerofrom::ZeroFrom::zero_from)
+ {
+ let (raw_lang, raw_variants) = {
+ let mut subtags = raw_lang_variants.split('-');
+ (
+ // str::split can't return empty iterators
+ unsafe { subtags.next().unwrap_unchecked() },
+ subtags,
+ )
+ };
+ if is_iter_sorted(raw_variants.clone()) {
+ if let Ok(lang) = raw_lang.parse::<Language>() {
+ if uts35_rule_matches(locale, lang, None, None, raw_variants.clone()) {
+ if let Ok(to) = raw_to.parse() {
+ uts35_replacement(
+ locale,
+ !lang.is_empty(),
+ false,
+ false,
+ Some(raw_variants),
+ &to,
+ );
+ result = TransformResult::Modified;
+ continue 'outer;
+ }
+ }
+ }
+ }
+ }
+ } else {
+ // These are absolute fallbacks, and currently empty.
+ for StrStrPair(raw_from, raw_to) in self
+ .aliases
+ .get()
+ .language
+ .iter()
+ .map(zerofrom::ZeroFrom::zero_from)
+ {
+ if let Ok(from) = raw_from.parse::<LanguageIdentifier>() {
+ if uts35_rule_matches(
+ locale,
+ from.language,
+ from.script,
+ from.region,
+ from.variants.iter().map(Variant::as_str),
+ ) {
+ if let Ok(to) = raw_to.parse() {
+ uts35_replacement(
+ locale,
+ !from.language.is_empty(),
+ from.script.is_some(),
+ from.region.is_some(),
+ Some(from.variants.iter().map(Variant::as_str)),
+ &to,
+ );
+ result = TransformResult::Modified;
+ continue 'outer;
+ }
+ }
+ }
+ }
+ }
+
+ if !locale.id.language.is_empty() {
+ // If the region is specified, check sgn-region rules first
+ if let Some(region) = locale.id.region {
+ if locale.id.language == language!("sgn") {
+ if let Some(&sgn_lang) = self
+ .aliases
+ .get()
+ .sgn_region
+ .get(&region.into_tinystr().to_unvalidated())
+ {
+ uts35_replacement::<core::iter::Empty<&str>>(
+ locale,
+ true,
+ false,
+ true,
+ None,
+ &sgn_lang.into(),
+ );
+ result = TransformResult::Modified;
+ continue;
+ }
+ }
+ }
+
+ if uts35_check_language_rules(locale, &self.aliases) == TransformResult::Modified {
+ result = TransformResult::Modified;
+ continue;
+ }
+ }
+
+ if let Some(script) = locale.id.script {
+ if let Some(&replacement) = self
+ .aliases
+ .get()
+ .script
+ .get(&script.into_tinystr().to_unvalidated())
+ {
+ locale.id.script = Some(replacement);
+ result = TransformResult::Modified;
+ continue;
+ }
+ }
+
+ if let Some(region) = locale.id.region {
+ let replacement = if region.is_alphabetic() {
+ self.aliases
+ .get()
+ .region_alpha
+ .get(&region.into_tinystr().resize().to_unvalidated())
+ } else {
+ self.aliases
+ .get()
+ .region_num
+ .get(&region.into_tinystr().to_unvalidated())
+ };
+ if let Some(&replacement) = replacement {
+ locale.id.region = Some(replacement);
+ result = TransformResult::Modified;
+ continue;
+ }
+
+ if let Some(regions) = self
+ .aliases
+ .get()
+ .complex_region
+ .get(&region.into_tinystr().to_unvalidated())
+ {
+ // Skip if regions are empty
+ if let Some(default_region) = regions.get(0) {
+ let mut maximized = LanguageIdentifier {
+ language: locale.id.language,
+ script: locale.id.script,
+ region: None,
+ variants: Variants::default(),
+ };
+
+ locale.id.region = Some(
+ match (self.expander.maximize(&mut maximized), maximized.region) {
+ (TransformResult::Modified, Some(candidate))
+ if regions.iter().any(|x| x == candidate) =>
+ {
+ candidate
+ }
+ _ => default_region,
+ },
+ );
+ result = TransformResult::Modified;
+ continue;
+ }
+ }
+ }
+
+ if !locale.id.variants.is_empty() {
+ let mut modified = Vec::new();
+ let mut unmodified = Vec::new();
+ for &variant in locale.id.variants.iter() {
+ if let Some(&updated) = self
+ .aliases
+ .get()
+ .variant
+ .get(&variant.into_tinystr().to_unvalidated())
+ {
+ modified.push(updated);
+ } else {
+ unmodified.push(variant);
+ }
+ }
+
+ if !modified.is_empty() {
+ modified.extend(unmodified);
+ modified.sort();
+ modified.dedup();
+ locale.id.variants = Variants::from_vec_unchecked(modified);
+ result = TransformResult::Modified;
+ continue;
+ }
+ }
+
+ // Nothing matched in this iteration, we're done.
+ break;
+ }
+
+ // Handle Locale extensions in their own loops, because these rules do not interact
+ // with each other.
+ if let Some(lang) = &locale.extensions.transform.lang {
+ let mut tlang: Locale = lang.clone().into();
+ let mut matched = false;
+ loop {
+ if uts35_check_language_rules(&mut tlang, &self.aliases)
+ == TransformResult::Modified
+ {
+ result = TransformResult::Modified;
+ matched = true;
+ continue;
+ }
+
+ break;
+ }
+
+ if matched {
+ locale.extensions.transform.lang = Some(tlang.id);
+ }
+ }
+
+ // The `rg` region override and `sd` regional subdivision keys may contain
+ // language codes that require canonicalization.
+ for key in &[key!("rg"), key!("sd")] {
+ if let Some(value) = locale.extensions.unicode.keywords.get_mut(key) {
+ if let &[only_value] = value.as_tinystr_slice() {
+ if let Some(modified_value) = self
+ .aliases
+ .get()
+ .subdivision
+ .get(&only_value.resize().to_unvalidated())
+ {
+ if let Ok(modified_value) = modified_value.parse() {
+ *value = modified_value;
+ result = TransformResult::Modified;
+ }
+ }
+ }
+ }
+ }
+
+ result
+ }
+}
+
+#[test]
+fn test_uts35_rule_matches() {
+ for (source, rule, result) in [
+ ("ja", "und", true),
+ ("und-heploc-hepburn", "und-hepburn", true),
+ ("ja-heploc-hepburn", "und-hepburn", true),
+ ("ja-hepburn", "und-hepburn-heploc", false),
+ ] {
+ let source = source.parse().unwrap();
+ let rule = rule.parse::<LanguageIdentifier>().unwrap();
+ assert_eq!(
+ uts35_rule_matches(
+ &source,
+ rule.language,
+ rule.script,
+ rule.region,
+ rule.variants.iter().map(Variant::as_str),
+ ),
+ result,
+ "{source}"
+ );
+ }
+}
+
+#[test]
+fn test_uts35_replacement() {
+ for (locale, rule_0, rule_1, result) in [
+ (
+ "ja-Latn-fonipa-hepburn-heploc",
+ "und-hepburn-heploc",
+ "und-alalc97",
+ "ja-Latn-alalc97-fonipa",
+ ),
+ ("sgn-DD", "und-DD", "und-DE", "sgn-DE"),
+ ("sgn-DE", "sgn-DE", "gsg", "gsg"),
+ ] {
+ let mut locale = locale.parse().unwrap();
+ let rule_0 = rule_0.parse::<LanguageIdentifier>().unwrap();
+ let rule_1 = rule_1.parse().unwrap();
+ let result = result.parse::<Locale>().unwrap();
+ uts35_replacement(
+ &mut locale,
+ !rule_0.language.is_empty(),
+ rule_0.script.is_some(),
+ rule_0.region.is_some(),
+ Some(rule_0.variants.iter().map(Variant::as_str)),
+ &rule_1,
+ );
+ assert_eq!(result, locale);
+ }
+}
diff --git a/third_party/rust/icu_locid_transform/src/directionality.rs b/third_party/rust/icu_locid_transform/src/directionality.rs
new file mode 100644
index 0000000000..8a6c243b81
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/src/directionality.rs
@@ -0,0 +1,231 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::provider::*;
+use crate::{LocaleExpander, LocaleTransformError};
+use icu_locid::subtags::Script;
+use icu_locid::LanguageIdentifier;
+use icu_provider::prelude::*;
+
+/// Represents the direction of a script.
+///
+/// [`LocaleDirectionality`] can be used to get this information.
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+#[non_exhaustive]
+pub enum Direction {
+ /// The script is left-to-right.
+ LeftToRight,
+ /// The script is right-to-left.
+ RightToLeft,
+}
+
+/// Provides methods to determine the direction of a locale.
+///
+/// # Examples
+///
+/// ```
+/// use icu_locid::locale;
+/// use icu_locid_transform::{Direction, LocaleDirectionality};
+///
+/// let ld = LocaleDirectionality::new();
+///
+/// assert_eq!(ld.get(&locale!("en")), Some(Direction::LeftToRight));
+/// ```
+#[derive(Debug)]
+pub struct LocaleDirectionality {
+ script_direction: DataPayload<ScriptDirectionV1Marker>,
+ expander: LocaleExpander,
+}
+
+impl LocaleDirectionality {
+ /// Creates a [`LocaleDirectionality`] from compiled data.
+ ///
+ /// This includes limited likely subtags data, see [`LocaleExpander::new()`].
+ #[cfg(feature = "compiled_data")]
+ pub const fn new() -> Self {
+ Self::new_with_expander(LocaleExpander::new())
+ }
+
+ // Note: This is a custom impl because the bounds on `try_new_unstable` don't suffice
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new)]
+ pub fn try_new_with_any_provider(
+ provider: &(impl AnyProvider + ?Sized),
+ ) -> Result<LocaleDirectionality, LocaleTransformError> {
+ let expander = LocaleExpander::try_new_with_any_provider(provider)?;
+ Self::try_new_with_expander_unstable(&provider.as_downcasting(), expander)
+ }
+
+ // Note: This is a custom impl because the bounds on `try_new_unstable` don't suffice
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(BUFFER, Self::new)]
+ #[cfg(feature = "serde")]
+ pub fn try_new_with_buffer_provider(
+ provider: &(impl BufferProvider + ?Sized),
+ ) -> Result<LocaleDirectionality, LocaleTransformError> {
+ let expander = LocaleExpander::try_new_with_buffer_provider(provider)?;
+ Self::try_new_with_expander_unstable(&provider.as_deserializing(), expander)
+ }
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
+ pub fn try_new_unstable<P>(provider: &P) -> Result<LocaleDirectionality, LocaleTransformError>
+ where
+ P: DataProvider<ScriptDirectionV1Marker>
+ + DataProvider<LikelySubtagsForLanguageV1Marker>
+ + DataProvider<LikelySubtagsForScriptRegionV1Marker>
+ + ?Sized,
+ {
+ let expander = LocaleExpander::try_new_unstable(provider)?;
+ Self::try_new_with_expander_unstable(provider, expander)
+ }
+
+ /// Creates a [`LocaleDirectionality`] with a custom [`LocaleExpander`] and compiled data.
+ ///
+ /// This allows using [`LocaleExpander::new_extended()`] with data for all locales.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_locid::locale;
+ /// use icu_locid_transform::{
+ /// Direction, LocaleDirectionality, LocaleExpander,
+ /// };
+ ///
+ /// let ld_default = LocaleDirectionality::new();
+ ///
+ /// assert_eq!(ld_default.get(&locale!("jbn")), None);
+ ///
+ /// let expander = LocaleExpander::new_extended();
+ /// let ld_extended = LocaleDirectionality::new_with_expander(expander);
+ ///
+ /// assert_eq!(
+ /// ld_extended.get(&locale!("jbn")),
+ /// Some(Direction::RightToLeft)
+ /// );
+ /// ```
+ #[cfg(feature = "compiled_data")]
+ pub const fn new_with_expander(expander: LocaleExpander) -> Self {
+ LocaleDirectionality {
+ script_direction: DataPayload::from_static_ref(
+ crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_SCRIPT_DIR_V1,
+ ),
+ expander,
+ }
+ }
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_with_expander)]
+ pub fn try_new_with_expander_unstable<P>(
+ provider: &P,
+ expander: LocaleExpander,
+ ) -> Result<LocaleDirectionality, LocaleTransformError>
+ where
+ P: DataProvider<ScriptDirectionV1Marker> + ?Sized,
+ {
+ let script_direction = provider.load(Default::default())?.take_payload()?;
+
+ Ok(LocaleDirectionality {
+ script_direction,
+ expander,
+ })
+ }
+
+ /// Returns the script direction of the given locale.
+ ///
+ /// Note that the direction is a property of the script of a locale, not of the language. As such,
+ /// when given a locale without an associated script tag (i.e., `locale!("en")` vs. `locale!("en-Latn")`),
+ /// this method first tries to infer the script using the language and region before returning its direction.
+ ///
+ /// If you already have a script struct and want to get its direction, you should use
+ /// `Locale::from(Some(my_script))` and call this method.
+ ///
+ /// This method will return `None` if either a locale's script cannot be determined, or there is no information
+ /// for the script.
+ ///
+ /// # Examples
+ ///
+ /// Using an existing locale:
+ ///
+ /// ```
+ /// use icu_locid::locale;
+ /// use icu_locid_transform::{Direction, LocaleDirectionality};
+ ///
+ /// let ld = LocaleDirectionality::new();
+ ///
+ /// assert_eq!(ld.get(&locale!("en-US")), Some(Direction::LeftToRight));
+ ///
+ /// assert_eq!(ld.get(&locale!("ar")), Some(Direction::RightToLeft));
+ ///
+ /// assert_eq!(ld.get(&locale!("en-Arab")), Some(Direction::RightToLeft));
+ ///
+ /// assert_eq!(ld.get(&locale!("foo")), None);
+ /// ```
+ ///
+ /// Using a script directly:
+ ///
+ /// ```
+ /// use icu_locid::subtags::script;
+ /// use icu_locid::Locale;
+ /// use icu_locid_transform::{Direction, LocaleDirectionality};
+ ///
+ /// let ld = LocaleDirectionality::new();
+ ///
+ /// assert_eq!(
+ /// ld.get(&Locale::from(Some(script!("Latn")))),
+ /// Some(Direction::LeftToRight)
+ /// );
+ /// ```
+ pub fn get(&self, locale: impl AsRef<LanguageIdentifier>) -> Option<Direction> {
+ let script = self.expander.get_likely_script(locale.as_ref())?;
+
+ if self.script_in_ltr(script) {
+ Some(Direction::LeftToRight)
+ } else if self.script_in_rtl(script) {
+ Some(Direction::RightToLeft)
+ } else {
+ None
+ }
+ }
+
+ /// Returns whether the given locale is right-to-left.
+ ///
+ /// Note that if this method returns `false`, the locale is either left-to-right or
+ /// the [`LocaleDirectionality`] does not include data for the locale.
+ /// You should use [`LocaleDirectionality::get`] if you need to differentiate between these cases.
+ ///
+ /// See [`LocaleDirectionality::get`] for more information.
+ pub fn is_right_to_left(&self, locale: impl AsRef<LanguageIdentifier>) -> bool {
+ self.expander
+ .get_likely_script(locale.as_ref())
+ .map(|s| self.script_in_rtl(s))
+ .unwrap_or(false)
+ }
+
+ /// Returns whether the given locale is left-to-right.
+ ///
+ /// Note that if this method returns `false`, the locale is either right-to-left or
+ /// the [`LocaleDirectionality`] does not include data for the locale.
+ /// You should use [`LocaleDirectionality::get`] if you need to differentiate between these cases.
+ ///
+ /// See [`LocaleDirectionality::get`] for more information.
+ pub fn is_left_to_right(&self, locale: impl AsRef<LanguageIdentifier>) -> bool {
+ self.expander
+ .get_likely_script(locale.as_ref())
+ .map(|s| self.script_in_ltr(s))
+ .unwrap_or(false)
+ }
+
+ fn script_in_rtl(&self, script: Script) -> bool {
+ self.script_direction
+ .get()
+ .rtl
+ .binary_search(&script.into_tinystr().to_unvalidated())
+ .is_ok()
+ }
+
+ fn script_in_ltr(&self, script: Script) -> bool {
+ self.script_direction
+ .get()
+ .ltr
+ .binary_search(&script.into_tinystr().to_unvalidated())
+ .is_ok()
+ }
+}
diff --git a/third_party/rust/icu_locid_transform/src/error.rs b/third_party/rust/icu_locid_transform/src/error.rs
new file mode 100644
index 0000000000..a59f838be3
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/src/error.rs
@@ -0,0 +1,27 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::fmt::Debug;
+use displaydoc::Display;
+use icu_provider::DataError;
+
+#[cfg(feature = "std")]
+impl std::error::Error for LocaleTransformError {}
+
+/// A list of error outcomes for various operations in this module.
+///
+/// Re-exported as [`Error`](crate::Error).
+#[derive(Display, Debug, Copy, Clone, PartialEq)]
+#[non_exhaustive]
+pub enum LocaleTransformError {
+ /// An error originating inside of the [data provider](icu_provider).
+ #[displaydoc("{0}")]
+ Data(DataError),
+}
+
+impl From<DataError> for LocaleTransformError {
+ fn from(e: DataError) -> Self {
+ Self::Data(e)
+ }
+}
diff --git a/third_party/rust/icu_locid_transform/src/expander.rs b/third_party/rust/icu_locid_transform/src/expander.rs
new file mode 100644
index 0000000000..56f204c324
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/src/expander.rs
@@ -0,0 +1,722 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::{provider::*, LocaleTransformError};
+
+use core::mem;
+use icu_locid::subtags::{Language, Region, Script};
+use icu_locid::LanguageIdentifier;
+use icu_provider::prelude::*;
+
+use crate::TransformResult;
+
+/// Implements the *Add Likely Subtags* and *Remove Likely Subtags*
+/// algorithms as defined in *[UTS #35: Likely Subtags]*.
+///
+/// # Examples
+///
+/// Add likely subtags:
+///
+/// ```
+/// use icu_locid::locale;
+/// use icu_locid_transform::{LocaleExpander, TransformResult};
+///
+/// let lc = LocaleExpander::new();
+///
+/// let mut locale = locale!("zh-CN");
+/// assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+/// assert_eq!(locale, locale!("zh-Hans-CN"));
+///
+/// let mut locale = locale!("zh-Hant-TW");
+/// assert_eq!(lc.maximize(&mut locale), TransformResult::Unmodified);
+/// assert_eq!(locale, locale!("zh-Hant-TW"));
+/// ```
+///
+/// Remove likely subtags:
+///
+/// ```
+/// use icu_locid::locale;
+/// use icu_locid_transform::{LocaleExpander, TransformResult};
+///
+/// let lc = LocaleExpander::new();
+///
+/// let mut locale = locale!("zh-Hans-CN");
+/// assert_eq!(lc.minimize(&mut locale), TransformResult::Modified);
+/// assert_eq!(locale, locale!("zh"));
+///
+/// let mut locale = locale!("zh");
+/// assert_eq!(lc.minimize(&mut locale), TransformResult::Unmodified);
+/// assert_eq!(locale, locale!("zh"));
+/// ```
+///
+/// Normally, only CLDR locales with Basic or higher coverage are included. To include more
+/// locales for maximization, use [`try_new_extended`](Self::try_new_extended_unstable):
+///
+/// ```
+/// use icu_locid::locale;
+/// use icu_locid_transform::{LocaleExpander, TransformResult};
+///
+/// let lc = LocaleExpander::new_extended();
+///
+/// let mut locale = locale!("atj");
+/// assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+/// assert_eq!(locale, locale!("atj-Latn-CA"));
+/// ```
+///
+/// [UTS #35: Likely Subtags]: https://www.unicode.org/reports/tr35/#Likely_Subtags
+#[derive(Debug, Clone)]
+pub struct LocaleExpander {
+ likely_subtags_l: DataPayload<LikelySubtagsForLanguageV1Marker>,
+ likely_subtags_sr: DataPayload<LikelySubtagsForScriptRegionV1Marker>,
+ likely_subtags_ext: Option<DataPayload<LikelySubtagsExtendedV1Marker>>,
+}
+
+struct LocaleExpanderBorrowed<'a> {
+ likely_subtags_l: &'a LikelySubtagsForLanguageV1<'a>,
+ likely_subtags_sr: &'a LikelySubtagsForScriptRegionV1<'a>,
+ likely_subtags_ext: Option<&'a LikelySubtagsExtendedV1<'a>>,
+}
+
+impl LocaleExpanderBorrowed<'_> {
+ fn get_l(&self, l: Language) -> Option<(Script, Region)> {
+ let key = &l.into_tinystr().to_unvalidated();
+ self.likely_subtags_l.language.get_copied(key).or_else(|| {
+ self.likely_subtags_ext
+ .and_then(|ext| ext.language.get_copied(key))
+ })
+ }
+
+ fn get_ls(&self, l: Language, s: Script) -> Option<Region> {
+ let key = &(
+ l.into_tinystr().to_unvalidated(),
+ s.into_tinystr().to_unvalidated(),
+ );
+ self.likely_subtags_l
+ .language_script
+ .get_copied(key)
+ .or_else(|| {
+ self.likely_subtags_ext
+ .and_then(|ext| ext.language_script.get_copied(key))
+ })
+ }
+
+ fn get_lr(&self, l: Language, r: Region) -> Option<Script> {
+ let key = &(
+ l.into_tinystr().to_unvalidated(),
+ r.into_tinystr().to_unvalidated(),
+ );
+ self.likely_subtags_l
+ .language_region
+ .get_copied(key)
+ .or_else(|| {
+ self.likely_subtags_ext
+ .and_then(|ext| ext.language_region.get_copied(key))
+ })
+ }
+
+ fn get_s(&self, s: Script) -> Option<(Language, Region)> {
+ let key = &s.into_tinystr().to_unvalidated();
+ self.likely_subtags_sr.script.get_copied(key).or_else(|| {
+ self.likely_subtags_ext
+ .and_then(|ext| ext.script.get_copied(key))
+ })
+ }
+
+ fn get_sr(&self, s: Script, r: Region) -> Option<Language> {
+ let key = &(
+ s.into_tinystr().to_unvalidated(),
+ r.into_tinystr().to_unvalidated(),
+ );
+ self.likely_subtags_sr
+ .script_region
+ .get_copied(key)
+ .or_else(|| {
+ self.likely_subtags_ext
+ .and_then(|ext| ext.script_region.get_copied(key))
+ })
+ }
+
+ fn get_r(&self, r: Region) -> Option<(Language, Script)> {
+ let key = &r.into_tinystr().to_unvalidated();
+ self.likely_subtags_sr.region.get_copied(key).or_else(|| {
+ self.likely_subtags_ext
+ .and_then(|ext| ext.region.get_copied(key))
+ })
+ }
+
+ fn get_und(&self) -> (Language, Script, Region) {
+ self.likely_subtags_l.und
+ }
+}
+
+#[inline]
+fn update_langid(
+ language: Language,
+ script: Option<Script>,
+ region: Option<Region>,
+ langid: &mut LanguageIdentifier,
+) -> TransformResult {
+ let mut modified = false;
+
+ if langid.language.is_empty() && !language.is_empty() {
+ langid.language = language;
+ modified = true;
+ }
+
+ if langid.script.is_none() && script.is_some() {
+ langid.script = script;
+ modified = true;
+ }
+
+ if langid.region.is_none() && region.is_some() {
+ langid.region = region;
+ modified = true;
+ }
+
+ if modified {
+ TransformResult::Modified
+ } else {
+ TransformResult::Unmodified
+ }
+}
+
+impl LocaleExpander {
+ /// Creates a [`LocaleExpander`] with compiled data for commonly-used locales
+ /// (locales with *Basic* or higher [CLDR coverage]).
+ ///
+ /// Use this constructor if you want limited likely subtags for data-oriented use cases.
+ ///
+ /// ✨ *Enabled with the `compiled_data` Cargo feature.*
+ ///
+ /// [📚 Help choosing a constructor](icu_provider::constructors)
+ ///
+ /// [CLDR coverage]: https://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels
+ #[cfg(feature = "compiled_data")]
+ pub const fn new() -> Self {
+ LocaleExpander {
+ likely_subtags_l: DataPayload::from_static_ref(
+ crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_L_V1,
+ ),
+ likely_subtags_sr: DataPayload::from_static_ref(
+ crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_SR_V1,
+ ),
+ likely_subtags_ext: None,
+ }
+ }
+
+ /// Creates a [`LocaleExpander`] with compiled data for all locales.
+ ///
+ /// Use this constructor if you want to include data for all locales, including ones
+ /// that may not have data for other services (i.e. [CLDR coverage] below *Basic*).
+ ///
+ /// ✨ *Enabled with the `compiled_data` Cargo feature.*
+ ///
+ /// [📚 Help choosing a constructor](icu_provider::constructors)
+ ///
+ /// [CLDR coverage]: https://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels
+ #[cfg(feature = "compiled_data")]
+ pub const fn new_extended() -> Self {
+ LocaleExpander {
+ likely_subtags_l: DataPayload::from_static_ref(
+ crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_L_V1,
+ ),
+ likely_subtags_sr: DataPayload::from_static_ref(
+ crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_SR_V1,
+ ),
+ likely_subtags_ext: Some(DataPayload::from_static_ref(
+ crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_EXT_V1,
+ )),
+ }
+ }
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_extended)]
+ pub fn try_new_extended_unstable<P>(
+ provider: &P,
+ ) -> Result<LocaleExpander, LocaleTransformError>
+ where
+ P: DataProvider<LikelySubtagsForLanguageV1Marker>
+ + DataProvider<LikelySubtagsForScriptRegionV1Marker>
+ + DataProvider<LikelySubtagsExtendedV1Marker>
+ + ?Sized,
+ {
+ let likely_subtags_l = provider.load(Default::default())?.take_payload()?;
+ let likely_subtags_sr = provider.load(Default::default())?.take_payload()?;
+ let likely_subtags_ext = Some(provider.load(Default::default())?.take_payload()?);
+
+ Ok(LocaleExpander {
+ likely_subtags_l,
+ likely_subtags_sr,
+ likely_subtags_ext,
+ })
+ }
+
+ icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: LocaleTransformError,
+ #[cfg(skip)]
+ functions: [
+ new_extended,
+ try_new_extended_with_any_provider,
+ try_new_extended_with_buffer_provider,
+ try_new_extended_unstable,
+ Self
+ ]);
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new)]
+ pub fn try_new_with_any_provider(
+ provider: &(impl AnyProvider + ?Sized),
+ ) -> Result<LocaleExpander, LocaleTransformError> {
+ Self::try_new_compat(&provider.as_downcasting())
+ }
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(BUFFER, Self::new)]
+ #[cfg(feature = "serde")]
+ pub fn try_new_with_buffer_provider(
+ provider: &(impl BufferProvider + ?Sized),
+ ) -> Result<LocaleExpander, LocaleTransformError> {
+ Self::try_new_compat(&provider.as_deserializing())
+ }
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
+ pub fn try_new_unstable<P>(provider: &P) -> Result<LocaleExpander, LocaleTransformError>
+ where
+ P: DataProvider<LikelySubtagsForLanguageV1Marker>
+ + DataProvider<LikelySubtagsForScriptRegionV1Marker>
+ + ?Sized,
+ {
+ let likely_subtags_l = provider.load(Default::default())?.take_payload()?;
+ let likely_subtags_sr = provider.load(Default::default())?.take_payload()?;
+
+ Ok(LocaleExpander {
+ likely_subtags_l,
+ likely_subtags_sr,
+ likely_subtags_ext: None,
+ })
+ }
+
+ fn try_new_compat<P>(provider: &P) -> Result<LocaleExpander, LocaleTransformError>
+ where
+ P: DataProvider<LikelySubtagsForLanguageV1Marker>
+ + DataProvider<LikelySubtagsForScriptRegionV1Marker>
+ + DataProvider<LikelySubtagsExtendedV1Marker>
+ + DataProvider<LikelySubtagsV1Marker>
+ + ?Sized,
+ {
+ let payload_l = provider
+ .load(Default::default())
+ .and_then(DataResponse::take_payload);
+ let payload_sr = provider
+ .load(Default::default())
+ .and_then(DataResponse::take_payload);
+ let payload_ext = provider
+ .load(Default::default())
+ .and_then(DataResponse::take_payload);
+
+ let (likely_subtags_l, likely_subtags_sr, likely_subtags_ext) =
+ match (payload_l, payload_sr, payload_ext) {
+ (Ok(l), Ok(sr), Err(_)) => (l, sr, None),
+ (Ok(l), Ok(sr), Ok(ext)) => (l, sr, Some(ext)),
+ _ => {
+ let result: DataPayload<LikelySubtagsV1Marker> =
+ provider.load(Default::default())?.take_payload()?;
+ (
+ result.map_project_cloned(|st, _| {
+ LikelySubtagsForLanguageV1::clone_from_borrowed(st)
+ }),
+ result.map_project(|st, _| st.into()),
+ None,
+ )
+ }
+ };
+
+ Ok(LocaleExpander {
+ likely_subtags_l,
+ likely_subtags_sr,
+ likely_subtags_ext,
+ })
+ }
+
+ fn as_borrowed(&self) -> LocaleExpanderBorrowed {
+ LocaleExpanderBorrowed {
+ likely_subtags_l: self.likely_subtags_l.get(),
+ likely_subtags_sr: self.likely_subtags_sr.get(),
+ likely_subtags_ext: self.likely_subtags_ext.as_ref().map(|p| p.get()),
+ }
+ }
+
+ /// The maximize method potentially updates a passed in locale in place
+ /// depending up the results of running the 'Add Likely Subtags' algorithm
+ /// from <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
+ ///
+ /// If the result of running the algorithm would result in a new locale, the
+ /// locale argument is updated in place to match the result, and the method
+ /// returns [`TransformResult::Modified`]. Otherwise, the method
+ /// returns [`TransformResult::Unmodified`] and the locale argument is
+ /// unchanged.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_locid::locale;
+ /// use icu_locid_transform::{LocaleExpander, TransformResult};
+ ///
+ /// let lc = LocaleExpander::new();
+ ///
+ /// let mut locale = locale!("zh-CN");
+ /// assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+ /// assert_eq!(locale, locale!("zh-Hans-CN"));
+ ///
+ /// let mut locale = locale!("zh-Hant-TW");
+ /// assert_eq!(lc.maximize(&mut locale), TransformResult::Unmodified);
+ /// assert_eq!(locale, locale!("zh-Hant-TW"));
+ /// ```
+ pub fn maximize<T: AsMut<LanguageIdentifier>>(&self, mut langid: T) -> TransformResult {
+ let langid = langid.as_mut();
+ let data = self.as_borrowed();
+
+ if !langid.language.is_empty() && langid.script.is_some() && langid.region.is_some() {
+ return TransformResult::Unmodified;
+ }
+
+ if !langid.language.is_empty() {
+ if let Some(region) = langid.region {
+ if let Some(script) = data.get_lr(langid.language, region) {
+ return update_langid(Language::UND, Some(script), None, langid);
+ }
+ }
+ if let Some(script) = langid.script {
+ if let Some(region) = data.get_ls(langid.language, script) {
+ return update_langid(Language::UND, None, Some(region), langid);
+ }
+ }
+ if let Some((script, region)) = data.get_l(langid.language) {
+ return update_langid(Language::UND, Some(script), Some(region), langid);
+ }
+ }
+ if let Some(script) = langid.script {
+ if let Some(region) = langid.region {
+ if let Some(language) = data.get_sr(script, region) {
+ return update_langid(language, None, None, langid);
+ }
+ }
+ if let Some((language, region)) = data.get_s(script) {
+ return update_langid(language, None, Some(region), langid);
+ }
+ }
+ if let Some(region) = langid.region {
+ if let Some((language, script)) = data.get_r(region) {
+ return update_langid(language, Some(script), None, langid);
+ }
+ }
+
+ update_langid(
+ data.get_und().0,
+ Some(data.get_und().1),
+ Some(data.get_und().2),
+ langid,
+ )
+ }
+
+ /// This returns a new Locale that is the result of running the
+ /// 'Remove Likely Subtags' algorithm from
+ /// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
+ ///
+ /// If the result of running the algorithm would result in a new locale, the
+ /// locale argument is updated in place to match the result, and the method
+ /// returns [`TransformResult::Modified`]. Otherwise, the method
+ /// returns [`TransformResult::Unmodified`] and the locale argument is
+ /// unchanged.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_locid::locale;
+ /// use icu_locid_transform::{LocaleExpander, TransformResult};
+ ///
+ /// let lc = LocaleExpander::new();
+ ///
+ /// let mut locale = locale!("zh-Hans-CN");
+ /// assert_eq!(lc.minimize(&mut locale), TransformResult::Modified);
+ /// assert_eq!(locale, locale!("zh"));
+ ///
+ /// let mut locale = locale!("zh");
+ /// assert_eq!(lc.minimize(&mut locale), TransformResult::Unmodified);
+ /// assert_eq!(locale, locale!("zh"));
+ /// ```
+ pub fn minimize<T: AsMut<LanguageIdentifier>>(&self, mut langid: T) -> TransformResult {
+ let langid = langid.as_mut();
+
+ let mut max = langid.clone();
+ self.maximize(&mut max);
+ let variants = mem::take(&mut max.variants);
+ max.variants.clear();
+ let mut trial = max.clone();
+
+ trial.script = None;
+ trial.region = None;
+ self.maximize(&mut trial);
+ if trial == max {
+ if langid.language != max.language || langid.script.is_some() || langid.region.is_some()
+ {
+ if langid.language != max.language {
+ langid.language = max.language
+ }
+ if langid.script.is_some() {
+ langid.script = None;
+ }
+ if langid.region.is_some() {
+ langid.region = None;
+ }
+ langid.variants = variants;
+ return TransformResult::Modified;
+ } else {
+ return TransformResult::Unmodified;
+ }
+ }
+
+ trial.script = None;
+ trial.region = max.region;
+ self.maximize(&mut trial);
+ if trial == max {
+ if langid.language != max.language
+ || langid.script.is_some()
+ || langid.region != max.region
+ {
+ if langid.language != max.language {
+ langid.language = max.language
+ }
+ if langid.script.is_some() {
+ langid.script = None;
+ }
+ if langid.region != max.region {
+ langid.region = max.region;
+ }
+ langid.variants = variants;
+ return TransformResult::Modified;
+ } else {
+ return TransformResult::Unmodified;
+ }
+ }
+
+ trial.script = max.script;
+ trial.region = None;
+ self.maximize(&mut trial);
+ if trial == max {
+ if langid.language != max.language
+ || langid.script != max.script
+ || langid.region.is_some()
+ {
+ if langid.language != max.language {
+ langid.language = max.language
+ }
+ if langid.script != max.script {
+ langid.script = max.script;
+ }
+ if langid.region.is_some() {
+ langid.region = None;
+ }
+ langid.variants = variants;
+ return TransformResult::Modified;
+ } else {
+ return TransformResult::Unmodified;
+ }
+ }
+
+ if langid.language != max.language
+ || langid.script != max.script
+ || langid.region != max.region
+ {
+ if langid.language != max.language {
+ langid.language = max.language
+ }
+ if langid.script != max.script {
+ langid.script = max.script;
+ }
+ if langid.region != max.region {
+ langid.region = max.region;
+ }
+ TransformResult::Modified
+ } else {
+ TransformResult::Unmodified
+ }
+ }
+
+ // TODO(3492): consider turning this and a future get_likely_region/get_likely_language public
+ #[inline]
+ pub(crate) fn get_likely_script<T: AsRef<LanguageIdentifier>>(
+ &self,
+ langid: T,
+ ) -> Option<Script> {
+ let langid = langid.as_ref();
+ langid
+ .script
+ .or_else(|| self.infer_likely_script(langid.language, langid.region))
+ }
+
+ fn infer_likely_script(&self, language: Language, region: Option<Region>) -> Option<Script> {
+ let data = self.as_borrowed();
+
+ // proceed through _all possible cases_ in order of specificity
+ // (borrowed from LocaleExpander::maximize):
+ // 1. language + region
+ // 2. language
+ // 3. region
+ // we need to check all cases, because e.g. for "en-US" the default script is associated
+ // with "en" but not "en-US"
+ if language != Language::UND {
+ if let Some(region) = region {
+ // 1. we know both language and region
+ if let Some(script) = data.get_lr(language, region) {
+ return Some(script);
+ }
+ }
+ // 2. we know language, but we either do not know region or knowing region did not help
+ if let Some((script, _)) = data.get_l(language) {
+ return Some(script);
+ }
+ }
+ if let Some(region) = region {
+ // 3. we know region, but we either do not know language or knowing language did not help
+ if let Some((_, script)) = data.get_r(region) {
+ return Some(script);
+ }
+ }
+ // we could not figure out the script from the given locale
+ None
+ }
+}
+
+#[cfg(feature = "serde")]
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use icu_locid::locale;
+
+ struct RejectByKeyProvider {
+ keys: Vec<DataKey>,
+ }
+
+ impl AnyProvider for RejectByKeyProvider {
+ fn load_any(&self, key: DataKey, _: DataRequest) -> Result<AnyResponse, DataError> {
+ if self.keys.contains(&key) {
+ return Err(DataErrorKind::MissingDataKey.with_str_context("rejected"));
+ }
+
+ let l = crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_L_V1;
+ let ext = crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_EXT_V1;
+ let sr = crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_SR_V1;
+
+ let payload = if key.hashed() == LikelySubtagsV1Marker::KEY.hashed() {
+ DataPayload::<LikelySubtagsV1Marker>::from_owned(LikelySubtagsV1 {
+ language_script: l
+ .language_script
+ .iter_copied()
+ .chain(ext.language_script.iter_copied())
+ .collect(),
+ language_region: l
+ .language_region
+ .iter_copied()
+ .chain(ext.language_region.iter_copied())
+ .collect(),
+ language: l
+ .language
+ .iter_copied()
+ .chain(ext.language.iter_copied())
+ .collect(),
+ script_region: ext.script_region.clone(),
+ script: ext.script.clone(),
+ region: ext.region.clone(),
+ und: l.und,
+ })
+ .wrap_into_any_payload()
+ } else if key.hashed() == LikelySubtagsForLanguageV1Marker::KEY.hashed() {
+ DataPayload::<LikelySubtagsForLanguageV1Marker>::from_static_ref(l)
+ .wrap_into_any_payload()
+ } else if key.hashed() == LikelySubtagsExtendedV1Marker::KEY.hashed() {
+ DataPayload::<LikelySubtagsExtendedV1Marker>::from_static_ref(ext)
+ .wrap_into_any_payload()
+ } else if key.hashed() == LikelySubtagsForScriptRegionV1Marker::KEY.hashed() {
+ DataPayload::<LikelySubtagsForScriptRegionV1Marker>::from_static_ref(sr)
+ .wrap_into_any_payload()
+ } else {
+ return Err(DataErrorKind::MissingDataKey.into_error());
+ };
+
+ Ok(AnyResponse {
+ payload: Some(payload),
+ metadata: Default::default(),
+ })
+ }
+ }
+
+ #[test]
+ fn test_old_keys() {
+ let provider = RejectByKeyProvider {
+ keys: vec![
+ LikelySubtagsForLanguageV1Marker::KEY,
+ LikelySubtagsForScriptRegionV1Marker::KEY,
+ LikelySubtagsExtendedV1Marker::KEY,
+ ],
+ };
+ let lc = LocaleExpander::try_new_with_any_provider(&provider)
+ .expect("should create with old keys");
+ let mut locale = locale!("zh-CN");
+ assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+ assert_eq!(locale, locale!("zh-Hans-CN"));
+ }
+
+ #[test]
+ fn test_new_keys() {
+ let provider = RejectByKeyProvider {
+ keys: vec![LikelySubtagsV1Marker::KEY],
+ };
+ let lc = LocaleExpander::try_new_with_any_provider(&provider)
+ .expect("should create with new keys");
+ let mut locale = locale!("zh-CN");
+ assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+ assert_eq!(locale, locale!("zh-Hans-CN"));
+ }
+
+ #[test]
+ fn test_mixed_keys() {
+ // Include the old key and one of the new keys but not both new keys.
+ // Not sure if this is a useful test.
+ let provider = RejectByKeyProvider {
+ keys: vec![LikelySubtagsForScriptRegionV1Marker::KEY],
+ };
+ let lc = LocaleExpander::try_new_with_any_provider(&provider)
+ .expect("should create with mixed keys");
+ let mut locale = locale!("zh-CN");
+ assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+ assert_eq!(locale, locale!("zh-Hans-CN"));
+ }
+
+ #[test]
+ fn test_no_keys() {
+ let provider = RejectByKeyProvider {
+ keys: vec![
+ LikelySubtagsForLanguageV1Marker::KEY,
+ LikelySubtagsForScriptRegionV1Marker::KEY,
+ LikelySubtagsV1Marker::KEY,
+ ],
+ };
+ if LocaleExpander::try_new_with_any_provider(&provider).is_ok() {
+ panic!("should not create: no data present")
+ };
+ }
+
+ #[test]
+ fn test_new_small_keys() {
+ // Include the new small keys but not the extended key
+ let provider = RejectByKeyProvider {
+ keys: vec![
+ LikelySubtagsExtendedV1Marker::KEY,
+ LikelySubtagsV1Marker::KEY,
+ ],
+ };
+ let lc = LocaleExpander::try_new_with_any_provider(&provider)
+ .expect("should create with mixed keys");
+ let mut locale = locale!("zh-CN");
+ assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+ assert_eq!(locale, locale!("zh-Hans-CN"));
+ }
+}
diff --git a/third_party/rust/icu_locid_transform/src/fallback/algorithms.rs b/third_party/rust/icu_locid_transform/src/fallback/algorithms.rs
new file mode 100644
index 0000000000..c3a3d08cab
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/src/fallback/algorithms.rs
@@ -0,0 +1,487 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use icu_locid::extensions::unicode::{key, Key};
+use icu_locid::subtags::Language;
+use icu_locid::LanguageIdentifier;
+use icu_provider::FallbackPriority;
+
+use super::*;
+
+const SUBDIVISION_KEY: Key = key!("sd");
+
+impl<'a> LocaleFallbackerWithConfig<'a> {
+ pub(crate) fn normalize(&self, locale: &mut DataLocale) {
+ let language = locale.language();
+ // 1. Populate the region (required for region fallback only)
+ if self.config.priority == FallbackPriority::Region && locale.region().is_none() {
+ // 1a. First look for region based on language+script
+ if let Some(script) = locale.script() {
+ locale.set_region(
+ self.likely_subtags
+ .ls2r
+ .get_2d(
+ &language.into_tinystr().to_unvalidated(),
+ &script.into_tinystr().to_unvalidated(),
+ )
+ .copied(),
+ );
+ }
+ // 1b. If that fails, try language only
+ if locale.region().is_none() {
+ locale.set_region(
+ self.likely_subtags
+ .l2r
+ .get(&language.into_tinystr().to_unvalidated())
+ .copied(),
+ );
+ }
+ }
+ // 2. Remove the script if it is implied by the other subtags
+ if let Some(script) = locale.script() {
+ let default_script = self
+ .likely_subtags
+ .l2s
+ .get_copied(&language.into_tinystr().to_unvalidated())
+ .unwrap_or(DEFAULT_SCRIPT);
+ if let Some(region) = locale.region() {
+ if script
+ == self
+ .likely_subtags
+ .lr2s
+ .get_copied_2d(
+ &language.into_tinystr().to_unvalidated(),
+ &region.into_tinystr().to_unvalidated(),
+ )
+ .unwrap_or(default_script)
+ {
+ locale.set_script(None);
+ }
+ } else if script == default_script {
+ locale.set_script(None);
+ }
+ }
+ // 3. Remove irrelevant extension subtags
+ locale.retain_unicode_ext(|key| {
+ match *key {
+ // Always retain -u-sd
+ SUBDIVISION_KEY => true,
+ // Retain the query-specific keyword
+ _ if Some(*key) == self.config.extension_key => true,
+ // Drop all others
+ _ => false,
+ }
+ });
+ // 4. If there is an invalid "sd" subtag, drop it
+ // For now, ignore it, and let fallback do it for us
+ }
+}
+
+impl<'a> LocaleFallbackIteratorInner<'a> {
+ pub fn step(&mut self, locale: &mut DataLocale) {
+ match self.config.priority {
+ FallbackPriority::Language => self.step_language(locale),
+ FallbackPriority::Region => self.step_region(locale),
+ // TODO(#1964): Change the collation fallback rules to be different
+ // from the language fallback fules.
+ FallbackPriority::Collation => self.step_language(locale),
+ // This case should not normally happen, but `FallbackPriority` is non_exhaustive.
+ // Make it go directly to `und`.
+ _ => {
+ debug_assert!(
+ false,
+ "Unknown FallbackPriority: {:?}",
+ self.config.priority
+ );
+ *locale = Default::default()
+ }
+ }
+ }
+
+ fn step_language(&mut self, locale: &mut DataLocale) {
+ // 1. Remove the extension fallback keyword
+ if let Some(extension_key) = self.config.extension_key {
+ if let Some(value) = locale.remove_unicode_ext(&extension_key) {
+ self.backup_extension = Some(value);
+ return;
+ }
+ }
+ // 2. Remove the subdivision keyword
+ if let Some(value) = locale.remove_unicode_ext(&SUBDIVISION_KEY) {
+ self.backup_subdivision = Some(value);
+ return;
+ }
+ // 3. Assert that the locale is a language identifier
+ debug_assert!(!locale.has_unicode_ext());
+ // 4. Remove variants
+ if locale.has_variants() {
+ self.backup_variants = Some(locale.clear_variants());
+ return;
+ }
+ // 5. Check for parent override
+ if let Some(parent) = self.get_explicit_parent(locale) {
+ locale.set_langid(parent);
+ self.restore_extensions_variants(locale);
+ return;
+ }
+ // 6. Add the script subtag if necessary
+ if locale.script().is_none() {
+ if let Some(region) = locale.region() {
+ let language = locale.language();
+ if let Some(script) = self.likely_subtags.lr2s.get_copied_2d(
+ &language.into_tinystr().to_unvalidated(),
+ &region.into_tinystr().to_unvalidated(),
+ ) {
+ locale.set_script(Some(script));
+ self.restore_extensions_variants(locale);
+ return;
+ }
+ }
+ }
+ // 7. Remove region
+ if locale.region().is_some() {
+ locale.set_region(None);
+ self.restore_extensions_variants(locale);
+ return;
+ }
+ // 8. Remove language+script
+ debug_assert!(!locale.language().is_empty()); // don't call .step() on und
+ locale.set_script(None);
+ locale.set_language(Language::UND);
+ }
+
+ fn step_region(&mut self, locale: &mut DataLocale) {
+ // 1. Remove the extension fallback keyword
+ if let Some(extension_key) = self.config.extension_key {
+ if let Some(value) = locale.remove_unicode_ext(&extension_key) {
+ self.backup_extension = Some(value);
+ return;
+ }
+ }
+ // 2. Remove the subdivision keyword
+ if let Some(value) = locale.remove_unicode_ext(&SUBDIVISION_KEY) {
+ self.backup_subdivision = Some(value);
+ return;
+ }
+ // 3. Assert that the locale is a language identifier
+ debug_assert!(!locale.has_unicode_ext());
+ // 4. Remove variants
+ if locale.has_variants() {
+ self.backup_variants = Some(locale.clear_variants());
+ return;
+ }
+ // 5. Remove language+script
+ if !locale.language().is_empty() || locale.script().is_some() {
+ locale.set_script(None);
+ locale.set_language(Language::UND);
+ self.restore_extensions_variants(locale);
+ return;
+ }
+ // 6. Remove region
+ debug_assert!(locale.region().is_some()); // don't call .step() on und
+ locale.set_region(None);
+ }
+
+ fn restore_extensions_variants(&mut self, locale: &mut DataLocale) {
+ if let Some(value) = self.backup_extension.take() {
+ #[allow(clippy::unwrap_used)] // not reachable unless extension_key is present
+ locale.set_unicode_ext(self.config.extension_key.unwrap(), value);
+ }
+ if let Some(value) = self.backup_subdivision.take() {
+ locale.set_unicode_ext(SUBDIVISION_KEY, value);
+ }
+ if let Some(variants) = self.backup_variants.take() {
+ locale.set_variants(variants);
+ }
+ }
+
+ fn get_explicit_parent(&self, locale: &DataLocale) -> Option<LanguageIdentifier> {
+ self.supplement
+ .and_then(|supplement| {
+ supplement
+ .parents
+ .get_copied_by(|uvstr| locale.strict_cmp(uvstr).reverse())
+ })
+ .or_else(|| {
+ self.parents
+ .parents
+ .get_copied_by(|uvstr| locale.strict_cmp(uvstr).reverse())
+ })
+ .map(LanguageIdentifier::from)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use icu_locid::Locale;
+ use std::str::FromStr;
+ use writeable::Writeable;
+
+ struct TestCase {
+ input: &'static str,
+ requires_data: bool,
+ extension_key: Option<Key>,
+ fallback_supplement: Option<LocaleFallbackSupplement>,
+ // Note: The first entry in the chain is the normalized locale
+ expected_language_chain: &'static [&'static str],
+ expected_region_chain: &'static [&'static str],
+ }
+
+ // TODO: Consider loading these from a JSON file
+ const TEST_CASES: &[TestCase] = &[
+ TestCase {
+ input: "en-u-hc-h12-sd-usca",
+ requires_data: false,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["en-u-sd-usca", "en"],
+ expected_region_chain: &["en-u-sd-usca", "en", "und-u-sd-usca"],
+ },
+ TestCase {
+ input: "en-US-u-hc-h12-sd-usca",
+ requires_data: false,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["en-US-u-sd-usca", "en-US", "en-u-sd-usca", "en"],
+ expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"],
+ },
+ TestCase {
+ input: "en-US-fonipa-u-hc-h12-sd-usca",
+ requires_data: false,
+ extension_key: Some(key!("hc")),
+ fallback_supplement: None,
+ expected_language_chain: &[
+ "en-US-fonipa-u-hc-h12-sd-usca",
+ "en-US-fonipa-u-sd-usca",
+ "en-US-fonipa",
+ "en-US",
+ "en-fonipa-u-hc-h12-sd-usca",
+ "en-fonipa-u-sd-usca",
+ "en-fonipa",
+ "en",
+ ],
+ expected_region_chain: &[
+ "en-US-fonipa-u-hc-h12-sd-usca",
+ "en-US-fonipa-u-sd-usca",
+ "en-US-fonipa",
+ "en-US",
+ "und-US-fonipa-u-hc-h12-sd-usca",
+ "und-US-fonipa-u-sd-usca",
+ "und-US-fonipa",
+ "und-US",
+ ],
+ },
+ TestCase {
+ input: "en-u-hc-h12-sd-usca",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["en-u-sd-usca", "en"],
+ expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"],
+ },
+ TestCase {
+ input: "en-Latn-u-sd-usca",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["en-u-sd-usca", "en"],
+ expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"],
+ },
+ TestCase {
+ input: "en-Latn-US-u-sd-usca",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["en-US-u-sd-usca", "en-US", "en-u-sd-usca", "en"],
+ expected_region_chain: &["en-US-u-sd-usca", "en-US", "und-US-u-sd-usca", "und-US"],
+ },
+ TestCase {
+ // NOTE: -u-rg is not yet supported; when it is, this test should be updated
+ input: "en-u-rg-gbxxxx",
+ requires_data: false,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["en"],
+ expected_region_chain: &["en"],
+ },
+ TestCase {
+ input: "sr-ME",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["sr-ME", "sr-Latn-ME", "sr-Latn"],
+ expected_region_chain: &["sr-ME", "und-ME"],
+ },
+ TestCase {
+ input: "sr-Latn-ME",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["sr-ME", "sr-Latn-ME", "sr-Latn"],
+ expected_region_chain: &["sr-ME", "und-ME"],
+ },
+ TestCase {
+ input: "sr-ME-fonipa",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &[
+ "sr-ME-fonipa",
+ "sr-ME",
+ "sr-Latn-ME-fonipa",
+ "sr-Latn-ME",
+ "sr-Latn-fonipa",
+ "sr-Latn",
+ ],
+ expected_region_chain: &["sr-ME-fonipa", "sr-ME", "und-ME-fonipa", "und-ME"],
+ },
+ TestCase {
+ input: "sr-RS",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["sr-RS", "sr"],
+ expected_region_chain: &["sr-RS", "und-RS"],
+ },
+ TestCase {
+ input: "sr-Cyrl-RS",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["sr-RS", "sr"],
+ expected_region_chain: &["sr-RS", "und-RS"],
+ },
+ TestCase {
+ input: "sr-Latn-RS",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["sr-Latn-RS", "sr-Latn"],
+ expected_region_chain: &["sr-Latn-RS", "und-RS"],
+ },
+ TestCase {
+ input: "de-Latn-LI",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["de-LI", "de"],
+ expected_region_chain: &["de-LI", "und-LI"],
+ },
+ TestCase {
+ input: "ca-ES-valencia",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["ca-ES-valencia", "ca-ES", "ca-valencia", "ca"],
+ expected_region_chain: &["ca-ES-valencia", "ca-ES", "und-ES-valencia", "und-ES"],
+ },
+ TestCase {
+ input: "es-AR",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["es-AR", "es-419", "es"],
+ expected_region_chain: &["es-AR", "und-AR"],
+ },
+ TestCase {
+ input: "hi-IN",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["hi-IN", "hi"],
+ expected_region_chain: &["hi-IN", "und-IN"],
+ },
+ TestCase {
+ input: "hi-Latn-IN",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["hi-Latn-IN", "hi-Latn", "en-IN", "en-001", "en"],
+ expected_region_chain: &["hi-Latn-IN", "und-IN"],
+ },
+ TestCase {
+ input: "zh-CN",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ // Note: "zh-Hans" is not reachable because it is the default script for "zh".
+ // The fallback algorithm does not visit the language-script bundle when the
+ // script is the default for the language
+ expected_language_chain: &["zh-CN", "zh"],
+ expected_region_chain: &["zh-CN", "und-CN"],
+ },
+ TestCase {
+ input: "zh-TW",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["zh-TW", "zh-Hant-TW", "zh-Hant"],
+ expected_region_chain: &["zh-TW", "und-TW"],
+ },
+ TestCase {
+ input: "yue-HK",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: None,
+ expected_language_chain: &["yue-HK", "yue"],
+ expected_region_chain: &["yue-HK", "und-HK"],
+ },
+ TestCase {
+ input: "yue-HK",
+ requires_data: true,
+ extension_key: None,
+ fallback_supplement: Some(LocaleFallbackSupplement::Collation),
+ // TODO(#1964): add "zh" as a target.
+ expected_language_chain: &["yue-HK", "yue", "zh-Hant"],
+ expected_region_chain: &["yue-HK", "und-HK"],
+ },
+ ];
+
+ #[test]
+ fn test_fallback() {
+ let fallbacker_no_data = LocaleFallbacker::new_without_data();
+ let fallbacker_no_data = fallbacker_no_data.as_borrowed();
+ let fallbacker_with_data = LocaleFallbacker::new();
+ for cas in TEST_CASES {
+ for (priority, expected_chain) in [
+ (
+ LocaleFallbackPriority::Language,
+ cas.expected_language_chain,
+ ),
+ (LocaleFallbackPriority::Region, cas.expected_region_chain),
+ ] {
+ let mut config = LocaleFallbackConfig::default();
+ config.priority = priority;
+ config.extension_key = cas.extension_key;
+ config.fallback_supplement = cas.fallback_supplement;
+ let fallbacker = if cas.requires_data {
+ fallbacker_with_data
+ } else {
+ fallbacker_no_data
+ };
+ let mut it = fallbacker
+ .for_config(config)
+ .fallback_for(Locale::from_str(cas.input).unwrap().into());
+ for &expected in expected_chain {
+ assert_eq!(
+ expected,
+ &*it.get().write_to_string(),
+ "{:?} ({:?})",
+ cas.input,
+ priority
+ );
+ it.step();
+ }
+ assert_eq!(
+ "und",
+ &*it.get().write_to_string(),
+ "{:?} ({:?})",
+ cas.input,
+ priority
+ );
+ }
+ }
+ }
+}
diff --git a/third_party/rust/icu_locid_transform/src/fallback/mod.rs b/third_party/rust/icu_locid_transform/src/fallback/mod.rs
new file mode 100644
index 0000000000..9dd835419c
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/src/fallback/mod.rs
@@ -0,0 +1,304 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Tools for locale fallback, enabling arbitrary input locales to be mapped into the nearest
+//! locale with data.
+//!
+//! The algorithm implemented in this module is called [Flexible Vertical Fallback](
+//! https://docs.google.com/document/d/1Mp7EUyl-sFh_HZYgyeVwj88vJGpCBIWxzlCwGgLCDwM/edit).
+//! Watch [#2243](https://github.com/unicode-org/icu4x/issues/2243) to track improvements to
+//! this algorithm and steps to enshrine the algorithm in CLDR.
+//!
+//! # Examples
+//!
+//! ```
+//! use icu_locid::locale;
+//! use icu_locid_transform::LocaleFallbacker;
+//!
+//! // Set up a LocaleFallbacker with data.
+//! let fallbacker = LocaleFallbacker::new();
+//!
+//! // Create a LocaleFallbackerIterator with a default configuration.
+//! // By default, uses language priority with no additional extension keywords.
+//! let mut fallback_iterator = fallbacker
+//! .for_config(Default::default())
+//! .fallback_for(locale!("hi-Latn-IN").into());
+//!
+//! // Run the algorithm and check the results.
+//! assert_eq!(fallback_iterator.get(), &locale!("hi-Latn-IN").into());
+//! fallback_iterator.step();
+//! assert_eq!(fallback_iterator.get(), &locale!("hi-Latn").into());
+//! fallback_iterator.step();
+//! assert_eq!(fallback_iterator.get(), &locale!("en-IN").into());
+//! fallback_iterator.step();
+//! assert_eq!(fallback_iterator.get(), &locale!("en-001").into());
+//! fallback_iterator.step();
+//! assert_eq!(fallback_iterator.get(), &locale!("en").into());
+//! fallback_iterator.step();
+//! assert_eq!(fallback_iterator.get(), &locale!("und").into());
+//! ```
+
+use crate::provider::*;
+use icu_locid::extensions::unicode::Value;
+use icu_locid::subtags::Variants;
+use icu_provider::prelude::*;
+
+#[doc(inline)]
+pub use icu_provider::fallback::*;
+
+mod algorithms;
+
+/// Implements the algorithm defined in *[UTS #35: Locale Inheritance and Matching]*.
+///
+/// Note that this implementation performs some additional steps compared to the *UTS #35*
+/// algorithm, see *[the design doc]* for a detailed description, and [#2243](
+/// https://github.com/unicode-org/icu4x/issues/2243) to track aligment with *UTS #35*.
+///
+/// # Examples
+///
+/// ```
+/// use icu_locid::locale;
+/// use icu_locid_transform::fallback::LocaleFallbacker;
+///
+/// // Set up a LocaleFallbacker with data.
+/// let fallbacker = LocaleFallbacker::new();
+///
+/// // Create a LocaleFallbackerIterator with a default configuration.
+/// // By default, uses language priority with no additional extension keywords.
+/// let mut fallback_iterator = fallbacker
+/// .for_config(Default::default())
+/// .fallback_for(locale!("hi-Latn-IN").into());
+///
+/// // Run the algorithm and check the results.
+/// assert_eq!(fallback_iterator.get(), &locale!("hi-Latn-IN").into());
+/// fallback_iterator.step();
+/// assert_eq!(fallback_iterator.get(), &locale!("hi-Latn").into());
+/// fallback_iterator.step();
+/// assert_eq!(fallback_iterator.get(), &locale!("en-IN").into());
+/// fallback_iterator.step();
+/// assert_eq!(fallback_iterator.get(), &locale!("en-001").into());
+/// fallback_iterator.step();
+/// assert_eq!(fallback_iterator.get(), &locale!("en").into());
+/// fallback_iterator.step();
+/// assert_eq!(fallback_iterator.get(), &locale!("und").into());
+/// ```
+///
+/// [UTS #35: Locale Inheritance and Matching]: https://www.unicode.org/reports/tr35/#Locale_Inheritance
+/// [the design doc]: https://docs.google.com/document/d/1Mp7EUyl-sFh_HZYgyeVwj88vJGpCBIWxzlCwGgLCDwM/edit
+#[doc(hidden)]
+#[derive(Debug, Clone, PartialEq)]
+pub struct LocaleFallbacker {
+ likely_subtags: DataPayload<LocaleFallbackLikelySubtagsV1Marker>,
+ parents: DataPayload<LocaleFallbackParentsV1Marker>,
+ collation_supplement: Option<DataPayload<CollationFallbackSupplementV1Marker>>,
+}
+
+/// Borrowed version of [`LocaleFallbacker`].
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct LocaleFallbackerBorrowed<'a> {
+ likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>,
+ parents: &'a LocaleFallbackParentsV1<'a>,
+ collation_supplement: Option<&'a LocaleFallbackSupplementV1<'a>>,
+}
+
+/// A [`LocaleFallbackerBorrowed`] with an associated [`LocaleFallbackConfig`].
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct LocaleFallbackerWithConfig<'a> {
+ likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>,
+ parents: &'a LocaleFallbackParentsV1<'a>,
+ supplement: Option<&'a LocaleFallbackSupplementV1<'a>>,
+ config: LocaleFallbackConfig,
+}
+
+/// Inner iteration type. Does not own the item under fallback.
+#[derive(Debug)]
+struct LocaleFallbackIteratorInner<'a> {
+ likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>,
+ parents: &'a LocaleFallbackParentsV1<'a>,
+ supplement: Option<&'a LocaleFallbackSupplementV1<'a>>,
+ config: LocaleFallbackConfig,
+ backup_extension: Option<Value>,
+ backup_subdivision: Option<Value>,
+ backup_variants: Option<Variants>,
+}
+
+/// Iteration type for locale fallback operations.
+///
+/// Because the `Iterator` trait does not allow items to borrow from the iterator, this class does
+/// not implement that trait. Instead, use `.step()` and `.get()`.
+#[derive(Debug)]
+pub struct LocaleFallbackIterator<'a, 'b> {
+ current: DataLocale,
+ inner: LocaleFallbackIteratorInner<'a>,
+ phantom: core::marker::PhantomData<&'b ()>,
+}
+
+impl LocaleFallbacker {
+ /// Creates a [`LocaleFallbacker`] with compiled fallback data (likely subtags and parent locales).
+ ///
+ /// ✨ *Enabled with the `compiled_data` Cargo feature.*
+ ///
+ /// [📚 Help choosing a constructor](icu_provider::constructors)
+ #[cfg(feature = "compiled_data")]
+ #[allow(clippy::new_ret_no_self)] // keeping constructors together
+ pub const fn new<'a>() -> LocaleFallbackerBorrowed<'a> {
+ let tickstatic = LocaleFallbackerBorrowed {
+ likely_subtags: crate::provider::Baked::SINGLETON_FALLBACK_LIKELYSUBTAGS_V1,
+ parents: crate::provider::Baked::SINGLETON_FALLBACK_PARENTS_V1,
+ collation_supplement: Some(crate::provider::Baked::SINGLETON_FALLBACK_SUPPLEMENT_CO_V1),
+ };
+ // Safety: we're transmuting down from LocaleFallbackerBorrowed<'static> to LocaleFallbackerBorrowed<'a>
+ // ZeroMaps use associated types in a way that confuse the compiler which gives up and marks them
+ // as invariant. However, they are covariant, and in non-const code this covariance can be safely triggered
+ // using Yokeable::transform. In const code we must transmute. In the long run we should
+ // be able to `transform()` in const code, and also we will have hopefully improved map polymorphism (#3128)
+ unsafe { core::mem::transmute(tickstatic) }
+ }
+
+ icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: DataError,
+ #[cfg(skip)]
+ functions: [
+ new,
+ try_new_with_any_provider,
+ try_new_with_buffer_provider,
+ try_new_unstable,
+ Self
+ ]);
+
+ #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)]
+ pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError>
+ where
+ P: DataProvider<LocaleFallbackLikelySubtagsV1Marker>
+ + DataProvider<LocaleFallbackParentsV1Marker>
+ + DataProvider<CollationFallbackSupplementV1Marker>
+ + ?Sized,
+ {
+ let likely_subtags = provider.load(Default::default())?.take_payload()?;
+ let parents = provider.load(Default::default())?.take_payload()?;
+ let collation_supplement = match DataProvider::<CollationFallbackSupplementV1Marker>::load(
+ provider,
+ Default::default(),
+ ) {
+ Ok(response) => Some(response.take_payload()?),
+ // It is expected that not all keys are present
+ Err(DataError {
+ kind: DataErrorKind::MissingDataKey,
+ ..
+ }) => None,
+ Err(e) => return Err(e),
+ };
+ Ok(LocaleFallbacker {
+ likely_subtags,
+ parents,
+ collation_supplement,
+ })
+ }
+
+ /// Creates a [`LocaleFallbacker`] without fallback data. Using this constructor may result in
+ /// surprising behavior, especially in multi-script languages.
+ pub fn new_without_data() -> Self {
+ LocaleFallbacker {
+ likely_subtags: DataPayload::from_owned(Default::default()),
+ parents: DataPayload::from_owned(Default::default()),
+ collation_supplement: None,
+ }
+ }
+
+ /// Associates a configuration with this fallbacker.
+ #[inline]
+ pub fn for_config(&self, config: LocaleFallbackConfig) -> LocaleFallbackerWithConfig {
+ self.as_borrowed().for_config(config)
+ }
+
+ /// Derives a configuration from a [`DataKey`] and associates it
+ /// with this fallbacker.
+ #[inline]
+ #[doc(hidden)] // will be removed in 2.0
+ pub fn for_key(&self, data_key: DataKey) -> LocaleFallbackerWithConfig {
+ self.for_config(data_key.fallback_config())
+ }
+
+ /// Creates a borrowed version of this fallbacker for performance.
+ pub fn as_borrowed(&self) -> LocaleFallbackerBorrowed {
+ LocaleFallbackerBorrowed {
+ likely_subtags: self.likely_subtags.get(),
+ parents: self.parents.get(),
+ collation_supplement: self.collation_supplement.as_ref().map(|p| p.get()),
+ }
+ }
+}
+
+impl<'a> LocaleFallbackerBorrowed<'a> {
+ /// Associates a configuration with this fallbacker.
+ #[inline]
+ pub const fn for_config(self, config: LocaleFallbackConfig) -> LocaleFallbackerWithConfig<'a> {
+ LocaleFallbackerWithConfig {
+ likely_subtags: self.likely_subtags,
+ parents: self.parents,
+ supplement: match config.fallback_supplement {
+ Some(LocaleFallbackSupplement::Collation) => self.collation_supplement,
+ _ => None,
+ },
+ config,
+ }
+ }
+}
+
+impl LocaleFallbackerBorrowed<'static> {
+ /// Cheaply converts a `LocaleFallbackerBorrowed<'static>` into a `LocaleFallbacker`.
+ pub const fn static_to_owned(self) -> LocaleFallbacker {
+ LocaleFallbacker {
+ likely_subtags: DataPayload::from_static_ref(self.likely_subtags),
+ parents: DataPayload::from_static_ref(self.parents),
+ collation_supplement: match self.collation_supplement {
+ None => None,
+ Some(x) => Some(DataPayload::from_static_ref(x)),
+ },
+ }
+ }
+}
+
+impl<'a> LocaleFallbackerWithConfig<'a> {
+ /// Creates an iterator based on a [`DataLocale`].
+ ///
+ /// If you have a [`Locale`](icu_locid::Locale), call `.into()` to get a [`DataLocale`].
+ ///
+ /// When first initialized, the locale is normalized according to the fallback algorithm.
+ pub fn fallback_for(&self, mut locale: DataLocale) -> LocaleFallbackIterator<'a, 'static> {
+ self.normalize(&mut locale);
+ LocaleFallbackIterator {
+ current: locale,
+ inner: LocaleFallbackIteratorInner {
+ likely_subtags: self.likely_subtags,
+ parents: self.parents,
+ supplement: self.supplement,
+ config: self.config,
+ backup_extension: None,
+ backup_subdivision: None,
+ backup_variants: None,
+ },
+ phantom: core::marker::PhantomData,
+ }
+ }
+}
+
+impl LocaleFallbackIterator<'_, '_> {
+ /// Borrows the current [`DataLocale`] under fallback.
+ pub fn get(&self) -> &DataLocale {
+ &self.current
+ }
+
+ /// Takes the current [`DataLocale`] under fallback.
+ pub fn take(self) -> DataLocale {
+ self.current
+ }
+
+ /// Performs one step of the locale fallback algorithm.
+ ///
+ /// The fallback is completed once the inner [`DataLocale`] becomes `und`.
+ pub fn step(&mut self) -> &mut Self {
+ self.inner.step(&mut self.current);
+ self
+ }
+}
diff --git a/third_party/rust/icu_locid_transform/src/lib.rs b/third_party/rust/icu_locid_transform/src/lib.rs
new file mode 100644
index 0000000000..4c4e34aeae
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/src/lib.rs
@@ -0,0 +1,116 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Canonicalization of locale identifiers based on [`CLDR`] data.
+//!
+//! This module is published as its own crate ([`icu_locid_transform`](https://docs.rs/icu_locid_transform/latest/icu_locid_transform/))
+//! and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project.
+//!
+//! It currently supports locale canonicalization based upon the canonicalization
+//! algorithm from [`UTS #35: Unicode LDML 3. LocaleId Canonicalization`],
+//! as well as the minimize and maximize likely subtags algorithms
+//! as described in [`UTS #35: Unicode LDML 3. Likely Subtags`].
+//!
+//! The maximize method potentially updates a passed in locale in place
+//! depending up the results of running the 'Add Likely Subtags' algorithm
+//! from [`UTS #35: Unicode LDML 3. Likely Subtags`].
+//!
+//! This minimize method returns a new Locale that is the result of running the
+//! 'Remove Likely Subtags' algorithm from [`UTS #35: Unicode LDML 3. Likely Subtags`].
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::Locale;
+//! use icu::locid_transform::{LocaleCanonicalizer, TransformResult};
+//!
+//! let lc = LocaleCanonicalizer::new();
+//!
+//! let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc"
+//! .parse()
+//! .expect("parse failed");
+//! assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified);
+//! assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse::<Locale>().unwrap());
+//! ```
+//!
+//! ```
+//! use icu::locid::locale;
+//! use icu::locid_transform::{LocaleExpander, TransformResult};
+//!
+//! let lc = LocaleExpander::new();
+//!
+//! let mut locale = locale!("zh-CN");
+//! assert_eq!(lc.maximize(&mut locale), TransformResult::Modified);
+//! assert_eq!(locale, locale!("zh-Hans-CN"));
+//!
+//! let mut locale = locale!("zh-Hant-TW");
+//! assert_eq!(lc.maximize(&mut locale), TransformResult::Unmodified);
+//! assert_eq!(locale, locale!("zh-Hant-TW"));
+//! ```
+//!
+//! ```
+//! use icu::locid::locale;
+//! use icu::locid_transform::{LocaleExpander, TransformResult};
+//! use writeable::assert_writeable_eq;
+//!
+//! let lc = LocaleExpander::new();
+//!
+//! let mut locale = locale!("zh-Hans-CN");
+//! assert_eq!(lc.minimize(&mut locale), TransformResult::Modified);
+//! assert_eq!(locale, locale!("zh"));
+//!
+//! let mut locale = locale!("zh");
+//! assert_eq!(lc.minimize(&mut locale), TransformResult::Unmodified);
+//! assert_eq!(locale, locale!("zh"));
+//! ```
+//!
+//! [`ICU4X`]: ../icu/index.html
+//! [`CLDR`]: http://cldr.unicode.org/
+//! [`UTS #35: Unicode LDML 3. Likely Subtags`]: https://www.unicode.org/reports/tr35/#Likely_Subtags.
+//! [`UTS #35: Unicode LDML 3. LocaleId Canonicalization`]: http://unicode.org/reports/tr35/#LocaleId_Canonicalization,
+
+// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations
+#![cfg_attr(not(any(test, feature = "std")), no_std)]
+#![cfg_attr(
+ not(test),
+ deny(
+ clippy::indexing_slicing,
+ clippy::unwrap_used,
+ clippy::expect_used,
+ clippy::panic,
+ clippy::exhaustive_structs,
+ clippy::exhaustive_enums,
+ missing_debug_implementations,
+ )
+)]
+#![warn(missing_docs)]
+
+extern crate alloc;
+
+mod canonicalizer;
+mod directionality;
+mod error;
+mod expander;
+pub mod fallback;
+pub mod provider;
+
+pub use canonicalizer::LocaleCanonicalizer;
+pub use directionality::{Direction, LocaleDirectionality};
+pub use error::LocaleTransformError;
+pub use expander::LocaleExpander;
+#[doc(inline)]
+pub use fallback::LocaleFallbacker;
+
+/// Used to track the result of a transformation operation that potentially modifies its argument in place.
+#[derive(Debug, PartialEq)]
+#[allow(clippy::exhaustive_enums)] // this enum is stable
+pub enum TransformResult {
+ /// The canonicalization operation modified the locale.
+ Modified,
+ /// The canonicalization operation did not modify the locale.
+ Unmodified,
+}
+
+#[doc(no_inline)]
+pub use LocaleTransformError as Error;
diff --git a/third_party/rust/icu_locid_transform/src/provider/canonicalizer.rs b/third_party/rust/icu_locid_transform/src/provider/canonicalizer.rs
new file mode 100644
index 0000000000..7638bba1f8
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/src/provider/canonicalizer.rs
@@ -0,0 +1,81 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use icu_locid::subtags::{Language, Region, Script, Variant};
+use icu_provider::prelude::*;
+use tinystr::UnvalidatedTinyAsciiStr;
+use zerovec::{VarZeroVec, ZeroMap, ZeroSlice};
+
+#[icu_provider::data_struct(marker(AliasesV1Marker, "locid_transform/aliases@1", singleton))]
+#[derive(PartialEq, Clone, Default)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+#[yoke(prove_covariance_manually)]
+/// This alias data is used for locale canonicalization. Each field defines a
+/// mapping from an old identifier to a new identifier, based upon the rules in
+/// from <http://unicode.org/reports/tr35/#LocaleId_Canonicalization>. The data
+/// is stored in sorted order, allowing for binary search to identify rules to
+/// apply. It is broken down into smaller vectors based upon some characteristic
+/// of the data, to help avoid unnecessary searches. For example, the `sgn_region`
+/// field contains aliases for sign language and region, so that it is not
+/// necessary to search the data unless the input is a sign language.
+///
+/// The algorithm in tr35 is not guaranteed to terminate on data other than what
+/// is currently in CLDR. For this reason, it is not a good idea to attempt to add
+/// or modify aliases for use in this structure.
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+// TODO: Use validated types as value types
+#[derive(Debug)]
+pub struct AliasesV1<'data> {
+ /// `[language(-variant)+\] -> [langid]`
+ /// This is not a map as it's searched linearly according to the canonicalization rules.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_variants: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>,
+ /// `sgn-[region] -> [language]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub sgn_region: ZeroMap<'data, UnvalidatedRegion, Language>,
+ /// `[language{2}] -> [langid]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_len2: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, UnvalidatedLanguageIdentifier>,
+ /// `[language{3}] -> [langid]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_len3: ZeroMap<'data, UnvalidatedLanguage, UnvalidatedLanguageIdentifier>,
+ /// `[langid] -> [langid]`
+ /// This is not a map as it's searched linearly according to the canonicalization rules.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>,
+
+ /// `[script] -> [script]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub script: ZeroMap<'data, UnvalidatedScript, Script>,
+
+ /// `[region{2}] -> [region]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub region_alpha: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, Region>,
+ /// `[region{3}] -> [region]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub region_num: ZeroMap<'data, UnvalidatedRegion, Region>,
+
+ /// `[region] -> [region]+`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub complex_region: ZeroMap<'data, UnvalidatedRegion, ZeroSlice<Region>>,
+
+ /// `[variant] -> [variant]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub variant: ZeroMap<'data, UnvalidatedVariant, Variant>,
+
+ /// `[value{7}] -> [value{7}]`
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub subdivision: ZeroMap<'data, UnvalidatedSubdivision, SemivalidatedSubdivision>,
+}
diff --git a/third_party/rust/icu_locid_transform/src/provider/directionality.rs b/third_party/rust/icu_locid_transform/src/provider/directionality.rs
new file mode 100644
index 0000000000..568248180d
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/src/provider/directionality.rs
@@ -0,0 +1,36 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use icu_provider::prelude::*;
+use zerovec::ZeroVec;
+
+#[icu_provider::data_struct(marker(
+ ScriptDirectionV1Marker,
+ "locid_transform/script_dir@1",
+ singleton
+))]
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+/// This directionality data is used to determine the script directionality of a locale.
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[yoke(prove_covariance_manually)]
+pub struct ScriptDirectionV1<'data> {
+ /// Scripts in right-to-left direction.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub rtl: ZeroVec<'data, UnvalidatedScript>,
+ /// Scripts in left-to-right direction.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub ltr: ZeroVec<'data, UnvalidatedScript>,
+}
diff --git a/third_party/rust/icu_locid_transform/src/provider/expander.rs b/third_party/rust/icu_locid_transform/src/provider/expander.rs
new file mode 100644
index 0000000000..2f624b40a8
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/src/provider/expander.rs
@@ -0,0 +1,243 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use icu_locid::subtags::{Language, Region, Script};
+use icu_provider::prelude::*;
+use zerovec::ZeroMap;
+
+#[icu_provider::data_struct(marker(
+ LikelySubtagsV1Marker,
+ "locid_transform/likelysubtags@1",
+ singleton
+))]
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+/// This likely subtags data is used for the minimize and maximize operations.
+/// Each field defines a mapping from an old identifier to a new identifier,
+/// based upon the rules in
+/// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
+///
+/// The data is stored is broken down into smaller vectors based upon the rules
+/// defined for the likely subtags maximize algorithm.
+///
+/// For efficiency, only the relevant part of the LanguageIdentifier is stored
+/// for searching and replacing. E.g., the `language_script` field is used to store
+/// rules for `LanguageIdentifier`s that contain a language and a script, but not a
+/// region.
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[yoke(prove_covariance_manually)]
+pub struct LikelySubtagsV1<'data> {
+ /// Language and script.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_script: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedScript), Region>,
+ /// Language and region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_region: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedRegion), Script>,
+ /// Just language.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language: ZeroMap<'data, UnvalidatedLanguage, (Script, Region)>,
+ /// Script and region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub script_region: ZeroMap<'data, (UnvalidatedScript, UnvalidatedRegion), Language>,
+ /// Just script.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub script: ZeroMap<'data, UnvalidatedScript, (Language, Region)>,
+ /// Just region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>,
+ /// Undefined.
+ pub und: (Language, Script, Region),
+}
+
+#[icu_provider::data_struct(marker(
+ LikelySubtagsForLanguageV1Marker,
+ "locid_transform/likelysubtags_l@1",
+ singleton
+))]
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+/// This likely subtags data is used for the minimize and maximize operations.
+/// Each field defines a mapping from an old identifier to a new identifier,
+/// based upon the rules in
+/// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
+///
+/// The data is stored is broken down into smaller vectors based upon the rules
+/// defined for the likely subtags maximize algorithm.
+///
+/// For efficiency, only the relevant part of the LanguageIdentifier is stored
+/// for searching and replacing. E.g., the `language_script` field is used to store
+/// rules for `LanguageIdentifier`s that contain a language and a script, but not a
+/// region.
+///
+/// This struct contains mappings when the input contains a language subtag.
+/// Also see [`LikelySubtagsForScriptRegionV1`].
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[yoke(prove_covariance_manually)]
+pub struct LikelySubtagsForLanguageV1<'data> {
+ /// Language and script.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_script: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedScript), Region>,
+ /// Language and region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_region: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedRegion), Script>,
+ /// Just language.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language: ZeroMap<'data, UnvalidatedLanguage, (Script, Region)>,
+ /// Undefined.
+ pub und: (Language, Script, Region),
+}
+
+impl<'data> From<LikelySubtagsV1<'data>> for LikelySubtagsForLanguageV1<'data> {
+ fn from(other: LikelySubtagsV1<'data>) -> Self {
+ Self {
+ language_script: other.language_script,
+ language_region: other.language_region,
+ language: other.language,
+ und: other.und,
+ }
+ }
+}
+
+impl<'data> LikelySubtagsForLanguageV1<'data> {
+ pub(crate) fn clone_from_borrowed(other: &LikelySubtagsV1<'data>) -> Self {
+ Self {
+ language_script: other.language_script.clone(),
+ language_region: other.language_region.clone(),
+ language: other.language.clone(),
+ und: other.und,
+ }
+ }
+}
+
+#[icu_provider::data_struct(marker(
+ LikelySubtagsForScriptRegionV1Marker,
+ "locid_transform/likelysubtags_sr@1",
+ singleton
+))]
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+/// This likely subtags data is used for the minimize and maximize operations.
+/// Each field defines a mapping from an old identifier to a new identifier,
+/// based upon the rules in
+/// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
+///
+/// The data is stored is broken down into smaller vectors based upon the rules
+/// defined for the likely subtags maximize algorithm.
+///
+/// For efficiency, only the relevant part of the LanguageIdentifier is stored
+/// for searching and replacing. E.g., the `script_region` field is used to store
+/// rules for `LanguageIdentifier`s that contain a script and a region, but not a
+/// language.
+///
+/// This struct contains mappings when the input does not contain a language subtag.
+/// Also see [`LikelySubtagsForLanguageV1`].
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[yoke(prove_covariance_manually)]
+pub struct LikelySubtagsForScriptRegionV1<'data> {
+ /// Script and region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub script_region: ZeroMap<'data, (UnvalidatedScript, UnvalidatedRegion), Language>,
+ /// Just script.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub script: ZeroMap<'data, UnvalidatedScript, (Language, Region)>,
+ /// Just region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>,
+}
+
+impl<'data> From<LikelySubtagsV1<'data>> for LikelySubtagsForScriptRegionV1<'data> {
+ fn from(other: LikelySubtagsV1<'data>) -> Self {
+ Self {
+ script_region: other.script_region,
+ script: other.script,
+ region: other.region,
+ }
+ }
+}
+
+#[icu_provider::data_struct(marker(
+ LikelySubtagsExtendedV1Marker,
+ "locid_transform/likelysubtags_ext@1",
+ singleton
+))]
+#[derive(Debug, PartialEq, Clone)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+/// This likely subtags data is used for full coverage of locales, including ones that
+/// don't otherwise have data in the Common Locale Data Repository (CLDR).
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[yoke(prove_covariance_manually)]
+pub struct LikelySubtagsExtendedV1<'data> {
+ /// Language and script.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_script: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedScript), Region>,
+ /// Language and region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language_region: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedRegion), Script>,
+ /// Just language.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub language: ZeroMap<'data, UnvalidatedLanguage, (Script, Region)>,
+ /// Script and region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub script_region: ZeroMap<'data, (UnvalidatedScript, UnvalidatedRegion), Language>,
+ /// Just script.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub script: ZeroMap<'data, UnvalidatedScript, (Language, Region)>,
+ /// Just region.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>,
+}
+
+impl<'data> From<LikelySubtagsV1<'data>> for LikelySubtagsExtendedV1<'data> {
+ fn from(other: LikelySubtagsV1<'data>) -> Self {
+ Self {
+ language_script: other.language_script,
+ language_region: other.language_region,
+ language: other.language,
+ script_region: other.script_region,
+ script: other.script,
+ region: other.region,
+ }
+ }
+}
diff --git a/third_party/rust/icu_locid_transform/src/provider/fallback.rs b/third_party/rust/icu_locid_transform/src/provider/fallback.rs
new file mode 100644
index 0000000000..c0635afe7f
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/src/provider/fallback.rs
@@ -0,0 +1,102 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use icu_locid::extensions::unicode::Key;
+use icu_locid::subtags::{region, script, Language, Region, Script};
+use icu_provider::prelude::*;
+use zerovec::ule::UnvalidatedStr;
+use zerovec::ZeroMap;
+use zerovec::ZeroMap2d;
+
+/// Locale fallback rules derived from likely subtags data.
+#[icu_provider::data_struct(marker(
+ LocaleFallbackLikelySubtagsV1Marker,
+ "fallback/likelysubtags@1",
+ singleton
+))]
+#[derive(Default, Clone, PartialEq, Debug)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+#[yoke(prove_covariance_manually)]
+pub struct LocaleFallbackLikelySubtagsV1<'data> {
+ /// Map from language to the default script in that language. Languages whose default script
+ /// is `Latn` are not included in the map for data size savings.
+ ///
+ /// Example: "zh" defaults to "Hans", which is in this map.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub l2s: ZeroMap<'data, UnvalidatedLanguage, Script>,
+ /// Map from language-region pairs to a script. Only populated if the script is different
+ /// from the one in `l2s` for that language.
+ ///
+ /// Example: "zh-TW" defaults to "Hant", which is in this map.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub lr2s: ZeroMap2d<'data, UnvalidatedLanguage, UnvalidatedRegion, Script>,
+ /// Map from language to the default region in that language. Languages whose default region
+ /// is `ZZ` are not included in the map for data size savings.
+ ///
+ /// Example: "zh" defaults to "CN".
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub l2r: ZeroMap<'data, UnvalidatedLanguage, Region>,
+ /// Map from language-script pairs to a region. Only populated if the region is different
+ /// from the one in `l2r` for that language.
+ ///
+ /// Example: "zh-Hant" defaults to "TW".
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub ls2r: ZeroMap2d<'data, UnvalidatedLanguage, UnvalidatedScript, Region>,
+}
+
+/// `Latn` is the most common script, so it is defaulted for data size savings.
+pub const DEFAULT_SCRIPT: Script = script!("Latn");
+
+/// `ZZ` is the most common region, so it is defaulted for data size savings.
+pub const DEFAULT_REGION: Region = region!("ZZ");
+
+/// Locale fallback rules derived from CLDR parent locales data.
+#[icu_provider::data_struct(marker(
+ LocaleFallbackParentsV1Marker,
+ "fallback/parents@1",
+ singleton
+))]
+#[derive(Default, Clone, PartialEq, Debug)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+#[yoke(prove_covariance_manually)]
+pub struct LocaleFallbackParentsV1<'data> {
+ /// Map from language identifier to language identifier, indicating that the language on the
+ /// left should inherit from the language on the right.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub parents: ZeroMap<'data, UnvalidatedStr, (Language, Option<Script>, Option<Region>)>,
+}
+
+/// Key-specific supplemental fallback data.
+#[icu_provider::data_struct(marker(
+ CollationFallbackSupplementV1Marker,
+ "fallback/supplement/co@1",
+ singleton,
+))]
+#[derive(Default, Clone, PartialEq, Debug)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_locid_transform::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+#[yoke(prove_covariance_manually)]
+pub struct LocaleFallbackSupplementV1<'data> {
+ /// Additional parent locales to supplement the common ones.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub parents: ZeroMap<'data, UnvalidatedStr, (Language, Option<Script>, Option<Region>)>,
+ /// Default values for Unicode extension keywords.
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub unicode_extension_defaults: ZeroMap2d<'data, Key, UnvalidatedStr, UnvalidatedStr>,
+}
diff --git a/third_party/rust/icu_locid_transform/src/provider/mod.rs b/third_party/rust/icu_locid_transform/src/provider/mod.rs
new file mode 100644
index 0000000000..62ac5d240c
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/src/provider/mod.rs
@@ -0,0 +1,114 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// Provider structs must be stable
+#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
+
+//! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component.
+//!
+//! <div class="stab unstable">
+//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
+//! to be stable, their Rust representation might not be. Use with caution.
+//! </div>
+//!
+//! Read more about data providers: [`icu_provider`]
+
+mod canonicalizer;
+pub use canonicalizer::*;
+mod directionality;
+pub use directionality::*;
+mod expander;
+pub use expander::*;
+mod fallback;
+pub use fallback::*;
+
+#[cfg(feature = "compiled_data")]
+#[derive(Debug)]
+/// Baked data
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. In particular, the `DataProvider` implementations are only
+/// guaranteed to match with this version's `*_unstable` providers. Use with caution.
+/// </div>
+pub struct Baked;
+
+#[cfg(feature = "compiled_data")]
+const _: () = {
+ pub mod icu {
+ pub use crate as locid_transform;
+ pub use icu_locid as locid;
+ }
+ icu_locid_transform_data::make_provider!(Baked);
+ icu_locid_transform_data::impl_fallback_likelysubtags_v1!(Baked);
+ icu_locid_transform_data::impl_fallback_parents_v1!(Baked);
+ icu_locid_transform_data::impl_fallback_supplement_co_v1!(Baked);
+ icu_locid_transform_data::impl_locid_transform_aliases_v1!(Baked);
+ icu_locid_transform_data::impl_locid_transform_likelysubtags_ext_v1!(Baked);
+ icu_locid_transform_data::impl_locid_transform_likelysubtags_l_v1!(Baked);
+ icu_locid_transform_data::impl_locid_transform_likelysubtags_sr_v1!(Baked);
+ icu_locid_transform_data::impl_locid_transform_script_dir_v1!(Baked);
+};
+
+#[cfg(feature = "datagen")]
+use icu_provider::prelude::*;
+
+#[cfg(feature = "datagen")]
+/// The latest minimum set of keys required by this component.
+pub const KEYS: &[DataKey] = &[
+ AliasesV1Marker::KEY,
+ CollationFallbackSupplementV1Marker::KEY,
+ LikelySubtagsExtendedV1Marker::KEY,
+ LikelySubtagsForLanguageV1Marker::KEY,
+ LikelySubtagsForScriptRegionV1Marker::KEY,
+ LocaleFallbackLikelySubtagsV1Marker::KEY,
+ LocaleFallbackParentsV1Marker::KEY,
+ ScriptDirectionV1Marker::KEY,
+];
+
+use alloc::borrow::Cow;
+use tinystr::{TinyAsciiStr, UnvalidatedTinyAsciiStr};
+
+// We use raw TinyAsciiStrs for map keys, as we then don't have to
+// validate them as subtags on deserialization. Map lookup can be
+// done even if they are not valid tags (an invalid key will just
+// become inaccessible).
+type UnvalidatedLanguage = UnvalidatedTinyAsciiStr<3>;
+type UnvalidatedScript = UnvalidatedTinyAsciiStr<4>;
+type UnvalidatedRegion = UnvalidatedTinyAsciiStr<3>;
+type UnvalidatedVariant = UnvalidatedTinyAsciiStr<8>;
+type UnvalidatedSubdivision = UnvalidatedTinyAsciiStr<7>;
+type SemivalidatedSubdivision = TinyAsciiStr<7>;
+
+// LanguageIdentifier doesn't have an AsULE implementation, so we have
+// to store strs and parse when needed.
+type UnvalidatedLanguageIdentifier = str;
+type UnvalidatedLanguageIdentifierPair = StrStrPairVarULE;
+
+#[zerovec::make_varule(StrStrPairVarULE)]
+#[zerovec::derive(Debug)]
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
+#[cfg_attr(
+ feature = "serde",
+ derive(serde::Deserialize),
+ zerovec::derive(Deserialize)
+)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ zerovec::derive(Serialize),
+ databake(path = icu_locid_transform::provider),
+)]
+/// A pair of strings with a EncodeAsVarULE implementation.
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+pub struct StrStrPair<'a>(
+ #[cfg_attr(feature = "serde", serde(borrow))] pub Cow<'a, str>,
+ #[cfg_attr(feature = "serde", serde(borrow))] pub Cow<'a, str>,
+);
diff --git a/third_party/rust/icu_locid_transform/tests/fixtures/canonicalize.json b/third_party/rust/icu_locid_transform/tests/fixtures/canonicalize.json
new file mode 100644
index 0000000000..89e6cec052
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/tests/fixtures/canonicalize.json
@@ -0,0 +1,444 @@
+[
+ {
+ "input": "es",
+ "output": "es"
+ },
+ {
+ "input": "en-CA",
+ "output": "en-CA"
+ },
+ {
+ "input": "cka",
+ "output": "cmr"
+ },
+ {
+ "input": "cze",
+ "output": "cs"
+ },
+ {
+ "input": "gfx",
+ "output": "vaj"
+ },
+ {
+ "input": "sgn-BR",
+ "output": "bzs"
+ },
+ {
+ "input": "sgn-DD",
+ "output": "gsg"
+ },
+ {
+ "input": "tam",
+ "output": "ta"
+ },
+ {
+ "input": "und-aaland",
+ "output": "und-AX"
+ },
+ {
+ "input": "nob-bokmal",
+ "output": "nb"
+ },
+ {
+ "input": "no-nynorsk",
+ "output": "nn"
+ },
+ {
+ "input": "und-Qaai",
+ "output": "und-Zinh"
+ },
+ {
+ "input": "en-554",
+ "output": "en-NZ"
+ },
+ {
+ "input": "en-084",
+ "output": "en-BZ"
+ },
+ {
+ "input": "art-lojban",
+ "output": "jbo"
+ },
+ {
+ "input": "zh-guoyu",
+ "output": "zh"
+ },
+ {
+ "input": "zh-hakka",
+ "output": "hak"
+ },
+ {
+ "input": "zh-xiang",
+ "output": "hsn"
+ },
+ {
+ "input": "aar-x-private",
+ "output": "aa-x-private"
+ },
+ {
+ "input": "heb-x-private",
+ "output": "he-x-private"
+ },
+ {
+ "input": "ces",
+ "output": "cs"
+ },
+ {
+ "input": "hy-arevela",
+ "output": "hy"
+ },
+ {
+ "input": "hy-arevmda",
+ "output": "hyw"
+ },
+ {
+ "input": "cel-gaulish",
+ "output": "xtg"
+ },
+ {
+ "input": "ja-latn-hepburn-heploc",
+ "output": "ja-Latn-alalc97"
+ },
+ {
+ "input": "ja-Latn-fonipa-hepburn-heploc",
+ "output": "ja-Latn-alalc97-fonipa"
+ },
+ {
+ "input": "und-Armn-SU",
+ "output": "und-Armn-AM"
+ },
+ {
+ "input": "sh",
+ "output": "sr-Latn"
+ },
+ {
+ "input": "sh-Cyrl",
+ "output": "sr-Cyrl"
+ },
+ {
+ "input": "cnr",
+ "output": "sr-ME"
+ },
+ {
+ "input": "cnr-BA",
+ "output": "sr-BA"
+ },
+ {
+ "input": "ru-SU",
+ "output": "ru-RU"
+ },
+ {
+ "input": "ru-810",
+ "output": "ru-RU"
+ },
+ {
+ "input": "en-SU",
+ "output": "en-RU"
+ },
+ {
+ "input": "en-810",
+ "output": "en-RU"
+ },
+ {
+ "input": "und-SU",
+ "output": "und-RU"
+ },
+ {
+ "input": "und-810",
+ "output": "und-RU"
+ },
+ {
+ "input": "und-Latn-SU",
+ "output": "und-Latn-RU"
+ },
+ {
+ "input": "und-Latn-810",
+ "output": "und-Latn-RU"
+ },
+ {
+ "input": "hy-SU",
+ "output": "hy-AM"
+ },
+ {
+ "input": "hy-810",
+ "output": "hy-AM"
+ },
+ {
+ "input": "und-Armn-SU",
+ "output": "und-Armn-AM"
+ },
+ {
+ "input": "und-Armn-810",
+ "output": "und-Armn-AM"
+ },
+ {
+ "input": "sr-CS",
+ "output": "sr-RS"
+ },
+ {
+ "input": "sr-Latn-CS",
+ "output": "sr-Latn-RS"
+ },
+ {
+ "input": "sr-Cyrl-CS",
+ "output": "sr-Cyrl-RS"
+ },
+ {
+ "input": "az-NT",
+ "output": "az-SA"
+ },
+ {
+ "input": "sl-t-sl-rozaj-biske-1994",
+ "output": "sl-t-sl-1994-biske-rozaj"
+ },
+ {
+ "input": "DE-T-M0-DIN-K0-QWERTZ",
+ "output": "de-t-k0-qwertz-m0-din"
+ },
+ {
+ "input": "en-t-m0-true",
+ "output": "en-t-m0-true"
+ },
+ {
+ "input": "en-t-iw",
+ "output": "en-t-he"
+ },
+ {
+ "input": "und-u-rg-no23",
+ "output": "und-u-rg-no50"
+ },
+ {
+ "input": "und-u-rg-cn11",
+ "output": "und-u-rg-cnbj"
+ },
+ {
+ "input": "und-u-rg-cz10a",
+ "output": "und-u-rg-cz110"
+ },
+ {
+ "input": "und-u-rg-fra",
+ "output": "und-u-rg-frges"
+ },
+ {
+ "input": "und-u-rg-frg",
+ "output": "und-u-rg-frges"
+ },
+ {
+ "input": "und-u-rg-lud",
+ "output": "und-u-rg-lucl"
+ },
+ {
+ "input": "und-NO-u-rg-no23",
+ "output": "und-NO-u-rg-no50"
+ },
+ {
+ "input": "und-CN-u-rg-cn11",
+ "output": "und-CN-u-rg-cnbj"
+ },
+ {
+ "input": "und-CZ-u-rg-cz10a",
+ "output": "und-CZ-u-rg-cz110"
+ },
+ {
+ "input": "und-FR-u-rg-fra",
+ "output": "und-FR-u-rg-frges"
+ },
+ {
+ "input": "und-FR-u-rg-frg",
+ "output": "und-FR-u-rg-frges"
+ },
+ {
+ "input": "und-u-rg-lud",
+ "output": "und-u-rg-lucl"
+ },
+ {
+ "input": "und-u-sd-no23",
+ "output": "und-u-sd-no50"
+ },
+ {
+ "input": "und-u-sd-cn11",
+ "output": "und-u-sd-cnbj"
+ },
+ {
+ "input": "und-u-sd-cz10a",
+ "output": "und-u-sd-cz110"
+ },
+ {
+ "input": "und-u-sd-fra",
+ "output": "und-u-sd-frges"
+ },
+ {
+ "input": "hy-arevela",
+ "output": "hy"
+ },
+ {
+ "input": "hy-Armn-arevela",
+ "output": "hy-Armn"
+ },
+ {
+ "input": "hy-AM-arevela",
+ "output": "hy-AM"
+ },
+ {
+ "input": "hy-arevela-fonipa",
+ "output": "hy-fonipa"
+ },
+ {
+ "input": "hy-fonipa-arevela",
+ "output": "hy-fonipa"
+ },
+ {
+ "input": "hy-arevmda",
+ "output": "hyw"
+ },
+ {
+ "input": "hy-Armn-arevmda",
+ "output": "hyw-Armn"
+ },
+ {
+ "input": "hy-AM-arevmda",
+ "output": "hyw-AM"
+ },
+ {
+ "input": "hy-arevmda-fonipa",
+ "output": "hyw-fonipa"
+ },
+ {
+ "input": "hy-fonipa-arevmda",
+ "output": "hyw-fonipa"
+ },
+ {
+ "input": "ja-Latn-hepburn-heploc",
+ "output": "ja-Latn-alalc97"
+ },
+ {
+ "input": "ja-Latn-JP-hepburn-heploc",
+ "output": "ja-Latn-JP-alalc97"
+ },
+ {
+ "input": "sv-aaland",
+ "output": "sv-AX"
+ },
+ {
+ "input": "el-polytoni",
+ "output": "el-polyton"
+ },
+ {
+ "input": "ja-Latn-alalc97-hepburn-heploc",
+ "output": "ja-Latn-alalc97"
+ },
+ {
+ "input": "ja-Latn-hepburn-alalc97-heploc",
+ "output": "ja-Latn-alalc97"
+ },
+ {
+ "input": "ja-Latn-hepburn-heploc-alalc97",
+ "output": "ja-Latn-alalc97"
+ },
+ {
+ "input": "ja-Latn-heploc-hepburn",
+ "output": "ja-Latn-alalc97"
+ },
+ {
+ "input": "ja-Latn-heploc",
+ "output": "ja-Latn-alalc97"
+ },
+ {
+ "input": "ja-Latn-aaland-heploc",
+ "output": "ja-Latn-AX-alalc97"
+ },
+ {
+ "input": "ja-Latn-heploc-polytoni",
+ "output": "ja-Latn-alalc97-polyton"
+ },
+ {
+ "input": "und-Latn-t-und-hani-m0-names",
+ "output": "und-Latn-t-und-hani-m0-prprname",
+ "disabled": true
+ },
+ {
+ "input": "und-u-ca-ethiopian-amete-alem",
+ "output": "und-u-ca-ethioaa",
+ "disabled": true
+ },
+ {
+ "input": "und-u-ca-islamicc",
+ "output": "und-u-ca-islamic-civil",
+ "disabled": true
+ },
+ {
+ "input": "und-u-ks-primary",
+ "output": "und-u-ks-level1",
+ "disabled": true
+ },
+ {
+ "input": "und-u-ks-tertiary",
+ "output": "und-u-ks-level3",
+ "disabled": true
+ },
+ {
+ "input": "und-u-ms-imperial",
+ "output": "und-u-ms-uksystem",
+ "disabled": true
+ },
+ {
+ "input": "und-u-kb-yes",
+ "output": "und-u-kb",
+ "disabled": true
+ },
+ {
+ "input": "und-u-kc-yes",
+ "output": "und-u-k",
+ "disabled": true
+ },
+ {
+ "input": "und-u-kh-yes",
+ "output": "und-u-kh",
+ "disabled": true
+ },
+ {
+ "input": "und-u-kk-yes",
+ "output": "und-u-kk",
+ "disabled": true
+ },
+ {
+ "input": "und-u-kn-yes",
+ "output": "und-u-kn",
+ "disabled": true
+ },
+ {
+ "input": "und-u-ka-yes",
+ "output": "und-u-ka-yes",
+ "disabled": true
+ },
+ {
+ "input": "und-u-tz-cnckg",
+ "output": "und-u-tz-cnsha",
+ "disabled": true
+ },
+ {
+ "input": "und-u-tz-eire",
+ "output": "und-u-tz-iedub",
+ "disabled": true
+ },
+ {
+ "input": "und-u-tz-est",
+ "output": "und-u-tz-utcw05",
+ "disabled": true
+ },
+ {
+ "input": "und-u-tz-gmt0",
+ "output": "und-u-tz-gmt",
+ "disabled": true
+ },
+ {
+ "input": "und-u-tz-uct",
+ "output": "und-u-tz-utc",
+ "disabled": true
+ },
+ {
+ "input": "und-u-tz-zulu",
+ "output": "und-u-tz-utc",
+ "disabled": true
+ }
+] \ No newline at end of file
diff --git a/third_party/rust/icu_locid_transform/tests/fixtures/maximize.json b/third_party/rust/icu_locid_transform/tests/fixtures/maximize.json
new file mode 100644
index 0000000000..7c241da0fe
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/tests/fixtures/maximize.json
@@ -0,0 +1,182 @@
+[
+ {
+ "input": "en-US",
+ "output": "en-Latn-US"
+ },
+ {
+ "input": "en-GB",
+ "output": "en-Latn-GB"
+ },
+ {
+ "input": "es-AR",
+ "output": "es-Latn-AR"
+ },
+ {
+ "input": "it",
+ "output": "it-Latn-IT"
+ },
+ {
+ "input": "zh-Hans-CN",
+ "output": "zh-Hans-CN"
+ },
+ {
+ "input": "de-AT",
+ "output": "de-Latn-AT"
+ },
+ {
+ "input": "pl",
+ "output": "pl-Latn-PL"
+ },
+ {
+ "input": "fr-FR",
+ "output": "fr-Latn-FR"
+ },
+ {
+ "input": "de-AT",
+ "output": "de-Latn-AT"
+ },
+ {
+ "input": "sr-Cyrl-SR",
+ "output": "sr-Cyrl-SR"
+ },
+ {
+ "input": "nb-NO",
+ "output": "nb-Latn-NO"
+ },
+ {
+ "input": "fr-FR",
+ "output": "fr-Latn-FR"
+ },
+ {
+ "input": "mk",
+ "output": "mk-Cyrl-MK"
+ },
+ {
+ "input": "uk",
+ "output": "uk-Cyrl-UA"
+ },
+ {
+ "input": "und-PL",
+ "output": "pl-Latn-PL"
+ },
+ {
+ "input": "und-Latn-AM",
+ "output": "ku-Latn-AM"
+ },
+ {
+ "input": "ug-Cyrl",
+ "output": "ug-Cyrl-KZ"
+ },
+ {
+ "input": "sr-ME",
+ "output": "sr-Latn-ME"
+ },
+ {
+ "input": "mn-Mong",
+ "output": "mn-Mong-CN"
+ },
+ {
+ "input": "lif-Limb",
+ "output": "lif-Limb-IN"
+ },
+ {
+ "input": "gan",
+ "output": "gan-Hans-CN"
+ },
+ {
+ "input": "zh-Hant",
+ "output": "zh-Hant-TW"
+ },
+ {
+ "input": "yue-Hans",
+ "output": "yue-Hans-CN"
+ },
+ {
+ "input": "unr",
+ "output": "unr-Beng-IN"
+ },
+ {
+ "input": "unr-Deva",
+ "output": "unr-Deva-NP"
+ },
+ {
+ "input": "und-Thai-CN",
+ "output": "lcp-Thai-CN"
+ },
+ {
+ "input": "ug-Cyrl",
+ "output": "ug-Cyrl-KZ"
+ },
+ {
+ "input": "en-Latn-DE",
+ "output": "en-Latn-DE"
+ },
+ {
+ "input": "pl-FR",
+ "output": "pl-Latn-FR"
+ },
+ {
+ "input": "de-CH",
+ "output": "de-Latn-CH"
+ },
+ {
+ "input": "tuq",
+ "output": "tuq-Latn-TD"
+ },
+ {
+ "input": "sr-ME",
+ "output": "sr-Latn-ME"
+ },
+ {
+ "input": "ng",
+ "output": "ng-Latn-NA"
+ },
+ {
+ "input": "klx",
+ "output": "klx-Latn-PG"
+ },
+ {
+ "input": "kk-Arab",
+ "output": "kk-Arab-CN"
+ },
+ {
+ "input": "en-Cyrl",
+ "output": "en-Cyrl-US"
+ },
+ {
+ "input": "und-Cyrl-UK",
+ "output": "ru-Cyrl-UK"
+ },
+ {
+ "input": "und-Arab",
+ "output": "ar-Arab-EG"
+ },
+ {
+ "input": "und-Arab-FO",
+ "output": "ar-Arab-FO"
+ },
+ {
+ "input": "zh-TW",
+ "output": "zh-Hant-TW"
+ },
+ {
+ "input": "und",
+ "output": "en-Latn-US"
+ },
+ {
+ "input": "zh-SG",
+ "output": "zh-Hans-SG"
+ },
+ {
+ "input": "und-TW",
+ "output": "zh-Hant-TW"
+ },
+ {
+ "input": "zh-hant-u-nu-Chinese-hc-h24",
+ "output": "zh-Hant-TW-u-hc-h24-nu-chinese"
+ },
+ {
+ "input": "und-latn-AQ",
+ "output": "en-Latn-AQ"
+ }
+]
diff --git a/third_party/rust/icu_locid_transform/tests/fixtures/minimize.json b/third_party/rust/icu_locid_transform/tests/fixtures/minimize.json
new file mode 100644
index 0000000000..6c225e230d
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/tests/fixtures/minimize.json
@@ -0,0 +1,26 @@
+[
+ {
+ "input": "zh-Hant",
+ "output": "zh-TW"
+ },
+ {
+ "input": "en-Latn-US",
+ "output": "en"
+ },
+ {
+ "input": "en",
+ "output": "en"
+ },
+ {
+ "input": "und",
+ "output": "en"
+ },
+ {
+ "input": "es-ES-preeuro",
+ "output": "es-preeuro"
+ },
+ {
+ "input": "zh-Hant-TW-u-hc-h24-nu-chinese",
+ "output": "zh-TW-u-hc-h24-nu-chinese"
+ }
+]
diff --git a/third_party/rust/icu_locid_transform/tests/fixtures/mod.rs b/third_party/rust/icu_locid_transform/tests/fixtures/mod.rs
new file mode 100644
index 0000000000..70dfb0381d
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/tests/fixtures/mod.rs
@@ -0,0 +1,12 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use serde::Deserialize;
+
+#[derive(Deserialize)]
+pub struct CanonicalizationTest {
+ pub input: String,
+ pub output: String,
+ pub disabled: Option<bool>,
+}
diff --git a/third_party/rust/icu_locid_transform/tests/helpers/mod.rs b/third_party/rust/icu_locid_transform/tests/helpers/mod.rs
new file mode 100644
index 0000000000..d250c510c5
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/tests/helpers/mod.rs
@@ -0,0 +1,15 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use std::fs::File;
+use std::io::{BufReader, Error};
+
+pub fn read_fixture<T>(path: &str) -> Result<T, Error>
+where
+ T: serde::de::DeserializeOwned,
+{
+ let file = File::open(path)?;
+ let reader = BufReader::new(file);
+ Ok(serde_json::from_reader(reader)?)
+}
diff --git a/third_party/rust/icu_locid_transform/tests/locale_canonicalizer.rs b/third_party/rust/icu_locid_transform/tests/locale_canonicalizer.rs
new file mode 100644
index 0000000000..06e360f53c
--- /dev/null
+++ b/third_party/rust/icu_locid_transform/tests/locale_canonicalizer.rs
@@ -0,0 +1,82 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod fixtures;
+mod helpers;
+
+use icu_locid::Locale;
+use icu_locid_transform::{LocaleCanonicalizer, LocaleExpander, TransformResult};
+use writeable::assert_writeable_eq;
+
+#[test]
+fn test_maximize() {
+ let lc = LocaleExpander::new_extended();
+
+ let path = "./tests/fixtures/maximize.json";
+ let testcases: Vec<fixtures::CanonicalizationTest> =
+ helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ for case in testcases {
+ if let Some(true) = case.disabled {
+ continue;
+ }
+ let mut locale: Locale = case.input.parse().unwrap();
+ let unmodified = locale.clone();
+ let result = lc.maximize(&mut locale);
+ assert_writeable_eq!(locale, case.output);
+ if result == TransformResult::Modified {
+ assert_ne!(locale, unmodified);
+ } else {
+ assert_eq!(locale, unmodified);
+ }
+ }
+}
+
+#[test]
+fn test_minimize() {
+ let lc = LocaleExpander::new_extended();
+
+ let path = "./tests/fixtures/minimize.json";
+ let testcases: Vec<fixtures::CanonicalizationTest> =
+ helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ for case in testcases {
+ if let Some(true) = case.disabled {
+ continue;
+ }
+ let mut locale: Locale = case.input.parse().unwrap();
+ let unmodified = locale.clone();
+ let result = lc.minimize(&mut locale);
+ assert_writeable_eq!(locale, case.output);
+ if result == TransformResult::Modified {
+ assert_ne!(locale, unmodified);
+ } else {
+ assert_eq!(locale, unmodified);
+ }
+ }
+}
+
+#[test]
+fn test_canonicalize() {
+ let lc = LocaleCanonicalizer::new();
+
+ let path = "./tests/fixtures/canonicalize.json";
+ let testcases: Vec<fixtures::CanonicalizationTest> =
+ helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ for case in testcases {
+ if let Some(true) = case.disabled {
+ continue;
+ }
+ let mut locale: Locale = case.input.parse().expect("Unable to parse input");
+ let unmodified = locale.clone();
+ let result = lc.canonicalize(&mut locale);
+ assert_writeable_eq!(locale, case.output);
+ if result == TransformResult::Modified {
+ assert_ne!(locale, unmodified);
+ } else {
+ assert_eq!(locale, unmodified);
+ }
+ }
+}
diff --git a/third_party/rust/icu_locid_transform_data/.cargo-checksum.json b/third_party/rust/icu_locid_transform_data/.cargo-checksum.json
new file mode 100644
index 0000000000..00252885c6
--- /dev/null
+++ b/third_party/rust/icu_locid_transform_data/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.toml":"eedae110c706f273f791010478827d1f2ae1a6ad17eb8ecd5788b5f6c23338c4","LICENSE":"853f87c96f3d249f200fec6db1114427bc8bdf4afddc93c576956d78152ce978","README.md":"eba7efca11d982b63e9606354a0c3bc81c272989683d8cc7a6d98a7325a1c9b6","data/macros.rs":"bf4776d79e736ece851aea1540aa641899f202125b6711cd5d2d6e787218eb85","data/macros/fallback_likelysubtags_v1.rs.data":"5877fee0995d25de89aa048f02ebb957bc64daec8eff4ab4e860192030930ea9","data/macros/fallback_parents_v1.rs.data":"49ce55c00ed504a91d68db269e1c90420469db0f7666240c047a0de4c61ccc81","data/macros/fallback_supplement_co_v1.rs.data":"3353645f99e745d9946dee0300692d21b1b954393cd9a5668a4ebb56abec68dc","data/macros/locid_transform_aliases_v1.rs.data":"ef8fca793245f5617a1a942e7c1568e0f3d98f913f73df54704a34d772503cb3","data/macros/locid_transform_likelysubtags_ext_v1.rs.data":"058fc56acaf4405dbd3b3e3b173d99273235a4b9c6c05fd7157f0a0bd8920b66","data/macros/locid_transform_likelysubtags_l_v1.rs.data":"d047e4a8b2a54ccd44a1edafbd4b0eacf259f2f30d9ab39ec493d38d6d9df53a","data/macros/locid_transform_likelysubtags_sr_v1.rs.data":"a8b91763d1792ef9d192d525ad6d6b6c2543685f00c36df5ff223b33e2ffcb2f","data/macros/locid_transform_script_dir_v1.rs.data":"20f9af720ca7d9e45279c9c3e89bb6ebcc0e32f186914946025923645d3951b0","src/lib.rs":"31e6e96c2396927f350dfcaef280bef1e51b9bf27fe648cead9dc94e7f7fdfb4"},"package":"545c6c3e8bf9580e2dafee8de6f9ec14826aaf359787789c7724f1f85f47d3dc"} \ No newline at end of file
diff --git a/third_party/rust/icu_locid_transform_data/Cargo.toml b/third_party/rust/icu_locid_transform_data/Cargo.toml
new file mode 100644
index 0000000000..178b3459c2
--- /dev/null
+++ b/third_party/rust/icu_locid_transform_data/Cargo.toml
@@ -0,0 +1,33 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+rust-version = "1.67"
+name = "icu_locid_transform_data"
+version = "1.4.0"
+authors = ["The ICU4X Project Developers"]
+include = [
+ "data/**/*",
+ "src/**/*",
+ "examples/**/*",
+ "benches/**/*",
+ "tests/**/*",
+ "Cargo.toml",
+ "LICENSE",
+ "README.md",
+]
+description = "Data for the icu_locid_transform crate"
+homepage = "https://icu4x.unicode.org"
+readme = "README.md"
+categories = ["internationalization"]
+license-file = "LICENSE"
+repository = "https://github.com/unicode-org/icu4x"
diff --git a/third_party/rust/icu_locid_transform_data/LICENSE b/third_party/rust/icu_locid_transform_data/LICENSE
new file mode 100644
index 0000000000..9845aa5f48
--- /dev/null
+++ b/third_party/rust/icu_locid_transform_data/LICENSE
@@ -0,0 +1,44 @@
+UNICODE LICENSE V3
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 2020-2023 Unicode, Inc.
+
+NOTICE TO USER: Carefully read the following legal agreement. BY
+DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
+SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
+DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of data files and any associated documentation (the "Data Files") or
+software and any associated documentation (the "Software") to deal in the
+Data Files or Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, and/or sell
+copies of the Data Files or Software, and to permit persons to whom the
+Data Files or Software are furnished to do so, provided that either (a)
+this copyright and permission notice appear with all copies of the Data
+Files or Software, or (b) this copyright and permission notice appear in
+associated Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
+THIRD PARTY RIGHTS.
+
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
+FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall
+not be used in advertising or otherwise to promote the sale, use or other
+dealings in these Data Files or Software without prior written
+authorization of the copyright holder.
+
+—
+
+Portions of ICU4X may have been adapted from ICU4C and/or ICU4J.
+ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others.
diff --git a/third_party/rust/icu_locid_transform_data/README.md b/third_party/rust/icu_locid_transform_data/README.md
new file mode 100644
index 0000000000..67792b2669
--- /dev/null
+++ b/third_party/rust/icu_locid_transform_data/README.md
@@ -0,0 +1,11 @@
+# icu_locid_transform_data [![crates.io](https://img.shields.io/crates/v/icu_locid_transform_data)](https://crates.io/crates/icu_locid_transform_data)
+
+<!-- cargo-rdme start -->
+
+Data for the icu_locid_transform crate
+
+<!-- cargo-rdme end -->
+
+## More Information
+
+For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x).
diff --git a/third_party/rust/icu_locid_transform_data/data/macros.rs b/third_party/rust/icu_locid_transform_data/data/macros.rs
new file mode 100644
index 0000000000..7ecb97c9d8
--- /dev/null
+++ b/third_party/rust/icu_locid_transform_data/data/macros.rs
@@ -0,0 +1,66 @@
+// @generated
+/// Marks a type as a data provider. You can then use macros like
+/// `impl_core_helloworld_v1` to add implementations.
+///
+/// ```ignore
+/// struct MyProvider;
+/// const _: () = {
+/// include!("path/to/generated/macros.rs");
+/// make_provider!(MyProvider);
+/// impl_core_helloworld_v1!(MyProvider);
+/// }
+/// ```
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __make_provider {
+ ($ name : ty) => {
+ #[clippy::msrv = "1.67"]
+ impl $name {
+ #[doc(hidden)]
+ #[allow(dead_code)]
+ pub const MUST_USE_MAKE_PROVIDER_MACRO: () = ();
+ }
+ };
+}
+#[doc(inline)]
+pub use __make_provider as make_provider;
+#[macro_use]
+#[path = "macros/fallback_likelysubtags_v1.rs.data"]
+mod fallback_likelysubtags_v1;
+#[doc(inline)]
+pub use __impl_fallback_likelysubtags_v1 as impl_fallback_likelysubtags_v1;
+#[macro_use]
+#[path = "macros/fallback_parents_v1.rs.data"]
+mod fallback_parents_v1;
+#[doc(inline)]
+pub use __impl_fallback_parents_v1 as impl_fallback_parents_v1;
+#[macro_use]
+#[path = "macros/fallback_supplement_co_v1.rs.data"]
+mod fallback_supplement_co_v1;
+#[doc(inline)]
+pub use __impl_fallback_supplement_co_v1 as impl_fallback_supplement_co_v1;
+#[macro_use]
+#[path = "macros/locid_transform_aliases_v1.rs.data"]
+mod locid_transform_aliases_v1;
+#[doc(inline)]
+pub use __impl_locid_transform_aliases_v1 as impl_locid_transform_aliases_v1;
+#[macro_use]
+#[path = "macros/locid_transform_likelysubtags_ext_v1.rs.data"]
+mod locid_transform_likelysubtags_ext_v1;
+#[doc(inline)]
+pub use __impl_locid_transform_likelysubtags_ext_v1 as impl_locid_transform_likelysubtags_ext_v1;
+#[macro_use]
+#[path = "macros/locid_transform_likelysubtags_l_v1.rs.data"]
+mod locid_transform_likelysubtags_l_v1;
+#[doc(inline)]
+pub use __impl_locid_transform_likelysubtags_l_v1 as impl_locid_transform_likelysubtags_l_v1;
+#[macro_use]
+#[path = "macros/locid_transform_likelysubtags_sr_v1.rs.data"]
+mod locid_transform_likelysubtags_sr_v1;
+#[doc(inline)]
+pub use __impl_locid_transform_likelysubtags_sr_v1 as impl_locid_transform_likelysubtags_sr_v1;
+#[macro_use]
+#[path = "macros/locid_transform_script_dir_v1.rs.data"]
+mod locid_transform_script_dir_v1;
+#[doc(inline)]
+pub use __impl_locid_transform_script_dir_v1 as impl_locid_transform_script_dir_v1;
diff --git a/third_party/rust/icu_locid_transform_data/data/macros/fallback_likelysubtags_v1.rs.data b/third_party/rust/icu_locid_transform_data/data/macros/fallback_likelysubtags_v1.rs.data
new file mode 100644
index 0000000000..186f871758
--- /dev/null
+++ b/third_party/rust/icu_locid_transform_data/data/macros/fallback_likelysubtags_v1.rs.data
@@ -0,0 +1,44 @@
+// @generated
+/// Implement `DataProvider<LocaleFallbackLikelySubtagsV1Marker>` on the given struct using the data
+/// hardcoded in this file. This allows the struct to be used with
+/// `icu`'s `_unstable` constructors.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __impl_fallback_likelysubtags_v1 {
+ ($ provider : ty) => {
+ #[clippy::msrv = "1.67"]
+ const _: () = <$provider>::MUST_USE_MAKE_PROVIDER_MACRO;
+ #[clippy::msrv = "1.67"]
+ impl $provider {
+ #[doc(hidden)]
+ pub const SINGLETON_FALLBACK_LIKELYSUBTAGS_V1: &'static <icu::locid_transform::provider::LocaleFallbackLikelySubtagsV1Marker as icu_provider::DataMarker>::Yokeable = &icu::locid_transform::provider::LocaleFallbackLikelySubtagsV1 {
+ l2s: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"am\0ar\0as\0be\0bg\0bgcbhobn\0brxchrcswcv\0doiel\0fa\0gu\0he\0hi\0hy\0ja\0ka\0kk\0km\0kn\0ko\0kokks\0ky\0lo\0maimk\0ml\0mn\0mnimr\0my\0ne\0nqoor\0pa\0ps\0rajru\0sa\0sahsatsd\0si\0sr\0syrta\0te\0tg\0th\0ti\0tt\0ug\0uk\0ur\0xnryuezh\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"EthiArabBengCyrlCyrlDevaDevaBengDevaCherCansCyrlDevaGrekArabGujrHebrDevaArmnJpanGeorCyrlKhmrKndaKoreDevaArabCyrlLaooDevaCyrlMlymCyrlBengDevaMymrDevaNkooOryaGuruArabDevaCyrlDevaCyrlOlckArabSinhCyrlSyrcTamlTeluCyrlThaiEthiCyrlArabCyrlArabDevaHantHans") })
+ },
+ lr2s: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap2d::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"az\0ha\0kk\0ku\0ky\0mn\0ms\0pa\0sd\0sr\0tg\0ug\0uz\0yuezh\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\x03\0\0\0\x05\0\0\0\t\0\0\0\n\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x10\0\0\0\x14\0\0\0\x15\0\0\0\x17\0\0\0\x19\0\0\0\x1A\0\0\0)\0\0\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"IQ\0IR\0RU\0CM\0SD\0AF\0CN\0IR\0MN\0LB\0CN\0TR\0CN\0CC\0PK\0IN\0ME\0RO\0RU\0TR\0PK\0KZ\0MN\0AF\0CN\0CN\0AU\0BN\0GB\0GF\0HK\0ID\0MO\0PA\0PF\0PH\0SR\0TH\0TW\0US\0VN\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"ArabArabCyrlArabArabArabArabArabArabArabArabLatnMongArabArabDevaLatnLatnLatnLatnArabCyrlCyrlArabCyrlHansHantHantHantHantHantHantHantHantHantHantHantHantHantHantHant") })
+ },
+ l2r: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"af\0am\0ar\0as\0astaz\0be\0bg\0bgcbhoblobn\0br\0brxbs\0ca\0cebchrcs\0cswcv\0cy\0da\0de\0doidsbel\0en\0eo\0es\0et\0eu\0fa\0ff\0fi\0filfo\0fr\0fy\0ga\0gd\0gl\0gu\0ha\0he\0hi\0hr\0hsbhu\0hy\0ia\0id\0ie\0ig\0is\0it\0ja\0jv\0ka\0keakgpkk\0km\0kn\0ko\0kokks\0ku\0kxvky\0lb\0lijlmolo\0lt\0lv\0maimi\0mk\0ml\0mn\0mnimr\0ms\0mt\0my\0ndsne\0nl\0nn\0no\0nqooc\0or\0pa\0pcmpl\0prgps\0pt\0qu\0rajrm\0ro\0ru\0sa\0sahsatsc\0sd\0si\0sk\0sl\0so\0sq\0sr\0su\0sv\0sw\0syrszlta\0te\0tg\0th\0ti\0tk\0to\0toktr\0tt\0ug\0uk\0ur\0uz\0vecvi\0vmwwo\0xh\0xnryo\0yrlyueza\0zh\0zu\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"ZA\0ET\0EG\0IN\0ES\0AZ\0BY\0BG\0IN\0IN\0BJ\0BD\0FR\0IN\0BA\0ES\0PH\0US\0CZ\0CA\0RU\0GB\0DK\0DE\0IN\0DE\0GR\0US\x00001ES\0EE\0ES\0IR\0SN\0FI\0PH\0FO\0FR\0NL\0IE\0GB\0ES\0IN\0NG\0IL\0IN\0HR\0DE\0HU\0AM\x00001ID\0EE\0NG\0IS\0IT\0JP\0ID\0GE\0CV\0BR\0KZ\0KH\0IN\0KR\0IN\0IN\0TR\0IN\0KG\0LU\0IT\0IT\0LA\0LT\0LV\0IN\0NZ\0MK\0IN\0MN\0IN\0IN\0MY\0MT\0MM\0DE\0NP\0NL\0NO\0NO\0GN\0FR\0IN\0IN\0NG\0PL\0PL\0AF\0BR\0PE\0IN\0CH\0RO\0RU\0IN\0RU\0IN\0IT\0PK\0LK\0SK\0SI\0SO\0AL\0RS\0ID\0SE\0TZ\0IQ\0PL\0IN\0IN\0TJ\0TH\0ET\0TM\0TO\x00001TR\0RU\0CN\0UA\0PK\0UZ\0IT\0VN\0MZ\0SN\0ZA\0IN\0NG\0BR\0HK\0CN\0CN\0ZA\0") })
+ },
+ ls2r: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap2d::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"az\0en\0ff\0kk\0ku\0ky\0mn\0pa\0sd\0tg\0ug\0uz\0yuezh\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\x01\0\0\0\x02\0\0\0\x03\0\0\0\x04\0\0\0\x06\0\0\0\x08\0\0\0\t\0\0\0\n\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x10\0\0\0\x11\0\0\0\x14\0\0\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"ArabShawAdlmArabArabYeziArabLatnMongArabDevaKhojSindArabCyrlArabHansBopoHanbHant") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"IR\0GB\0GN\0CN\0IQ\0GE\0CN\0TR\0CN\0PK\0IN\0IN\0IN\0PK\0KZ\0AF\0CN\0TW\0TW\0TW\0") })
+ },
+ };
+ }
+ #[clippy::msrv = "1.67"]
+ impl icu_provider::DataProvider<icu::locid_transform::provider::LocaleFallbackLikelySubtagsV1Marker> for $provider {
+ fn load(&self, req: icu_provider::DataRequest) -> Result<icu_provider::DataResponse<icu::locid_transform::provider::LocaleFallbackLikelySubtagsV1Marker>, icu_provider::DataError> {
+ if req.locale.is_empty() {
+ Ok(icu_provider::DataResponse { payload: Some(icu_provider::DataPayload::from_static_ref(Self::SINGLETON_FALLBACK_LIKELYSUBTAGS_V1)), metadata: Default::default() })
+ } else {
+ Err(icu_provider::DataErrorKind::ExtraneousLocale.with_req(<icu::locid_transform::provider::LocaleFallbackLikelySubtagsV1Marker as icu_provider::KeyedDataMarker>::KEY, req))
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/icu_locid_transform_data/data/macros/fallback_parents_v1.rs.data b/third_party/rust/icu_locid_transform_data/data/macros/fallback_parents_v1.rs.data
new file mode 100644
index 0000000000..c8a37729a8
--- /dev/null
+++ b/third_party/rust/icu_locid_transform_data/data/macros/fallback_parents_v1.rs.data
@@ -0,0 +1,32 @@
+// @generated
+/// Implement `DataProvider<LocaleFallbackParentsV1Marker>` on the given struct using the data
+/// hardcoded in this file. This allows the struct to be used with
+/// `icu`'s `_unstable` constructors.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __impl_fallback_parents_v1 {
+ ($ provider : ty) => {
+ #[clippy::msrv = "1.67"]
+ const _: () = <$provider>::MUST_USE_MAKE_PROVIDER_MACRO;
+ #[clippy::msrv = "1.67"]
+ impl $provider {
+ #[doc(hidden)]
+ pub const SINGLETON_FALLBACK_PARENTS_V1: &'static <icu::locid_transform::provider::LocaleFallbackParentsV1Marker as icu_provider::DataMarker>::Yokeable = &icu::locid_transform::provider::LocaleFallbackParentsV1 {
+ parents: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::VarZeroVec::from_bytes_unchecked(b"\x86\0\0\0\0\0\x06\0\x0B\0\x10\0\x15\0\x1A\0\x1F\0$\0)\0.\x003\08\0=\0B\0G\0L\0Q\0V\0[\0`\0e\0j\0o\0t\0y\0~\0\x83\0\x88\0\x8D\0\x92\0\x97\0\x9C\0\xA1\0\xA6\0\xAB\0\xB0\0\xB5\0\xBA\0\xBF\0\xC4\0\xC9\0\xCE\0\xD3\0\xD8\0\xDD\0\xE2\0\xE7\0\xEC\0\xF1\0\xF6\0\xFB\0\0\x01\x05\x01\n\x01\x0F\x01\x14\x01\x19\x01\x1E\x01#\x01(\x01-\x012\x017\x01<\x01A\x01F\x01K\x01P\x01U\x01Z\x01_\x01d\x01i\x01n\x01s\x01x\x01}\x01\x82\x01\x87\x01\x8C\x01\x91\x01\x96\x01\x9B\x01\xA0\x01\xA5\x01\xAA\x01\xAF\x01\xB4\x01\xB9\x01\xBE\x01\xC3\x01\xC8\x01\xCD\x01\xD2\x01\xD7\x01\xDC\x01\xE1\x01\xE6\x01\xEB\x01\xF0\x01\xF5\x01\xFA\x01\xFF\x01\x04\x02\t\x02\x0E\x02\x13\x02\x18\x02\x1D\x02\"\x02'\x02,\x021\x026\x02;\x02@\x02E\x02J\x02Q\x02S\x02U\x02W\x02\\\x02a\x02f\x02k\x02p\x02u\x02z\x02\x7F\x02\x84\x02\x89\x02\x8E\x02\x93\x02en-150en-AGen-AIen-ATen-AUen-BBen-BEen-BMen-BSen-BWen-BZen-CCen-CHen-CKen-CMen-CXen-CYen-DEen-DGen-DKen-DMen-ERen-FIen-FJen-FKen-FMen-GBen-GDen-GGen-GHen-GIen-GMen-GYen-HKen-IDen-IEen-ILen-IMen-INen-IOen-JEen-JMen-KEen-KIen-KNen-KYen-LCen-LRen-LSen-MGen-MOen-MSen-MTen-MUen-MVen-MWen-MYen-NAen-NFen-NGen-NLen-NRen-NUen-NZen-PGen-PKen-PNen-PWen-RWen-SBen-SCen-SDen-SEen-SGen-SHen-SIen-SLen-SSen-SXen-SZen-TCen-TKen-TOen-TTen-TVen-TZen-UGen-VCen-VGen-VUen-WSen-ZAen-ZMen-ZWes-ARes-BOes-BRes-BZes-CLes-COes-CRes-CUes-DOes-ECes-GTes-HNes-JPes-MXes-NIes-PAes-PEes-PRes-PYes-SVes-USes-UYes-VEhi-Latnhtnbnnno-NOpt-AOpt-CHpt-CVpt-FRpt-GQpt-GWpt-LUpt-MOpt-MZpt-STpt-TLzh-Hant-MO") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01150en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001en\0\0\0\0\0\0\x01001es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419es\0\0\0\0\0\0\x01419en\0\0\0\0\0\0\x01IN\0fr\0\0\0\0\0\0\x01HT\0no\0\0\0\0\0\0\0\0\0\0no\0\0\0\0\0\0\0\0\0\0no\0\0\0\0\0\0\0\0\0\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0pt\0\0\0\0\0\0\x01PT\0zh\0\x01Hant\x01HK\0") })
+ },
+ };
+ }
+ #[clippy::msrv = "1.67"]
+ impl icu_provider::DataProvider<icu::locid_transform::provider::LocaleFallbackParentsV1Marker> for $provider {
+ fn load(&self, req: icu_provider::DataRequest) -> Result<icu_provider::DataResponse<icu::locid_transform::provider::LocaleFallbackParentsV1Marker>, icu_provider::DataError> {
+ if req.locale.is_empty() {
+ Ok(icu_provider::DataResponse { payload: Some(icu_provider::DataPayload::from_static_ref(Self::SINGLETON_FALLBACK_PARENTS_V1)), metadata: Default::default() })
+ } else {
+ Err(icu_provider::DataErrorKind::ExtraneousLocale.with_req(<icu::locid_transform::provider::LocaleFallbackParentsV1Marker as icu_provider::KeyedDataMarker>::KEY, req))
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/icu_locid_transform_data/data/macros/fallback_supplement_co_v1.rs.data b/third_party/rust/icu_locid_transform_data/data/macros/fallback_supplement_co_v1.rs.data
new file mode 100644
index 0000000000..bdce8a4194
--- /dev/null
+++ b/third_party/rust/icu_locid_transform_data/data/macros/fallback_supplement_co_v1.rs.data
@@ -0,0 +1,36 @@
+// @generated
+/// Implement `DataProvider<CollationFallbackSupplementV1Marker>` on the given struct using the data
+/// hardcoded in this file. This allows the struct to be used with
+/// `icu`'s `_unstable` constructors.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __impl_fallback_supplement_co_v1 {
+ ($ provider : ty) => {
+ #[clippy::msrv = "1.67"]
+ const _: () = <$provider>::MUST_USE_MAKE_PROVIDER_MACRO;
+ #[clippy::msrv = "1.67"]
+ impl $provider {
+ #[doc(hidden)]
+ pub const SINGLETON_FALLBACK_SUPPLEMENT_CO_V1: &'static <icu::locid_transform::provider::CollationFallbackSupplementV1Marker as icu_provider::DataMarker>::Yokeable = &icu::locid_transform::provider::LocaleFallbackSupplementV1 {
+ parents: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::VarZeroVec::from_bytes_unchecked(b"\x01\0\0\0\0\0yue") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"zh\0\x01Hant\0\0\0\0") })
+ },
+ unicode_extension_defaults: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap2d::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"co") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\x02\0\0\0") }, unsafe { zerovec::VarZeroVec::from_bytes_unchecked(b"\x02\0\0\0\0\0\x02\0zhzh-Hant") }, unsafe { zerovec::VarZeroVec::from_bytes_unchecked(b"\x02\0\0\0\0\0\x06\0pinyinstroke") })
+ },
+ };
+ }
+ #[clippy::msrv = "1.67"]
+ impl icu_provider::DataProvider<icu::locid_transform::provider::CollationFallbackSupplementV1Marker> for $provider {
+ fn load(&self, req: icu_provider::DataRequest) -> Result<icu_provider::DataResponse<icu::locid_transform::provider::CollationFallbackSupplementV1Marker>, icu_provider::DataError> {
+ if req.locale.is_empty() {
+ Ok(icu_provider::DataResponse { payload: Some(icu_provider::DataPayload::from_static_ref(Self::SINGLETON_FALLBACK_SUPPLEMENT_CO_V1)), metadata: Default::default() })
+ } else {
+ Err(icu_provider::DataErrorKind::ExtraneousLocale.with_req(<icu::locid_transform::provider::CollationFallbackSupplementV1Marker as icu_provider::KeyedDataMarker>::KEY, req))
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_aliases_v1.rs.data b/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_aliases_v1.rs.data
new file mode 100644
index 0000000000..5bbc4119c7
--- /dev/null
+++ b/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_aliases_v1.rs.data
@@ -0,0 +1,66 @@
+// @generated
+/// Implement `DataProvider<AliasesV1Marker>` on the given struct using the data
+/// hardcoded in this file. This allows the struct to be used with
+/// `icu`'s `_unstable` constructors.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __impl_locid_transform_aliases_v1 {
+ ($ provider : ty) => {
+ #[clippy::msrv = "1.67"]
+ const _: () = <$provider>::MUST_USE_MAKE_PROVIDER_MACRO;
+ #[clippy::msrv = "1.67"]
+ impl $provider {
+ #[doc(hidden)]
+ pub const SINGLETON_LOCID_TRANSFORM_ALIASES_V1: &'static <icu::locid_transform::provider::AliasesV1Marker as icu_provider::DataMarker>::Yokeable = &icu::locid_transform::provider::AliasesV1 {
+ language_variants: unsafe { zerovec::VarZeroVec::from_bytes_unchecked(b"\x13\0\0\0\0\0\x17\x000\0J\0c\0z\0\x92\0\xBB\0\xD1\0\xE8\0\xFF\0\x1B\x015\x01O\x01h\x01\x80\x01\x99\x01\xB3\x01\xCB\x01\x02\0\0\0\0\0\0\0\x08\0\0\0aa-saahossy\x02\0\0\0\0\0\0\0\n\0\0\0art-lojbanjbo\x02\0\0\0\0\0\0\0\x0B\0\0\0cel-gaulishxtg\x02\0\0\0\0\0\0\0\n\0\0\0hy-arevmdahyw\x02\0\0\0\0\0\0\0\t\0\0\0no-bokmalnb\x02\0\0\0\0\0\0\0\n\0\0\0no-nynorsknn\x02\0\0\0\0\0\0\0\x12\0\0\0und-hepburn-heplocund-alalc97\x02\0\0\0\0\0\0\0\x08\0\0\0zh-guoyuzh\x02\0\0\0\0\0\0\0\x08\0\0\0zh-hakkahak\x02\0\0\0\0\0\0\0\x08\0\0\0zh-xianghsn\x02\0\0\0\0\0\0\0\n\0\0\0und-aalandund-AX\x02\0\0\0\0\0\0\0\x0B\0\0\0und-arevelaund\x02\0\0\0\0\0\0\0\x0B\0\0\0und-arevmdaund\x02\0\0\0\0\0\0\0\n\0\0\0und-bokmalund\x02\0\0\0\0\0\0\0\t\0\0\0und-hakkaund\x02\0\0\0\0\0\0\0\n\0\0\0und-lojbanund\x02\0\0\0\0\0\0\0\x0B\0\0\0und-nynorskund\x02\0\0\0\0\0\0\0\t\0\0\0und-saahound\x02\0\0\0\0\0\0\0\t\0\0\0und-xiangund") },
+ sgn_region: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"BR\0CO\0DE\0DK\0ES\0FR\0GB\0GR\0IE\0IT\0JP\0MX\0NI\0NL\0NO\0PT\0SE\0US\0ZA\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"bzscsngsgdslsspfslbfigssisgisejslmfsncsdsensipsrswlasesfs") })
+ },
+ language_len2: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"bhiniwjijwmoshtltw") }, unsafe { zerovec::VarZeroVec::from_bytes_unchecked(b"\t\0\0\0\0\0\x03\0\x05\0\x07\0\t\0\x0B\0\r\0\x14\0\x17\0bhoidheyijvrosr-Latnfilak") })
+ },
+ language_len3: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"aamaarabkadpafragpaisajtajuakaalbalsamharaarbargarmasdasmaueavaaveaymayrayxazeazjbakbambaqbazbccbclbelbenbgmbhkbicbihbisbjdbjqbkbblgbodbosbrebtbbulburbxkbxrcatccqceschachechichuchvcjrckacldcmkcmncnrcorcoscoycqucrecwdcymczedafdandapdeudgodhddikdiqditdivdjldkldrhdrrdrwduddujdutdwldzoekkellelpemkengepoeskesteusewefaofasfatfijfinfrafrefryfucfulgavgazgbcgbogeogergfxggnggoggrgioglagleglggliglvgnogregrngtiguggujguvgyahathauhbshdnheahebherhimhinhmohrrhrvhunhyeibiiboiceidoiiiikeikuileillilwinaindipkislitaizijarjavjegjpnkalkankaskatkaukazkdvkgckgdkghkhkkhmkikkinkirkmrknckngknnkojkomkonkorkppkpvkrmktrkuakurkvskwqkxekxlkzhkzjkztlaklaolatlavlbklegliilimlinlitllolmmltzlubluglvsmacmahmalmaomarmaymegmgxmhrmkdmlgmltmnkmntmofmolmonmrimsamstmupmwdmwjmyamydmytnadnaunavnbfnblnbxncpndendonepnldnlnnlrnnonnsnnxnobnoonornpintsnxunyaociojgojioriormoryossounpanpatpbupcrperpesplipltpmcpmupnbpolporppapprprsprypuspuzquequzrmrrmyrohronrumrunrussagsansapscasccscrsglsinskkslksloslvsmdsmesmosnasnbsndsomsotspaspysqisrcsrdsrpsswsulsumsunswaswcsweswhtahtamtattduteltggtgktglthathcthwthxtibtidtietirtkktlwtmptnetnftontsftsntsottqtukturtwiuigukrumuunpuokurduzbuznvenvievolwelwgwwitwiwwlnwolxbaxhoxiaxkhxpexrqxsjxslybdyddyenyidyiyymaymtyoryosyuuzaizhazhozirzsmzulzyb") }, unsafe { zerovec::VarZeroVec::from_bytes_unchecked(b"\x9E\x01\0\0\0\0\x03\0\x05\0\x07\0\t\0\x0B\0\x0E\0\x11\0\x14\0\x17\0\x19\0\x1B\0\x1D\0\x1F\0!\0#\0%\0'\0*\0,\0/\x001\x003\x005\x007\0:\0<\0>\0@\0B\0D\0G\0J\0M\0O\0Q\0T\0W\0Z\0]\0_\0b\0e\0h\0k\0m\0o\0q\0t\0v\0x\0{\0~\0\x80\0\x83\0\x85\0\x87\0\x89\0\x8B\0\x8D\0\x8F\0\x92\0\x95\0\x98\0\x9B\0\x9D\0\xA2\0\xA4\0\xA6\0\xA9\0\xAC\0\xAE\0\xB0\0\xB2\0\xB4\0\xB7\0\xB9\0\xBC\0\xBE\0\xC1\0\xC4\0\xC7\0\xCA\0\xCD\0\xCF\0\xD2\0\xD5\0\xD7\0\xDA\0\xDF\0\xE2\0\xE5\0\xE7\0\xEA\0\xEC\0\xEE\0\xF0\0\xF3\0\xF6\0\xF8\0\xFA\0\xFC\0\xFE\0\0\x01\x02\x01\x04\x01\x06\x01\x08\x01\n\x01\x0C\x01\x0E\x01\x10\x01\x12\x01\x14\x01\x16\x01\x19\x01\x1B\x01\x1E\x01!\x01#\x01%\x01(\x01+\x01.\x011\x014\x016\x018\x01:\x01=\x01?\x01B\x01D\x01F\x01I\x01K\x01M\x01P\x01S\x01U\x01W\x01^\x01a\x01d\x01f\x01h\x01k\x01m\x01o\x01r\x01t\x01v\x01x\x01{\x01}\x01\x7F\x01\x81\x01\x83\x01\x85\x01\x87\x01\x89\x01\x8C\x01\x8F\x01\x91\x01\x93\x01\x95\x01\x97\x01\x99\x01\x9C\x01\x9F\x01\xA1\x01\xA4\x01\xA6\x01\xA8\x01\xAA\x01\xAC\x01\xAE\x01\xB0\x01\xB2\x01\xB5\x01\xB8\x01\xBB\x01\xBE\x01\xC0\x01\xC2\x01\xC4\x01\xC6\x01\xC8\x01\xCA\x01\xCC\x01\xCE\x01\xD1\x01\xD4\x01\xD6\x01\xD8\x01\xDA\x01\xDD\x01\xDF\x01\xE2\x01\xE5\x01\xE7\x01\xE9\x01\xEC\x01\xEF\x01\xF2\x01\xF5\x01\xF8\x01\xFB\x01\xFE\x01\x01\x02\x03\x02\x05\x02\x07\x02\n\x02\r\x02\x10\x02\x12\x02\x14\x02\x16\x02\x19\x02\x1C\x02\x1E\x02 \x02\"\x02$\x02&\x02(\x02*\x02,\x02.\x020\x023\x026\x029\x02;\x02=\x02?\x02B\x02E\x02H\x02J\x02L\x02N\x02P\x02S\x02V\x02Y\x02\\\x02^\x02a\x02d\x02g\x02i\x02k\x02n\x02p\x02s\x02v\x02x\x02z\x02|\x02~\x02\x81\x02\x84\x02\x86\x02\x89\x02\x8C\x02\x8E\x02\x91\x02\x93\x02\x95\x02\x98\x02\x9B\x02\x9D\x02\x9F\x02\xA1\x02\xA3\x02\xA5\x02\xA7\x02\xA9\x02\xAB\x02\xAE\x02\xB0\x02\xB3\x02\xB5\x02\xB8\x02\xBA\x02\xBC\x02\xBE\x02\xC0\x02\xC3\x02\xC6\x02\xC9\x02\xCB\x02\xCD\x02\xD0\x02\xD3\x02\xD8\x02\xDB\x02\xDD\x02\xE0\x02\xE2\x02\xE4\x02\xE7\x02\xEA\x02\xEC\x02\xEE\x02\xF0\x02\xF2\x02\xF4\x02\xF6\x02\xF8\x02\xFB\x02\xFE\x02\0\x03\x02\x03\x05\x03\x07\x03\n\x03\x0C\x03\x0E\x03\x10\x03\x13\x03\x15\x03\x17\x03\x19\x03\x1C\x03\x1E\x03 \x03\"\x03$\x03'\x03)\x03+\x03-\x03/\x031\x034\x037\x039\x03;\x03@\x03B\x03D\x03F\x03H\x03J\x03M\x03O\x03R\x03T\x03W\x03Y\x03\\\x03_\x03b\x03d\x03g\x03j\x03l\x03o\x03r\x03u\x03x\x03}\x03\x7F\x03\x82\x03\x84\x03\x86\x03\x89\x03\x8B\x03\x8D\x03\x8F\x03\x91\x03\x93\x03\x96\x03\x99\x03\x9C\x03\x9E\x03\xA0\x03\xA2\x03\xA4\x03\xA6\x03\xA8\x03\xAA\x03\xAD\x03\xB0\x03\xB3\x03\xB5\x03\xB7\x03\xBA\x03\xBC\x03\xBF\x03\xC2\x03\xC5\x03\xC8\x03\xCB\x03\xCE\x03\xD1\x03\xD3\x03\xD6\x03\xD8\x03\xDB\x03\xDE\x03\xE1\x03\xE3\x03\xE6\x03\xE9\x03\xEC\x03\xEE\x03\xF0\x03\xF3\x03\xF5\x03\xF7\x03aasaaabdzafapfamiaebjrbaksqsqamararanhysnzasktzavaeayaynunazazbabmeunvobalbikbebnbcgfblbirbhobidrlbzcebkibabobsbrbebbgmyluybuacarkicschcezhcucvmomcmrsyrxchzhsr-MEkwcopijquhcrcrcycsdnjdanjzdedoimwrdinzzadifdvdzeaqdmnkzkfa-AFuthdwunldbtdzetelamqmaneneoiketeueefofaakfjfifrfrfyffffdevomwnygrbkadevajgvresggtuaougdgaglkzkgvgonelgnnycgnguduzgbahthasr-Latnhaihmnhehzsrxhihojalhrhuhyopaigisioiiiuiuieilmgaliaidikisitezajgkjvoybjaklknkskakrkkzkdtdfncqkmlmnkmkirwkykukrkgkokkwvkvkgkojkmkvbmfdtpkjkugdjyamtvdkrudgldtpdtpksplolalvbncenlraqlilnltngtrmxlblulglvmkmhmlmimrmscirjbkchmmkmgmtmanwnnxntromnmimsmryrajdmwvajmyaogmryxnynanvnrunrekckdzndngnenlazdnrknnnbrngvnbdtdnonepijbppnyocojojoromorosvajpakxrpsadxfafapimghuwphrlahplptbfylcqfa-AFprtpspubququemxromrmrorornrusgsaaqthlesrhrisksioybskskslkmbsesmsnibasdsostesklnsqscscsrsssgdulwsuswsw-CDsvswtytattdtptebjptgfilthtpoolaoybboitdrastitwmweotyjkakfa-AFtotajtntstmhtktrakugukdelwroemauruzuzvevivocywgbnolnwowawocaxxhacnwawkpedmwsujdenrkiyiynqyiyrmlrrmtmyozomyugzapzazhscvmszuza") })
+ },
+ language: zerovec::VarZeroVec::new(),
+ script: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"Qaai") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"Zinh") })
+ },
+ region_alpha: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"BUCTDDDYFXHVJTMINHNQPUPZQURHTPUKVDWKYDZR") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"MM\0KI\0DE\0BJ\0FR\0BF\0UM\0UM\0VU\0AQ\0UM\0PA\0EU\0ZW\0TL\0GB\0VN\0UM\0YE\0CD\0") })
+ },
+ region_num: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"004008010012016020024028031032036040044048050051052056060064068070072074076084086090092096100104108112116120124132136140144148152156158162166170174175178180184188191192196203204208212214218222226230231232233234238239242246248249250254258260262266268270275276278280288292296300304308312316320324328332334336340344348352356360364368372376380384388392398400404408410414417418422426428430434438440442446450454458462466470474478480484492496498499500504508512516520524528531533534535540548554558562566570574578580581583584585586591598600604608612616620624626630634638642643646652654659660662663666670674678682686688690694702703704705706710716720724728729732736740744748752756760762764768772776780784788792795796798800804807818826831832833834840850854858860862876882886887894958959960962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"AF\0AL\0AQ\0DZ\0AS\0AD\0AO\0AG\0AZ\0AR\0AU\0AT\0BS\0BH\0BD\0AM\0BB\0BE\0BM\0BT\0BO\0BA\0BW\0BV\0BR\0BZ\0IO\0SB\0VG\0BN\0BG\0MM\0BI\0BY\0KH\0CM\0CA\0CV\0KY\0CF\0LK\0TD\0CL\0CN\0TW\0CX\0CC\0CO\0KM\0YT\0CG\0CD\0CK\0CR\0HR\0CU\0CY\0CZ\0BJ\0DK\0DM\0DO\0EC\0SV\0GQ\0ET\0ET\0ER\0EE\0FO\0FK\0GS\0FJ\0FI\0AX\0FR\0FR\0GF\0PF\0TF\0DJ\0GA\0GE\0GM\0PS\0DE\0DE\0DE\0GH\0GI\0KI\0GR\0GL\0GD\0GP\0GU\0GT\0GN\0GY\0HT\0HM\0VA\0HN\0HK\0HU\0IS\0IN\0ID\0IR\0IQ\0IE\0IL\0IT\0CI\0JM\0JP\0KZ\0JO\0KE\0KP\0KR\0KW\0KG\0LA\0LB\0LS\0LV\0LR\0LY\0LI\0LT\0LU\0MO\0MG\0MW\0MY\0MV\0ML\0MT\0MQ\0MR\0MU\0MX\0MC\0MN\0MD\0ME\0MS\0MA\0MZ\0OM\0NA\0NR\0NP\0NL\0CW\0AW\0SX\0BQ\0NC\0VU\0NZ\0NI\0NE\0NG\0NU\0NF\0NO\0MP\0UM\0FM\0MH\0PW\0PK\0PA\0PG\0PY\0PE\0PH\0PN\0PL\0PT\0GW\0TL\0PR\0QA\0RE\0RO\0RU\0RW\0BL\0SH\0KN\0AI\0LC\0MF\0PM\0VC\0SM\0ST\0SA\0SN\0RS\0SC\0SL\0SG\0SK\0VN\0SI\0SO\0ZA\0ZW\0YE\0ES\0SS\0SD\0EH\0SD\0SR\0SJ\0SZ\0SE\0CH\0SY\0TJ\0TH\0TG\0TK\0TO\0TT\0AE\0TN\0TR\0TM\0TC\0TV\0UG\0UA\0MK\0EG\0GB\0GG\0JE\0IM\0TZ\0US\0VI\0BF\0UY\0UZ\0VE\0WF\0WS\0YE\0YE\0ZM\0AA\0QM\0QN\0QP\0QQ\0QR\0QS\0QT\0EU\0QV\0QW\0QX\0QY\0QZ\0XA\0XB\0XC\0XD\0XE\0XF\0XG\0XH\0XI\0XJ\0XK\0XL\0XM\0XN\0XO\0XP\0XQ\0XR\0XS\0XT\0XU\0XV\0XW\0XX\0XY\0XZ\0ZZ\0") })
+ },
+ complex_region: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"062172200530532536582810830890891AN\0CS\0FQ\0NT\0PC\0SU\0YU\0") }, unsafe { zerovec::VarZeroVec::from_bytes_unchecked(b"\x12\0\0\0\0\0\x06\0*\x000\09\0B\0H\0T\0\x81\0\x87\0\x99\0\x9F\0\xA8\0\xAE\0\xB4\0\xBA\0\xC6\0\xF3\x00034143RU\0AM\0AZ\0BY\0GE\0KG\0KZ\0MD\0TJ\0TM\0UA\0UZ\0CZ\0SK\0CW\0SX\0BQ\0CW\0SX\0BQ\0SA\0IQ\0FM\0MH\0MP\0PW\0RU\0AM\0AZ\0BY\0EE\0GE\0KZ\0KG\0LV\0LT\0MD\0TJ\0TM\0UA\0UZ\0JE\0GG\0RS\0ME\0SI\0HR\0MK\0BA\0RS\0ME\0CW\0SX\0BQ\0RS\0ME\0AQ\0TF\0SA\0IQ\0FM\0MH\0MP\0PW\0RU\0AM\0AZ\0BY\0EE\0GE\0KZ\0KG\0LV\0LT\0MD\0TJ\0TM\0UA\0UZ\0RS\0ME\0") })
+ },
+ variant: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"heploc\0\0polytoni") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"alalc97\0polyton\0") })
+ },
+ subdivision: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"cn11\0\0\0cn12\0\0\0cn13\0\0\0cn14\0\0\0cn15\0\0\0cn21\0\0\0cn22\0\0\0cn23\0\0\0cn31\0\0\0cn32\0\0\0cn33\0\0\0cn34\0\0\0cn35\0\0\0cn36\0\0\0cn37\0\0\0cn41\0\0\0cn42\0\0\0cn43\0\0\0cn44\0\0\0cn45\0\0\0cn46\0\0\0cn50\0\0\0cn51\0\0\0cn52\0\0\0cn53\0\0\0cn54\0\0\0cn61\0\0\0cn62\0\0\0cn63\0\0\0cn64\0\0\0cn65\0\0\0cn71\0\0\0cn91\0\0\0cn92\0\0\0cz10a\0\0cz10b\0\0cz10c\0\0cz10d\0\0cz10e\0\0cz10f\0\0cz611\0\0cz612\0\0cz613\0\0cz614\0\0cz615\0\0cz621\0\0cz622\0\0cz623\0\0cz624\0\0cz626\0\0cz627\0\0czjc\0\0\0czjm\0\0\0czka\0\0\0czkr\0\0\0czli\0\0\0czmo\0\0\0czol\0\0\0czpa\0\0\0czpl\0\0\0czpr\0\0\0czst\0\0\0czus\0\0\0czvy\0\0\0czzl\0\0\0fi01\0\0\0fra\0\0\0\0frb\0\0\0\0frbl\0\0\0frc\0\0\0\0frcp\0\0\0frd\0\0\0\0fre\0\0\0\0frf\0\0\0\0frg\0\0\0\0frgf\0\0\0frgp\0\0\0frh\0\0\0\0fri\0\0\0\0frj\0\0\0\0frk\0\0\0\0frl\0\0\0\0frm\0\0\0\0frmf\0\0\0frmq\0\0\0frn\0\0\0\0frnc\0\0\0fro\0\0\0\0frp\0\0\0\0frpf\0\0\0frpm\0\0\0frq\0\0\0\0frr\0\0\0\0frre\0\0\0frs\0\0\0\0frt\0\0\0\0frtf\0\0\0fru\0\0\0\0frv\0\0\0\0frwf\0\0\0fryt\0\0\0laxn\0\0\0lud\0\0\0\0lug\0\0\0\0lul\0\0\0\0mrnkc\0\0nlaw\0\0\0nlcw\0\0\0nlsx\0\0\0no23\0\0\0nzn\0\0\0\0nzs\0\0\0\0omba\0\0\0omsh\0\0\0plds\0\0\0plkp\0\0\0pllb\0\0\0plld\0\0\0pllu\0\0\0plma\0\0\0plmz\0\0\0plop\0\0\0plpd\0\0\0plpk\0\0\0plpm\0\0\0plsk\0\0\0plsl\0\0\0plwn\0\0\0plwp\0\0\0plzp\0\0\0shta\0\0\0tteto\0\0ttrcm\0\0ttwto\0\0twkhq\0\0twtnq\0\0twtpq\0\0twtxq\0\0usas\0\0\0usgu\0\0\0usmp\0\0\0uspr\0\0\0usum\0\0\0usvi\0\0\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"cnbj\0\0\0cntj\0\0\0cnhe\0\0\0cnsx\0\0\0cnmn\0\0\0cnln\0\0\0cnjl\0\0\0cnhl\0\0\0cnsh\0\0\0cnjs\0\0\0cnzj\0\0\0cnah\0\0\0cnfj\0\0\0cnjx\0\0\0cnsd\0\0\0cnha\0\0\0cnhb\0\0\0cnhn\0\0\0cngd\0\0\0cngx\0\0\0cnhi\0\0\0cncq\0\0\0cnsc\0\0\0cngz\0\0\0cnyn\0\0\0cnxz\0\0\0cnsn\0\0\0cngs\0\0\0cnqh\0\0\0cnnx\0\0\0cnxj\0\0\0twzzzz\0hkzzzz\0mozzzz\0cz110\0\0cz111\0\0cz112\0\0cz113\0\0cz114\0\0cz115\0\0cz663\0\0cz632\0\0cz633\0\0cz634\0\0cz635\0\0cz641\0\0cz642\0\0cz643\0\0cz644\0\0cz646\0\0cz647\0\0cz31\0\0\0cz64\0\0\0cz41\0\0\0cz52\0\0\0cz51\0\0\0cz80\0\0\0cz71\0\0\0cz53\0\0\0cz32\0\0\0cz10\0\0\0cz20\0\0\0cz42\0\0\0cz63\0\0\0cz72\0\0\0axzzzz\0frges\0\0frnaq\0\0blzzzz\0frara\0\0cpzzzz\0frbfc\0\0frbre\0\0frcvl\0\0frges\0\0gfzzzz\0gpzzzz\0frcor\0\0frbfc\0\0fridf\0\0frocc\0\0frnaq\0\0frges\0\0mfzzzz\0mqzzzz\0frocc\0\0nczzzz\0frhdf\0\0frnor\0\0pfzzzz\0pmzzzz\0frnor\0\0frpdl\0\0rezzzz\0frhdf\0\0frnaq\0\0tfzzzz\0frpac\0\0frara\0\0wfzzzz\0ytzzzz\0laxs\0\0\0lucl\0\0\0luec\0\0\0luca\0\0\0mr13\0\0\0awzzzz\0cwzzzz\0sxzzzz\0no50\0\0\0nzauk\0\0nzcan\0\0ombj\0\0\0omsj\0\0\0pl02\0\0\0pl04\0\0\0pl08\0\0\0pl10\0\0\0pl06\0\0\0pl12\0\0\0pl14\0\0\0pl16\0\0\0pl20\0\0\0pl18\0\0\0pl22\0\0\0pl26\0\0\0pl24\0\0\0pl28\0\0\0pl30\0\0\0pl32\0\0\0tazzzz\0tttob\0\0ttmrc\0\0tttob\0\0twkhh\0\0twtnn\0\0twnwt\0\0twtxg\0\0aszzzz\0guzzzz\0mpzzzz\0przzzz\0umzzzz\0vizzzz\0") })
+ },
+ };
+ }
+ #[clippy::msrv = "1.67"]
+ impl icu_provider::DataProvider<icu::locid_transform::provider::AliasesV1Marker> for $provider {
+ fn load(&self, req: icu_provider::DataRequest) -> Result<icu_provider::DataResponse<icu::locid_transform::provider::AliasesV1Marker>, icu_provider::DataError> {
+ if req.locale.is_empty() {
+ Ok(icu_provider::DataResponse { payload: Some(icu_provider::DataPayload::from_static_ref(Self::SINGLETON_LOCID_TRANSFORM_ALIASES_V1)), metadata: Default::default() })
+ } else {
+ Err(icu_provider::DataErrorKind::ExtraneousLocale.with_req(<icu::locid_transform::provider::AliasesV1Marker as icu_provider::KeyedDataMarker>::KEY, req))
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_likelysubtags_ext_v1.rs.data b/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_likelysubtags_ext_v1.rs.data
new file mode 100644
index 0000000000..955e545813
--- /dev/null
+++ b/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_likelysubtags_ext_v1.rs.data
@@ -0,0 +1,52 @@
+// @generated
+/// Implement `DataProvider<LikelySubtagsExtendedV1Marker>` on the given struct using the data
+/// hardcoded in this file. This allows the struct to be used with
+/// `icu`'s `_unstable` constructors.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __impl_locid_transform_likelysubtags_ext_v1 {
+ ($ provider : ty) => {
+ #[clippy::msrv = "1.67"]
+ const _: () = <$provider>::MUST_USE_MAKE_PROVIDER_MACRO;
+ #[clippy::msrv = "1.67"]
+ impl $provider {
+ #[doc(hidden)]
+ pub const SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_EXT_V1: &'static <icu::locid_transform::provider::LikelySubtagsExtendedV1Marker as icu_provider::DataMarker>::Yokeable = &icu::locid_transform::provider::LikelySubtagsExtendedV1 {
+ language_script: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"aaeGrekaafArabablLatnacyArabacyGrekadiTibtagjArabahkMymrahkThaiainLatnaiwArabaiwEthiakbBatkamfEthiamwArabamwArmiamwLatnanuArabanuLatnaotLatnarcNbatarcPalmatbLisuaujLatnaujTfngazbCyrlazbLatnbcaHanibcqLatnbeaCansbexArabbfaArabbfsHanibfuTakrbhdArabbhdTakrbhhHebrbhhLatnbhtDevabhtLatnbjiEthiblrTaleblrThaibozArabbqfArabbrbLaoobrbLatnbruLaoobruThaibskLatnbstLatnbsvArabbswEthibtsBatkbtxBatkbudBraibweLatnbwoEthibxmLatnbxmMongbxuCyrlbxuLatncafCanscbkBraicdfBengcdhTakrcdmLatncdoHantcdoLatncfmBengchnDuplciaArabciaHangciwCanscjsCyrlcjyHantckmGlagclkTibtcmoKhmrcnpHantcrjLatncrxCanscshLatncspHantctgArabctgLatncu\0GlagcvgTibtczhHantdagArabdbjArabderLatndinArabdisBengdngArabdsqArabdtaCyrldtaHansduhGujrdwrEthiekoArabemsCyrlessCyrlevnLatnevnMongfuhArabgacDevagaxEthigbaArabgbkTakrgblDevagdbTelugdlEthighaLatnghaTfnggjnArabgmvLatngofLatngraGujrgrcLinbgruLatngukEthigylEthihajBengharArabharLatnhdyLatnhiiDevahkhDevahkhLatnhmnBopohmnHmnghmzPlrdhnjLaoohrmHmnghutTibtigsGrekiktCansiliArabiliCyrliruMlymiskCyrliumHaniiumLaooiumThaijctLatnjdtHebrjdtLatnjehLaoojgeHebrjkmBraijkmLatnjnjEthijnsLatnjnsTakrjraKhmrjrbArabkbpBraikbrEthikdrCyrlkexGujrkffDevakffOryakffTelukfiKndakfkTakrkfxTakrkhrDevakjpThaikkeArabklsArabkmcHaniknkArabknsThaikqyLatnkr\0ArabkswLatnktbLatnkvqLatnkxcEthikxdArabkxfLatnkyuLatnkyuMymrkywBengkywOryalaeTibtlaxBenglbfTibtlbjArablbnLaoolboLatnlfnCyrllggArablifLimblmkMymrlotArabloyTibtlpoLisulusBenglusBraimanNkoomawArabmdeLatnmdxLatnmdyLatnmeoArabmeyArabmfgArabmfiLatnmfxEthimgdArabmjlTakrmjtBengmjxBengmkiDevamlfLatnmlqArabmmdHansmmdHantmoyEthimpeEthimqxBugimrgBengmrgDevamrwArabmurArabmuzLatnmvfPhagmvzArabmwtThaimymLatnnbtDevancbDevancqThainctBengnivLatnnjzBengnkiBengnmmTibtnmoBengnngBengnruHansnruHantnupArabnwcBrahnwcDevanwcSiddojbCansojwCansolaTibtonpDevaoscLatnpalPhlppccHanipceThaipcgKndapcgTamlpciOryapduMymrpi\0Brahpi\0Devapi\0Kharpi\0Khmrpi\0Mymrpi\0ThaipkhDevaprxTibtpyuHaniqwmCyrlqwmRunrrahLatnreiTelurmlCyrlrmnCyrlrmnGrekromCyrlrupGrekrutLatnsamHebrsamSyrcsbuDevasceArabscsCanssekCanssghArabsghLatnshkArabshuLatnshyArabshyTfngsjpBengsngBraisoaThaisqtLatnsseArabsssThaistvArabsuqEthisuvBengsuvDevasvaCyrlsvaLatnsxuRunrtaqTfngtayHanstayHanttbkHanotbkLatntdaArabtdaLatntdbBengtdbKthitgtHanotgtTagbthpDuplthvArabthvTfngthzTfngtliCyrltpeBengtrpBengtwhTavttytTavtunrDevausiBenguumCyrlvasGujrvavGujrvkpDevavwaMymrwblArabwblCyrlwmwArabwofArabwtwBugixnzArabxomEthixonBraixubKndaxubMlymxumItalyalArabybeOugryeaKndaykaArabymkArabywqYiiiywuYiiizauArabzayEthizenArabzhdLatnzhnHanizljLatnzneArabzqeLatnzyjHani") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"IT\0IN\0ID\0CY\0CY\0CN\0ET\0MM\0TH\0JP\0ET\0ET\0ID\0ET\0SY\0SY\0SY\0SS\0SS\0IN\0JO\0SY\0CN\0LY\0LY\0AZ\0AZ\0CN\0ET\0CA\0SS\0SS\0CN\0IN\0IN\0IN\0IL\0IL\0IN\0IN\0ET\0CN\0TH\0ML\0GN\0LA\0VN\0LA\0LA\0PK\0ET\0GN\0ET\0ID\0ID\0TG\0MM\0ET\0MN\0MN\0CN\0CN\0CA\0PH\0IN\0IN\0NP\0CN\0CN\0IN\0US\0ID\0ID\0US\0RU\0CN\0HR\0CN\0KH\0CN\0CA\0CA\0MM\0CN\0BD\0BD\0BG\0IN\0CN\0GH\0MY\0IN\0SS\0IN\0KG\0ML\0CN\0CN\0IN\0ET\0MZ\0US\0US\0CN\0CN\0NE\0IN\0ET\0CF\0IN\0IN\0IN\0ET\0LY\0LY\0GH\0ET\0ET\0IN\0GR\0ET\0ET\0ET\0IN\0ET\0ET\0ET\0IN\0IN\0IN\0CN\0CN\0CN\0LA\0CN\0NP\x00001CA\0CN\0KZ\0IN\0TJ\0CN\0LA\0TH\0UA\0RU\0AZ\0LA\0IL\0MM\0MM\0ET\0IN\0IN\0KH\0MA\0TG\0ET\0UA\0IN\0IN\0IN\0IN\0IN\0IN\0IN\0IN\0TH\0GN\0PK\0CN\0SL\0TH\0ET\0NG\0MM\0ET\0MM\0ET\0BN\0MM\0MM\0MM\0IN\0IN\0IN\0IN\0CN\0IN\0LA\0US\x00001UG\0IN\0IN\0SS\0NP\0CN\0BD\0IN\0GN\0GH\0TD\0ET\0ET\0MY\0MR\0GN\0CM\0ET\0SS\0IN\0BD\0BD\0IN\0LA\0SN\0CN\0CN\0ET\0ET\0ID\0IN\0IN\0PH\0SS\0ET\0CN\0ET\0TH\0ET\0IN\0IN\0LA\0IN\0RU\0IN\0IN\0NP\0IN\0IN\0CN\0CN\0NG\0NP\0NP\0NP\0CA\0CA\0CN\0IN\0IT\0CN\0CN\0TH\0IN\0IN\0IN\0MM\0IN\0IN\0IN\0IN\0IN\0IN\0BD\0IN\0TW\0RU\0RU\0IN\0IN\0BY\0BG\0GR\0BG\0GR\0AZ\0PS\0PS\0IN\0CN\0CA\0CA\0AF\0TJ\0SS\0TD\0DZ\0DZ\0IN\0CD\0TH\0YE\0PH\0TH\0ET\0ET\0IN\0IN\0GE\0GE\0DE\0ML\0TW\0TW\0PH\0PH\0NE\0NE\0IN\0IN\0PH\0PH\0CA\0DZ\0DZ\0NE\0US\0BD\0IN\0VN\0VN\0NP\0BD\0GE\0IN\0IN\0IN\0CN\0AF\0TJ\0MZ\0GM\0ID\0EG\0ET\0GH\0IN\0IN\0IT\0GN\0CN\0IN\0PH\0MZ\0CN\0CN\0IN\0ET\0MR\0VN\0CN\0CN\0SS\0CN\0CN\0") })
+ },
+ language_region: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"ahkTH\0avuSD\0cmoKH\0fuhBF\0hnjAU\0hnjCN\0hnjFR\0hnjGF\0hnjLA\0hnjMM\0hnjSR\0hnjTH\0hnjVN\0jrbMA\0lwoZA\0manGN\0ndyTD\0romBG\0sclIN\0stiKH\0unrNP\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"LatnLatnLatnLatnLaooLaooLaooLaooLaooLaooLaooLaooLaooArabLatnNkooLatnCyrlArabLatnDeva") })
+ },
+ language: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"aa\0aaaaabaacaadaaeaafaagaahaaiaakaalaanaaoaapaaqaasaataauaawaaxaazab\0abaabbabcabdabeabfabgabhabiablabmabnaboabpabrabsabtabuabvabwabxabyabzacaacbacdaceacfachacmacnacpacqacracsactacuacvacwacxacyaczadaadbaddadeadfadgadhadiadjadladnadoadqadradtaduadwadxadyadzae\0aeaaebaecaeeaekaelaemaeqaeraeuaewaeyaezafbafdafeafhafiafkafnafoafpafsafuafzagaagbagcagdageagfaggaghagiagjagkaglagmagnagoagqagragsagtaguagvagwagxagyagzahaahbahgahhahiahkahlahmahnahoahpahrahsahtaiaaibaicaidaieaifaigaijaikailaimainaioaipaiqairaitaiwaixaiyajaajgajiajnajpajwajzak\0akbakcakdakeakfakgakhakiakkaklakoakpakqakraksaktakuakvakwakzalaalcaldalealfalhalialjalkallalmalnaloalpalqalraltalualwalxalyalzamaambamcameamfamgamiamjamkammamnamoampamqamramsamtamuamvamwamxamyamzan\0anaanbancandaneanfanganhanianjankanlanmannanoanpanransantanuanvanwanxanyanzaoaaobaocaodaoeaofaogaoiaojaokaolaomaonaoraosaotaoxaozapbapcapdapeapfapgaphapiapjapkaplapmapnapoappaprapsaptapuapvapwapxapyapzaqcaqdaqgaqkaqmaqnaqraqtaqzarcardarearhariarjarkarlarnaroarparqarrarsaruarwarxaryarzasaasbascaseasgashasiasjaskaslasnasoassasuasvasxasyaszataatbatcatdateatgatiatjatkatlatmatnatoatpatqatratsattatuatvatwatxatyatzauaaucaudaugauhauiaujaukaulaumaunauoaupauqaurautauuauwauyauzav\0avbavdaviavkavlavmavnavoavsavtavuavvawaawbawcaweawgawhawiawkawmawnawoawrawsawtawuawvawwawxawyaxbaxeaxgaxkaxlaxmaxxay\0ayaaybaycaydayeaygayhayiaykaylaynayoaypayqaysaytayuayzazbazdazgazmaznazoaztazzba\0baababbacbaebafbagbahbajbalbanbaobapbarbasbaubavbawbaxbaybbabbbbbcbbdbbebbfbbgbbibbjbbkbblbbmbbnbbobbpbbqbbrbbsbbtbbubbvbbwbbxbbybcabcbbcdbcebcfbcgbchbcibcjbckbcmbcnbcobcpbcqbcrbcsbctbcubcvbcwbcybczbdabdbbdcbddbdebdfbdgbdhbdibdjbdkbdlbdmbdnbdobdpbdqbdrbdsbdtbdubdvbdwbdxbdybdzbeabebbecbedbeebefbehbeibejbekbembeobepbeqbesbetbeubevbewbexbeybezbfabfbbfcbfdbfebffbfgbfhbfjbflbfmbfnbfobfpbfqbfsbftbfubfwbfxbfybfzbgabgbbgdbgfbggbgibgjbgnbgobgpbgqbgrbgsbgtbgubgvbgwbgxbgybgzbhabhbbhcbhdbhebhfbhgbhhbhibhjbhlbhmbhnbhpbhqbhrbhsbhtbhubhvbhwbhybhzbi\0biabibbidbiebifbigbikbilbimbinbiobipbiqbirbitbiubivbiwbiybizbjabjbbjcbjfbjgbjhbjibjjbjkbjlbjmbjnbjobjpbjrbjsbjtbjubjvbjwbjxbjybjzbkabkcbkdbkfbkgbkhbkibkjbklbkmbknbkobkpbkqbkrbksbktbkubkvbkwbkxbkybkzblablbblcbldbleblfblhblibljblkblmblnblpblqblrblsbltblvblwblxblyblzbm\0bmabmbbmcbmdbmebmfbmgbmhbmibmjbmkbmlbmmbmnbmobmpbmqbmrbmsbmubmvbmwbmxbmzbnabnbbncbndbnebnfbngbnibnjbnkbnmbnnbnobnpbnqbnrbnsbnubnvbnwbnxbnybnzbo\0boabobboebofbohbojbokbolbombonboobopboqborbotboubovbowboxboybozbpabpcbpdbpebpgbphbpibpjbpkbplbpmbpobppbpqbprbpsbptbpubpvbpwbpxbpybpzbqabqbbqcbqdbqfbqgbqibqjbqkbqlbqmbqobqpbqqbqrbqsbqtbqubqvbqwbqxbqzbrabrbbrcbrdbrfbrgbrhbribrjbrkbrlbrmbrnbrpbrqbrrbrsbrtbrubrvbrybrzbsabsbbscbsebsfbshbsibsjbskbslbsmbsnbsobspbsqbsrbssbstbsubsvbswbsxbsybtabtcbtdbtebtfbtgbthbtibtjbtmbtnbtobtpbtqbtrbtsbttbtubtvbtwbtxbtybtzbuabubbucbudbuebufbugbuhbuibujbukbumbunbuobupbuqbusbutbuubuvbuwbuxbuybuzbvabvbbvcbvdbvebvfbvgbvhbvibvjbvkbvmbvnbvobvqbvrbvtbvubvvbvwbvxbvybvzbwabwbbwcbwdbwebwfbwgbwhbwibwjbwkbwlbwmbwobwpbwqbwrbwsbwtbwubwwbwxbwybwzbxabxbbxcbxfbxgbxhbxibxjbxlbxmbxnbxobxpbxqbxsbxubxvbxwbxzbyabybbycbydbyebyfbyhbyibyjbykbylbymbynbypbyrbysbyvbywbyxbyzbzabzbbzcbzdbzebzfbzhbzibzjbzkbzlbzmbznbzobzpbzqbzrbztbzubzvbzwbzxbzybzzcaacabcaccadcaecafcagcahcajcakcalcamcancaocapcaqcarcascavcawcaxcaycazcbbcbccbdcbgcbicbjcbkcblcbncbocbqcbrcbscbtcbucbvcbwcbycccccdcceccgcchccjcclccmccoccpccrcdecdfcdhcdicdjcdmcdocdrcdzce\0ceacegcekcencetceycfacfdcfgcfmcgacgccggcgkch\0chbchdchfchgchhchjchkchlchmchnchochpchqchtchwchxchychzciacibcicciecihcimcincipcirciwciycjacjecjhcjicjkcjmcjncjocjpcjscjvcjyckbcklckmcknckockqckrckscktckuckvckxckyckzclaclccleclhclicljclkcllclmclocltcluclwclycmacmecmgcmicmlcmocmrcmscmtcnacnbcnccngcnhcnicnkcnlcnpcnqcnscntcnwcnxco\0coacobcoccodcoecofcogcohcojcokcolcomcoocopcoqcotcoucoxcozcpacpbcpccpgcpicpncpocpscpucpxcpycqdcr\0crbcrccrdcrfcrgcrhcricrjcrkcrlcrmcrncrocrqcrscrtcrvcrwcrxcrycrzcsacsbcshcsjcskcsmcsocspcsscstcsvcsycszctactcctdctectgcthctlctmctnctoctpctscttctuctzcu\0cuacubcuccuhcuicujcukculcuocupcutcuucuvcuxcvgcvncwacwbcwecwgcwtcyacybcyoczhczkczncztdaadacdaddaedagdahdaidajdakdaldamdaodaqdardasdaudavdawdaxdazdbadbbdbddbedbfdbgdbidbjdbldbmdbndbodbpdbqdbtdbudbvdbwdbydccdcrddadddddeddgddiddjddnddoddrddsddwdecdeddeedefdegdehdeidekdeldemdendeqderdesdevdezdgadgbdgcdgddgedggdghdgidgkdgndgrdgsdgtdgwdgxdgzdhgdhidhldhmdhndhodhrdhsdhudhvdhwdhxdiadibdicdiddifdigdihdiidijdildindiodipdirdisdiudiwdixdiydizdjadjbdjcdjddjedjfdjidjjdjkdjmdjndjodjrdjudjwdkadkgdkkdkrdksdkxdlgdlmdlndmadmbdmcdmddmedmfdmgdmkdmldmmdmodmrdmsdmudmvdmwdmxdmydnadnddnedngdnidnjdnkdnndnodnrdntdnudnvdnwdnydoadobdocdoedofdohdokdoldondoodopdordosdotdovdowdoxdoydppdrcdredrgdridrldrndrodrqdrsdrtdrudrydshdsidsndsodsqdtadtbdtddthdtidtkdtmdtodtpdtrdtsdttdtudtyduadubducduedufdugduhduidukduldumdunduodupduqdurdusduuduvduwduxduyduzdv\0dvadwadwkdwrdwsdwudwwdwydwzdyadybdyddygdyidymdyndyodyudyydz\0dzadzedzgdzldzneaaebcebgebkeboebrebuecrecyee\0efaefeefiegaeglegmegoegyehueipeiteivejaekaekeekgekieklekmekoekpekrekyeleelkelmeloeluemaembemeemgemiemmemnempemsemuemwemxemzenaenbencendenfenhenlenmennenoenqenrenvenwenxeotepieraergerherierkerrerterweseesgeshesiesmessesuesyetbetnetoetretsettetuetxetzeveevhevnewoexteyaeyoezaezefaafabfadfaffagfahfaifajfakfalfamfanfapfarfaufaxfayfazfblferffiffmfgrfiafiefiffipfirfitfiwfj\0fkkfkvflaflhflifllflnflrflyfmpfmufnbfngfnifodfoifomfonforfosfpefqsfrcfrdfrkfrmfrofrpfrqfrrfrsfrtfubfudfuefuffuhfuifumfunfuqfurfutfuufuvfuyfvrfwafwegaagabgacgadgaegafgaggahgaigajgakgalgamgangaogapgaqgargasgatgaugawgaxgaygbagbbgbdgbegbfgbggbhgbigbjgbkgblgbmgbngbpgbqgbrgbsgbugbvgbwgbxgbygbzgccgcdgcfgclgcngcrgctgdbgdcgddgdegdfgdggdhgdigdjgdkgdlgdmgdngdogdqgdrgdtgdugdxgeagebgecgedgefgeggehgeigejgekgelgeqgesgevgewgexgeygezgfkggaggbggdggegggggkgglggtgguggwghaghcgheghkghnghrghsghtgiagibgicgidgiegiggihgilgimgingipgiqgirgisgitgixgiygizgjkgjmgjngjrgjugkagkdgkegkngkogkpgkuglbglcgldglhgljglkgllgloglrgluglwgmagmbgmdgmggmhgmmgmngmrgmugmvgmxgmygmzgn\0gnagnbgncgndgnegnggnhgnignjgnkgnlgnmgnngnqgnrgntgnugnwgnzgoagobgocgodgoegofgoggohgoigokgolgomgongoogopgoqgorgosgotgougovgowgoxgoygpagpegpngqagqngqrgragrbgrcgrdgrggrhgrigrjgrmgrqgrsgrtgrugrvgrwgrxgrygrzgslgsngsogspgswgtagtuguagubgucgudguegufguhguigukgulgumgunguogupguqgurgutguuguwguxguzgv\0gvagvcgvegvfgvjgvlgvmgvngvogvpgvrgvsgvygwagwbgwcgwdgwegwfgwggwigwjgwmgwngwrgwtgwugwwgwxgxxgybgydgyegyfgyggyigylgymgyngyogyrgyygyzgzagzigznhaahachadhaehaghahhaihajhakhalhamhanhaohaphaqharhashavhawhaxhayhazhbahbbhbnhbohbuhchhdyhedheghehheihemhgmhgwhhihhrhhyhiahibhidhifhighihhiihijhikhilhiohirhithiwhixhjihkahkehkhhkkhlahlbhldhlthluhmahmbhmdhmfhmjhmmhmnhmphmqhmrhmshmthmuhmvhmwhmyhmzhnahndhnehnghnhhnihnjhnnhnohnsho\0hoahobhochodhoehohhoihojholhomhoohophorhothovhowhoyhpohrahrchrehrkhrmhrohrphrthruhrwhrxhrzhsnhssht\0htihtohtshtuhtxhubhuchudhuehufhughuhhuihukhulhumhuphurhushuthuuhuvhuwhuxhuyhuzhvchvehvkhvnhvvhwahwchwohyahywhz\0iaiianiaribaibbibdibeibgibhiblibmibnibribuibyicaichicridaidbidciddideidiidridsidtiduifaifbifeiffifkifmifuifyigbigeiggigligmignigoigsigwihbihiihpihwii\0iinijcijeijjijnijsik\0ikiikkiklikoikpikriktikvikwikxikzilailbilgiliilkilmiloilpiluilvimiimlimnimoimrimsimtimyin\0inbinginhinjinninoinpintio\0ioriouiowipiipoiquiqwireirhiriirkirniruirxiryisaiscisdishisiiskismisnisoistisuitbitditeitiitkitlitmitoitritsittitvitwitxityitziu\0iumivbivviw\0iwkiwmiwoiwsixcixliyaiyoiyxizhizrizzjaajabjacjadjaejafjahjajjakjaljamjanjaojaqjasjatjaujaxjayjazjbejbijbjjbkjbmjbnjbojbrjbtjbujbwjctjdajdgjdtjebjeejehjeijekjeljenjerjetjeujgbjgejgkjgojhiji\0jiajibjicjidjiejigjiljimjitjiujivjiyjjejjrjkajkmjkojkujlejmajmbjmcjmdjmijmljmnjmrjmsjmwjmxjnajndjngjnijnjjnljnsjobjodjogjorjowjpajprjqrjrajrbjrrjrtjrujuajubjudjuhjuijukjuljumjunjuojupjurjutjuujuwjuyjvdjvnjw\0jwijyajyejyykaakabkackadkagkahkaikajkakkamkaokapkaqkavkawkaxkaykbakbbkbckbdkbekbhkbikbjkbkkblkbmkbnkbokbpkbqkbrkbskbtkbukbvkbwkbxkbykbzkcakcbkcckcdkcekcfkcgkchkcikcjkckkclkcmkcnkcokcpkcqkcskctkcukcvkcwkczkdakdckddkdekdfkdgkdhkdikdjkdkkdlkdmkdnkdpkdqkdrkdtkdwkdxkdykdzkebkeckedkeekefkegkehkeikekkelkemkenkeokerkesketkeukewkexkeykezkfakfbkfckfdkfekffkfhkfikfkkflkfmkfnkfokfpkfqkfrkfskfvkfwkfxkfykfzkg\0kgakgbkgekgfkgjkgkkglkgmkgokgqkgrkgskgtkgukgvkgwkgxkgykhakhbkhckhdkhekhfkhgkhhkhjkhlkhnkhpkhqkhrkhskhtkhukhvkhwkhxkhykhzki\0kiakibkickidkiekifkigkihkijkilkimkiokipkiqkiskitkiukivkiwkixkiykizkj\0kjakjbkjckjdkjekjgkjhkjikjjkjkkjlkjmkjnkjokjpkjqkjrkjskjtkjukjxkjykkakkbkkckkdkkekkfkkgkkhkkikkjkkkkklkkmkkokkpkkqkkrkkskktkkukkvkkwkkxkkykkzkl\0klaklbklckldkleklfklgklhklikljklkkllklmklnkloklpklqklrklskltkluklvklwklxklyklzkmakmbkmckmdkmekmfkmgkmhkmikmjkmkkmlkmmkmnkmokmpkmqkmskmtkmukmvkmwkmxkmykmzknaknbkndkneknfkniknjknkknlknmknoknpknqknrknskntknuknvknwknxknyknzkoakockodkoekofkogkohkoikolkookopkoqkoskotkoukovkowkoykozkpakpckpdkpekpfkpgkphkpikpjkpkkplkpmkpnkpokpqkprkpskptkpukpwkpxkpykpzkqakqbkqckqdkqekqfkqgkqhkqikqjkqkkqlkqmkqnkqokqpkqqkqrkqskqtkqukqvkqwkqxkqykqzkr\0krakrbkrckrdkrekrfkrhkrikrjkrkkrlkrnkrpkrrkrskrtkrukrvkrwkrxkrykrzksaksbkscksdkseksfksgkshksiksjkskkslksmksnksokspksqksrksskstksuksvkswksxkszktaktbktcktdktfktgkthktiktjktkktlktmktnktoktpktqktskttktuktvktwktxktyktzkubkuckudkuekufkugkuhkuikujkukkulkumkunkuokupkuqkuskutkuukuvkuwkuxkuykuzkv\0kvakvbkvckvdkvekvfkvgkvhkvikvjkvlkvmkvnkvokvpkvqkvrkvtkvvkvwkvxkvykvzkw\0kwakwbkwckwdkwekwfkwgkwhkwikwjkwkkwlkwmkwnkwokwpkwrkwskwtkwukwvkwwkwykwzkxakxbkxckxdkxfkxikxjkxkkxmkxnkxokxpkxqkxrkxtkxwkxxkxykxzkyakybkyckydkyekyfkygkyhkyikyjkykkylkymkynkyokyqkyrkyskytkyukyvkywkyxkyykyzkzakzbkzckzdkzekzfkzikzkkzlkzmkznkzokzpkzrkzskzukzvkzwkzxkzykzzla\0laalablacladlaelaglahlailajlallamlanlaplaqlarlaslaulawlaxlazlbblbclbelbflbilbjlbllbmlbnlbolbqlbrlbtlbulbvlbwlbxlbylbzlcclcdlcelcflchlcllcmlcplcqlcsldaldblddldgldhldildjldkldlldmldnldoldpldqlealeblecledleeleflehleilejleklellemlenleolepleqlerlesletleulevlewlexleylezlfalfnlg\0lgalgblgglghlgilgklgllgmlgnlgolgqlgrlgtlgulgzlhalhhlhilhmlhnlhslhtlhuli\0lialibliclidlielifliglihliklilliolipliqlirlisliulivliwlixliylizljaljeljiljlljpljwljxlkalkblkclkdlkelkhlkilkjlkllkmlknlkolkrlkslktlkulkyllallbllclldllellfllgllilljllklllllmllnllpllqllullxlmalmblmclmdlmelmflmglmhlmilmjlmklmllmnlmplmqlmrlmulmvlmwlmxlmyln\0lnalnblndlnhlnilnjlnllnmlnnlnslnulnwlnzloaloblocloeloglohloilojloklollomlonlooloploqlorloslotloulowloxloylozlpalpelpnlpolpxlqrlralrclrglrilrklrllrmlrnlrolrtlrvlrzlsalsdlselsilsmlsrlssltglthltiltnltoltsltulu\0lualucludluelufluilujluklullumlunluolupluqlurluslutluuluvluwluyluzlvalvilvklvulwalwelwglwhlwllwmlwolwtlwwlxmlyalynlzhlzllznlzzmaamabmadmaemafmagmajmakmammanmaqmasmatmaumavmawmaxmazmbambbmbcmbdmbfmbhmbimbjmbkmblmbmmbnmbombpmbqmbrmbsmbtmbumbvmbwmbxmbymbzmcamcbmccmcdmcemcfmcgmchmcimcjmckmclmcmmcnmcomcpmcqmcrmcsmctmcumcvmcwmcxmcymczmdamdbmdcmddmdemdfmdgmdhmdimdjmdkmdmmdnmdpmdqmdrmdsmdtmdumdvmdwmdxmdymdzmeamebmecmedmeemehmejmekmelmemmenmeomepmeqmermesmetmeumevmewmeymezmfamfbmfcmfdmfemffmfgmfhmfimfjmfkmflmfmmfnmfomfpmfqmfrmftmfumfvmfwmfxmfymfzmg\0mgbmgcmgdmgemgfmggmghmgimgjmgkmglmgmmgnmgomgpmgqmgrmgsmgtmgumgvmgwmgymgzmh\0mhbmhcmhdmhemhfmhgmhimhjmhkmhlmhmmhnmhomhpmhqmhsmhtmhumhwmhxmhymhzmiamibmicmidmiemifmigmihmiimijmikmilmimminmiomipmiqmirmitmiumiwmixmiymizmjbmjcmjdmjemjgmjhmjimjjmjkmjlmjmmjnmjqmjrmjsmjtmjumjvmjwmjxmjymjzmkamkbmkcmkemkfmkimkjmkkmklmkmmknmkomkpmkrmksmktmkumkvmkwmkxmkymkzmlamlbmlcmlemlfmlhmlimljmlkmllmlnmlomlpmlqmlrmlsmlumlvmlwmlxmlzmmammbmmcmmdmmemmfmmgmmhmmimmmmmnmmommpmmqmmrmmtmmummvmmwmmxmmymmzmnamnbmndmnemnfmngmnhmnjmnlmnmmnnmnpmnqmnrmnsmnumnvmnwmnxmnymnzmo\0moamocmodmoemogmohmoimojmokmommoomopmoqmormosmotmoumovmowmoxmoymozmpampbmpcmpdmpempgmphmpimpjmpkmplmpmmpnmpomppmpqmprmpsmptmpumpvmpwmpxmpympzmqamqbmqcmqemqfmqgmqhmqimqjmqkmqlmqmmqnmqomqpmqqmqrmqsmqumqvmqwmqxmqymqzmramrbmrcmrdmrfmrgmrhmrjmrkmrlmrmmrnmromrpmrqmrrmrsmrtmrumrvmrwmrxmrymrzmsbmscmsemsfmsgmshmsimsjmskmslmsmmsnmsomspmsqmssmsumsvmswmsxmsymszmtamtbmtcmtdmtemtfmtgmthmtimtjmtkmtlmtmmtnmtomtpmtqmtrmtsmttmtumtvmtwmtxmtymuamubmucmudmuemugmuhmuimujmukmummuomuqmurmusmutmuumuvmuxmuymuzmvamvdmvfmvgmvhmvkmvlmvnmvomvpmvqmvrmvsmvtmvumvvmvwmvxmvymvzmwamwbmwcmwemwfmwgmwhmwimwkmwlmwmmwnmwomwpmwqmwrmwsmwtmwumwvmwwmwzmxamxbmxcmxdmxemxfmxgmxhmximxjmxkmxlmxmmxnmxomxpmxqmxrmxsmxtmxumxvmxwmxxmxymxzmybmycmyemyfmygmyhmyjmykmylmymmypmyrmyumyvmywmyxmyymyzmzamzdmzemzhmzimzjmzkmzlmzmmznmzomzpmzqmzrmztmzumzvmzwmzxmzzna\0naanabnacnaenafnagnajnaknalnamnannaonapnaqnarnasnatnawnaxnaynaznb\0nbanbbnbcnbdnbenbhnbinbjnbknbmnbnnbonbpnbqnbrnbtnbunbvnbwnbyncancbnccncdncencfncgnchncincjncknclncmncnnconcqncrnctncuncxncznd\0ndandbndcnddndfndgndhndindjndkndlndmndnndpndqndrndtndundvndwndxndyndzneanebnecnedneenegnehneinejneknemnenneoneqnernetneunewnexneyneznfanfdnflnfrnfung\0ngangbngcngdngenggnghngingjngknglngmngnngpngqngrngsngtngungvngwngxngyngznhanhbnhcnhdnhenhfnhgnhinhknhmnhnnhonhpnhqnhrnhtnhunhvnhwnhxnhynhznianibnidnienifnignihniinijnilnimninnioniqnirnisnitniunivniwnixniyniznjanjbnjdnjhnjinjjnjlnjmnjnnjonjrnjsnjtnjunjxnjynjznkankbnkcnkdnkenkfnkgnkhnkinkjnkknkmnknnkonkqnkrnksnktnkunkvnkwnkxnkznlanlcnlenlgnlinljnlknlmnlonlqnlunlvnlwnlxnlynlznmanmbnmcnmdnmenmfnmgnmhnminmjnmknmlnmmnmnnmonmpnmqnmrnmsnmtnmunmvnmwnmxnmznnannbnncnndnnennfnngnnhnninnjnnknnlnnmnnnnnpnnqnnrnntnnunnvnnwnnynnznoanocnodnoenofnognohnoinojnoknomnonnopnoqnosnotnounovnownoynpbnpgnphnplnpnnponpsnpunpxnpynqgnqknqlnqmnqnnqqnqtnqynr\0nranrbnrenrfnrgnrinrknrlnrmnrpnrunrxnrznsansbnscnsdnsensfnsgnshnsknsmnsnnsonsqnssnstnsunsvnswnsxnsynszntdntentgntintjntkntmntontpntrntuntxntyntznuanucnudnuenufnugnuhnuinujnuknumnunnuonupnuqnurnusnutnuunuvnuwnuxnuynuznv\0nvhnvmnvonwbnwcnwenwgnwinwmnwonwrnwwnwxnxanxdnxenxgnxinxlnxnnxonxqnxrnxxny\0nybnycnydnyenyfnygnyhnyinyjnyknylnymnynnyonypnyqnyrnysnytnyunyvnyxnyynzanzbnzdnzinzknzmnzunzynzzoaaoacoaroavobiobkoblobmoboobrobtobuocaocoocuodaodkodtoduofuogbogcoggogooguohtoiaoieoinoj\0ojbojcojsojvojwokaokbokcokdokeokgokiokkokmokookroksokuokvokxokzolaoldoleolkolmoloolroltoluom\0omaombomcomgomiomkomlomoompomromtomuomwonaoneongonionjonkonnonoonponronsontonuonxoodoonooropaopkopmopooptopyoraorcoreorgornoroorrorsortoruorvorworxorzos\0osaoscosiosoospostosuosxotaotbotdoteotiotkotlotmotnotqotrotsottotuotwotxotyotzouboueoumovdowiowloydoymoyyozmpabpacpadpaepafpagpahpaipakpalpampaopappaqparpaspaupavpawpaxpaypazpbbpbcpbepbfpbgpbhpbipblpbmpbnpbopbppbrpbspbtpbvpbypcapcbpccpcdpcepcfpcgpchpcipcjpckpcnpcppcwpdapdcpdnpdopdtpdupeapebpedpeepegpeipekpelpempeopeppeqpevpexpeypezpfapfepflpgapgdpggpgipgkpglpgnpgspguphdphgphhphkphlphmphnphophrphtphvphwpi\0piapibpicpidpifpigpihpijpilpimpinpiopippirpispitpiupivpiwpixpiypizpjtpkapkbpkgpkhpknpkopkppkrpkuplaplbplcpldpleplgplhpljplkpllplnploplrplspluplvplwplzpmapmbpmdpmepmfpmhpmipmjpmlpmmpmnpmopmqpmrpmspmtpmwpmxpmypmzpnapncpndpnepngpnhpnipnjpnkpnlpnmpnnpnopnppnqpnrpnspntpnvpnwpnypnzpocpoepofpogpohpoipokpomponpoopoppoqpospotpovpowpoyppeppippkpplppmppnppopppppqppspptpqapqmpraprcprdpreprfprhpriprkprmproprpprqprrprtpruprwprxpsapsepshpsipsmpsnpsqpsspstpswptapthptiptnptoptpptrpttptuptvpuapubpucpudpuepufpugpuipujpumpuopuppuqpurputpuupuwpuxpuypwapwbpwgpwmpwnpwopwrpwwpxmpyepympynpyupyxpyypzhpznquaqubqucqudqufqugquiqukqulqumqunqupquqqurqusquvquwquxquyqvaqvcqveqvhqviqvjqvlqvmqvnqvoqvpqvsqvwqvzqwaqwcqwhqwmqwsqwtqxaqxcqxhqxlqxnqxoqxpqxqqxrqxtqxuqxwqyaqypraarabracradrafragrahrairakramranraoraprarravrawraxrayrazrbbrbkrblrbprcfrdbrearebreeregreirejrelremrenresretreyrgargnrgrrgsrgurhgrhpriarifrilrimrinrirritriurjgrjirjsrkarkbrkhrkirkmrktrkwrmarmbrmcrmdrmermfrmgrmhrmirmkrmlrmmrmnrmormprmqrmtrmurmwrmxrmzrn\0rndrngrnlrnnrnrrnwrobrocrodroerofrogrolromrooroprorrourowrpnrptrrirrorrtrskrtcrthrtmrtwrubrucruerufrugruirukruorupruqrutruuruyruzrw\0rwarwkrwlrwmrworwrrxdrxwryusaasabsacsadsaesafsajsaksamsaosaqsarsassausavsawsaxsaysazsbasbbsbcsbdsbesbgsbhsbisbjsbksblsbmsbnsbosbpsbqsbrsbssbtsbusbvsbwsbxsbysbzscbscescfscgschsciscksclscnscoscpscssctscuscvscwscxsdasdbsdcsdesdfsdgsdhsdjsdksdnsdosdqsdssdusdxse\0seasebsecsedseesefsegsehseisejsekselsenseosepseqsersessetseusevsewseysezsfesfmsfwsg\0sgasgbsgcsgdsgesghsgisgjsgmsgpsgrsgssgtsgusgwsgysgzshashbshcshdsheshgshhshishjshkshmshnshoshpshqshrshsshtshushvshwshyshzsiasibsidsiesifsigsihsiisijsiksilsimsipsiqsirsissiusivsiwsixsiysizsjasjbsjdsjesjgsjlsjmsjpsjrsjtsjusjwskaskbskcskdskeskfskgskhskiskjskmsknskoskpskqskrskssktskuskvskwskxskyskzslcsldslgslhslisljsllslmslnslpslqslrsluslwslxslyslzsm\0smasmbsmcsmfsmgsmhsmjsmksmlsmnsmpsmqsmrsmssmtsmusmwsmxsmysmzsn\0sncsnesnfsngsnisnjsnksnlsnmsnnsnosnpsnqsnrsnssnusnvsnwsnxsnysnzsoasobsocsodsoesogsoisoksolsoosopsoqsorsossousovsowsoxsoysozspbspcspdspespgspispksplspmspnsposppspqsprspssptspvsqasqhsqmsqosqqsqtsqusrasrbsresrfsrgsrhsrisrksrlsrmsrnsrosrqsrrsrssrtsrusrvsrwsrxsrysrzss\0ssbsscssdssessfssgsshssjsslssmssnssossqssssstssussvssxssysszst\0stastbstestfstgsthstistjstkstlstmstnstostpstqstrstssttstvstwstysuasubsucsuesugsuisujsuksuosuqsursussutsuvsuwsuysuzsvasvbsvcsvesvmsvsswbswfswgswiswjswkswmswoswpswqswrswsswtswuswvswwswxswysxbsxesxnsxrsxssxusxwsyasybsycsyisyksylsymsynsyosyssywsyxszaszbszcszdszgsznszpszvszwszytaatabtactadtaetaftagtajtaktaltantaotaptaqtartastautavtawtaxtaytaztbatbctbdtbetbftbgtbhtbitbjtbktbltbmtbntbotbptbstbttbutbvtbwtbxtbytbztcatcbtcctcdtcetcftcgtchtcitcktcmtcntcotcptcqtcstcutcwtcxtcytcztdatdbtdctddtdetdgtdhtditdjtdktdltdmtdntdotdqtdrtdstdttdvtdxtdyteatebtectedteetegtehteitektemtenteotepteqtertestetteutevtewtexteytfitfntfotfrtfttgatgbtgctgdtgetgftghtgitgjtgntgotgptgqtgstgttgutgvtgwtgxtgytgzthdthethfthhthithkthlthmthpthqthrthsthtthuthvthythztictiftigtihtiitijtiktiltimtintiotiptiqtistittiutivtiwtixtiytjatjgtjitjjtjltjntjotjptjstjutjwtkatkbtkdtketkftkgtkltkptkqtkrtkstkttkutkvtkwtkxtkztl\0tlatlbtlctldtlftlgtlitljtlktlltlmtlntlptlqtlrtlstlttlutlvtlxtlytmatmbtmctmdtmetmftmgtmhtmitmjtmktmltmmtmntmotmqtmrtmttmutmvtmwtmytmztn\0tnatnbtnctndtngtnhtnitnktnltnmtnntnotnptnqtnrtnstnttnvtnwtnxtnytobtoctodtoftogtohtoitojtoltomtootoptoqtortostoutovtowtoxtoytoztpatpctpetpftpgtpitpjtpktpltpmtpntpptprtpttputpvtpxtpytpztqbtqltqmtqntqotqptqttqutqwtratrbtrctretrftrgtrhtritrjtrltrmtrntrotrptrqtrrtrstrttrutrvtrwtrxtrytrzts\0tsatsbtsctsdtsgtshtsitsjtsltsptsrtsttsutsvtswtsxtszttbttcttdttettftthttittjttkttlttmttnttottpttrttstttttuttvttwttytuatubtuctudtuetuftugtuhtuitujtultumtuntuotuqtustuutuvtuxtuytuztvatvdtvetvktvltvmtvntvotvstvttvutvwtvxtwatwbtwdtwetwftwgtwhtwltwmtwntwotwptwqtwrtwttwutwwtwxtwytxatxetxgtxitxjtxmtxntxotxqtxstxttxutxxtxyty\0tyatyetyhtyityjtyltyntyptyrtystyttyutyvtyxtyytyztzhtzjtzltzmtzntzotzxuamuarubaubiublubrubuudaudeudgudiudjudludmuduuesufiugaugbugeughugouhauhnuisuivujiukaukgukhukiukkukpukqukuukvukwukyulaulbulculeulfuliulkulmulnuluulwumaumbumdumgumiummumnumoumpumrumsunauneunguniunkunmunnunrunuunxunzuonupiupvuraurburcureurfurgurhuriurkurmurnurourpurrurturuurvurwurxuryurzusaushusiuskuspussusuutauteuthutputrutuuumuuruveuvhuvluwauyauzsvaavaevafvagvahvaivajvalvamvanvaovapvarvasvauvavvayvbbvbkve\0vemveovepvervgrvicvidvifvigvilvinvitvivvkavkjvkkvklvkmvknvkovkpvktvkuvkzvlpvlsvmavmbvmcvmdvmevmfvmgvmhvmivmjvmkvmlvmmvmpvmqvmrvmsvmuvmxvmyvmzvnkvnmvnpvo\0vorvotvravrovrsvrtvtovumvunvutvwawa\0waawabwacwadwaewafwagwahwaiwajwalwamwanwapwaqwarwaswatwauwavwawwaxwaywazwbawbbwbewbfwbhwbiwbjwbkwblwbmwbpwbqwbrwbtwbvwbwwcawciwddwdgwdjwdkwdtwduwdywecwedwegwehweiwemweowepwerweswetweuwewwfgwgawgbwggwgiwgowguwgywhawhgwhkwhuwibwicwiewifwigwihwiiwijwikwilwimwinwirwiuwivwiywjawjiwkawkdwkrwkwwkywlawlgwlhwliwlmwlowlrwlswluwlvwlwwlxwmawmbwmcwmdwmewmhwmiwmmwmnwmowmswmtwmwwmxwnbwncwndwnewngwniwnkwnmwnnwnownpwnuwnwwnywoawobwocwodwoewofwogwoiwokwomwonwooworwoswowwpcwrbwrgwrhwriwrkwrlwrmwrowrpwrrwrswruwrvwrwwrxwrzwsawsgwsiwskwsrwsswsuwsvwtfwthwtiwtkwtmwtwwuawubwudwulwumwunwurwutwuuwuvwuxwuywwawwbwwowwrwwwwxwwybwyiwymwynwyrwyyxaaxabxaixajxakxalxamxanxaoxarxasxatxauxavxawxayxbbxbdxbexbgxbixbjxbmxbnxbpxbrxbwxbyxchxcoxcrxdaxdkxdoxdqxdyxedxegxemxerxesxetxeuxgbxgdxggxgixgmxguxgwxhexhmxhvxiixinxirxisxiyxjbxjtxkaxkbxkcxkdxkexkgxkjxklxknxkpxkqxkrxksxktxkuxkvxkwxkxxkyxkzxlaxlcxldxlyxmaxmbxmcxmdxmfxmgxmhxmjxmmxmnxmoxmpxmqxmrxmtxmuxmvxmwxmxxmyxmzxnaxnbxnixnjxnkxnmxnnxnqxntxnuxnyxnzxocxodxogxoixokxomxonxooxopxorxowxpaxpbxpdxpfxpgxphxpixpjxpkxplxpmxpnxpoxpqxprxptxpvxpwxpxxpzxraxrbxrdxrexrgxrixrmxrnxrrxruxrwxsaxsbxsexshxsixsmxsnxspxsqxsrxssxsuxsyxtaxtbxtcxtdxtexthxtixtjxtlxtmxtnxtpxtsxttxtuxtvxtwxtyxubxudxujxulxumxunxuoxutxuuxvexvixvnxvoxvsxwaxwdxwexwjxwkxwlxwoxwrxwtxwwxxbxxkxxmxxrxxtxyaxybxyjxykxylxytxyyxzhxzpyaayabyacyadyaeyafyagyaiyajyakyalyamyanyaoyapyaqyaryasyatyauyavyawyaxyayyazybaybbybeybhybiybjyblybmybnyboybxybyyclycnydaydeydgydkyeayecyeeyeiyejyelyeryesyetyeuyevyeyygaygiyglygmygpygryguygwyhdyi\0yiayigyihyiiyijyilyimyiryisyivykaykgykiykkykmykoykrykyylaylbyleylgyliyllylryluylyymbymeymgymkymlymmymnymoympynayndyngynkynlynqynsynuyobyogyoiyokyolyomyonyotyoyyrayrbyreyrkyrmyroyrsyrwyryysdysnyspysryssysyytwytyyuayubyucyudyufyugyuiyujyulyumyunyupyuqyuryutyuwyuxyuzyvayvtywaywgywnywqywrywuywwyxayxgyxlyxmyxuyxyyyryyuzaazabzaczadzaezafzagzahzajzakzamzaozapzaqzarzaszatzauzavzawzaxzayzazzbazbczbezbtzbuzbwzcazchzdjzeazegzehzenzgazgbzghzgmzgnzgrzhdzhizhnzhwzhxziazikzilzimzinziwzizzkazkbzkdzkozkpzktzkuzkzzlazljzlmzlnzlqzmazmbzmczmdzmezmfzmgzmhzmizmjzmkzmlzmmzmnzmozmpzmqzmrzmszmtzmuzmvzmwzmxzmyzmzznaznezngznkznszoczohzomzoozoqzorzoszpazpbzpczpdzpezpfzpgzphzpizpjzpkzplzpmzpnzpozppzpqzprzpszptzpuzpvzpwzpxzpyzpzzqezrnzrozrpzrszsazsrzsuzteztgztlztmztnztpztqztszttztuztxztyzuazuhzumzunzuyzygzyjzynzypzzazzj") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"LatnET\0LatnNG\0LatnNG\0LatnPG\0LatnPG\0LatnIT\0MlymIN\0LatnPG\0LatnPG\0LatnPG\0LatnPG\0LatnCM\0LatnBR\0ArabDZ\0LatnBR\0LatnUS\0LatnTZ\0GrekGR\0LatnPG\0LatnPG\0LatnID\0LatnID\0CyrlGE\0LatnCI\0LatnCM\0LatnPH\0LatnPH\0LatnCA\0LatnMY\0LatnPG\0ArabTJ\0LatnCI\0RjngID\0LatnNG\0LatnNG\0LatnNG\0LatnPH\0LatnGH\0LatnID\0LatnPG\0LatnCI\0ArabBH\0LatnPG\0LatnPH\0LatnPG\0LatnID\0LatnCO\0LatnNG\0LatnGH\0LatnID\0LatnLC\0LatnUG\0ArabIQ\0LatnCN\0LatnNG\0ArabYE\0LatnGT\0LatnBR\0LatnNL\0LatnEC\0LatnUS\0ArabSA\0ArabOM\0LatnCY\0LatnSD\0LatnGH\0LatnTL\0LatnCM\0LatnTG\0ArabOM\0LatnAU\0LatnUG\0LatnIN\0LatnCI\0LatnIN\0LatnID\0LatnPG\0LatnGH\0LatnID\0LatnAU\0LatnNG\0LatnBR\0TibtCN\0CyrlRU\0LatnPG\0AvstIR\0LatnAU\0ArabTN\0ArabEG\0ArabAF\0LatnNC\0LatnCM\0LatnVN\0ArabPK\0LatnAU\0LatnCN\0LatnPG\0LatnPG\0LatnPG\0ArabKW\0LatnPG\0LatnNG\0LatnGH\0LatnPG\0LatnPG\0LatnNG\0LatnNG\0LatnPG\0LatnMX\0LatnGH\0LatnID\0LatnPE\0LatnNG\0LatnNG\0LatnPG\0LatnPG\0LatnID\0LatnPG\0LatnCD\0DevaIN\0EthiET\0LatnPH\0LatnPG\0LatnPG\0LatnPH\0LatnPG\0LatnCM\0LatnPE\0LatnCM\0LatnPH\0LatnGT\0LatnPH\0LatnSB\0CyrlRU\0LatnPH\0LatnPH\0LatnGH\0LatnVU\0EthiET\0LatnID\0LatnCI\0LatnMM\0LatnTG\0LatnCI\0LatnNG\0AhomIN\0LatnCI\0DevaIN\0LatnNG\0LatnUS\0LatnSB\0ArabCN\0LatnPG\0LatnAU\0LatnPG\0LatnPG\0LatnAG\0HebrIL\0LatnNG\0LatnPG\0LatnIN\0KanaJP\0MymrIN\0LatnID\0ArabAF\0LatnID\0LatnBR\0LatnET\0LatnPG\0LatnCF\0LatnSS\0LatnBJ\0LatnNC\0LatnAU\0ArabJO\0LatnNG\0LatnIN\0LatnGH\0LatnID\0LatnID\0LatnNG\0LatnGY\0LatnNG\0LatnID\0LatnPG\0LatnPG\0XsuxIQ\0LatnPH\0LatnSR\0LatnGH\0LatnPG\0LatnVU\0LatnTG\0LatnPG\0LatnCM\0CyrlRU\0LatnCG\0LatnUS\0LatnNG\0LatnCL\0LatnCI\0LatnUS\0LatnNG\0LatnAU\0LatnPG\0LatnPH\0LaooLA\0MlymIN\0LatnVU\0LatnXK\0LatnID\0LatnID\0LatnCA\0CyrlRU\0CyrlRU\0LatnSB\0EthiET\0LatnPG\0LatnAU\0LatnCD\0LatnBR\0LatnNG\0LatnPE\0LatnPE\0LatnET\0LatnAU\0LatnTW\0LatnTD\0LatnID\0LatnPG\0LatnPG\0LatnNG\0LatnPG\0LatnID\0LatnPE\0JpanJP\0LatnPG\0LatnMX\0LatnID\0SyrcSY\0LatnAU\0LatnAU\0LatnAU\0LatnES\0LatnCO\0LatnPE\0LatnNG\0LatnID\0LatnNC\0LatnGH\0LatnGB\0LatnPG\0CyrlRU\0LatnPG\0LatnNG\0LatnMM\0LatnIN\0LatnNG\0LatnCO\0DevaIN\0DevaIN\0LatnCO\0LatnAU\0EthiET\0LatnCM\0LatnNG\0LatnPG\0LatnCI\0LatnPG\0LatnST\0LatnPG\0LatnVE\0LatnPG\0LatnPG\0LatnPG\0LatnPG\0LatnAU\0LatnPG\0LatnNC\0LatnID\0LatnPG\0LatnPG\0LatnVU\0LatnID\0BengBD\0LatnGY\0LatnID\0LatnSB\0ArabSY\0ArabTG\0LatnPG\0LatnPH\0LatnID\0DevaNP\0LatnBR\0LatnUS\0LatnUS\0LatnUS\0LatnUS\0LatnBR\0LatnPG\0LatnVU\0LatnPG\0LatnPG\0LatnIN\0LatnBR\0LatnBR\0LatnUS\0LatnID\0LatnBR\0LatnPG\0CyrlRU\0LatnML\0LatnNG\0LatnNG\0LatnID\0LatnPH\0LatnNC\0LatnPY\0LatnBR\0ArmiIR\0LatnAU\0LatnAU\0LatnCO\0LatnUS\0LatnBR\0LatnBR\0LatnPE\0LatnCL\0LatnBO\0LatnUS\0ArabDZ\0LatnBR\0ArabSA\0LatnBR\0LatnSR\0LatnBR\0ArabMA\0ArabEG\0LatnTZ\0LatnCA\0LatnID\0SgnwUS\0LatnNG\0LatnPE\0LatnID\0LatnCM\0ArabAF\0LatnID\0LatnBR\0LatnPG\0LatnCM\0LatnBR\0LatnCD\0LatnPG\0LatnID\0LatnID\0LatnPG\0LatnCN\0LatnPE\0LatnPH\0LatnPG\0LatnNG\0LatnCI\0LatnCA\0LatnPH\0LatnPH\0LatnPH\0ArabIR\0LatnCM\0LatnPH\0LatnID\0LatnBR\0LatnUS\0LatnPH\0LatnSS\0CyrlRU\0LatnUS\0LatnBR\0LatnVU\0LatnPH\0LatnSB\0LatnEC\0LatnSB\0LatnBJ\0LatnZM\0LatnPG\0ArabLY\0LatnPG\0LatnVU\0LatnNG\0LatnPG\0LatnNG\0LatnPG\0LatnID\0LatnPG\0LatnPF\0LatnID\0LatnID\0LatnPG\0ArabUZ\0CyrlRU\0LatnPG\0ArabIR\0LatnCI\0Latn001ArabEG\0LatnAU\0LatnGH\0LatnBR\0LatnPE\0LatnPG\0LatnSS\0LatnBR\0DevaIN\0LatnPG\0LatnNG\0LatnBR\0LatnAU\0LatnID\0LatnPG\0LatnAU\0LatnPG\0EthiET\0LatnNG\0LatnID\0LatnID\0LatnBR\0LatnID\0LatnID\0LatnPG\0LatnPG\0LatnID\0LatnAR\0LatnAU\0LatnBR\0LatnCF\0LatnAU\0ArmnAM\0LatnNC\0LatnBO\0LatnPG\0LatnBJ\0LatnPE\0LatnAU\0LatnNG\0LatnTG\0ArabYE\0LatnNG\0LatnNG\0ArabLY\0ArabYE\0LatnPY\0ArabIQ\0LatnPG\0LatnPH\0LatnPH\0LatnNG\0LatnID\0ArabIR\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnCM\0LatnPH\0LatnMX\0CyrlRU\0LatnSB\0LatnGW\0LatnID\0LatnVE\0LatnCM\0LatnCM\0LatnBS\0LatnID\0ArabPK\0LatnID\0LatnCO\0DevaNP\0LatnAT\0LatnCM\0LatnNG\0LatnCM\0LatnCM\0BamuCM\0LatnID\0LatnBJ\0LatnPG\0LatnID\0LatnPG\0LatnCD\0LatnPG\0LatnGA\0LatnCM\0LatnCM\0LatnCM\0GeorGE\0LatnCD\0LatnPG\0LatnBF\0LatnCF\0LatnCM\0LatnPG\0LatnNG\0LatnNG\0LatnNG\0LatnPG\0LatnCM\0LatnCM\0LatnCM\0LatnCN\0LatnSN\0LatnID\0LatnCM\0LatnPG\0LatnGN\0LatnPG\0LatnCI\0LatnAU\0LatnAU\0LatnPG\0LatnNG\0LatnPG\0LatnCD\0EthiET\0LatnCA\0LatnNG\0LatnCD\0LatnPG\0LatnNG\0LatnCM\0LatnNG\0LatnSN\0LatnSN\0LatnID\0LatnCO\0LatnPG\0LatnNG\0LatnPG\0LatnMY\0LatnSS\0LatnSD\0LatnSS\0LatnAZ\0LatnID\0LatnTD\0LatnCM\0LatnTD\0LatnTZ\0LatnVN\0LatnMY\0LatnTZ\0LatnCF\0LatnCM\0OryaIN\0LatnID\0LatnID\0LatnAU\0ArabPK\0LatnCA\0LatnCM\0LatnCM\0LatnID\0DevaIN\0LatnPG\0LatnBJ\0LatnID\0ArabSD\0LatnPG\0LatnZM\0LatnPG\0LatnID\0LatnCG\0LatnTD\0LatnCI\0LatnID\0LatnCI\0LatnID\0LatnSS\0LatnPG\0LatnTZ\0LatnSS\0DevaIN\0LatnCN\0LatnCM\0LatnID\0LatnCF\0LatnID\0LatnPG\0LatnCM\0LatnCF\0LatnCM\0LatnTL\0LatnBF\0LatnCM\0TamlIN\0LatnCN\0ArabPK\0TibtIN\0OryaIN\0LatnPH\0DevaIN\0DevaIN\0LatnNG\0LatnID\0DevaIN\0LatnCM\0LatnIN\0LatnPH\0LatnCM\0ArabPK\0LatnGN\0ArabPK\0DevaIN\0LatnIN\0LatnPH\0LatnSB\0LatnNG\0LatnID\0DevaIN\0GrekTR\0LatnID\0LatnID\0DevaIN\0DevaIN\0LatnID\0DevaIN\0ArabPK\0LatnPG\0LatnPG\0CyrlIL\0DevaIN\0DevaNP\0LatnPG\0ArabOM\0SyrcGE\0LatnID\0LatnID\0LatnMG\0LatnCM\0TakrIN\0DevaIN\0LatnID\0LatnID\0LatnCD\0LatnID\0LatnVU\0LatnAU\0LatnBF\0LatnTD\0LatnPG\0LatnGW\0LatnPG\0LatnPH\0LatnNG\0LatnGH\0LatnNG\0LatnPG\0LatnCD\0LatnPG\0LatnPG\0LatnPG\0LatnIN\0LatnGH\0LatnCM\0DevaIN\0LatnCD\0LatnCD\0LatnAU\0LatnPG\0SyrcIL\0LatnGW\0LatnPG\0LatnET\0DevaIN\0LatnPG\0LatnPG\0ArabIQ\0LatnID\0LatnCF\0LatnPG\0LatnPG\0LatnBB\0LatnSN\0LatnCM\0LatnTD\0LatnCI\0LatnPH\0LatnAU\0LatnPG\0LatnNG\0LatnCM\0LatnPH\0LatnCD\0LatnCF\0LatnCM\0LatnVU\0LatnCF\0LatnID\0LatnCM\0LatnID\0LatnCM\0LatnCD\0LatnBR\0LatnID\0LatnPH\0LatnCD\0LatnPH\0LatnNG\0LatnCG\0LatnTL\0LatnNG\0LatnID\0LatnCA\0LatnSB\0LatnCA\0LatnID\0LatnGW\0LatnID\0LatnLR\0LatnCD\0LatnID\0MymrMM\0LatnSS\0LatnPH\0LatnSB\0LatnPG\0LatnCN\0LatnID\0TavtVN\0LatnAO\0LatnPH\0LatnPH\0LatnBJ\0LatnID\0LatnML\0LatnNG\0LatnCD\0LatnPG\0LatnGN\0LatnCF\0LatnSL\0LatnCD\0LatnPG\0LatnTD\0DevaNP\0LatnPG\0LatnCD\0LatnMG\0LatnPG\0LatnCM\0LatnPG\0LatnML\0LatnCO\0LatnNE\0LatnPG\0LatnCM\0LatnCG\0LatnPG\0LatnPG\0LatnID\0LatnMY\0LatnPH\0LatnID\0LatnID\0LatnID\0LatnGQ\0LatnCD\0LatnPH\0LatnVU\0LatnGQ\0LatnTW\0LatnPH\0LatnPG\0LatnID\0LatnVU\0DevaIN\0LatnID\0LatnID\0LatnPG\0LatnCD\0LatnMY\0LatnCM\0TibtCN\0LatnPE\0LatnKE\0LatnCM\0LatnBF\0LatnCD\0LatnPG\0LatnCG\0LatnNG\0LatnNG\0LatnPG\0LatnML\0LatnPG\0LatnPG\0LatnBR\0LatnSS\0LatnTZ\0LatnGH\0LatnPG\0LatnBF\0LatnCF\0LatnML\0LatnVU\0LatnCM\0LatnCF\0LatnPG\0LatnID\0CyrlRU\0LatnPG\0LatnCD\0LatnNC\0LatnAU\0LatnPG\0LatnID\0LatnID\0LatnID\0LatnPH\0LatnPH\0LatnAU\0LatnPG\0LatnID\0LatnPG\0DevaIN\0BengIN\0LatnID\0LatnBJ\0LatnID\0LatnBJ\0LatnCM\0LatnGN\0LatnTG\0ArabIR\0LatnSN\0LatnCF\0LatnPG\0LatnCM\0LatnCM\0LatnNG\0LatnID\0LatnID\0LatnPG\0LatnCM\0LatnCD\0LatnCI\0LatnNG\0LatnNG\0LatnCM\0DevaIN\0KhmrKH\0LatnGY\0DevaNP\0LatnCD\0LatnBO\0ArabPK\0LatnCM\0LatnVU\0ArabSD\0LatnBW\0LatnCD\0LatnCR\0LatnID\0LatnPG\0LatnSB\0LatnID\0LatnNG\0LatnVN\0LaooLA\0LatnPG\0LatnPG\0LatnID\0LatnBN\0LatnSN\0LatnCM\0LatnNG\0ArabAF\0LatnCM\0LatnNG\0ArabPK\0LatnNG\0LatnID\0LatnCO\0LatnTD\0LatnGN\0BassLR\0LatnNG\0LatnCM\0EthiET\0LatnID\0LatnGN\0LatnET\0LatnNG\0LatnMY\0LatnNG\0LatnCM\0BatkID\0LatnNG\0LatnTD\0LatnCI\0LatnMY\0LatnID\0LatnID\0BatkID\0LatnPH\0LatnPH\0LatnPG\0LatnMY\0LatnVU\0LatnID\0LatnNG\0LatnNG\0DevaPK\0LatnPH\0LatnID\0LatnID\0LatnID\0CyrlRU\0LatnTD\0LatnYT\0LatnTG\0LatnCA\0LatnCD\0LatnID\0LatnCN\0LatnCG\0LatnNG\0LatnPG\0LatnCM\0LatnSL\0LatnPG\0LatnID\0LatnPG\0LatnNG\0LatnPG\0LatnCD\0LatnPG\0LatnGA\0LatnNG\0LatnSL\0LatnNG\0LatnTD\0LatnGQ\0LatnSB\0LatnSB\0LatnID\0LatnTD\0LatnCM\0LatnNG\0LatnSS\0LatnNG\0LatnID\0LatnCM\0LatnPG\0LatnTD\0LatnCF\0LatnAU\0LatnID\0LatnID\0LatnVE\0LatnNG\0LatnCG\0LatnPH\0LatnID\0LatnNC\0LatnFJ\0LatnZM\0LatnPG\0MymrMM\0LatnPG\0LatnMZ\0LatnCM\0LatnVE\0LatnBF\0LatnPG\0LatnCD\0LatnPG\0LatnET\0LatnID\0LatnBF\0LatnNG\0LatnCD\0LatnCM\0LatnGH\0LatnCD\0LatnCN\0LatnBF\0LatnCG\0LatnSB\0LatnSS\0LatnGQ\0LatnPG\0LatnCD\0LatnPG\0LatnAU\0LatnAU\0LatnBF\0CyrlMN\0LatnAU\0LatnNG\0LatnCM\0LatnNG\0LatnCM\0MongCN\0LatnTD\0LatnML\0LatnPG\0LatnPH\0LatnCM\0LatnNG\0LatnID\0LatnPG\0LatnNG\0DevaNP\0LatnCD\0LatnNG\0LatnCN\0LatnID\0LatnAU\0EthiER\0LatnNG\0LatnPG\0LatnNG\0LatnCM\0DevaNP\0LatnPG\0LatnPG\0LatnLR\0LatnID\0LatnMG\0LatnCR\0LatnML\0LatnPG\0LatnPG\0ThaiTH\0LatnBZ\0LatnNI\0LatnID\0LatnCD\0LatnID\0LatnCD\0LatnID\0LatnID\0LatnAU\0Latn001LatnID\0LatnCM\0LatnNG\0LatnML\0LatnNG\0LatnNG\0LatnGT\0LatnHN\0LatnGT\0LatnUS\0LatnSN\0LatnCA\0LatnPY\0LatnPE\0LatnAR\0LatnGT\0LatnMP\0LatnNC\0LatnPG\0LatnBO\0LatnBO\0LatnIN\0LatnVE\0LatnBO\0LatnBO\0LatnBO\0LatnBO\0LatnCA\0LatnBO\0LatnCO\0LatnCO\0LatnCO\0LatnCO\0LatnEC\0LatnBJ\0LatnPH\0LatnMM\0ThaiTH\0LatnNG\0LatnNG\0LatnPE\0LatnPE\0LatnPE\0LatnPE\0LatnCO\0LatnPH\0LatnCO\0LatnPE\0LatnBR\0LatnMZ\0LatnNG\0LatnNG\0LatnGW\0LatnTZ\0LatnMY\0LatnMX\0CakmBD\0LatnSV\0TeluIN\0LatnIN\0DevaIN\0GujrIN\0DevaIN\0DevaNP\0HansCN\0LatnNG\0BengIN\0CyrlRU\0LatnUS\0LatnPY\0LatnMM\0LatnNG\0LatnNG\0LatnMM\0LatnNG\0LatnNG\0LatnNG\0LatnMM\0LatnPG\0LatnPH\0LatnUG\0TibtBT\0LatnGU\0LatnCO\0LatnMX\0LatnMX\0ArabTM\0LatnUS\0LatnMX\0LatnFM\0LatnUS\0CyrlRU\0LatnUS\0LatnUS\0LatnCA\0LatnMX\0LatnPE\0LatnMZ\0DevaNP\0LatnUS\0LatnMX\0LatnID\0LatnBJ\0LatnUS\0LatnNG\0DevaIN\0LatnIT\0LatnBR\0LatnMX\0LatnNC\0LatnUS\0LatnVE\0ArabKH\0LatnVN\0LatnUS\0CyrlRU\0LatnAO\0ChamVN\0LatnPG\0LatnPE\0LatnCR\0LatnRU\0LatnPG\0HansCN\0ArabIQ\0LatnNG\0LatnHR\0LatnMM\0LatnGH\0LatnTD\0LatnPG\0LatnNC\0CyrlRU\0LatnUS\0LatnTW\0LatnCM\0LatnNG\0LatnGT\0LatnNG\0LatnCA\0LatnMX\0ArabPK\0LatnGH\0LatnMM\0LatnIN\0LatnGH\0LatnUS\0LatnMX\0LatnMM\0LatnPH\0CyrlRU\0LatnMX\0LatnVN\0LatnBF\0SoyoMN\0LatnCO\0LatnID\0LatnVN\0LatnMM\0LatnIT\0LatnZA\0TibtIN\0LatnMM\0LatnVN\0LatnCN\0LatnMM\0LatnPE\0LatnMM\0LatnMX\0HansCN\0LatnCM\0LatnID\0LatnMX\0LatnMM\0LatnGB\0LatnFR\0LatnAU\0LatnMX\0LatnMX\0LatnPE\0LatnCO\0LatnEC\0ThaiTH\0LatnKE\0LatnMX\0LatnMX\0LatnUS\0LatnUS\0LatnCA\0CoptEG\0LatnUS\0LatnPE\0LatnSN\0LatnPE\0LatnMX\0LatnMX\0LatnPE\0LatnPE\0GrekGR\0LatnNR\0LatnGH\0LatnBF\0LatnPH\0LatnPE\0LatnCN\0LatnPE\0LatnCN\0CansCA\0LatnVC\0LatnVU\0LatnUS\0LatnCO\0LatnCA\0CyrlUA\0LatnST\0CansCA\0CansCA\0CansCA\0CansCA\0LatnMX\0LatnUS\0LatnAR\0LatnSC\0LatnAR\0LatnIN\0LatnVN\0LatnCA\0LatnNG\0LatnUS\0LatnMX\0LatnPL\0MymrMM\0LatnMM\0LatnSN\0LatnUS\0LatnMX\0HansCN\0LatnUS\0LatnUS\0LatnMM\0LatnMM\0LatnUS\0LatnMX\0LatnUS\0PaucMM\0LatnMX\0BengBD\0LatnMM\0LatnMX\0LatnUS\0DevaNP\0LatnCO\0LatnMX\0LatnPH\0TamlIN\0LatnMX\0LatnMX\0CyrlRU\0LatnVN\0LatnCO\0LatnMX\0LatnKE\0LatnCO\0LatnPE\0LatnPA\0LatnBR\0LatnVE\0LatnUS\0LatnMX\0LanaCN\0LatnCM\0LatnMX\0LatnIN\0LatnMX\0LatnTZ\0LatnMZ\0LatnTZ\0LatnMY\0LatnSN\0LatnMX\0LatnBO\0LatnPH\0HansCN\0HebrCZ\0LatnMX\0LatnMM\0LatnTD\0LatnPG\0LatnPG\0LatnCM\0LatnGH\0LatnPG\0LatnTD\0LatnSD\0LatnUS\0LatnKE\0LatnNG\0LatnMM\0DevaIN\0CyrlRU\0LatnCI\0LatnTD\0LatnKE\0LatnPH\0LatnAU\0LatnID\0LatnML\0LatnNG\0LatnNG\0LatnID\0LatnID\0LatnML\0LatnNG\0LatnMY\0LatnAU\0LatnNG\0LatnID\0LatnNG\0LatnNG\0LatnCM\0LatnML\0LatnML\0LatnNG\0LatnML\0LatnPG\0ArabIN\0LatnVI\0LatnAU\0LatnSS\0LatnCG\0LatnTL\0LatnPG\0LatnAU\0LatnBJ\0CyrlRU\0LatnAU\0LatnML\0LatnID\0LatnSD\0LatnPG\0LatnLR\0ArabIR\0LatnNG\0ArabPK\0LatnID\0LatnCM\0LatnUS\0LatnID\0LatnCA\0LatnCF\0BengIN\0LatnBR\0LatnPG\0LatnCD\0LatnGH\0LatnML\0LatnPH\0LatnBF\0LatnPG\0LatnPG\0LatnNG\0LatnBF\0LatnCF\0LatnAU\0LatnCA\0LatnBF\0LatnAU\0LatnAU\0LatnPG\0LatnPG\0LatnAU\0DevaNP\0LatnAU\0LatnAO\0GujrIN\0DevaIN\0LatnAU\0LatnTZ\0LatnAU\0LatnNC\0DevaNP\0LatnAU\0LatnPG\0LatnSS\0LatnCI\0LatnSS\0LatnAU\0LatnKE\0LatnMX\0LatnCM\0LatnID\0LatnSD\0LatnSS\0LatnNG\0LatnSS\0LatnNG\0LatnIN\0LatnNA\0LatnSS\0LatnVU\0LatnID\0LatnCD\0LatnAU\0LatnAU\0LatnTD\0LatnAU\0LatnNE\0LatnAU\0LatnAU\0LatnAU\0LatnSR\0LatnML\0LatnAU\0LatnID\0LatnAU\0LatnPG\0LatnAU\0TibtBT\0LatnNG\0LatnID\0LatnMY\0LatnSS\0LatnCM\0CyrlRU\0LatnHR\0LatnIN\0LatnGA\0LatnML\0LatnPG\0LatnAU\0LatnCM\0MedfNG\0LatnMY\0ArabPK\0ArabPK\0LatnCM\0LatnCM\0LatnID\0LatnID\0LatnID\0LatnMY\0LatnAU\0LatnMZ\0LatnID\0LatnID\0LatnPG\0LatnTZ\0CyrlKG\0LatnID\0LatnCI\0LatnID\0LatnBF\0LatnCD\0LatnPG\0LatnID\0MymrMM\0MymrMM\0LatnID\0LatnBR\0LatnPG\0LatnPG\0LatnCN\0LatnTZ\0LatnPG\0LatnNG\0LatnID\0LatnPG\0LatnPG\0LatnCD\0LatnBJ\0LatnSB\0LatnBF\0LatnNG\0LatnZW\0LatnCM\0EthiET\0LatnGH\0LatnMY\0LatnPT\0TibtNP\0LatnMY\0LatnNG\0LatnAU\0LatnID\0LatnMY\0DevaNP\0EthiET\0LatnNL\0LatnTW\0DevaNP\0LatnKE\0LatnTD\0LatnID\0OryaIN\0LatnML\0LatnCN\0LatnMY\0LatnCA\0LatnAU\0LatnML\0LatnML\0LatnML\0LatnML\0LatnMY\0LatnMY\0LatnML\0LatnML\0LatnML\0DevaNP\0LatnCM\0GujrIN\0LatnPG\0LatnPH\0LatnNC\0LatnKE\0DevaIN\0LatnPG\0LatnPG\0LatnPH\0LatnNL\0LatnID\0LatnPH\0LatnID\0LatnID\0LatnCM\0DevaNP\0LatnCN\0LatnID\0LatnID\0LatnML\0LatnPH\0LatnCM\0ThaaMV\0LatnPG\0LatnNG\0OryaIN\0LatnET\0Latn001LatnAU\0LatnPG\0LatnAU\0DevaNP\0LatnBF\0LatnAU\0LatnAU\0LatnPH\0LatnCI\0LatnML\0LatnAU\0LatnSN\0LatnBF\0LatnAU\0TibtBT\0LatnNG\0LatnAU\0LatnTD\0TibtBT\0LatnCD\0LatnAU\0LatnID\0LatnNG\0LatnPH\0LatnCG\0LatnCI\0LatnKE\0GrekGR\0CprtCY\0LatnGH\0LatnNG\0LatnCD\0LatnNG\0LatnCI\0LatnIT\0LatnTZ\0LatnNG\0EgypEG\0LatnNG\0LatnID\0LatnPG\0LatnPG\0LatnGW\0LatnNG\0LatnNG\0LatnID\0LatnNG\0LatnBD\0LatnCM\0LatnMZ\0LatnNG\0LatnNG\0KaliMM\0LatnPG\0LatnPG\0LatnNG\0LatnKE\0LatnPG\0LatnNG\0LatnID\0LatnGF\0DevaNP\0LatnPG\0LatnMX\0LatnCM\0LatnPA\0LatnUS\0DevaIN\0LatnID\0LatnFR\0LatnCM\0LatnPG\0LatnKE\0LatnVN\0LatnID\0CyrlRU\0CyrlRU\0LatnPY\0LatnGB\0LatnNG\0LatnID\0LatnPG\0LatnID\0LatnNG\0LatnNG\0LatnPY\0LatnCI\0LatnNG\0TamlIN\0LatnVU\0LatnNG\0LatnPG\0LatnVU\0LatnAU\0LatnID\0LatnID\0LatnBO\0GonmIN\0ArabIR\0LatnUS\0LatnCI\0LatnUS\0LatnUS\0LatnPH\0LatnNG\0LatnVU\0LatnCM\0LatnPG\0LatnNG\0ItalIT\0LatnNG\0LatnNG\0LatnID\0CyrlRU\0LatnNG\0CyrlRU\0LatnCM\0LatnES\0LatnUS\0LatnKE\0LatnNG\0LatnNG\0LatnPG\0LatnGQ\0LatnPG\0LatnSB\0LatnPG\0LatnNG\0LatnPG\0LatnPG\0LatnCM\0LatnCM\0LatnNG\0LatnGQ\0LatnSN\0LatnSB\0LatnID\0LatnES\0ArabIR\0ArabIR\0LatnPH\0LatnSS\0LatnPG\0LatnML\0LatnTD\0ArabSD\0LatnNG\0LatnSA\0LatnTZ\0LatnNG\0LatnSE\0LatnPG\0LatnFJ\0LatnNG\0LatnNO\0LatnUS\0LatnID\0LatnNG\0LatnCM\0LatnAU\0LatnCD\0LatnZA\0LatnCM\0DevaIN\0LatnVU\0LatnZA\0LatnTD\0LatnBJ\0LatnPG\0LatnCD\0LatnBJ\0LatnPG\0LatnTW\0LatnGQ\0LatnPG\0LatnUS\0LatnID\0LatnDE\0LatnFR\0LatnFR\0LatnFR\0LatnPG\0LatnDE\0LatnDE\0LatnVU\0ArabCM\0LatnWF\0LatnBJ\0LatnGN\0LatnNE\0LatnTD\0LatnNG\0LatnBR\0LatnNE\0LatnIT\0LatnVU\0LatnCD\0LatnNG\0LatnPG\0LatnSD\0LatnNC\0LatnNA\0LatnGH\0LatnTD\0LatnIN\0LatnPH\0LatnVE\0LatnPG\0LatnMD\0LatnPG\0LatnPG\0LatnPG\0LatnID\0LatnTL\0LatnPG\0HansCN\0LatnPG\0LatnPG\0OryaIN\0LatnPG\0GujrIN\0LatnPG\0TeluIN\0LatnPG\0LatnET\0LatnID\0LatnCF\0LatnAU\0LatnAU\0LatnPG\0LatnPG\0LatnCF\0LatnBJ\0LatnID\0OryaIN\0DevaIN\0GujrIN\0DevaIN\0LatnSS\0LatnCF\0LatnCF\0LatnNG\0LatnBJ\0LatnAU\0LatnCF\0LatnAU\0LatnBJ\0LatnNG\0ArabIR\0LatnPG\0LatnAU\0LatnGP\0LatnGD\0LatnPG\0LatnGF\0LatnVE\0OryaIN\0LatnAU\0LatnPG\0LatnNG\0LatnNG\0LatnPH\0LatnAU\0LatnCF\0LatnAU\0LatnTD\0LatnET\0LatnTD\0LatnPG\0CyrlRU\0LatnYE\0LatnPG\0LatnAU\0LatnNG\0DevaIN\0LatnNG\0LatnPG\0LatnLR\0LatnNG\0LatnID\0LatnNG\0LatnCA\0LatnID\0LatnTG\0LatnNG\0LatnNG\0LatnCF\0LatnID\0LatnGA\0LatnNG\0LatnSO\0LatnCD\0EthiET\0LatnPG\0LatnSB\0LatnLR\0LatnAU\0LatnAU\0ArabPK\0LatnAU\0LatnPG\0LatnPG\0LatnCI\0LatnPG\0ArabLY\0LatnGB\0DevaNP\0LatnMM\0LatnSB\0ArabPK\0LatnPG\0TibtNP\0LatnAU\0LatnNG\0LatnZA\0LatnCM\0LatnCI\0ArabPK\0LatnAU\0LatnKI\0LatnPG\0CyrlRU\0LatnPG\0LatnVN\0LatnVN\0LatnCM\0LatnCA\0LatnCD\0LatnAU\0LatnCM\0ArabPK\0LatnAU\0LatnGH\0LatnAU\0ArabPK\0LatnPG\0LatnPG\0LatnCM\0LatnNG\0LatnAU\0LatnGN\0LatnZA\0LatnNG\0LatnTD\0CyrlRU\0ArabAF\0LatnTD\0ArabIR\0LatnAU\0LatnNG\0LatnLR\0LatnTD\0LatnNG\0LatnAU\0LatnSB\0LatnNG\0LatnPG\0LatnDE\0LatnCM\0LatnCM\0LatnAU\0LatnPG\0EthiET\0LatnTZ\0LinbGR\0LatnNG\0LatnPY\0LatnBF\0LatnIN\0LatnES\0LatnCM\0LatnNG\0LatnTG\0LatnNG\0LatnAU\0LatnCI\0LatnBW\0LatnAU\0LatnPG\0LatnAU\0LatnMY\0LatnAU\0LatnPG\0LatnPG\0LatnBO\0LatnCF\0LatnCI\0LatnCO\0LatnPG\0LatnCI\0TibtBT\0EthiET\0LatnTZ\0LatnDE\0LatnPG\0DevaIN\0LatnLR\0DevaIN\0TeluIN\0LatnFJ\0LatnID\0LatnID\0LatnID\0LatnNL\0GothUA\0LatnCM\0LatnCI\0LatnTZ\0LatnCD\0LatnTD\0LatnNG\0LatnGH\0LatnPG\0LatnNG\0LatnBR\0LatnTD\0DevaIN\0LatnLR\0CprtCY\0LatnNG\0LatnPG\0LatnNG\0LatnSB\0LatnLR\0LatnMY\0LatnPG\0LatnID\0BengIN\0EthiET\0LatnLR\0LatnPG\0LatnPG\0LatnLR\0LatnPG\0LatnSN\0LatnPG\0LatnCF\0LatnPG\0LatnCH\0LatnBR\0LatnAU\0LatnNG\0LatnBR\0LatnCO\0LatnCI\0LatnAU\0LatnAU\0LatnCO\0LatnBO\0LatnET\0LatnUS\0LatnCO\0LatnBR\0LatnCO\0LatnAU\0LatnPY\0LatnGH\0LatnCR\0LatnVE\0LatnBJ\0LatnBF\0LatnKE\0LatnIM\0LatnPY\0LatnBR\0LatnPG\0LatnPG\0LatnBR\0LatnTD\0LatnNG\0LatnAU\0LatnBR\0LatnBR\0DevaNP\0LatnPG\0LatnAU\0LatnCI\0LatnNG\0ArabPK\0LatnET\0LatnTZ\0ArabPK\0LatnNG\0LatnCA\0LatnBW\0LatnAU\0LatnNG\0LatnUG\0ArabAF\0LatnAU\0LatnAU\0LatnGH\0LatnCI\0LatnPG\0LatnAU\0LatnNG\0LatnAU\0LatnCF\0LatnCM\0LatnET\0LatnPA\0LatnGY\0DevaNP\0LatnBO\0LatnAU\0LatnNG\0LatnSD\0ArabIR\0LatnID\0LatnUS\0ArabIR\0LatnID\0LatnET\0LatnGH\0LatnPG\0LatnCA\0LatnIN\0HansCN\0LatnVN\0LatnPG\0LatnTZ\0LatnPG\0LatnID\0LatnTZ\0EthiET\0LatnCA\0LatnCD\0LatnUS\0LatnCA\0LatnTZ\0ArabAF\0LatnCD\0LatnNG\0LatnSD\0HebrIL\0LatnTL\0LatnMX\0EthiET\0LatnTD\0LatnID\0LatnTZ\0LatnCA\0LatnCD\0LatnNA\0LatnPG\0LatnPG\0LatnSN\0LatnPG\0LatnNG\0LatnPE\0LatnUS\0LatnFJ\0LatnNG\0LatnPG\0TakrIN\0LatnCM\0LatnID\0LatnPH\0LatnBW\0LatnBR\0XsuxTR\0LatnVU\0LatnBR\0LatnID\0LatnTZ\0LatnCD\0ArabIN\0LatnPG\0LatnPG\0DevaIN\0LatnVN\0LatnMM\0HluwTR\0LatnCN\0LatnML\0PlrdCN\0LatnVN\0BopoCN\0LatnCN\0LatnCN\0LatnCN\0BopoCN\0LatnIN\0LatnCN\0LatnPG\0LatnID\0LatnVN\0LatnCN\0LatnCN\0LatnCN\0LatnCM\0ArabPK\0DevaIN\0LatnAO\0LatnBW\0LatnCN\0HmnpUS\0LatnPH\0ArabPK\0LatnSR\0LatnPG\0LatnSB\0LatnPG\0DevaIN\0LatnNG\0LatnNG\0ArabOM\0LatnUS\0DevaIN\0LatnAO\0LatnSS\0LatnCD\0LatnUS\0LatnTD\0LatnPG\0LatnID\0HaniCN\0DevaIN\0MymrMM\0LatnIN\0LatnPG\0LatnVN\0LatnID\0LatnCN\0LatnVN\0LatnAU\0SyrcTR\0LatnIN\0LatnPG\0LatnBR\0ArabIR\0HansCN\0ArabOM\0LatnHT\0LatnID\0LatnCO\0LatnTZ\0LatnID\0XsuxTR\0LatnPE\0LatnBW\0LatnID\0LatnMX\0LatnPG\0LatnPE\0LatnCL\0LatnPG\0LatnID\0LatnPG\0LatnCD\0LatnUS\0LatnCA\0LatnMX\0DevaNP\0LatnPE\0LatnMX\0LatnID\0LatnPE\0HebrIL\0CyrlRU\0LatnHT\0LatnMX\0LatnNC\0LatnID\0LatnMX\0LatnCI\0LatnUS\0LatnNG\0LatnCM\0ArmnAM\0LatnNA\0LatnNC\0LatnPG\0LatnPG\0LatnMY\0LatnNG\0LatnAU\0LatnNG\0LatnPH\0LatnVN\0LatnPH\0LatnNG\0LatnNG\0LatnNG\0LatnID\0LatnNG\0LatnBJ\0LatnNG\0LatnCO\0LatnKE\0LatnIN\0LatnNG\0LatnBJ\0LatnNG\0LatnPG\0LatnSS\0LatnNG\0LatnTL\0LatnNG\0LatnPH\0LatnPH\0LatnTG\0LatnVU\0LatnPH\0LatnCG\0LatnPH\0LatnPH\0LatnNG\0LatnNG\0LatnPG\0LatnNG\0LatnPG\0LatnBO\0LatnPG\0Latn001LatnNG\0LatnID\0LatnNG\0LatnID\0LatnAU\0YiiiCN\0LatnAU\0LatnNG\0LatnNG\0LatnBJ\0LatnNG\0LatnNG\0LatnUS\0LatnNG\0LatnNG\0LatnNG\0LatnNG\0LatnNG\0LatnAU\0LatnCA\0LatnNG\0LatnNG\0LatnUG\0LatnTZ\0LatnID\0LatnZM\0LatnAU\0LatnCN\0LatnPH\0LatnMY\0LatnPH\0LatnPH\0LatnID\0LatnNG\0LatnPG\0LatnUS\0LatnPG\0LatnPG\0LatnID\0LatnIT\0LatnSS\0LyciTR\0LatnID\0LatnCO\0LatnUS\0CyrlRU\0LatnCO\0LatnPH\0LatnPG\0LatnPE\0MymrMM\0Latn001EthiET\0LatnPG\0LatnUS\0LatnPG\0LatnPG\0LatnPE\0LatnNG\0LatnID\0LatnID\0LatnNG\0LatnTZ\0LatnBR\0TamlIN\0LatnID\0LatnPH\0LatnPG\0LatnPE\0LatnPH\0LatnNG\0LatnNG\0ArabAF\0LatnID\0LatnTZ\0LatnNG\0LatnHR\0LatnCM\0LatnPH\0LatnID\0LatnBO\0LatnPH\0HebrIT\0CyrlRU\0LatnNG\0LatnBO\0LatnPG\0LatnNG\0LatnPH\0LatnPH\0LatnNG\0LatnID\0LatnPH\0LatnGT\0CansCA\0LatnCN\0LatnPH\0LatnPH\0HebrIL\0LatnPH\0LatnPG\0LatnID\0LatnPG\0LatnMX\0LatnGT\0LatnNG\0LatnCM\0LatnCG\0LatnRU\0LatnNG\0LatnNG\0LatnBR\0LatnNG\0LatnGT\0ArabGN\0LatnPG\0LatnNG\0LatnMY\0LatnSB\0LatnMY\0LatnID\0LatnJM\0LatnAU\0LatnAU\0LatnID\0LatnNC\0ArabAF\0LatnID\0LatnID\0LatnAU\0LatnNC\0HebrIL\0LatnAU\0LatnID\0LatnPG\0LatnNG\0ArabLY\0Latn001LatnID\0LatnBR\0LatnCM\0LatnAU\0CyrlUA\0TibtIN\0ArabPK\0CyrlRU\0LatnPE\0DevaNP\0LatnVN\0LatnID\0LatnCI\0LatnID\0LatnNG\0LatnNG\0LatnPG\0LatnTD\0LatnCD\0GeorGE\0LatnNG\0LatnCM\0LatnMY\0HebrUA\0LatnCM\0LatnNG\0LatnHN\0LatnNG\0LatnNG\0LatnAU\0LatnPG\0LatnCM\0LatnTZ\0LatnCN\0LatnEC\0LatnCN\0HangKR\0LatnNG\0LatnID\0MymrMM\0LatnPG\0LatnNG\0LatnSD\0LatnPG\0LatnNG\0LatnTZ\0LatnID\0LatnNG\0DevaNP\0LatnMM\0LatnGH\0LatnNG\0LatnPG\0LatnMX\0TakrIN\0ArabPK\0LatnAU\0LatnNG\0LatnET\0DevaIN\0DevaIN\0LatnCD\0LatnCI\0ArabPK\0LatnBO\0LatnML\0HebrPS\0HebrIL\0LatnPE\0LatnVN\0HebrIL\0LatnNG\0LatnNG\0LatnVE\0LatnBR\0LatnNG\0LatnCI\0LatnNG\0LatnAU\0LatnNG\0DevaNP\0LatnSD\0OryaIN\0LatnNG\0LatnBR\0LatnBR\0LatnDK\0LatnNG\0LatnNG\0OryaIN\0LatnID\0LatnSR\0LatnID\0LatnGH\0TibtCN\0HebrIL\0LatnTD\0CyrlUZ\0LatnDZ\0LatnMM\0LatnNG\0LatnMY\0LatnCF\0LatnNG\0LatnNG\0LatnPH\0LatnKE\0LatnML\0CyrlRU\0LatnPE\0LatnBR\0KawiID\0LatnID\0LatnBR\0LatnAU\0LatnBR\0LatnBR\0CyrlRU\0LatnAU\0LatnCO\0LatnID\0LatnCD\0LatnPG\0LatnTD\0LatnPG\0LatnCF\0LatnSS\0LatnTG\0LatnPG\0LatnET\0LatnGA\0LatnPG\0ArabPK\0LatnID\0LatnPG\0LatnPG\0ArabNE\0LatnNG\0CyrlRU\0LatnPG\0LatnNG\0LatnID\0LatnNG\0LatnNG\0LatnNG\0LatnNG\0LatnNG\0LatnGW\0LatnZW\0LatnPG\0LatnCF\0LatnUG\0LatnPG\0LatnSD\0LatnNG\0LatnNG\0LatnPG\0LatnTZ\0LatnCD\0LatnCD\0LatnTZ\0LatnAU\0LatnTZ\0LatnAU\0LatnTZ\0LatnPG\0LatnCD\0LatnTG\0LatnUG\0LatnUG\0LatnNC\0LatnNG\0LatnNG\0LatnZW\0LatnNG\0BengIN\0LatnLT\0ThaiTH\0LatnID\0LatnNG\0LatnID\0LatnCM\0LatnGA\0LatnSD\0LatnTZ\0LatnUS\0LatnTG\0LatnSD\0LatnPG\0LatnID\0LatnGT\0LatnCD\0LatnTL\0LatnCM\0LatnUG\0LatnTD\0LatnNG\0CyrlRU\0LatnTG\0LatnPG\0DevaIN\0TeluIN\0LatnNG\0KndaIN\0DevaIN\0TeluIN\0KndaIN\0TamlIN\0LatnIN\0MlymIN\0TamlIN\0DevaIN\0LatnCM\0ArabIR\0LatnCM\0LatnCI\0DevaIN\0DevaIN\0DevaIN\0DevaIN\0LatnIN\0LatnIN\0DevaIN\0DevaIN\0LatnBF\0LatnCD\0LatnCI\0LatnID\0LatnID\0LatnPG\0DevaNP\0LatnBR\0LatnAU\0LatnBR\0LatnSD\0LatnID\0LatnID\0LatnAU\0LatnNG\0LatnPG\0LatnID\0LatnID\0LatnID\0DevaNP\0LatnIN\0TaluCN\0LatnID\0LatnID\0LatnID\0ThaiLA\0TibtCN\0LatnID\0LatnNG\0LatnPG\0DevaIN\0LatnID\0LatnML\0LatnIN\0LatnPG\0MymrIN\0LatnAO\0CyrlRU\0ArabPK\0LatnCD\0LatnCD\0LatnPG\0LatnKE\0LatnTD\0LatnSD\0LatnUS\0LatnCM\0LatnTD\0DevaNP\0LatnID\0LatnPG\0LatnPG\0LatnNG\0CyrlRU\0LatnUS\0DevaNP\0LatnID\0LatnPG\0LatnPG\0LatnTR\0LatnTZ\0LatnPG\0LatnIN\0LatnID\0LatnTZ\0LatnNA\0LatnID\0LatnGT\0LatnID\0LatnPG\0LatnID\0LaooLA\0CyrlRU\0LatnSB\0LatnAZ\0LatnID\0DevaNP\0LatnVN\0LatnAU\0DevaIN\0MymrMM\0LatnUS\0LatnID\0LatnPG\0ThaiTH\0LatnUS\0LatnPG\0LatnPG\0LatnNG\0LatnID\0LatnPG\0LatnNG\0LatnGN\0TibtIN\0LatnPH\0LanaMM\0LatnTZ\0LatnCM\0LatnSB\0LatnID\0LatnNG\0LatnSD\0LatnAU\0LatnCD\0LatnNG\0LatnNG\0DevaNP\0LatnNG\0LatnID\0LatnCG\0LatnID\0LatnAU\0LatnCA\0LatnGL\0LatnUS\0LatnMX\0LatnCM\0LatnAU\0DevaNP\0LatnTD\0LatnPH\0LatnPG\0LatnID\0ArabIR\0LatnNG\0LatnPH\0LatnPG\0LatnKE\0LatnNG\0LatnPG\0LatnPG\0DevaNP\0LatnPK\0LatnPG\0LatnLR\0LatnVU\0LatnID\0LatnPG\0LatnID\0LatnID\0LatnGH\0LatnAO\0LatnCN\0LatnPH\0LatnCM\0LatnPG\0LatnPG\0LatnPG\0LatnNG\0DevaIN\0LatnPH\0LatnPH\0LatnIN\0LatnPG\0LatnPG\0LatnCM\0LatnET\0LatnPG\0LatnID\0LatnPG\0LatnBR\0LatnCD\0LatnPG\0LatnNG\0ArabIR\0LatnNG\0LatnPH\0LatnID\0LatnPH\0LatnGW\0LatnNG\0LatnGT\0LatnSL\0LatnID\0LatnBR\0LatnSL\0LatnCM\0LatnMY\0LatnPG\0LatnMY\0LatnBR\0LatnGN\0LatnPG\0LatnNA\0LatnID\0LatnCD\0LatnBF\0LatnPG\0LatnNG\0LatnID\0LatnSS\0LatnNG\0LatnCO\0LatnCG\0CyrlRU\0LatnPG\0LatnUG\0LatnPG\0LatnGA\0LatnFM\0LatnCM\0LatnTD\0LatnNG\0LatnNG\0LatnUS\0LatnPG\0LatnNG\0LatnCO\0LatnID\0LatnLR\0LatnPG\0LatnFM\0LatnGH\0LatnID\0LatnBR\0LatnNG\0LatnCD\0LatnVN\0LatnBR\0LatnTG\0LatnID\0LatnPG\0LatnID\0CyrlRU\0LatnID\0LatnPG\0LatnPG\0CyrlRU\0LatnUG\0LatnPG\0LatnPG\0LatnPG\0SyrcIQ\0LatnPH\0LatnPG\0LatnBF\0LatnTZ\0LatnPG\0LatnPG\0LatnBJ\0LatnPG\0LatnCI\0LatnZM\0LatnLR\0LatnTD\0LatnBR\0LatnMY\0LatnGN\0LatnMY\0LatnZA\0LatnID\0LatnPG\0LatnCM\0EthiET\0LatnZA\0LatnNG\0DevaNP\0LatnUS\0CyrlRU\0LatnTL\0LatnBR\0LatnVU\0LatnNG\0LatnSL\0LatnPH\0CyrlRU\0LatnRU\0LatnLR\0LatnNG\0KhmrKH\0LatnSS\0LatnNE\0DevaIN\0KhmrKH\0LatnLR\0LatnSN\0LatnAZ\0LatnID\0LatnNG\0LatnTZ\0LatnPH\0LatnPG\0LatnPG\0LatnCM\0LatnSB\0LatnDE\0LatnPG\0LatnPG\0LatnUS\0LatnPG\0LatnNG\0LatnPH\0LatnNG\0LatnCF\0LatnNG\0LatnPG\0LatnLR\0LatnBF\0MymrIN\0LatnCD\0MymrMM\0LatnID\0DevaIN\0LatnVN\0EthiET\0LatnNG\0LatnAU\0LatnCD\0LatnAU\0LatnTD\0LatnID\0LatnCI\0LatnPG\0ArabIR\0LatnPG\0LatnBR\0LatnPG\0PlrdCN\0LatnPH\0LatnID\0LatnID\0LatnCD\0LatnVN\0LatnUS\0LatnBR\0LatnCD\0LatnNA\0LatnNG\0LatnID\0LatnPG\0LatnPG\0LaooLA\0LatnNG\0LatnNG\0LatnBR\0LatnTZ\0LatnID\0LatnNG\0CyrlRU\0LatnER\0LatnPG\0LatnPG\0LatnBR\0LatnGH\0LatnCA\0LatnUS\0LatnID\0LatnCF\0LatnAU\0LatnAU\0LatnCL\0CyrlRU\0CyrlRU\0LatnID\0LatnPG\0LatnID\0LatnMY\0LatnTD\0LatnPG\0LatnID\0LatnTD\0LatnCM\0LatnMM\0LatnCM\0LatnCO\0LatnID\0LatnID\0MymrMM\0LatnID\0MymrMM\0LatnID\0LatnID\0ArabPK\0KaliMM\0LatnID\0LatnGB\0LatnBR\0LatnNG\0LatnCG\0LatnSB\0LatnID\0LatnSB\0LatnTD\0LatnID\0LatnCO\0LatnPG\0LatnCA\0LatnNG\0LatnNA\0LatnNA\0LatnPG\0LatnCI\0LatnID\0LatnCD\0LatnID\0LatnCM\0LatnTD\0LatnSR\0LatnCD\0LatnAO\0LatnPG\0LatnCI\0LatnET\0LatnBN\0MymrMM\0LatnMY\0LatnTD\0MymrMM\0ThaiTH\0LatnMY\0LatnBR\0ArabPK\0LatnID\0LatnPG\0LatnPG\0LatnPG\0LatnCG\0LatnVN\0LatnPG\0LatnTZ\0LatnPH\0LatnPG\0LatnID\0LatnGH\0LatnCI\0LatnPG\0LatnUS\0LatnMY\0LatnPH\0LatnPH\0LatnUS\0LatnCF\0LatnPH\0LatnID\0LatnTD\0LatnBR\0LatnMY\0LatnID\0KaliMM\0DevaNP\0DevaIN\0LatnPG\0LatnPG\0LatnBR\0LatnBF\0LatnID\0LatnCI\0LatnID\0LatnPG\0LatnID\0LatnMY\0LatnSB\0LatnID\0LatnID\0LatnMW\0LatnGA\0LatnID\0LatnCM\0LatnMY\0LatnID\0LatnID\0LatnBR\0LatnID\0LatnCD\0LatnID\0LatnVA\0LatnPH\0LinaGR\0LatnMX\0HebrIL\0DevaIN\0LatnTZ\0ArabPK\0LatnMW\0LatnUG\0LatnCD\0LatnZM\0LatnNG\0LatnTD\0LatnVN\0LatnGH\0LatnTG\0LatnID\0LatnID\0LatnIN\0LatnPG\0LatnPG\0LisuCN\0CyrlRU\0DevaIN\0LatnCM\0TibtIN\0LatnPH\0DevaIN\0LatnLA\0LaooLA\0LatnPG\0DevaNP\0LatnVN\0LatnPG\0LatnPG\0LatnID\0LatnID\0LatnAU\0LatnAU\0LatnID\0LatnID\0LatnID\0LatnID\0LatnAO\0LatnID\0LatnPG\0ThaiCN\0LatnID\0LatnID\0LatnCI\0LatnNG\0LatnNG\0LatnNG\0LatnNG\0LatnCG\0LatnNG\0LatnNG\0LatnNG\0LatnGN\0Latn001LatnNG\0LatnNG\0LatnNG\0LatnCD\0LatnZM\0LatnBO\0LatnCD\0LatnBF\0LatnGH\0LatnZM\0LatnPG\0LatnCD\0LatnPG\0LatnCD\0LatnCM\0LatnHN\0LatnCM\0LepcIN\0LatnPG\0LatnPG\0LatnCD\0LatnPG\0LatnPG\0LatnID\0LatnID\0LatnID\0LatnID\0CyrlRU\0LatnCM\0Latn001LatnUG\0LatnSB\0LatnSB\0LatnUG\0LatnVN\0LatnID\0LatnVU\0LatnSB\0LatnCD\0LatnET\0LatnSS\0LatnGH\0LatnSB\0LatnPG\0LatnSB\0LatnCD\0LatnVN\0LatnID\0LatnCN\0DevaNP\0LatnMY\0SyrcSY\0LatnVU\0LatnCN\0LatnNL\0LatnSL\0LatnPG\0LatnCN\0LatnPG\0LatnCD\0DevaNP\0LatnGH\0LatnPG\0LatnCD\0LatnCA\0LatnID\0LatnGH\0LatnET\0LatnLR\0LisuCN\0LatnSD\0LatnLV\0LatnID\0LatnID\0LatnCF\0LatnCD\0LatnAU\0LatnID\0LatnID\0LatnID\0LatnID\0LatnAU\0LatnAU\0LatnTL\0LatnKE\0LatnVN\0LatnBR\0LatnUG\0TibtBT\0ArabIR\0LatnMY\0LatnPG\0LatnAU\0LatnVU\0LatnKE\0LatnSS\0LatnKE\0LatnUS\0LatnAU\0LatnSS\0LatnNG\0LatnMZ\0LatnGN\0LatnIT\0LatnPG\0LatnPG\0LatnID\0LatnCG\0LatnAU\0LatnMY\0LatnPG\0LatnID\0LatnTD\0LatnVU\0LatnID\0LatnSB\0LatnFJ\0LatnGN\0LatnVU\0LatnAU\0LatnSD\0LatnTD\0LatnID\0LatnPG\0DevaNP\0LatnCD\0LatnID\0LatnIN\0LatnVU\0TeluIN\0LatnCM\0LatnID\0LatnID\0LatnVU\0LatnFJ\0LatnUS\0LatnCM\0LatnID\0LatnCD\0LatnCF\0LatnNA\0LatnID\0LatnMY\0LatnPG\0LatnAU\0LatnCF\0LatnPG\0LatnVU\0LatnCM\0LatnNG\0LatnAU\0LatnCD\0LatnID\0LatnBF\0LatnPH\0LatnID\0LatnCD\0LatnSS\0LatnCI\0LatnPG\0LatnSL\0LatnCD\0LatnLR\0LatnMW\0LatnCD\0LatnNG\0LatnCD\0LatnCI\0LatnPG\0LatnSS\0LatnUS\0LatnMY\0LatnID\0DevaNP\0LatnZM\0LatnVU\0LatnID\0LatnMM\0PlrdCN\0LatnSS\0LatnSS\0LatnMY\0ArabIR\0LatnAU\0LatnKE\0ArabPK\0ArabIR\0LatnKE\0LatnID\0LatnSD\0LatnID\0LatnVU\0LatnVU\0ArabIR\0HebrIL\0LatnCD\0LatnMM\0LatnUG\0LatnPG\0ArabPK\0LatnLV\0LatnUG\0LatnID\0LatnBR\0LatnKE\0LatnKE\0LatnID\0LatnCD\0LatnCD\0LatnUG\0LatnRU\0LatnZM\0LatnPG\0LatnUS\0LatnCD\0TibtBT\0LatnSS\0LatnAO\0LatnZM\0LatnKE\0LatnGA\0LatnCU\0LatnID\0LatnIN\0LatnUS\0DevaNP\0ArabOM\0LatnCM\0LatnKE\0ArabIR\0LatnTL\0LatnLA\0LatnSB\0LatnID\0LatnCD\0LatnID\0LatnKE\0LatnVN\0ThaiTH\0ThaiCN\0LatnSS\0LatnID\0LatnVU\0LatnPG\0TibtBT\0LatnZM\0HansCN\0LatnVU\0LatnMM\0LatnTR\0LatnMX\0LatnMX\0LatnID\0LatnNG\0LatnCM\0DevaIN\0LatnMX\0LatnID\0LatnGT\0LatnGM\0LatnMX\0LatnKE\0LatnMX\0LatnMX\0LatnBR\0LatnGH\0LatnID\0LatnMX\0LatnPH\0LatnPH\0LatnBR\0LatnPH\0LatnSG\0LatnPG\0LatnPH\0LatnBR\0LatnPG\0LatnBR\0LatnCG\0LatnCO\0LatnCM\0LatnCO\0LatnPG\0LatnCO\0LatnPH\0LatnPH\0LatnNG\0LatnGN\0LatnPG\0LatnPG\0ArabPK\0LatnMX\0LatnPY\0LatnPE\0LatnPG\0LatnPE\0LatnMX\0LatnPE\0LatnVE\0LatnVE\0LatnPG\0LatnNG\0LatnAO\0LatnCO\0LatnMY\0LatnTD\0LatnMX\0LatnCM\0LatnPG\0LatnPG\0LatnCM\0LatnCM\0LatnCM\0LatnPG\0LatnTD\0LatnCF\0LatnPG\0LatnPG\0LatnNG\0LatnPG\0LatnPG\0LatnCM\0ArabTD\0CyrlRU\0LatnTD\0LatnPH\0LatnCD\0LatnCD\0LatnCD\0LatnCD\0LatnCF\0LatnCD\0LatnCD\0LatnID\0LatnPG\0LatnCG\0LatnCG\0LatnMX\0LatnCG\0EthiET\0EthiET\0LatnBR\0LatnCM\0LatnPG\0LatnAU\0LatnPG\0LatnPG\0LatnMX\0LatnID\0LatnPG\0LatnMY\0LatnAU\0LatnSL\0LatnMY\0LatnAU\0LatnCM\0LatnKE\0LatnTD\0LatnPG\0LatnPG\0LatnLR\0LatnNG\0LatnMR\0LatnUS\0ArabTH\0LatnID\0LatnCD\0LatnCM\0LatnMU\0LatnCM\0LatnGN\0LatnCM\0ArabCM\0LatnCM\0LatnCM\0LatnNG\0LatnNG\0LatnNG\0LatnNG\0LatnID\0LatnTG\0LatnAU\0LatnPG\0LatnAO\0LatnGW\0LatnPG\0LatnET\0LatnMX\0LatnSS\0LatnMG\0LatnTD\0LatnSS\0LatnSS\0LatnTD\0LatnID\0LatnCM\0LatnMZ\0LatnNG\0LatnNG\0LatnID\0LatnPG\0LatnTL\0LatnCF\0LatnCM\0DevaNP\0LatnTZ\0LatnZM\0LatnTZ\0LatnPG\0LatnPG\0LatnTZ\0LatnTZ\0LatnTZ\0LatnTZ\0LatnMH\0LatnGA\0LatnMX\0LatnTZ\0LatnMY\0LatnPG\0LatnAU\0LatnUG\0ArabAF\0LatnCM\0LatnPG\0LatnMZ\0LatnIT\0LatnZM\0LatnID\0LatnUS\0LatnID\0LatnVE\0LatnIN\0LatnBW\0LatnMM\0LatnID\0LatnID\0LatnUS\0LatnMX\0LatnCA\0MandIQ\0LatnMX\0LatnCM\0LatnMX\0LatnMX\0LatnMX\0LatnCM\0LatnUS\0LatnMX\0LatnMX\0LatnID\0LatnMX\0LatnMX\0LatnNI\0LatnMX\0LatnMX\0LatnMX\0LatnPG\0LatnMX\0LatnMX\0LatnMX\0LatnTL\0LatnMX\0LatnUS\0LatnTD\0LatnCN\0LatnTZ\0LatnCN\0LatnPG\0LatnPG\0DevaIN\0LatnPG\0LatnPG\0MlymIN\0MlymIN\0LatnNG\0DevaIN\0TeluIN\0MlymIN\0LatnIN\0LatnBD\0LatnUS\0DevaNP\0LatnCI\0DevaIN\0LatnPG\0DevaIN\0LatnNG\0ArabPK\0LatnFM\0LatnCM\0LatnBJ\0ThaiTH\0LatnID\0LatnNG\0LatnPG\0LatnPG\0LatnMX\0LatnNC\0LatnGN\0LatnVU\0LatnCG\0LatnPH\0LatnID\0LatnTL\0LatnVU\0LatnCM\0LatnVN\0LatnPG\0ThaiLA\0LatnPG\0LatnID\0LatnTD\0LatnKE\0LatnVU\0LatnSB\0LatnSN\0LatnPG\0LatnSN\0LatnCM\0LatnSD\0LatnSB\0LatnVU\0LatnCM\0LatnVU\0LatnPH\0LatnNG\0LatnID\0LatnMX\0LatnCN\0LatnVU\0LatnNG\0LatnVU\0LatnBR\0LatnPG\0LatnVU\0LatnPH\0LatnPG\0LatnPG\0LatnPG\0LatnCN\0LatnPG\0LatnCM\0LatnBR\0LatnVU\0LatnPG\0LatnTD\0LatnCD\0LatnPG\0LatnID\0LatnBR\0LatnTD\0LatnCM\0LatnVN\0LatnCD\0ArabAF\0LatnVU\0LatnPG\0LatnVN\0LatnCN\0LatnMY\0LatnUS\0CyrlRU\0LatnID\0LatnSB\0MymrMM\0LatnID\0LatnMZ\0LatnID\0LatnRO\0LatnCI\0LatnAR\0LatnUS\0LatnCA\0LatnID\0LatnCA\0LatnNG\0LatnCG\0LatnID\0LatnNI\0LatnVN\0LatnBZ\0LatnID\0LatnSD\0LatnBF\0LatnCO\0LatnTD\0LatnUS\0LatnCG\0LatnPG\0LatnET\0LatnTD\0LatnTZ\0LatnAU\0LatnAU\0LatnBR\0LatnET\0LatnTD\0LatnAU\0LatnCM\0LatnAU\0LatnTD\0LatnPG\0LatnMX\0LatnPG\0LatnPG\0LatnPG\0LatnBR\0LatnSB\0LatnPG\0LatnPG\0LatnBR\0LatnPG\0LatnBR\0LatnPG\0LatnID\0ThaiTH\0LatnID\0LatnCM\0LatnID\0LatnPG\0LatnID\0LatnID\0LatnMX\0LatnID\0LatnID\0LatnPH\0LatnBJ\0LatnPF\0LatnID\0LatnID\0LatnID\0LatnMY\0LatnID\0LatnID\0LatnSS\0LatnPG\0LatnPG\0LatnID\0LatnID\0LatnPG\0ThaiTH\0LatnVU\0LatnUS\0DevaNP\0LatnID\0LatnIN\0LatnIN\0CyrlRU\0LatnNC\0LatnFM\0LatnVU\0LatnSB\0MrooBD\0LatnVU\0LatnPF\0DevaIN\0LatnVU\0LatnNG\0LatnCM\0LatnPF\0LatnPH\0LatnID\0LatnPH\0LatnID\0LatnPH\0LatnGN\0LatnTD\0LatnID\0LatnID\0LatnMG\0LatnMY\0LatnCD\0LatnPH\0LatnID\0LatnPH\0LatnVU\0LatnID\0LatnBR\0LatnNC\0LatnID\0LatnPG\0LatnCM\0LatnGW\0LatnPG\0LatnPG\0LatnPG\0LatnPH\0LatnCI\0LatnPG\0LatnID\0LatnSB\0LatnPG\0LatnID\0LatnID\0LatnPG\0LatnID\0LatnCM\0LatnNG\0CyrlRU\0LatnNI\0LatnMX\0LatnBO\0LatnVN\0DevaIN\0LatnPE\0LatnVU\0LatnMX\0LatnPG\0LatnPH\0LatnMX\0LatnPG\0LatnCM\0LatnTD\0LatnCM\0CyrlRU\0LatnEC\0LatnCM\0LatnSS\0LatnID\0LatnTD\0TibtNP\0LatnPG\0LatnCM\0LatnCN\0LatnSS\0LatnUS\0DevaIN\0LatnKE\0TamlIN\0LatnPG\0LatnCM\0EthiET\0LatnPG\0LatnID\0MongCN\0LatnMX\0LatnTD\0LatnPG\0LatnAU\0LatnPG\0LatnSB\0LatnID\0LatnPG\0LatnID\0LatnID\0LatnVU\0LatnTD\0LatnMY\0LatnTZ\0LatnID\0ArabPK\0EthiET\0LatnPG\0LatnPG\0LatnPG\0LatnTZ\0LatnAU\0LatnPG\0LatnPG\0LatnVU\0LatnML\0LatnPT\0LatnTD\0LatnZM\0LatnVU\0LatnAU\0LatnMM\0DevaIN\0LatnKE\0MymrMM\0LatnSS\0LatnID\0HmnpUS\0LatnCD\0LatnMX\0LatnMX\0LatnZW\0LatnID\0LatnVU\0LatnCM\0LatnAO\0LatnCD\0LatnES\0LatnIN\0LatnPG\0LatnBJ\0LatnPG\0LatnID\0LatnZM\0LatnMX\0LatnMX\0LatnMY\0LatnMX\0LatnMX\0LatnCM\0LatnMX\0LatnPG\0LatnCI\0LatnMX\0LatnID\0LatnTD\0LatnCD\0LatnGA\0LatnET\0LatnCM\0LatnUS\0LatnSS\0LatnML\0LatnID\0EthiET\0LatnBR\0LatnPE\0LatnBR\0CyrlRU\0LatnPG\0LatnUG\0LatnCO\0MandIR\0LatnMX\0LatnCM\0LatnPG\0LatnAR\0LatnMX\0LatnLR\0LatnNG\0LatnMX\0LatnNG\0ArabIR\0LatnBR\0LatnBO\0LatnID\0LatnBR\0LatnMY\0LatnPG\0LatnCF\0LatnGH\0LatnGY\0LatnPG\0LatnNR\0LatnID\0LatnBR\0LatnPG\0LatnID\0LatnPG\0LatnIN\0LatnGN\0LatnPG\0LatnPG\0LatnAU\0HansCN\0DevaNP\0LatnIT\0LatnNA\0LatnNG\0LatnPG\0LatnNG\0LatnGH\0LatnPG\0LatnAU\0LatnMX\0LatnNO\0LatnAO\0LatnNG\0LatnIN\0LatnCD\0LatnIN\0LatnNG\0LatnIN\0LatnAU\0LatnPG\0LatnCF\0LatnID\0LatnNG\0LatnNG\0LatnID\0LatnNG\0LatnIN\0LatnIN\0LatnCM\0LatnCD\0LatnPG\0LatnPG\0LatnIN\0LatnPG\0DevaNP\0LatnPG\0LatnPG\0LatnCA\0LatnMX\0LatnMX\0LatnMX\0LatnAU\0LatnMX\0LatnPG\0LatnPG\0LatnPG\0LaooLA\0LatnCM\0LatnIN\0LatnGH\0LatnMX\0LatnUS\0LatnZW\0LatnCG\0LatnCM\0LatnMZ\0LatnNG\0CyrlRU\0LatnTZ\0LatnTZ\0LatnNG\0LatnTZ\0LatnCD\0LatnCD\0LatnTD\0LatnCG\0LatnUG\0LatnAO\0LatnNG\0LatnCD\0LatnCM\0LatnSN\0LatnCD\0LatnID\0LatnCF\0LatnSS\0LatnID\0LatnCI\0LatnID\0LatnNG\0LatnNC\0CyrlRU\0TibtBT\0XsuxTR\0LatnPG\0LatnNC\0LatnNC\0LatnNC\0LatnVN\0LatnMX\0LatnID\0LatnPG\0Latn001DevaNP\0LatnPG\0LatnCI\0LatnUS\0LatnID\0LatnNG\0LatnSB\0LatnGH\0LatnCM\0LatnNA\0LatnCD\0LatnCD\0LatnCD\0LatnCF\0LatnCM\0LatnCF\0LatnZA\0LatnNG\0LatnCM\0LatnAU\0LatnMZ\0LatnFM\0LatnCM\0LatnTZ\0LatnTZ\0LatnSB\0LatnNG\0LaooLA\0LatnMX\0LatnCM\0LatnNG\0LatnNG\0LatnCM\0LatnCG\0LatnAU\0LatnCI\0LatnMX\0LatnPY\0LatnMX\0LatnAU\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnPG\0LatnMX\0LatnMX\0LatnBW\0LatnMX\0LatnCM\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnID\0LatnPG\0LatnAU\0LatnTD\0LatnPG\0LatnAU\0LatnTZ\0LatnPG\0LatnID\0LatnID\0LatnTZ\0LatnNG\0CyrlRU\0LatnKE\0LatnID\0LatnPG\0TeluIN\0LatnNU\0CyrlRU\0LatnPG\0LatnCD\0LatnCD\0LatnPG\0LatnNG\0LatnIN\0LatnTZ\0LatnIN\0LatnAU\0LatnCM\0LatnSS\0LatnIN\0LatnIN\0LatnIN\0LatnNG\0LatnID\0LatnSR\0LatnAU\0LatnCG\0LatnCM\0LatnIN\0LatnZM\0LatnIN\0LatnCM\0LatnIN\0LatnSB\0LatnIN\0LatnPG\0LatnIN\0LatnIN\0LatnID\0LatnVU\0LatnPG\0LatnAO\0LatnGH\0LatnGH\0LatnFM\0LatnID\0LatnTZ\0LatnCI\0LatnMW\0LatnCD\0LatnNG\0LatnNG\0LatnCM\0LatnID\0LatnKE\0LatnSB\0ArabAF\0LatnCD\0LatnID\0ArabPK\0LatnCD\0LatnMM\0LatnGH\0LatnMX\0LatnAU\0DevaIN\0LatnAU\0LatnSB\0LatnIN\0LatnVU\0LatnTD\0LatnGA\0LatnIN\0LatnIN\0LatnCM\0LatnIN\0LatnNG\0LatnCF\0LatnVU\0LatnCM\0DevaNP\0LatnBW\0LatnIN\0LatnAU\0LatnZW\0LatnCM\0LatnVU\0LatnFM\0LatnUS\0LatnAU\0LatnPG\0LatnPG\0LatnTG\0LatnAU\0LatnCD\0LatnTD\0LatnVU\0LatnAO\0LatnPG\0LatnIN\0LatnCM\0LatnID\0LatnET\0LatnPG\0LatnIN\0LatnPG\0LatnTD\0WchoIN\0LatnTZ\0LatnAU\0LatnUS\0LatnGH\0LatnAU\0LatnBF\0LatnAU\0LatnCM\0LatnCO\0LatnPG\0LanaTH\0DevaIN\0LatnPG\0CyrlRU\0LatnPG\0DevaIN\0LatnCO\0LatnUS\0LatnPE\0RunrSE\0LatnPG\0LatnCD\0YiiiCN\0LatnPE\0LatnPG\0Latn001LatnTZ\0LatnTD\0TibtBT\0LatnMM\0LatnIN\0LatnMX\0LatnPG\0LatnIN\0LatnID\0LatnIN\0LatnSB\0LatnID\0LatnBJ\0LatnBJ\0LatnAO\0LatnID\0LatnPG\0LatnMM\0LatnNG\0LatnMM\0LatnZA\0LatnGA\0LatnER\0LatnIN\0LatnJE\0LatnVU\0LatnIN\0LatnAU\0LatnAU\0LatnMY\0LatnIT\0LatnCN\0LatnAU\0LatnPG\0LatnIN\0LatnZA\0LatnNG\0YiiiCN\0LatnZM\0YiiiCN\0LatnTZ\0LatnCM\0CansCA\0LatnIN\0LatnPG\0LatnZA\0LatnUS\0LatnPG\0TnsaIN\0LatnMX\0YiiiCN\0LatnVU\0LatnAO\0LatnID\0LatnUS\0LatnMY\0LatnMZ\0LatnAU\0LatnBF\0LatnAU\0LatnTZ\0LatnBJ\0LatnCD\0LatnMX\0LatnGH\0LatnSB\0LatnMM\0YiiiVN\0ArabIR\0LatnNC\0LatnBR\0LatnPG\0LatnCD\0LatnCN\0LatnAU\0LatnNG\0LatnGQ\0LatnUG\0LatnCA\0LatnTO\0LatnMM\0LatnVN\0LatnNG\0LatnPG\0LatnPG\0LatnSS\0LatnVN\0LatnCD\0LatnBF\0LatnFM\0LatnPG\0LatnAU\0LatnMX\0LatnUS\0LatnVU\0LatnPG\0LatnCM\0LatnCI\0NewaNP\0LatnCM\0LatnAU\0LatnVU\0LatnSS\0LatnAU\0LatnPG\0LatnTZ\0DevaNP\0LatnTL\0LatnCD\0LatnID\0LatnID\0LatnTZ\0LatnID\0LatnAU\0LatnGA\0LatnCN\0LatnPG\0LatnID\0LatnMW\0LatnGH\0LatnCD\0LatnKE\0LatnAO\0LatnKE\0LatnCD\0LatnAU\0LatnSD\0LatnCD\0LatnAO\0ThaiTH\0LatnTZ\0LatnUG\0LatnUG\0LatnUG\0ArabIR\0LatnMW\0LatnAU\0LatnAU\0LatnMZ\0LatnAU\0LatnAU\0LatnTZ\0LatnCM\0LatnGA\0LatnCD\0LatnGH\0LatnCF\0LatnIN\0LatnCG\0LatnTD\0LatnML\0CyrlRU\0CyrlRU\0SyrcSY\0GeorGE\0LatnUS\0LatnPH\0LatnCM\0PhnxJO\0LatnPH\0MymrMM\0LatnFR\0LatnNG\0LatnPE\0LatnGB\0LatnMX\0LatnNG\0ArabPK\0LatnNL\0LatnNG\0LatnNG\0LatnNG\0LatnNG\0LatnNG\0LatnNG\0LatnNG\0XsuxTR\0LatnID\0LatnSS\0LatnPG\0CansCA\0LatnCA\0LatnCA\0CansCA\0LatnSB\0LatnCA\0LatnCA\0LatnNG\0LatnCD\0LatnNG\0LatnNG\0LatnAU\0LatnKE\0LatnPG\0HangKR\0HaniKR\0LatnNG\0LatnNG\0LatnCM\0LatnPG\0LatnNG\0KhmrKH\0DevaNP\0LatnTZ\0TibtBT\0LatnAU\0LatnNG\0LatnRU\0LatnVU\0LatnLT\0LatnAO\0LatnET\0LatnUS\0LatnVU\0LatnPE\0LatnPE\0LatnCD\0CyrlRU\0LatnCD\0LatnPG\0MteiIN\0ModiIN\0LatnKE\0LatnPE\0LatnPG\0LatnAR\0LatnCA\0LatnPG\0LatnID\0LatnPG\0LatnPG\0LatnPG\0LatnCA\0LatnIN\0LatnPG\0LatnPG\0LatnPG\0LatnVU\0LatnID\0LatnUS\0DevaIN\0LatnZA\0LatnNG\0LatnID\0LatnPG\0LatnPG\0LatnMX\0LatnBR\0LatnSB\0LatnKE\0LatnPE\0LatnNG\0LatnMY\0LatnPG\0LatnNG\0LatnMY\0TeluIN\0ArabPK\0CyrlRU\0LatnBR\0LatnNG\0LatnID\0CyrlGE\0OsgeUS\0ItalIT\0JavaID\0LatnNG\0LatnES\0LatnCM\0LatnPG\0LatnDE\0ArabTR\0TibtCN\0LatnID\0LatnMX\0LatnBR\0OrkhMN\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnSD\0LatnMX\0LatnMX\0LatnBR\0LatnCA\0LatnMX\0GranIN\0LatnMX\0LatnLR\0LatnPG\0LatnPG\0LatnSE\0LatnPG\0LatnGB\0LatnET\0LatnBR\0LatnPG\0LatnCM\0LatnBR\0LatnVN\0LatnBR\0LatnCD\0LatnBR\0LatnPH\0LatnBR\0LatnNG\0LatnBR\0PhliIR\0LatnPH\0LatnUS\0LatnCW\0CyrlTJ\0LatnUS\0LatnID\0LatnPW\0LatnBR\0LatnUS\0LatnBR\0LatnHN\0LatnBR\0LatnCO\0LatnGY\0LatnMX\0LatnMX\0LatnVE\0LatnVE\0LatnCM\0LatnNG\0LatnMX\0LatnNG\0LatnGW\0LatnGN\0LatnTZ\0LatnMX\0ArabAF\0LatnIN\0LatnPG\0LatnMX\0KhmrKH\0LatnCN\0LatnFR\0MymrMM\0MlymIN\0MlymIN\0DevaIN\0DevaIN\0TeluIN\0LatnIN\0LatnNG\0LatnBO\0LatnNG\0LatnPG\0LatnUS\0LatnID\0LatnID\0LatnCA\0LatnMM\0LatnID\0LatnUS\0LatnPG\0LatnID\0OryaIN\0LatnMX\0LatnPG\0LatnID\0LatnCD\0XpeoIR\0LatnPG\0LatnUS\0LatnVE\0LatnPG\0LatnID\0LatnMY\0LatnFM\0LatnCM\0LatnDE\0LatnSS\0KharPK\0DevaIN\0LatnPG\0LatnVU\0OgamIE\0ItalIT\0LatnNG\0LatnID\0DevaIN\0LatnVN\0LatnVN\0MymrIN\0ArabPK\0LatnMZ\0PhnxLB\0LaooLA\0ArabPK\0ThaiTH\0ArabAF\0DevaNP\0SinhIN\0LatnMX\0LatnPE\0LatnGA\0LatnVE\0LatnFM\0LatnPE\0LatnNF\0LatnCO\0LatnBJ\0LatnUS\0LatnPG\0LatnCO\0LatnNG\0LatnBR\0LatnSB\0LatnAU\0LatnAU\0LatnSB\0LatnTZ\0LatnPG\0LatnNG\0LatnNC\0LatnAU\0BrahIN\0LatnKE\0LatnPG\0LatnBD\0LatnAU\0LatnKE\0LatnCK\0MlymIN\0LatnID\0LatnPG\0LatnVU\0LatnPH\0LatnGB\0LatnID\0LatnAR\0LatnID\0LatnNG\0ArabPK\0MymrMM\0LatnCO\0LatnMX\0LatnCI\0LatnMX\0LatnBR\0LatnPH\0LatnPH\0LatnMY\0LatnVU\0LatnCD\0LatnAU\0LatnNC\0LatnID\0BrahIN\0LatnCN\0LatnCN\0LatnTN\0LatnCM\0LatnCM\0LatnID\0LatnMX\0LatnPG\0LatnIT\0LatnPF\0LatnUS\0LatnIN\0LatnID\0LatnMX\0LatnMY\0LatnID\0LatnAO\0LatnMY\0LatnNG\0LatnCK\0LatnID\0LatnAU\0LatnBO\0LatnBF\0LatnMY\0LatnPG\0LatnPE\0LatnID\0LatnBF\0LatnPG\0LatnID\0GrekGR\0LatnAU\0LatnAU\0LatnCM\0LatnCF\0LatnGT\0LatnMX\0LatnCD\0LatnBR\0LatnGT\0LatnMX\0LatnBR\0LatnUS\0LatnFM\0LatnUS\0LatnNC\0LatnMX\0LatnMX\0LatnUS\0LatnGW\0LatnMX\0LatnTZ\0LatnPG\0LatnMX\0LatnID\0LatnSV\0LatnID\0LatnPG\0LatnPG\0LatnCD\0LatnPG\0LatnMX\0LatnPG\0LatnNG\0LatnCA\0KharPK\0ArabAF\0ArabIR\0LatnST\0LatnPH\0LatnPH\0LatnNC\0LatnMM\0LatnPG\0LatnFR\0GujrIN\0LatnPE\0LatnBR\0ThaiTH\0LatnID\0LatnPG\0ArabIN\0LatnID\0LatnID\0ArabAF\0ArabAF\0LatnBO\0LatnID\0LatnPG\0LatnPG\0ArabPK\0LatnVU\0LatnPY\0LatnBR\0LatnAU\0LatnID\0LatnBR\0LatnPG\0LatnVU\0LatnID\0LatnID\0LatnVU\0LatnMX\0LatnIN\0LatnID\0LatnID\0LatnAR\0LatnID\0LatnBF\0LatnCO\0LatnID\0DevaNP\0LatnVN\0LatnPG\0LatnPE\0LatnBR\0LatnID\0LatnGA\0LatnFM\0LatnPG\0LatnUS\0LatnPG\0LatnNG\0LatnPG\0LatnPH\0LatnTW\0MymrMM\0DevaIN\0ThaiTH\0LatnMX\0LatnCI\0LatnNG\0LatnBR\0LatnTW\0MymrMM\0LatnMM\0LatnTW\0LatnMM\0LatnUS\0LatnPE\0LatnGT\0LatnEC\0LatnPE\0LatnEC\0LatnUS\0LatnPE\0LatnBO\0LatnGT\0LatnUS\0LatnPE\0LatnES\0LatnPE\0LatnAR\0LatnGT\0LatnEC\0LatnPE\0LatnPE\0LatnPE\0LatnPE\0LatnPE\0LatnPE\0LatnEC\0LatnEC\0LatnPE\0LatnPE\0LatnPE\0LatnPE\0LatnPE\0LatnPE\0LatnPE\0LatnEC\0LatnPE\0LatnPE\0LatnPE\0LatnRU\0LatnPE\0LatnUS\0LatnPE\0LatnPE\0LatnPE\0LatnEC\0LatnPE\0LatnPE\0LatnPE\0ArabIR\0LatnEC\0LatnPE\0LatnPE\0LatnPE\0Latn001LatnUS\0DevaNP\0DevaNP\0LatnID\0LatnVN\0DevaNP\0LatnKE\0BengIN\0LatnPG\0LatnPG\0LatnBR\0LatnID\0LatnPG\0LatnCL\0LatnCK\0DevaNP\0LatnMM\0LatnNG\0LatnPF\0LatnID\0MymrMM\0LatnPH\0LatnPH\0LatnAU\0LatnRE\0ArabIR\0LatnPG\0LatnID\0LatnMY\0LatnTZ\0OryaIN\0LatnID\0LatnKE\0LatnPE\0LatnVN\0LatnNG\0LatnID\0LatnBO\0LatnVU\0LatnIT\0LatnPE\0LatnVN\0LatnID\0RohgMM\0LatnPG\0LatnIN\0LatnMA\0LatnMM\0LatnTZ\0LatnNG\0LatnID\0LatnAU\0LatnID\0LatnID\0DevaNP\0DevaNP\0KhmrKH\0LatnBR\0LatnCK\0MymrMM\0LatnBF\0BengBD\0LatnAU\0LatnNI\0LatnAU\0LatnSK\0LatnDK\0LatnGB\0LatnFI\0LatnNO\0LatnID\0ArmnAM\0LatnPG\0LatnPL\0LatnID\0LatnRS\0LatnCH\0LatnPG\0LatnES\0ArabIR\0LatnSE\0LatnGB\0LatnVN\0MymrIN\0LatnBI\0LatnCD\0LatnMZ\0LatnIN\0LatnID\0LatnAU\0LatnTZ\0LatnID\0LatnVN\0LatnNG\0LatnPG\0LatnTZ\0LatnVN\0LatnPH\0LatnRO\0LatnPG\0LatnAU\0LatnID\0LatnTD\0LatnID\0LatnVU\0LatnPG\0LatnSB\0LatnPG\0LatnAU\0CyrlRS\0LatnMM\0LatnID\0LatnFJ\0DevaIN\0LatnUG\0LatnUG\0CyrlUA\0LatnTZ\0LatnSB\0LatnTZ\0LatnNG\0LatnHR\0LatnRO\0LatnGR\0CyrlRU\0LatnMY\0LatnNG\0LatnNG\0LatnRW\0LatnPG\0LatnTZ\0LatnTZ\0LatnUG\0LatnPG\0DevaIN\0LatnAU\0LatnAU\0KanaJP\0LatnTD\0LatnPA\0LatnUS\0LatnTZ\0LatnBR\0LatnGH\0LatnID\0LatnGA\0SamrPS\0LatnID\0LatnKE\0LatnBO\0LatnID\0LatnID\0LatnSN\0LatnID\0LatnVU\0LatnNG\0SaurIN\0LatnTD\0LatnSB\0LatnPG\0LatnBF\0LatnPG\0LatnID\0LatnPG\0LatnPG\0LatnTD\0LatnTZ\0LatnPH\0LatnTZ\0ArabPK\0LatnMY\0LatnTZ\0LatnPG\0LatnID\0LatnNA\0LatnID\0TibtIN\0LatnIT\0LatnGA\0LatnID\0LatnZM\0LatnCF\0LatnVN\0LatnCN\0LatnPA\0LatnID\0LatnIN\0LatnLK\0DevaIN\0ArabPK\0LatnIT\0LatnGB\0DevaNP\0LatnCA\0LaooLA\0TakrIN\0LatnNG\0LatnNG\0GrekIT\0LatnID\0ArabIQ\0LatnIT\0LatnNG\0ArabIQ\0ArabAF\0ArabIR\0LatnCG\0LatnPG\0LatnIT\0LatnMY\0LatnID\0ArabTN\0LatnID\0LatnMY\0LatnNO\0LatnMY\0LatnCI\0LatnCA\0LatnVN\0LatnUS\0LatnCI\0LatnTZ\0LatnMZ\0LatnMX\0LatnPG\0LatnCA\0CyrlRU\0LatnBF\0LatnPG\0LatnBF\0LatnBF\0LatnUS\0LatnML\0LatnID\0LatnID\0LatnCI\0LatnPG\0LatnEC\0LatnMM\0LatnPH\0PlrdCN\0LatnGH\0LatnCF\0OgamIE\0LatnPH\0LatnKE\0LatnPH\0LatnID\0CyrlTJ\0LatnCM\0DevaIN\0LatnKE\0LatnIN\0ArabIR\0LatnLT\0TibtBT\0LatnID\0EthiET\0ArabAF\0LatnPG\0LatnNG\0LatnBR\0LatnCD\0ArabPK\0LatnET\0LatnBW\0LatnUS\0TfngMA\0LatnSD\0LatnSS\0ArabIR\0MymrMM\0LatnNG\0LatnPE\0LatnZM\0LatnCD\0LatnCA\0LatnUS\0ArabTD\0ArabOM\0LatnSD\0LatnDZ\0LatnML\0CyrlRU\0LatnMY\0LatnET\0LatnZM\0LatnBF\0LatnGH\0LatnNC\0LatnIN\0LatnPG\0LatnBR\0LatnGH\0LatnPG\0TibtIN\0LatnPG\0LatnNG\0LatnUS\0LatnPG\0LatnPG\0LatnPG\0LatnPG\0ArabIR\0ArabEG\0LatnCO\0LatnID\0CyrlRU\0LatnSE\0LatnTD\0LatnIN\0LatnPH\0DevaIN\0LatnPG\0CyrlRU\0LatnSE\0LatnUS\0LatnUS\0ThaiTH\0LatnPG\0LatnUS\0LatnVU\0LatnBR\0LatnMG\0LatnID\0LatnID\0DevaNP\0LatnPG\0LatnPH\0LatnID\0LatnMY\0LatnBF\0ArabPK\0LatnPG\0LatnCD\0LatnVU\0LatnID\0LatnGY\0LatnID\0LatnSB\0LatnID\0LatnCO\0LatnBF\0LatnID\0LatnUS\0LatnPL\0LatnBR\0LatnPG\0LatnPH\0LatnUS\0LatnID\0ArabIR\0LatnCN\0LatnID\0LatnPG\0LatnCD\0LatnID\0LatnID\0LatnWS\0LatnSE\0LatnPG\0LatnPG\0LatnPG\0LatnPG\0YiiiCN\0LatnSE\0LatnPH\0LatnPH\0LatnFI\0SamrIL\0LatnPG\0LatnID\0LatnFI\0LatnIN\0KhmrKH\0LatnID\0LatnCD\0ArabIR\0LatnPG\0LatnZW\0LatnPG\0LatnMY\0LatnSN\0LatnCD\0LatnPE\0LatnCF\0LatnML\0LatnPH\0LatnUG\0LatnCO\0LatnUS\0LatnPG\0LatnGA\0LatnPG\0LatnVU\0LatnID\0LatnMY\0LatnGH\0LatnPG\0LatnPG\0LatnPG\0TavtTH\0LatnID\0LatnCD\0LatnCD\0LatnCD\0SogdUZ\0DevaNP\0LatnTD\0LatnPG\0LatnCD\0LatnCD\0LatnPG\0LatnTD\0LatnBF\0ThaiTH\0LatnPW\0LatnPG\0LatnCM\0LatnBJ\0LatnTZ\0LatnID\0LatnVE\0LatnPG\0LatnPG\0LatnMY\0LatnID\0LatnPG\0LatnPG\0LatnPG\0LatnPY\0LatnUS\0LatnML\0LatnPE\0LatnID\0LatnPG\0TibtIN\0OryaIN\0LatnNG\0LatnNG\0LatnCF\0ArabIR\0LaooLA\0ArabYE\0LatnCA\0LatnPG\0SoraIN\0LatnID\0LatnPG\0LatnPH\0ArabCN\0LatnCO\0LatnMY\0LatnID\0LatnSR\0LatnSR\0LatnIT\0LatnBO\0LatnSN\0LatnCA\0LatnID\0LatnBR\0LatnPH\0LatnID\0DevaIN\0LatnPG\0ArabIR\0LatnZA\0LatnPH\0LatnTZ\0LatnPG\0LatnPH\0LatnTW\0LatnPG\0ArabAE\0LatnPG\0LatnGH\0LatnMY\0LatnKE\0LatnPG\0LatnID\0LaooLA\0LatnPG\0LatnPG\0LatnVU\0LatnPG\0LatnER\0LatnPG\0LatnZA\0LatnZM\0LatnPH\0LatnID\0LatnPG\0LatnVN\0LatnIE\0LatnVN\0LatnBF\0LatnPG\0LatnNL\0LatnPG\0LatnSB\0LatnCA\0LatnMX\0LatnDE\0LatnCA\0ArabAF\0LatnVN\0EthiET\0LatnFM\0CyrlRU\0LatnPG\0LatnCD\0LatnPH\0LatnPG\0LatnPG\0LatnPG\0LatnTZ\0LatnTZ\0LatnPG\0LatnET\0LatnNG\0LatnGN\0LatnNI\0LatnIN\0LatnTZ\0LatnBR\0DevaNP\0GeorGE\0LatnPG\0LatnVC\0LatnID\0LatnIT\0LatnSB\0ArabYT\0LatnCD\0LatnDE\0HaniCN\0LatnGA\0LatnMW\0LatnPG\0LatnBR\0LatnPG\0LatnCM\0LatnID\0LatnID\0LatnID\0LatnID\0DevaIN\0LatnVU\0LatnBR\0LatnTD\0LatnKE\0LatnGA\0LatnID\0LatnTW\0LatnNG\0LatnDE\0LatnBJ\0LatnID\0LatnPH\0SyrcTR\0LatnGA\0LatnNG\0BengBD\0LatnBF\0SyrcIR\0LatnKH\0LatnTD\0DevaNP\0LatnGA\0LatnMY\0LatnID\0LatnMY\0LatnMY\0LatnCD\0LatnID\0LatnID\0LatnCM\0LatnID\0LatnTW\0LatnUS\0CyrlRU\0LatnMX\0LatnID\0LatnBR\0LatnBR\0LatnSD\0DevaNP\0LatnNG\0LatnNG\0LatnNG\0LatnTW\0LatnCD\0LatnML\0LatnMX\0LatnVN\0LatnUS\0LatnCO\0LatnPG\0LatnTD\0LatnTW\0LatnSD\0LatnBR\0LatnPG\0LatnPG\0LatnSB\0LatnPG\0LatnPG\0LatnAU\0LatnSD\0LatnPG\0TagbPH\0LatnPH\0LatnCD\0LatnCO\0LatnPG\0LatnID\0LatnPG\0LatnCD\0LatnMX\0LatnPG\0LatnPH\0LatnPG\0LatnID\0LatnBJ\0LatnBR\0LatnUS\0LatnTZ\0LatnGH\0LatnCA\0LatnMX\0LatnID\0LatnTC\0LatnPG\0LatnGA\0LatnID\0TibtNP\0MymrMM\0LatnMM\0LatnID\0LatnAU\0LatnMX\0LatnMX\0TamlIN\0KndaIN\0LatnIN\0TfngNE\0DevaIN\0LatnCO\0TaleCN\0LatnML\0DevaNP\0DevaNP\0LatnID\0LatnID\0LatnNG\0LatnNG\0LatnGY\0LatnID\0LatnNG\0LatnNG\0LatnVN\0LatnID\0LatnTL\0LatnNG\0LatnMG\0LatnPH\0LatnMY\0LatnEC\0LatnKE\0LatnCI\0LatnMX\0LatnGA\0LatnAR\0LatnPG\0LatnCD\0LatnSL\0LatnCO\0LatnUG\0LatnMX\0LatnSD\0LatnBR\0JavaID\0LatnTL\0LatnUG\0LatnID\0LatnUS\0LatnSS\0LatnSD\0LatnBJ\0LatnUS\0LatnID\0LatnPA\0LatnID\0LatnKE\0LatnMY\0LatnPG\0LatnNG\0DevaNP\0TibtBT\0LatnTT\0LatnPG\0LatnIN\0LatnPH\0LatnPG\0LatnVU\0LatnMY\0LatnVU\0LatnPH\0LatnPG\0LatnBR\0LatnCI\0LatnCA\0LatnSS\0LatnAU\0LatnAU\0DevaNP\0DevaNP\0LatnMX\0TaleLA\0LatnKE\0DevaNP\0ThaiTH\0LatnCA\0DevaNP\0DevaNP\0DevaNP\0LatnCA\0LatnSS\0LatnDZ\0LatnNG\0LatnNE\0LatnSD\0LatnPG\0EthiER\0LatnMY\0LatnCD\0DevaNP\0LatnCM\0LatnUS\0LatnPG\0CyrlRU\0LatnPG\0LatnID\0LatnBF\0LatnPH\0LatnCO\0LatnPH\0LatnNG\0LatnAU\0LatnUS\0LatnPH\0LatnLR\0LatnID\0LatnCN\0LatnAU\0MymrMM\0LatnCI\0ArabDZ\0LatnAU\0LatnCN\0LatnAU\0LatnAU\0LatnBR\0DevaIN\0LatnTL\0LatnMZ\0LatnBR\0LatnMG\0LatnTK\0LatnSB\0LatnNG\0LatnAZ\0ArabIR\0DevaNP\0LatnMX\0LatnPG\0LatnSB\0LatnID\0LatnVN\0LatnPH\0LatnMX\0LatnID\0LatnMX\0LatnID\0LatnPG\0LatnID\0LatnUS\0LatnUG\0LatnID\0LatnCD\0LatnVU\0LatnID\0LatnMX\0LatnMM\0LatnSB\0LatnVU\0LatnID\0LatnID\0LatnID\0LatnPG\0LatnAZ\0LatnTD\0LatnVU\0LatnTD\0LatnPG\0LatnBR\0LatnPY\0LatnID\0LatnNE\0LatnVU\0LatnID\0DevaNP\0LatnID\0LatnVN\0LatnID\0LatnMY\0LatnPG\0SyrcIL\0LatnVU\0LatnID\0LatnCD\0LatnMY\0LatnPG\0LatnVE\0LatnZA\0LatnBO\0LatnCO\0LatnCO\0LatnCO\0LatnTD\0LatnPG\0LatnID\0LatnVU\0LatnVU\0LatnID\0LatnVU\0LatnBO\0LatnVU\0LatnPR\0LatnSN\0LatnPG\0LatnID\0CakmBD\0LatnID\0LatnSB\0LatnTZ\0LatnAR\0LatnMX\0LatnGN\0LatnPG\0LatnMW\0LatnMZ\0LatnZM\0LatnMX\0LatnUS\0LatnID\0LatnMX\0LatnMX\0LatnSS\0LatnCD\0LatnMX\0LatnVN\0ArabIR\0LatnUS\0LatnPW\0LatnID\0LatnCM\0LatnPG\0LatnMX\0LatnBD\0LatnID\0LatnID\0LatnPG\0LatnPY\0LatnBR\0LatnMX\0LatnGH\0LatnBR\0LatnMX\0LatnBR\0LatnMX\0KhmrKH\0LatnMP\0LatnMX\0LatnBR\0LatnPG\0LatnBR\0LatnVU\0LatnPG\0LatnUS\0LatnPG\0LatnPG\0LatnMX\0LatnSB\0LatnUS\0ArabAF\0LatnPG\0LatnMX\0LatnID\0LatnTT\0HebrIL\0LatnPG\0LatnSR\0LatnTD\0LatnGB\0ArabAF\0LatnBO\0LatnIN\0LatnIN\0LatnMX\0LatnPE\0LatnMX\0LatnID\0LatnTR\0LatnTW\0ArabPK\0LatnMY\0LatnIN\0LatnBR\0LatnZA\0LatnCG\0LatnET\0LatnMZ\0GrekGR\0LatnPH\0LatnCM\0LatnCA\0TibtBT\0LatnVN\0LatnBF\0LatnVU\0LatnML\0LatnTW\0LatnGA\0LatnNG\0LatnPG\0LatnMX\0LatnNG\0LatnGT\0LatnPG\0LatnPG\0LatnCM\0LaooLA\0LatnID\0LatnUG\0LatnCO\0LatnZM\0LatnCA\0LatnID\0LaooLA\0LatnID\0LatnNG\0ThaiTH\0LatnAZ\0LatnPG\0LatnPG\0LatnMY\0LatnID\0LatnPG\0LatnUS\0LatnPG\0LatnBR\0LatnCO\0LatnCO\0LatnTD\0LatnPG\0LatnCM\0LatnID\0LatnNG\0LatnMW\0LatnUS\0LatnBR\0LatnTD\0LatnCA\0LatnUS\0LatnKE\0LatnBR\0LatnKE\0LatnBF\0LatnSB\0LatnNG\0LatnID\0LatnVU\0LatnTV\0LatnID\0MymrMM\0LatnID\0LatnKE\0LatnIN\0LatnCM\0LatnID\0LatnTW\0LatnUS\0LatnPH\0LatnNL\0LatnID\0LatnUS\0LatnID\0LatnVN\0LatnMZ\0DevaIN\0LatnCM\0LatnBW\0LatnPG\0LatnNE\0LatnMX\0LatnBR\0LatnID\0LatnPG\0LatnMZ\0LatnID\0LatnMY\0LatnID\0TangCN\0LatnBR\0LatnNG\0LatnID\0LatnID\0TotoIN\0LatnID\0LatnID\0LatnID\0LatnBR\0LatnMY\0LatnMG\0LatnPF\0LatnPG\0LatnNG\0LatnVN\0LatnCG\0LatnVN\0LatnVN\0LatnID\0LatnAU\0TavtVN\0LatnVN\0LatnVN\0LatnBW\0CyrlRU\0LatnCG\0LatnNG\0LatnVN\0LatnMX\0LatnGT\0Latn001LatnMA\0LatnID\0LatnMX\0LatnPG\0LatnBR\0LatnPG\0LatnNG\0LatnTD\0LatnPH\0LatnPG\0LatnPG\0LatnNG\0CyrlRU\0MlymIN\0AghbRU\0LatnID\0LatnCM\0CyrlRU\0LatnSD\0LatnID\0LatnPG\0UgarSY\0LatnAU\0LatnSB\0CyrlRU\0ThaiTH\0LatnNG\0LatnID\0LatnPG\0LatnCM\0LatnNG\0LatnID\0LatnPG\0LatnCF\0OryaIN\0LatnMM\0LatnNG\0LatnNG\0LatnNG\0LatnSS\0LatnNG\0LatnAU\0LatnNG\0LatnNG\0CyrlRU\0LatnAR\0LatnID\0LatnFM\0LatnAU\0LatnID\0LatnPG\0LatnID\0LatnNI\0LatnUS\0LatnAO\0LatnAU\0LatnAU\0LatnMY\0LatnNG\0LatnMM\0LatnBR\0LatnAU\0LatnAU\0LatnID\0LatnPG\0LatnNG\0LatnAU\0LatnPG\0LatnBR\0LatnUS\0LatnAU\0BengIN\0LatnPG\0BengIN\0LatnID\0LatnTW\0LatnPG\0LatnVU\0LatnPE\0LatnBR\0LatnAU\0LatnBO\0LatnAU\0LatnPG\0LatnNG\0LatnPG\0ThaiTH\0LatnPG\0LatnID\0LatnPG\0LatnBR\0LatnVU\0LatnPG\0LatnBR\0LatnPG\0LatnPG\0LatnPG\0LatnID\0LatnBR\0LatnPG\0ArabPK\0LatnBD\0LatnCM\0LatnGT\0LatnNG\0LatnPG\0LatnNG\0LatnUS\0LatnNG\0LatnSB\0LatnNG\0LatnPG\0GrekGE\0LatnVU\0LatnNC\0LatnPG\0LatnPG\0LatnAU\0LatnNG\0ArabAF\0TamlIN\0LatnCF\0ArabIR\0LatnGH\0DevaIN\0VaiiLR\0LatnNA\0LatnPG\0LatnPG\0LatnPG\0LatnVU\0LatnIN\0LatnMX\0DevaIN\0LatnCD\0DevaIN\0DevaNP\0LatnID\0LatnPH\0LatnZA\0LatnNG\0LatnUS\0LatnRU\0LatnNG\0ArabPK\0LatnSX\0LatnTZ\0LatnCG\0LatnBF\0LatnAR\0LatnTZ\0LatnNG\0LatnPG\0LatnAU\0LatnTD\0LatnID\0LatnID\0LatnBR\0LatnNG\0LatnID\0LatnIN\0LatnID\0LatnAU\0LatnNG\0LatnVU\0LatnBE\0LatnAU\0LatnAU\0LatnMX\0KndaIN\0LatnID\0LatnDE\0LatnPG\0ArabIR\0LatnAU\0LatnMX\0LatnMZ\0LatnAU\0LatnMX\0LatnMX\0LatnMX\0LatnMZ\0LatnID\0LatnAU\0LatnMX\0LatnMX\0LatnMX\0LatnSB\0LatnVU\0LatnVU\0Latn001LatnNG\0LatnRU\0LatnVU\0LatnEE\0LatnSB\0LatnVU\0LatnID\0LatnGA\0LatnTZ\0LatnCM\0LatnCN\0LatnBE\0LatnUS\0LatnPG\0LatnUS\0LatnID\0LatnCH\0LatnBR\0LatnPG\0LatnID\0LatnID\0LatnPG\0EthiET\0LatnUS\0LatnCI\0LatnGY\0LatnAU\0LatnPH\0LatnUS\0LatnPG\0LatnBR\0LatnNG\0LatnBR\0LatnPG\0LatnSR\0LatnPG\0LatnVE\0LatnID\0LatnID\0LatnBF\0LatnTZ\0LatnTZ\0LatnTZ\0ArabAF\0LatnPK\0LatnCN\0LatnAU\0TeluIN\0DevaIN\0LatnAU\0LatnAU\0LatnID\0LatnBR\0LatnTG\0LatnGA\0LatnPG\0LatnAU\0LatnAU\0LatnCA\0LatnAU\0LatnAU\0LatnCI\0LatnPG\0LatnAU\0LatnCM\0LatnPG\0LatnBJ\0LatnID\0LatnDE\0LatnPG\0LatnCM\0LatnID\0LatnMM\0LatnID\0LatnID\0LatnAU\0LatnPG\0LatnAU\0LatnPG\0LatnID\0LatnAU\0LatnAU\0LatnID\0LatnPG\0LatnID\0LatnID\0LatnBF\0LatnUS\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnPG\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnUS\0LatnBR\0LatnPG\0LatnPG\0LatnUS\0LatnNG\0LatnNG\0LatnTZ\0LatnID\0LatnAU\0LatnAU\0LatnAU\0LatnPG\0LatnAU\0LatnTL\0LatnID\0LatnGB\0ArabID\0LatnVU\0LatnWF\0LatnAU\0LatnAR\0LatnID\0LatnGH\0LatnNG\0LatnAU\0LatnPG\0LatnBR\0DevaNP\0LatnTL\0LatnAU\0LatnID\0LatnNC\0LatnPG\0LatnID\0LatnAU\0LatnMZ\0LatnPG\0LatnPG\0LatnPG\0LatnAU\0ArabPK\0LatnID\0ArabKM\0LatnID\0LatnAU\0LatnAU\0LatnID\0LatnPG\0LatnPG\0LatnUS\0LatnAU\0LatnAU\0LatnCI\0LatnPG\0LatnID\0LatnFM\0LatnGM\0LatnPG\0LatnID\0LatnCM\0LatnNG\0LatnCD\0LatnID\0LatnID\0LatnPG\0LatnID\0LatnVE\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnID\0LatnAU\0LatnPG\0LatnID\0LatnPG\0LatnAU\0LatnID\0LatnAU\0LatnID\0GongIN\0LatnVU\0LatnPG\0LatnPG\0LatnGH\0LatnBR\0ArabAF\0LatnPG\0LatnAU\0LatnET\0LatnPG\0DevaIN\0LatnID\0LatnAU\0LatnAU\0LatnTG\0LatnID\0LatnGA\0LatnTZ\0LatnAU\0LatnPG\0HansCN\0LatnPG\0LatnAU\0LatnID\0LatnBJ\0LatnAU\0LatnVU\0LatnAU\0LatnCM\0LatnAU\0LatnAU\0LatnAU\0LatnPL\0LatnUS\0LatnBR\0LatnFJ\0LatnES\0LatnNG\0LatnBR\0LatnBR\0LatnVE\0CyrlRU\0LatnZA\0EthiET\0LatnVN\0LatnPG\0CyrlRU\0LatnBR\0LatnID\0LatnBR\0LatnUS\0LatnID\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnPG\0LatnAU\0LatnFR\0LatnMY\0LatnAU\0LatnID\0LatnBR\0LatnAU\0LatnUS\0ChrsUZ\0CariTR\0LatnAU\0LatnAU\0LatnAO\0CyrlRU\0LatnID\0LatnCM\0LatnZA\0LatnID\0LatnBR\0LatnPG\0LatnBR\0LatnPG\0LatnCI\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0ArabPK\0KhmrKH\0LatnVN\0LatnZA\0LatnGT\0LatnBR\0OryaIN\0LatnBR\0LatnAU\0LatnAU\0ArabPK\0LatnBJ\0ArabIR\0LatnID\0LatnID\0LatnML\0ArabIR\0LatnID\0LatnID\0ArabIR\0LatnID\0LatnBR\0LatnID\0LatnGH\0LatnCG\0LatnBW\0LatnID\0LatnPG\0LatnMY\0LatnBT\0LatnPG\0LyciTR\0LydiTR\0ElymIR\0LatnSO\0LatnCM\0LatnMZ\0LatnCM\0GeorGE\0LatnCM\0LatnAU\0LatnCM\0LatnID\0ManiCN\0LatnBR\0LatnAU\0LatnAU\0MercSD\0LatnID\0LatnAU\0LatnMG\0LatnMG\0LatnID\0LatnAU\0LatnID\0NarbSA\0LatnTW\0LatnAU\0LatnTZ\0LatnAU\0LatnAU\0LatnPH\0LatnMZ\0LatnUS\0LatnAU\0LatnAU\0LatnEG\0LatnNG\0LatnID\0LatnUG\0LatnPG\0LatnBR\0LatnSD\0LatnGH\0LatnBR\0LatnPG\0LatnBR\0LatnPG\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0GrekTR\0LatnAU\0OgamGB\0LatnAU\0LatnBR\0LatnAU\0CyrlRU\0LatnBR\0LatnMX\0LatnUS\0PrtiIR\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnBR\0LatnBF\0LatnAU\0LatnBR\0LatnAU\0LatnBR\0CyrlRU\0CyrlRU\0LatnIT\0LatnAU\0LatnPG\0SarbYE\0LatnPH\0LatnID\0LatnNG\0LatnPG\0LatnGH\0LatnNG\0LatnPG\0LatnMZ\0DevaNP\0CyrlRU\0LatnVE\0LatnTW\0LatnMX\0LatnMX\0LatnSD\0LatnMX\0LatnID\0LatnAU\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnAU\0LatnBR\0LatnMX\0TamlIN\0LatnAU\0TamlIN\0LatnAU\0LatnIT\0LatnAU\0LatnTD\0LatnAU\0LatnNA\0ItalIT\0ArabAF\0LatnES\0LatnIT\0LatnIT\0LatnBR\0LatnAU\0LatnBJ\0LatnAU\0LatnAU\0LatnBJ\0CyrlRU\0LatnID\0LatnAU\0LatnAU\0LatnGH\0LatnID\0LatnAU\0LatnBR\0LatnID\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnBR\0LatnAU\0LatnAU\0MarcCN\0LatnMX\0LatnPE\0LatnBR\0LatnID\0LatnPE\0LatnVE\0LatnCD\0LatnCL\0CyrlTJ\0LatnCF\0LatnUS\0LatnGN\0LatnCM\0LatnNI\0LatnMZ\0LatnFM\0LatnMX\0LatnVE\0LatnCM\0LatnCM\0LatnVE\0LatnCM\0LatnBR\0LatnAO\0LatnNG\0LatnNG\0LatnNG\0LatnCM\0LatnCN\0DevaNP\0DevaNP\0LatnNG\0LatnNG\0LatnPG\0LatnBR\0LatnPG\0LatnPG\0LatnPG\0LatnCN\0LatnCO\0LatnAU\0LatnPG\0ArabPK\0LatnPG\0MlymIN\0LatnDE\0LatnPG\0LatnCM\0GrekIL\0LatnCD\0LatnNG\0LatnNG\0LatnID\0TeluIN\0LatnPG\0LatnBW\0LatnAU\0LatnAU\0LatnPG\0LatnPG\0PlrdCN\0LatnPG\0LatnAU\0LatnPG\0HebrIL\0HebrUA\0LatnAU\0YiiiCN\0HebrDE\0LatnAU\0LatnAU\0LatnAU\0LatnIN\0LatnID\0LatnPG\0YiiiCN\0LatnPH\0CyrlRU\0LatnID\0LatnPG\0LatnPG\0LatnCM\0LatnPG\0LatnCF\0LatnPG\0LatnPG\0LatnPG\0LatnPG\0LatnID\0LatnPG\0LatnAU\0LatnPG\0LatnNC\0LatnPG\0LatnPE\0LatnCD\0LatnMZ\0LatnPG\0LatnSO\0LatnID\0LatnPG\0LatnPG\0PlrdCN\0LatnAU\0LatnCD\0CyrlRU\0LatnPG\0LatnNG\0LatnCD\0LatnCO\0LatnPG\0LatnPH\0JpanJP\0LatnUS\0LatnGB\0LatnCD\0LatnPG\0LatnNG\0ThaiTH\0LatnPG\0LatnPG\0LatnCI\0CyrlRU\0LatnAU\0LatnBR\0LatnID\0LatnPG\0LatnAU\0YiiiCN\0YiiiCN\0YiiiCN\0CyrlRU\0LatnPG\0PlrdCN\0LatnPG\0LatnAU\0LatnMX\0LatnAU\0LatnUS\0HebrIL\0LatnUS\0CyrlRU\0LatnCO\0LatnPG\0LatnCF\0LatnUS\0LatnNG\0LatnCO\0LatnBO\0LatnUS\0LatnPG\0LatnPG\0CyrlRU\0LatnBO\0LatnID\0LatnVE\0LatnPG\0LatnAU\0LatnBR\0PlrdCN\0LatnAU\0PlrdCN\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnPG\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnSD\0LatnNG\0LatnTZ\0LatnTZ\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0TibtIN\0LatnMX\0LatnMX\0LatnMX\0LatnET\0LatnNG\0Arab001LatnMY\0LatnMY\0LatnID\0LatnNG\0LatnMY\0LatnMX\0HaniCN\0ArabKM\0LatnNL\0LatnPG\0HaniCN\0TfngMR\0LatnTZ\0HaniCN\0TfngMA\0HaniCN\0HaniCN\0LatnPG\0HaniCN\0LatnNG\0LatnCN\0LatnCM\0NshuCN\0LatnPG\0LatnPG\0LatnGN\0LatnTD\0LatnTZ\0LatnTZ\0LatnNG\0LatnID\0CyrlRU\0LatnMM\0CyrlRU\0LatnBR\0KitsCN\0LatnAU\0CyrlRU\0LatnCD\0HaniCN\0LatnTG\0HaniCN\0HaniCN\0LatnAU\0LatnCD\0LatnAU\0LatnAU\0LatnAU\0LatnCD\0LatnAU\0LatnPG\0LatnMY\0LatnAU\0LatnAU\0LatnAU\0LatnAU\0LatnGA\0LatnSD\0LatnCD\0LatnCD\0LatnAU\0LatnCD\0LatnAU\0LatnAU\0LatnAU\0LatnCD\0LatnCG\0LatnAU\0LatnCD\0LatnTD\0LatnCD\0LatnVN\0LatnAU\0LatnNG\0LatnMX\0LatnMX\0LatnIN\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0HaniCN\0LatnTD\0LatnEC\0HebrFR\0LatnID\0LatnPG\0LatnMX\0LatnPG\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnMX\0LatnNG\0LatnPG\0ArabOM\0LatnUS\0LatnCM\0HaniCN\0LatnCN\0HaniCN\0LatnMM\0LatnTR\0HaniCN\0") })
+ },
+ script_region: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"ArabKH\0ArabMM\0ArabTG\0ArabTH\0ArabTR\0ArabYT\0CyrlGE\0CyrlTR\0DevaFJ\0DevaPK\0GrekTR\0HebrSE\0HebrUA\0HebrUS\0LaooAU\0LaooCN\0LaooFR\0LaooGF\0LaooMM\0LaooSR\0LaooTH\0LaooUS\0LaooVN\0LatnAS\0LatnBI\0LatnBQ\0LatnCW\0LatnDJ\0LatnGH\0LatnGL\0LatnHT\0LatnLS\0LatnMG\0LatnML\0LatnMM\0LatnMU\0LatnNO\0LatnPG\0LatnPW\0LatnPY\0LatnRU\0LatnRW\0LatnSJ\0LatnSL\0LatnTK\0LatnTV\0LatnTW\0LatnVU\0LatnWS\0LatnZM\0LatnZW\0MymrIN\0MymrTH\0ThaiCN\0ThaiKH\0ThaiLA\0TibtBT\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"cjarhgapdmfaapcswbab\0kbdhifbtvbgxyi\0yi\0yi\0hnjhnjhnjhnjhnjhnjhnjhnjhnjsm\0rn\0pappapaa\0ak\0kl\0ht\0st\0mg\0bm\0kacmfenb\0tpipaugn\0krlrw\0nb\0kritkltvltrvbi\0sm\0bemsn\0khtmnwlcpkdtkdtdz\0") })
+ },
+ script: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"AghbAhomArmiAvstBaliBamuBassBatkBrahBugiBuhdCakmCansCariChamChrsCoptCprtDiakEgypElymGlagGongGonmGothHanoHluwHmngHmnpItalKaliKawiKharKitsLanaLepcLimbLinaLinbLisuLyciLydiMakaMandManiMarcMedfMendMercMeroMrooMultNagmNarbNbatNewaNkooNshuOgamOrkhOsgePalmPaucPermPhagPhliPhlpPhnxPlrdPrtiRjngRohgRunrSamrSarbSaurSgnwSogdSogoSoraSoyoSyloTagbTaleTaluTangTavtTfngThaaTibtTnsaTotoUgarVaiiWaraWchoXpeoXsuxYiiiZanb") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"udiRU\0ahoIN\0arcIR\0ae\0IR\0banID\0baxCM\0bsqLR\0bbcID\0pkaIN\0bugID\0bkuPH\0ccpBD\0iu\0CA\0xcrTR\0cjmVN\0xcoUZ\0copEG\0grcCY\0dv\0MV\0egyEG\0arcIR\0cu\0BG\0wsgIN\0esgIN\0gotUA\0hnnPH\0hluTR\0hnjUS\0hnjUS\0ettIT\0ekyMM\0kawID\0praPK\0zktCN\0nodTH\0lepIN\0lifIN\0labGR\0grcGR\0lisCN\0xlcTR\0xldTR\0makID\0myzIR\0xmnCN\0bo\0CN\0dmfNG\0menSL\0xmrSD\0xmrSD\0mroBD\0skrPK\0unrIN\0xnaSA\0arcJO\0newNP\0manGN\0zhxCN\0sgaIE\0otkMN\0osaUS\0arcSY\0ctdMM\0kv\0RU\0lzhCN\0palIR\0palCN\0phnLB\0hmdCN\0xprIR\0rejID\0rhgMM\0nonSE\0smpIL\0xsaYE\0sazIN\0aseUS\0sogUZ\0sogUZ\0srbIN\0cmgMN\0sylBD\0tbwPH\0tddCN\0khbCN\0txgCN\0bltVN\0zghMA\0dv\0MV\0bo\0CN\0nstIN\0txoIN\0ugaSY\0vaiLR\0hocIN\0nnpIN\0peoIR\0akkIQ\0ii\0CN\0cmgMN\0") })
+ },
+ region: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"AS\0BI\0BQ\0BT\0CW\0DJ\0GH\0GL\0HT\0LS\0MG\0ML\0MU\0MV\0NO\0PG\0PW\0PY\0RW\0SJ\0SL\0TK\0TV\0VU\0WS\0ZM\0ZW\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"sm\0Latnrn\0LatnpapLatndz\0TibtpapLatnaa\0Latnak\0Latnkl\0Latnht\0Latnst\0Latnmg\0Latnbm\0LatnmfeLatndv\0Thaanb\0LatntpiLatnpauLatngn\0Latnrw\0Latnnb\0LatnkriLatntklLatntvlLatnbi\0Latnsm\0LatnbemLatnsn\0Latn") })
+ },
+ };
+ }
+ #[clippy::msrv = "1.67"]
+ impl icu_provider::DataProvider<icu::locid_transform::provider::LikelySubtagsExtendedV1Marker> for $provider {
+ fn load(&self, req: icu_provider::DataRequest) -> Result<icu_provider::DataResponse<icu::locid_transform::provider::LikelySubtagsExtendedV1Marker>, icu_provider::DataError> {
+ if req.locale.is_empty() {
+ Ok(icu_provider::DataResponse { payload: Some(icu_provider::DataPayload::from_static_ref(Self::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_EXT_V1)), metadata: Default::default() })
+ } else {
+ Err(icu_provider::DataErrorKind::ExtraneousLocale.with_req(<icu::locid_transform::provider::LikelySubtagsExtendedV1Marker as icu_provider::KeyedDataMarker>::KEY, req))
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_likelysubtags_l_v1.rs.data b/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_likelysubtags_l_v1.rs.data
new file mode 100644
index 0000000000..7ea72eeb9f
--- /dev/null
+++ b/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_likelysubtags_l_v1.rs.data
@@ -0,0 +1,41 @@
+// @generated
+/// Implement `DataProvider<LikelySubtagsForLanguageV1Marker>` on the given struct using the data
+/// hardcoded in this file. This allows the struct to be used with
+/// `icu`'s `_unstable` constructors.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __impl_locid_transform_likelysubtags_l_v1 {
+ ($ provider : ty) => {
+ #[clippy::msrv = "1.67"]
+ const _: () = <$provider>::MUST_USE_MAKE_PROVIDER_MACRO;
+ #[clippy::msrv = "1.67"]
+ impl $provider {
+ #[doc(hidden)]
+ pub const SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_L_V1: &'static <icu::locid_transform::provider::LikelySubtagsForLanguageV1Marker as icu_provider::DataMarker>::Yokeable = &icu::locid_transform::provider::LikelySubtagsForLanguageV1 {
+ language_script: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"az\0Araben\0Shawff\0Adlmkk\0Arabku\0Arabku\0Yeziky\0Arabky\0Latnmn\0Mongpa\0Arabsd\0Devasd\0Khojsd\0Sindtg\0Arabug\0Cyrluz\0ArabyueHanszh\0Bopozh\0Hanbzh\0Hant") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"IR\0GB\0GN\0CN\0IQ\0GE\0CN\0TR\0CN\0PK\0IN\0IN\0IN\0PK\0KZ\0AF\0CN\0TW\0TW\0TW\0") })
+ },
+ language_region: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"az\0IQ\0az\0IR\0az\0RU\0ha\0CM\0ha\0SD\0kk\0AF\0kk\0CN\0kk\0IR\0kk\0MN\0ku\0LB\0ky\0CN\0ky\0TR\0mn\0CN\0ms\0CC\0pa\0PK\0sd\0IN\0sr\0ME\0sr\0RO\0sr\0RU\0sr\0TR\0tg\0PK\0ug\0KZ\0ug\0MN\0uz\0AF\0uz\0CN\0yueCN\0zh\0AU\0zh\0BN\0zh\0GB\0zh\0GF\0zh\0HK\0zh\0ID\0zh\0MO\0zh\0PA\0zh\0PF\0zh\0PH\0zh\0SR\0zh\0TH\0zh\0TW\0zh\0US\0zh\0VN\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"ArabArabCyrlArabArabArabArabArabArabArabArabLatnMongArabArabDevaLatnLatnLatnLatnArabCyrlCyrlArabCyrlHansHantHantHantHantHantHantHantHantHantHantHantHantHantHantHant") })
+ },
+ language: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"af\0am\0ar\0as\0astaz\0be\0bg\0bgcbhoblobn\0br\0brxbs\0ca\0cebchrcs\0cswcv\0cy\0da\0de\0doidsbel\0en\0eo\0es\0et\0eu\0fa\0ff\0fi\0filfo\0fr\0fy\0ga\0gd\0gl\0gu\0ha\0he\0hi\0hr\0hsbhu\0hy\0ia\0id\0ie\0ig\0is\0it\0ja\0jv\0ka\0keakgpkk\0km\0kn\0ko\0kokks\0ku\0kxvky\0lb\0lijlmolo\0lt\0lv\0maimi\0mk\0ml\0mn\0mnimr\0ms\0mt\0my\0ndsne\0nl\0nn\0no\0nqooc\0or\0pa\0pcmpl\0prgps\0pt\0qu\0rajrm\0ro\0ru\0sa\0sahsatsc\0sd\0si\0sk\0sl\0so\0sq\0sr\0su\0sv\0sw\0syrszlta\0te\0tg\0th\0ti\0tk\0to\0toktr\0tt\0ug\0uk\0ur\0uz\0vecvi\0vmwwo\0xh\0xnryo\0yrlyueza\0zh\0zu\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"LatnZA\0EthiET\0ArabEG\0BengIN\0LatnES\0LatnAZ\0CyrlBY\0CyrlBG\0DevaIN\0DevaIN\0LatnBJ\0BengBD\0LatnFR\0DevaIN\0LatnBA\0LatnES\0LatnPH\0CherUS\0LatnCZ\0CansCA\0CyrlRU\0LatnGB\0LatnDK\0LatnDE\0DevaIN\0LatnDE\0GrekGR\0LatnUS\0Latn001LatnES\0LatnEE\0LatnES\0ArabIR\0LatnSN\0LatnFI\0LatnPH\0LatnFO\0LatnFR\0LatnNL\0LatnIE\0LatnGB\0LatnES\0GujrIN\0LatnNG\0HebrIL\0DevaIN\0LatnHR\0LatnDE\0LatnHU\0ArmnAM\0Latn001LatnID\0LatnEE\0LatnNG\0LatnIS\0LatnIT\0JpanJP\0LatnID\0GeorGE\0LatnCV\0LatnBR\0CyrlKZ\0KhmrKH\0KndaIN\0KoreKR\0DevaIN\0ArabIN\0LatnTR\0LatnIN\0CyrlKG\0LatnLU\0LatnIT\0LatnIT\0LaooLA\0LatnLT\0LatnLV\0DevaIN\0LatnNZ\0CyrlMK\0MlymIN\0CyrlMN\0BengIN\0DevaIN\0LatnMY\0LatnMT\0MymrMM\0LatnDE\0DevaNP\0LatnNL\0LatnNO\0LatnNO\0NkooGN\0LatnFR\0OryaIN\0GuruIN\0LatnNG\0LatnPL\0LatnPL\0ArabAF\0LatnBR\0LatnPE\0DevaIN\0LatnCH\0LatnRO\0CyrlRU\0DevaIN\0CyrlRU\0OlckIN\0LatnIT\0ArabPK\0SinhLK\0LatnSK\0LatnSI\0LatnSO\0LatnAL\0CyrlRS\0LatnID\0LatnSE\0LatnTZ\0SyrcIQ\0LatnPL\0TamlIN\0TeluIN\0CyrlTJ\0ThaiTH\0EthiET\0LatnTM\0LatnTO\0Latn001LatnTR\0CyrlRU\0ArabCN\0CyrlUA\0ArabPK\0LatnUZ\0LatnIT\0LatnVN\0LatnMZ\0LatnSN\0LatnZA\0DevaIN\0LatnNG\0LatnBR\0HantHK\0LatnCN\0HansCN\0LatnZA\0") })
+ },
+ und: (icu::locid::subtags::language!("en"), icu::locid::subtags::script!("Latn"), icu::locid::subtags::region!("US")),
+ };
+ }
+ #[clippy::msrv = "1.67"]
+ impl icu_provider::DataProvider<icu::locid_transform::provider::LikelySubtagsForLanguageV1Marker> for $provider {
+ fn load(&self, req: icu_provider::DataRequest) -> Result<icu_provider::DataResponse<icu::locid_transform::provider::LikelySubtagsForLanguageV1Marker>, icu_provider::DataError> {
+ if req.locale.is_empty() {
+ Ok(icu_provider::DataResponse { payload: Some(icu_provider::DataPayload::from_static_ref(Self::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_L_V1)), metadata: Default::default() })
+ } else {
+ Err(icu_provider::DataErrorKind::ExtraneousLocale.with_req(<icu::locid_transform::provider::LikelySubtagsForLanguageV1Marker as icu_provider::KeyedDataMarker>::KEY, req))
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_likelysubtags_sr_v1.rs.data b/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_likelysubtags_sr_v1.rs.data
new file mode 100644
index 0000000000..0c3bd3cb27
--- /dev/null
+++ b/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_likelysubtags_sr_v1.rs.data
@@ -0,0 +1,40 @@
+// @generated
+/// Implement `DataProvider<LikelySubtagsForScriptRegionV1Marker>` on the given struct using the data
+/// hardcoded in this file. This allows the struct to be used with
+/// `icu`'s `_unstable` constructors.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __impl_locid_transform_likelysubtags_sr_v1 {
+ ($ provider : ty) => {
+ #[clippy::msrv = "1.67"]
+ const _: () = <$provider>::MUST_USE_MAKE_PROVIDER_MACRO;
+ #[clippy::msrv = "1.67"]
+ impl $provider {
+ #[doc(hidden)]
+ pub const SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_SR_V1: &'static <icu::locid_transform::provider::LikelySubtagsForScriptRegionV1Marker as icu_provider::DataMarker>::Yokeable = &icu::locid_transform::provider::LikelySubtagsForScriptRegionV1 {
+ script_region: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"ArabAF\0ArabCC\0ArabCN\0ArabGB\0ArabID\0ArabIN\0ArabIR\0ArabMN\0ArabMU\0ArabNG\0ArabPK\0ArabTJ\0CyrlAL\0CyrlBA\0CyrlBG\0CyrlBY\0CyrlGR\0CyrlKG\0CyrlMD\0CyrlMK\0CyrlMN\0CyrlRO\0CyrlRS\0CyrlSK\0CyrlTJ\0CyrlUA\0CyrlXK\0DevaBT\0DevaMU\0DevaNP\0EthiER\0HantCA\0Latn419LatnAD\0LatnAF\0LatnAL\0LatnAM\0LatnAO\0LatnAR\0LatnAT\0LatnAW\0LatnAX\0LatnAZ\0LatnBA\0LatnBE\0LatnBF\0LatnBJ\0LatnBL\0LatnBN\0LatnBO\0LatnBR\0LatnCD\0LatnCF\0LatnCG\0LatnCH\0LatnCI\0LatnCL\0LatnCM\0LatnCN\0LatnCO\0LatnCR\0LatnCU\0LatnCV\0LatnCY\0LatnCZ\0LatnDE\0LatnDK\0LatnDO\0LatnDZ\0LatnEA\0LatnEC\0LatnEE\0LatnES\0LatnET\0LatnFI\0LatnFO\0LatnFR\0LatnGA\0LatnGE\0LatnGF\0LatnGN\0LatnGP\0LatnGQ\0LatnGT\0LatnGW\0LatnHN\0LatnHR\0LatnHU\0LatnIC\0LatnID\0LatnIR\0LatnIS\0LatnIT\0LatnKE\0LatnKM\0LatnLI\0LatnLT\0LatnLU\0LatnLV\0LatnMA\0LatnMC\0LatnMD\0LatnME\0LatnMF\0LatnMK\0LatnMO\0LatnMQ\0LatnMR\0LatnMT\0LatnMX\0LatnMY\0LatnMZ\0LatnNA\0LatnNC\0LatnNE\0LatnNI\0LatnNL\0LatnPA\0LatnPE\0LatnPF\0LatnPH\0LatnPL\0LatnPM\0LatnPR\0LatnPT\0LatnRE\0LatnRO\0LatnRS\0LatnSC\0LatnSE\0LatnSI\0LatnSK\0LatnSM\0LatnSN\0LatnSO\0LatnSR\0LatnST\0LatnSV\0LatnSY\0LatnTD\0LatnTF\0LatnTG\0LatnTL\0LatnTM\0LatnTN\0LatnTO\0LatnTR\0LatnTZ\0LatnUA\0LatnUG\0LatnUY\0LatnUZ\0LatnVA\0LatnVE\0LatnVN\0LatnWF\0LatnXK\0LatnYT\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"fa\0ms\0ug\0ur\0ms\0ur\0fa\0kk\0ur\0ha\0ur\0fa\0mk\0sr\0bg\0be\0mk\0ky\0uk\0mk\0mn\0bg\0sr\0uk\0tg\0uk\0sr\0ne\0bhone\0ti\0yuees\0ca\0tk\0sq\0ku\0pt\0es\0de\0nl\0sv\0az\0bs\0nl\0fr\0fr\0fr\0ms\0es\0pt\0sw\0fr\0fr\0de\0fr\0es\0fr\0za\0es\0es\0es\0pt\0tr\0cs\0de\0da\0es\0fr\0es\0es\0et\0es\0en\0fi\0fo\0fr\0fr\0ku\0fr\0fr\0fr\0es\0es\0pt\0es\0hr\0hu\0es\0id\0tk\0is\0it\0sw\0fr\0de\0lt\0fr\0lv\0fr\0fr\0ro\0sr\0fr\0sq\0pt\0fr\0fr\0mt\0es\0ms\0pt\0af\0fr\0ha\0es\0nl\0es\0es\0fr\0filpl\0fr\0es\0pt\0fr\0ro\0sr\0fr\0sv\0sl\0sk\0it\0fr\0so\0nl\0pt\0es\0fr\0fr\0fr\0fr\0pt\0tk\0fr\0to\0tr\0sw\0pl\0sw\0es\0uz\0it\0es\0vi\0fr\0sq\0fr\0") })
+ },
+ script: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"AdlmArabArmnBengBhksBopoBraiCherCyrlDevaDogrDuplElbaEthiGeorGranGrekGujrGuruHanbHangHaniHansHantHebrHiraHungJamoJavaJpanKanaKhmrKhojKndaKoreKthiLaooLatnMahjMlymModiMongMteiMymrNandOlckOryaOsmaShawShrdSiddSindSinhSundSyrcTakrTamlTeluTglgThaiTirhVithYezi") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"ff\0GN\0ar\0EG\0hy\0AM\0bn\0BD\0sa\0IN\0zh\0TW\0fr\0FR\0chrUS\0ru\0RU\0hi\0IN\0doiIN\0fr\0FR\0sq\0AL\0am\0ET\0ka\0GE\0sa\0IN\0el\0GR\0gu\0IN\0pa\0IN\0zh\0TW\0ko\0KR\0zh\0CN\0zh\0CN\0zh\0TW\0he\0IL\0ja\0JP\0hu\0HU\0ko\0KR\0jv\0ID\0ja\0JP\0ja\0JP\0km\0KH\0sd\0IN\0kn\0IN\0ko\0KR\0bhoIN\0lo\0LA\0en\0US\0hi\0IN\0ml\0IN\0mr\0IN\0mn\0CN\0mniIN\0my\0MM\0sa\0IN\0satIN\0or\0IN\0so\0SO\0en\0GB\0sa\0IN\0sa\0IN\0sd\0IN\0si\0LK\0su\0ID\0syrIQ\0doiIN\0ta\0IN\0te\0IN\0filPH\0th\0TH\0maiIN\0sq\0AL\0ku\0GE\0") })
+ },
+ region: unsafe {
+ #[allow(unused_unsafe)]
+ zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"419AC\0AD\0AE\0AF\0AG\0AI\0AL\0AM\0AO\0AQ\0AR\0AT\0AU\0AW\0AX\0AZ\0BA\0BB\0BD\0BE\0BF\0BG\0BH\0BJ\0BL\0BM\0BN\0BO\0BR\0BS\0BV\0BW\0BY\0BZ\0CA\0CC\0CD\0CF\0CG\0CH\0CI\0CK\0CL\0CM\0CN\0CO\0CP\0CQ\0CR\0CU\0CV\0CX\0CY\0CZ\0DE\0DG\0DK\0DM\0DO\0DZ\0EA\0EC\0EE\0EG\0EH\0ER\0ES\0ET\0FI\0FJ\0FK\0FM\0FO\0FR\0GA\0GB\0GD\0GE\0GF\0GG\0GI\0GM\0GN\0GP\0GQ\0GR\0GS\0GT\0GU\0GW\0GY\0HK\0HM\0HN\0HR\0HU\0IC\0ID\0IE\0IL\0IM\0IN\0IO\0IQ\0IR\0IS\0IT\0JE\0JM\0JO\0JP\0KE\0KG\0KH\0KI\0KM\0KN\0KP\0KR\0KW\0KY\0KZ\0LA\0LB\0LC\0LI\0LK\0LR\0LT\0LU\0LV\0LY\0MA\0MC\0MD\0ME\0MF\0MH\0MK\0MM\0MN\0MO\0MP\0MQ\0MR\0MS\0MT\0MW\0MX\0MY\0MZ\0NA\0NC\0NE\0NF\0NG\0NI\0NL\0NP\0NR\0NU\0NZ\0OM\0PA\0PE\0PF\0PH\0PK\0PL\0PM\0PN\0PR\0PS\0PT\0QA\0RE\0RO\0RS\0RU\0SA\0SB\0SC\0SD\0SE\0SG\0SH\0SI\0SK\0SM\0SN\0SO\0SR\0SS\0ST\0SV\0SX\0SY\0SZ\0TA\0TC\0TD\0TF\0TG\0TH\0TJ\0TL\0TM\0TN\0TO\0TR\0TT\0TW\0TZ\0UA\0UG\0UM\0US\0UY\0UZ\0VA\0VC\0VE\0VG\0VI\0VN\0WF\0XK\0YE\0YT\0ZA\0") }, unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"es\0Latnen\0Latnca\0Latnar\0Arabfa\0Araben\0Latnen\0Latnsq\0Latnhy\0Armnpt\0Latnen\0Latnes\0Latnde\0Latnen\0Latnnl\0Latnsv\0Latnaz\0Latnbs\0Latnen\0Latnbn\0Bengnl\0Latnfr\0Latnbg\0Cyrlar\0Arabfr\0Latnfr\0Latnen\0Latnms\0Latnes\0Latnpt\0Latnen\0Latnen\0Latnen\0Latnbe\0Cyrlen\0Latnen\0Latnms\0Arabsw\0Latnfr\0Latnfr\0Latnde\0Latnfr\0Latnen\0Latnes\0Latnfr\0Latnzh\0Hanses\0Latnen\0Latnen\0Latnes\0Latnes\0Latnpt\0Latnen\0Latnel\0Grekcs\0Latnde\0Latnen\0Latnda\0Latnen\0Latnes\0Latnar\0Arabes\0Latnes\0Latnet\0Latnar\0Arabar\0Arabti\0Ethies\0Latnam\0Ethifi\0Latnen\0Latnen\0Latnen\0Latnfo\0Latnfr\0Latnfr\0Latnen\0Latnen\0Latnka\0Georfr\0Latnen\0Latnen\0Latnen\0Latnfr\0Latnfr\0Latnes\0Latnel\0Greken\0Latnes\0Latnen\0Latnpt\0Latnen\0Latnzh\0Hanten\0Latnes\0Latnhr\0Latnhu\0Latnes\0Latnid\0Latnen\0Latnhe\0Hebren\0Latnhi\0Devaen\0Latnar\0Arabfa\0Arabis\0Latnit\0Latnen\0Latnen\0Latnar\0Arabja\0Jpansw\0Latnky\0Cyrlkm\0Khmren\0Latnar\0Araben\0Latnko\0Koreko\0Korear\0Araben\0Latnru\0Cyrllo\0Laooar\0Araben\0Latnde\0Latnsi\0Sinhen\0Latnlt\0Latnfr\0Latnlv\0Latnar\0Arabar\0Arabfr\0Latnro\0Latnsr\0Latnfr\0Latnen\0Latnmk\0Cyrlmy\0Mymrmn\0Cyrlzh\0Hanten\0Latnfr\0Latnar\0Araben\0Latnmt\0Latnen\0Latnes\0Latnms\0Latnpt\0Latnaf\0Latnfr\0Latnha\0Latnen\0Latnen\0Latnes\0Latnnl\0Latnne\0Devaen\0Latnen\0Latnen\0Latnar\0Arabes\0Latnes\0Latnfr\0LatnfilLatnur\0Arabpl\0Latnfr\0Latnen\0Latnes\0Latnar\0Arabpt\0Latnar\0Arabfr\0Latnro\0Latnsr\0Cyrlru\0Cyrlar\0Araben\0Latnfr\0Latnar\0Arabsv\0Latnen\0Latnen\0Latnsl\0Latnsk\0Latnit\0Latnfr\0Latnso\0Latnnl\0Latnar\0Arabpt\0Latnes\0Latnen\0Latnar\0Araben\0Latnen\0Latnen\0Latnfr\0Latnfr\0Latnfr\0Latnth\0Thaitg\0Cyrlpt\0Latntk\0Latnar\0Arabto\0Latntr\0Latnen\0Latnzh\0Hantsw\0Latnuk\0Cyrlsw\0Latnen\0Latnen\0Latnes\0Latnuz\0Latnit\0Latnen\0Latnes\0Latnen\0Latnen\0Latnvi\0Latnfr\0Latnsq\0Latnar\0Arabfr\0Latnen\0Latn") })
+ },
+ };
+ }
+ #[clippy::msrv = "1.67"]
+ impl icu_provider::DataProvider<icu::locid_transform::provider::LikelySubtagsForScriptRegionV1Marker> for $provider {
+ fn load(&self, req: icu_provider::DataRequest) -> Result<icu_provider::DataResponse<icu::locid_transform::provider::LikelySubtagsForScriptRegionV1Marker>, icu_provider::DataError> {
+ if req.locale.is_empty() {
+ Ok(icu_provider::DataResponse { payload: Some(icu_provider::DataPayload::from_static_ref(Self::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_SR_V1)), metadata: Default::default() })
+ } else {
+ Err(icu_provider::DataErrorKind::ExtraneousLocale.with_req(<icu::locid_transform::provider::LikelySubtagsForScriptRegionV1Marker as icu_provider::KeyedDataMarker>::KEY, req))
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_script_dir_v1.rs.data b/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_script_dir_v1.rs.data
new file mode 100644
index 0000000000..a21c7d298c
--- /dev/null
+++ b/third_party/rust/icu_locid_transform_data/data/macros/locid_transform_script_dir_v1.rs.data
@@ -0,0 +1,27 @@
+// @generated
+/// Implement `DataProvider<ScriptDirectionV1Marker>` on the given struct using the data
+/// hardcoded in this file. This allows the struct to be used with
+/// `icu`'s `_unstable` constructors.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __impl_locid_transform_script_dir_v1 {
+ ($ provider : ty) => {
+ #[clippy::msrv = "1.67"]
+ const _: () = <$provider>::MUST_USE_MAKE_PROVIDER_MACRO;
+ #[clippy::msrv = "1.67"]
+ impl $provider {
+ #[doc(hidden)]
+ pub const SINGLETON_LOCID_TRANSFORM_SCRIPT_DIR_V1: &'static <icu::locid_transform::provider::ScriptDirectionV1Marker as icu_provider::DataMarker>::Yokeable = &icu::locid_transform::provider::ScriptDirectionV1 { rtl: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"AdlmArabArmiAvstChrsCprtElymHatrHebrHungKharLydiMandManiMendMercMeroNarbNbatNkooOrkhOugrPalmPhliPhlpPhnxPrtiRohgSamrSarbSogdSogoSyrcThaaYezi") }, ltr: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"AghbAhomArmnBaliBamuBassBatkBengBhksBopoBrahBugiBuhdCakmCansCariChamCherCoptCpmnCyrlDevaDiakDogrDsrtDuplEgypElbaEthiGeorGlagGongGonmGothGranGrekGujrGuruHanbHangHaniHanoHansHantHiraHluwHmngHmnpItalJamoJavaJpanKaliKanaKawiKhmrKhojKitsKndaKoreKthiLanaLaooLatnLepcLimbLinaLinbLisuLyciMahjMakaMarcMedfMlymModiMongMrooMteiMultMymrNagmNandNewaNshuOgamOlckOryaOsgeOsmaPaucPermPhagPlrdRjngRunrSaurSgnwShawShrdSiddSindSinhSoraSoyoSundSyloTagbTakrTaleTaluTamlTangTavtTeluTfngTglgThaiTibtTirhTnsaTotoUgarVaiiVithWaraWchoXpeoXsuxYiiiZanb") } };
+ }
+ #[clippy::msrv = "1.67"]
+ impl icu_provider::DataProvider<icu::locid_transform::provider::ScriptDirectionV1Marker> for $provider {
+ fn load(&self, req: icu_provider::DataRequest) -> Result<icu_provider::DataResponse<icu::locid_transform::provider::ScriptDirectionV1Marker>, icu_provider::DataError> {
+ if req.locale.is_empty() {
+ Ok(icu_provider::DataResponse { payload: Some(icu_provider::DataPayload::from_static_ref(Self::SINGLETON_LOCID_TRANSFORM_SCRIPT_DIR_V1)), metadata: Default::default() })
+ } else {
+ Err(icu_provider::DataErrorKind::ExtraneousLocale.with_req(<icu::locid_transform::provider::ScriptDirectionV1Marker as icu_provider::KeyedDataMarker>::KEY, req))
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/icu_locid_transform_data/src/lib.rs b/third_party/rust/icu_locid_transform_data/src/lib.rs
new file mode 100644
index 0000000000..148784b2ab
--- /dev/null
+++ b/third_party/rust/icu_locid_transform_data/src/lib.rs
@@ -0,0 +1,12 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Data for the icu_locid_transform crate
+
+#![no_std]
+
+#[cfg(icu4x_custom_data)]
+include!(concat!(core::env!("ICU4X_DATA_DIR"), "/macros.rs"));
+#[cfg(not(icu4x_custom_data))]
+include!("../data/macros.rs");