summaryrefslogtreecommitdiffstats
path: root/vendor/icu_locid
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/icu_locid')
-rw-r--r--vendor/icu_locid/.cargo-checksum.json1
-rw-r--r--vendor/icu_locid/Cargo.lock725
-rw-r--r--vendor/icu_locid/Cargo.toml136
-rw-r--r--vendor/icu_locid/LICENSE51
-rw-r--r--vendor/icu_locid/README.md63
-rw-r--r--vendor/icu_locid/benches/fixtures/langid.json48
-rw-r--r--vendor/icu_locid/benches/fixtures/locale.json26
-rw-r--r--vendor/icu_locid/benches/fixtures/mod.rs25
-rw-r--r--vendor/icu_locid/benches/fixtures/subtags.json18
-rw-r--r--vendor/icu_locid/benches/helpers/macros.rs110
-rw-r--r--vendor/icu_locid/benches/helpers/mod.rs17
-rw-r--r--vendor/icu_locid/benches/iai_langid.rs118
-rw-r--r--vendor/icu_locid/benches/langid.rs93
-rw-r--r--vendor/icu_locid/benches/locale.rs87
-rw-r--r--vendor/icu_locid/benches/subtags.rs39
-rw-r--r--vendor/icu_locid/examples/filter_langids.rs66
-rw-r--r--vendor/icu_locid/examples/syntatically_canonicalize_locales.rs54
-rw-r--r--vendor/icu_locid/src/extensions/mod.rs313
-rw-r--r--vendor/icu_locid/src/extensions/other/mod.rs157
-rw-r--r--vendor/icu_locid/src/extensions/other/subtag.rs37
-rw-r--r--vendor/icu_locid/src/extensions/private/mod.rs167
-rw-r--r--vendor/icu_locid/src/extensions/private/other.rs31
-rw-r--r--vendor/icu_locid/src/extensions/transform/fields.rs228
-rw-r--r--vendor/icu_locid/src/extensions/transform/key.rs31
-rw-r--r--vendor/icu_locid/src/extensions/transform/mod.rs236
-rw-r--r--vendor/icu_locid/src/extensions/transform/value.rs119
-rw-r--r--vendor/icu_locid/src/extensions/unicode/attribute.rs34
-rw-r--r--vendor/icu_locid/src/extensions/unicode/attributes.rs115
-rw-r--r--vendor/icu_locid/src/extensions/unicode/key.rs31
-rw-r--r--vendor/icu_locid/src/extensions/unicode/keywords.rs404
-rw-r--r--vendor/icu_locid/src/extensions/unicode/mod.rs233
-rw-r--r--vendor/icu_locid/src/extensions/unicode/value.rs199
-rw-r--r--vendor/icu_locid/src/helpers.rs648
-rw-r--r--vendor/icu_locid/src/langid.rs523
-rw-r--r--vendor/icu_locid/src/lib.rs101
-rw-r--r--vendor/icu_locid/src/locale.rs528
-rw-r--r--vendor/icu_locid/src/macros.rs191
-rw-r--r--vendor/icu_locid/src/ordering.rs61
-rw-r--r--vendor/icu_locid/src/parser/errors.rs54
-rw-r--r--vendor/icu_locid/src/parser/langid.rs269
-rw-r--r--vendor/icu_locid/src/parser/locale.rs42
-rw-r--r--vendor/icu_locid/src/parser/mod.rs98
-rw-r--r--vendor/icu_locid/src/serde.rs135
-rw-r--r--vendor/icu_locid/src/subtags/language.rs107
-rw-r--r--vendor/icu_locid/src/subtags/mod.rs58
-rw-r--r--vendor/icu_locid/src/subtags/region.rs61
-rw-r--r--vendor/icu_locid/src/subtags/script.rs32
-rw-r--r--vendor/icu_locid/src/subtags/variant.rs34
-rw-r--r--vendor/icu_locid/src/subtags/variants.rs134
-rw-r--r--vendor/icu_locid/src/zerovec.rs132
-rw-r--r--vendor/icu_locid/tests/fixtures/canonicalize.json18
-rw-r--r--vendor/icu_locid/tests/fixtures/invalid-extensions.json112
-rw-r--r--vendor/icu_locid/tests/fixtures/invalid.json85
-rw-r--r--vendor/icu_locid/tests/fixtures/langid.json167
-rw-r--r--vendor/icu_locid/tests/fixtures/locale.json298
-rw-r--r--vendor/icu_locid/tests/fixtures/mod.rs260
-rw-r--r--vendor/icu_locid/tests/helpers/mod.rs15
-rw-r--r--vendor/icu_locid/tests/langid.rs157
-rw-r--r--vendor/icu_locid/tests/locale.rs122
59 files changed, 8454 insertions, 0 deletions
diff --git a/vendor/icu_locid/.cargo-checksum.json b/vendor/icu_locid/.cargo-checksum.json
new file mode 100644
index 000000000..1f1097bd7
--- /dev/null
+++ b/vendor/icu_locid/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.lock":"6bf9c8304a3fe9f99d7189f9a082be2c7859ea164976975069f8fd2f7f80bbbd","Cargo.toml":"44c6bcdc448226df67e425cb00bf02596c96d0a0bfcb3951d3a5d0998afaa60d","LICENSE":"4ad7541d66a407234e2c84902124cef325c29f3e966353efdb800bedb8b8da21","README.md":"d0e5ced27519cf715a66dc4fece18c8cacece8dbb81eb1e03ab82dd57f0bc7f5","benches/fixtures/langid.json":"373c11527653c63c685c9e229a8de5ae2b557c25b686a9d891c59e1f603232d8","benches/fixtures/locale.json":"669b19db933094290a45bf856559920f4e92401072e364ac82c482119dc9233a","benches/fixtures/mod.rs":"9a9671eddcf38a6faa10cb814949f8abc15d89f5e70f3ad6f684f1bc3ffe72ea","benches/fixtures/subtags.json":"28be3a639e452d713e807d5779b6819e06277e2dbbf67801ef34964fb9b074b6","benches/helpers/macros.rs":"bba0945a826bc083156bc302507c48c0c99c4d965e2a84352644d768591b0339","benches/helpers/mod.rs":"c98167d866fdb7f66c8cab41e8d57b5aab9e9707dfc66c37ef136e088dac6fef","benches/iai_langid.rs":"675ab67edc2820894e1179e97e3aad6037957084efa07e494c17c40f3c0bbe35","benches/langid.rs":"4e3d307d48fd9071308a567a0ef927b229814978abd2ba29f57c65edd51f38e4","benches/locale.rs":"b8d5b1e3f8b5578c549a5149229656fb60de26b76a1bf66b6c1abce75042d674","benches/subtags.rs":"e7e80dabaf31bf031779456614f139cafcdadb805986e71b49133ac964928432","examples/filter_langids.rs":"28bea5b7dc715d6c00694437c3f12a72cf68dc984bb13acbb7b1ce5f97c5726a","examples/syntatically_canonicalize_locales.rs":"de97579c82f1670629d077a6216ecce06761da28715387f46250f81b8172ae6b","src/extensions/mod.rs":"76efffe1c99da3ef61a93f8174267e4b0b63abc3283ec8e0c5170ebc582263fe","src/extensions/other/mod.rs":"4216cd8a4dcef13105b48e507659920feaaa3fa3aebc2ba8d7702b40bbec2881","src/extensions/other/subtag.rs":"cb52ec1acec55e4c0e1d37cc5a552d11010051d827786202702257c8fcd96c49","src/extensions/private/mod.rs":"961bfb455114ad7166beb5acb36a1b182d2e81d99cccbfd3b3bf68853cae490d","src/extensions/private/other.rs":"586fd24398e78c5fda0afdb98de28a6467afd2d702683daf5dfab2a6c45af1e9","src/extensions/transform/fields.rs":"376ae5862329709d54b262a6d91be97bb02fc5e0198f30be8a2f4b0adc420c8b","src/extensions/transform/key.rs":"53e8c9ce13f00f678c2322855cc1d90afd91cd33a2af3758d098b7bbcc7090e5","src/extensions/transform/mod.rs":"c932d7e4484ac3bf3c9fe0c63b17847d8cb29f8874d71cd17070e63b8bca5998","src/extensions/transform/value.rs":"153c4edeb987e052dafe0790bcda560da4dcfa6897e5aaf3f62ae772b0605009","src/extensions/unicode/attribute.rs":"d558a193b72f54cdb03afe8e023a145ac74832c8416ca55401cd417ebba2431c","src/extensions/unicode/attributes.rs":"f2f13714750035ff805455b43ba665710978d13b90a53358314e798662c436b6","src/extensions/unicode/key.rs":"6c8694527079c5dd5f03f8e85f23ae6b5aa4b47899d1047036960e8400dca7de","src/extensions/unicode/keywords.rs":"58a2eca7c5e6ac6ad6812538a5b8118e35274c6b5de8029d55cbe1b4cd0a4abb","src/extensions/unicode/mod.rs":"e81db13fdb2db8d6cf7cfcd7c0d926b929fceca500894e688768b3494d02d0c3","src/extensions/unicode/value.rs":"02876ed95059d21d09ff2b986776d6bf0cb14c698237a86a9be24886ffd7a1cd","src/helpers.rs":"a6b8c22ef40a57339e4051fad54e724320851f827bc6f888187f30371024d04a","src/langid.rs":"b3258b1be6566dc117295a525dcb04237f0049c59dc91f460d939cd162ef8b39","src/lib.rs":"6f6248e20709be74b9e186b45810a4963ffa91c680be4ad78be9a6af5a10da5c","src/locale.rs":"a1ff7500d17581fe06524f6d70d59f0765c5d5ca89cb64b42953b286b20727b4","src/macros.rs":"f7154fc103ea1120a55bb5898540b20df80de6eec42e70ce15f339d997f2bf52","src/ordering.rs":"c70aa4e33d5cbab8d75d2833641141b71984a93648634cfc57fc25c3f79a5f58","src/parser/errors.rs":"ccea5e49c109db3766a71ac4aab1d759e2351c4cd31816b6abdca166699c7f3e","src/parser/langid.rs":"ef5c3dc233a5cea1953688e69152c601a3260196caa9327dd07edc7b6be7b0b8","src/parser/locale.rs":"b7d4cd4ed80b0acae9e77046a3b4943ee19e4aec395e36951750da32366b9a8e","src/parser/mod.rs":"c65268221fc67a692a2a647b08dd81b244a9186c04f5ab0837383dcaa983b740","src/serde.rs":"06e940e4f2d15f02d313b4e2b233aea3e74c93c6c43076f5ffe52d49c133608f","src/subtags/language.rs":"e9dc6de6c6aebb6d8bf6e55f1ae9fab41844a52e681b4309e625a5076c02f9f3","src/subtags/mod.rs":"0257f746ed368ea3fa675054c9e7e40d972ec31cd7cc525be655a16a83c9d17b","src/subtags/region.rs":"4f4120f4910d0a4496f29c193d00313e71be4c646867d97ebd0e9a7438693847","src/subtags/script.rs":"6b1a68783cb90409bdd39b0184dfb2cb1c302fdee7202e3b6f7c7c8941bc7dfe","src/subtags/variant.rs":"956f1ea3d98172b6ead333411f010cf4e84404584a3051cb775d148d79beb4f8","src/subtags/variants.rs":"7740d1b20f596b04f509db917e9c2fffba80a013ffc42f0046cdc2d32b088aeb","src/zerovec.rs":"9d01a235d18296fbf0c2e89d188459e9446df0e63aaedc7e150165604af885b9","tests/fixtures/canonicalize.json":"9f2b7cbef72c24944cd4dc50de368c6e3ef69949f29c9ce1aa8807de767a4d0a","tests/fixtures/invalid-extensions.json":"0af95f38e458c8f52760f76c6540993beb9ba9421a3967df0cd6abb9fe2ce21a","tests/fixtures/invalid.json":"1f1ae207f1ce886b3f57cfcdfb2525aa3e58d538f997b2bda4088062de7aa68d","tests/fixtures/langid.json":"960fd01722217ef1ea9077e2e0821d7089fe318a241bd7fb7918f50bf8f3f5c3","tests/fixtures/locale.json":"8606e0569fc6ea0e50a1fecb9295b911fbef7d8dbfde3c585476284a751baccf","tests/fixtures/mod.rs":"28dec3e5c9d766e148adbff6857dce884d9ff94f7ef8aee17fde0084cc78a7ee","tests/helpers/mod.rs":"d3bf59e7eed6230f340bef6c87a7b8de3a387ec391f60afc1b15a0d001cbfb67","tests/langid.rs":"2e21d576a6eaba000fbe88d52362384f460ba350cac1e7034a1661302000ac58","tests/locale.rs":"91af0a738ca5def89fdb4d7f8d3504ad7b757e1d7c8e4d24dc246de610b46a04"},"package":"34b3de5d99a0e275fe6193b9586dbf37364daebc0d39c89b5cf8376a53b789e8"} \ No newline at end of file
diff --git a/vendor/icu_locid/Cargo.lock b/vendor/icu_locid/Cargo.lock
new file mode 100644
index 000000000..9940858d2
--- /dev/null
+++ b/vendor/icu_locid/Cargo.lock
@@ -0,0 +1,725 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bstr"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
+dependencies = [
+ "lazy_static",
+ "memchr",
+ "regex-automata",
+ "serde",
+]
+
+[[package]]
+name = "bumpalo"
+version = "3.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d"
+
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "clap"
+version = "2.34.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
+dependencies = [
+ "bitflags",
+ "textwrap",
+ "unicode-width",
+]
+
+[[package]]
+name = "cobs"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15"
+
+[[package]]
+name = "criterion"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f"
+dependencies = [
+ "atty",
+ "cast",
+ "clap",
+ "criterion-plot",
+ "csv",
+ "itertools",
+ "lazy_static",
+ "num-traits",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_cbor",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876"
+dependencies = [
+ "cast",
+ "itertools",
+]
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
+dependencies = [
+ "cfg-if",
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1"
+dependencies = [
+ "autocfg",
+ "cfg-if",
+ "crossbeam-utils",
+ "memoffset",
+ "once_cell",
+ "scopeguard",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+]
+
+[[package]]
+name = "csv"
+version = "1.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
+dependencies = [
+ "bstr",
+ "csv-core",
+ "itoa 0.4.8",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "databake"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c87777d6d7bde863ba217aa87521dc857239de1f36d66aac46fd173fb0495858"
+dependencies = [
+ "databake-derive",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "databake-derive"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "905c7a060fc0c84c0452d97473b1177dd7a5cbc7670cfbae4a7fe22e42f6432e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "displaydoc"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "either"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797"
+
+[[package]]
+name = "half"
+version = "1.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "iai"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71a816c97c42258aa5834d07590b718b4c9a598944cd39a52dc25b351185d678"
+
+[[package]]
+name = "icu_benchmark_macros"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c867656f2d9c90b13709ac88e710a9d6afe33998c1dfa22384bab8804e8b3d4"
+
+[[package]]
+name = "icu_locid"
+version = "1.0.0"
+dependencies = [
+ "criterion",
+ "databake",
+ "displaydoc",
+ "iai",
+ "icu_benchmark_macros",
+ "litemap",
+ "postcard",
+ "serde",
+ "serde_json",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "0.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
+
+[[package]]
+name = "itoa"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754"
+
+[[package]]
+name = "js-sys"
+version = "0.3.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47"
+dependencies = [
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "libc"
+version = "0.2.133"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0f80d65747a3e43d1596c7c5492d95d5edddaabd45a7fcdb02b95f644164966"
+
+[[package]]
+name = "litemap"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f34a3f4798fac63fb48cf277eefa38f94d3443baff555bb98e4f56bc9092368e"
+
+[[package]]
+name = "log"
+version = "0.4.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "memchr"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
+
+[[package]]
+name = "memoffset"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_cpus"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1"
+
+[[package]]
+name = "oorandom"
+version = "11.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
+
+[[package]]
+name = "plotters"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2538b639e642295546c50fcd545198c9d64ee2a38620a628724a3b266d5fbf97"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9a81d2759aae1dae668f783c308bc5c8ebd191ff4184aaa1b37f65a6ae5a56f"
+dependencies = [
+ "plotters-backend",
+]
+
+[[package]]
+name = "postcard"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c2b180dc0bade59f03fd005cb967d3f1e5f69b13922dad0cd6e047cb8af2363"
+dependencies = [
+ "cobs",
+ "serde",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rayon"
+version = "1.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d"
+dependencies = [
+ "autocfg",
+ "crossbeam-deque",
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f"
+dependencies = [
+ "crossbeam-channel",
+ "crossbeam-deque",
+ "crossbeam-utils",
+ "num_cpus",
+]
+
+[[package]]
+name = "regex"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
+dependencies = [
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244"
+
+[[package]]
+name = "ryu"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
+
+[[package]]
+name = "serde"
+version = "1.0.145"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_cbor"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5"
+dependencies = [
+ "half",
+ "serde",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.145"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.85"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44"
+dependencies = [
+ "itoa 1.0.3",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "synstructure"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "unicode-xid",
+]
+
+[[package]]
+name = "textwrap"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
+name = "tinystr"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8aeafdfd935e4a7fe16a91ab711fa52d54df84f9c8f7ca5837a9d1d902ef4c2"
+dependencies = [
+ "displaydoc",
+ "serde",
+]
+
+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
+
+[[package]]
+name = "unicode-width"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
+
+[[package]]
+name = "walkdir"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
+dependencies = [
+ "same-file",
+ "winapi",
+ "winapi-util",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268"
+dependencies = [
+ "cfg-if",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142"
+dependencies = [
+ "bumpalo",
+ "log",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f"
+
+[[package]]
+name = "web-sys"
+version = "0.3.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "writeable"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8e6ab4f5da1b24daf2c590cfac801bacb27b15b4f050e84eb60149ea726f06b"
+
+[[package]]
+name = "zerofrom"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79e9355fccf72b04b7deaa99ce7a0f6630530acf34045391b74460fcd714de54"
+
+[[package]]
+name = "zerovec"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d919a74c17749ccb17beaf6405562e413cd94e98ba52ca1e64bbe7eefbd8b8"
+dependencies = [
+ "zerofrom",
+]
diff --git a/vendor/icu_locid/Cargo.toml b/vendor/icu_locid/Cargo.toml
new file mode 100644
index 000000000..3ce7066e7
--- /dev/null
+++ b/vendor/icu_locid/Cargo.toml
@@ -0,0 +1,136 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+name = "icu_locid"
+version = "1.0.0"
+authors = ["The ICU4X Project Developers"]
+include = [
+ "src/**/*",
+ "examples/**/*",
+ "benches/**/*",
+ "tests/**/*",
+ "Cargo.toml",
+ "LICENSE",
+ "README.md",
+]
+description = "API for managing Unicode Language and Locale Identifiers"
+readme = "README.md"
+categories = ["internationalization"]
+license = "Unicode-DFS-2016"
+repository = "https://github.com/unicode-org/icu4x"
+resolver = "2"
+
+[package.metadata.cargo-all-features]
+skip_optional_dependencies = true
+denylist = ["bench"]
+extra_features = ["serde"]
+
+[package.metadata.docs.rs]
+all-features = true
+
+[lib]
+path = "src/lib.rs"
+bench = false
+
+[[example]]
+name = "filter_langids"
+test = true
+
+[[example]]
+name = "syntatically_canonicalize_locales"
+test = true
+
+[[bench]]
+name = "subtags"
+harness = false
+required-features = ["bench"]
+
+[[bench]]
+name = "langid"
+harness = false
+
+[[bench]]
+name = "locale"
+harness = false
+
+[[bench]]
+name = "iai_langid"
+harness = false
+required-features = ["bench"]
+
+[dependencies.databake]
+version = "0.1.0"
+features = ["derive"]
+optional = true
+
+[dependencies.displaydoc]
+version = "0.2.3"
+default-features = false
+
+[dependencies.litemap]
+version = "0.6"
+
+[dependencies.serde]
+version = "1.0"
+features = [
+ "alloc",
+ "derive",
+]
+optional = true
+default-features = false
+
+[dependencies.tinystr]
+version = "0.7"
+features = ["alloc"]
+default-features = false
+
+[dependencies.writeable]
+version = "0.5"
+
+[dependencies.zerovec]
+version = "0.9"
+optional = true
+
+[dev-dependencies.criterion]
+version = "0.3.3"
+
+[dev-dependencies.iai]
+version = "0.1.1"
+
+[dev-dependencies.icu_benchmark_macros]
+version = "0.7"
+
+[dev-dependencies.litemap]
+version = "0.6"
+features = ["testing"]
+
+[dev-dependencies.postcard]
+version = "1.0.0"
+features = ["use-std"]
+default-features = false
+
+[dev-dependencies.serde]
+version = "1.0"
+features = ["derive"]
+
+[dev-dependencies.serde_json]
+version = "1.0"
+
+[features]
+bench = ["serde"]
+default = []
+serde = [
+ "dep:serde",
+ "tinystr/serde",
+]
+std = []
diff --git a/vendor/icu_locid/LICENSE b/vendor/icu_locid/LICENSE
new file mode 100644
index 000000000..9858d01ab
--- /dev/null
+++ b/vendor/icu_locid/LICENSE
@@ -0,0 +1,51 @@
+UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
+
+See Terms of Use <https://www.unicode.org/copyright.html>
+for definitions of Unicode Inc.’s Data Files and Software.
+
+NOTICE TO USER: Carefully read the following legal agreement.
+BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
+DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
+YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT.
+IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
+THE DATA FILES OR SOFTWARE.
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 1991-2022 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that either
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software, or
+(b) this copyright and permission notice appear in associated
+Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.
+
+—
+
+Portions of ICU4X may have been adapted from ICU4C and/or ICU4J.
+ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others.
diff --git a/vendor/icu_locid/README.md b/vendor/icu_locid/README.md
new file mode 100644
index 000000000..cc2a0b023
--- /dev/null
+++ b/vendor/icu_locid/README.md
@@ -0,0 +1,63 @@
+# icu_locid [![crates.io](https://img.shields.io/crates/v/icu_locid)](https://crates.io/crates/icu_locid)
+
+Parsing, manipulating, and serializing Unicode Language and Locale Identifiers.
+
+This module is published as its own crate ([`icu_locid`](https://docs.rs/icu_locid/latest/icu_locid/))
+and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project.
+
+The module provides algorithms for parsing a string into a well-formed language or locale identifier
+as defined by [`UTS #35: Unicode LDML 3. Unicode Language and Locale Identifiers`].
+
+[`Locale`] is the most common structure to use for storing information about a language,
+script, region, variants and extensions. In almost all cases, this struct should be used as the
+base unit for all locale management operations.
+
+[`LanguageIdentifier`] is a strict subset of [`Locale`] which can be useful in a narrow range of
+cases where [`Unicode Extensions`] are not relevant.
+
+If in doubt, use [`Locale`].
+
+## Examples
+
+```rust
+use icu::locid::subtags::{Language, Region};
+use icu::locid::Locale;
+
+let mut loc: Locale = "en-US".parse().expect("Parsing failed.");
+
+let lang: Language = "en".parse().expect("Parsing failed.");
+let region: Region = "US".parse().expect("Parsing failed.");
+
+assert_eq!(loc.id.language, lang);
+assert_eq!(loc.id.script, None);
+assert_eq!(loc.id.region, Some(region));
+assert_eq!(loc.id.variants.len(), 0);
+
+let region: Region = "GB".parse().expect("Parsing failed.");
+loc.id.region = Some(region);
+
+assert_eq!(loc.to_string(), "en-GB");
+```
+
+### Macros
+
+```rust
+use icu::locid::{
+ langid, subtags_language as language, subtags_region as region,
+};
+
+let lid = langid!("EN_US");
+
+assert_eq!(lid.language, language!("en"));
+assert_eq!(lid.region, Some(region!("US")));
+```
+
+For more details, see [`Locale`] and [`LanguageIdentifier`].
+
+[`UTS #35: Unicode LDML 3. Unicode Language and Locale Identifiers`]: https://unicode.org/reports/tr35/tr35.html#Unicode_Language_and_Locale_Identifiers
+[`ICU4X`]: ../icu/index.html
+[`Unicode Extensions`]: extensions
+
+## More Information
+
+For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x).
diff --git a/vendor/icu_locid/benches/fixtures/langid.json b/vendor/icu_locid/benches/fixtures/langid.json
new file mode 100644
index 000000000..43c56d5a2
--- /dev/null
+++ b/vendor/icu_locid/benches/fixtures/langid.json
@@ -0,0 +1,48 @@
+{
+ "canonicalized": [
+ "en-US",
+ "en-GB",
+ "es-AR",
+ "it",
+ "zh-Hans-CN",
+ "de-AT",
+ "pl",
+ "fr-FR",
+ "de-AT",
+ "sr-Cyrl-SR",
+ "nb-NO",
+ "fr-FR",
+ "mk",
+ "uk",
+ "en-US",
+ "en-GB",
+ "es-AR",
+ "th",
+ "de",
+ "zh-Cyrl-HN",
+ "en-Latn-US"
+ ],
+ "casing": [
+ "En_uS",
+ "EN-GB",
+ "ES-aR",
+ "iT",
+ "zH_HaNs_cN",
+ "dE-aT",
+ "Pl",
+ "FR-FR",
+ "de_AT",
+ "sR-CyrL_sr",
+ "NB-NO",
+ "fr_fr",
+ "Mk",
+ "uK",
+ "en-us",
+ "en_gb",
+ "ES-AR",
+ "tH",
+ "DE",
+ "ZH_cyrl_hN",
+ "eN-lAtN-uS"
+ ]
+}
diff --git a/vendor/icu_locid/benches/fixtures/locale.json b/vendor/icu_locid/benches/fixtures/locale.json
new file mode 100644
index 000000000..f974a166f
--- /dev/null
+++ b/vendor/icu_locid/benches/fixtures/locale.json
@@ -0,0 +1,26 @@
+{
+ "canonicalized": [
+ "en-US-u-hc-h12",
+ "en-GB-u-ca-gregory-hc-h12",
+ "es-AR-x-private",
+ "th-u-ca-buddhist",
+ "de-u-co-phonebk-ka-shifted",
+ "ar-u-nu-native",
+ "ar-u-nu-latn",
+ "ja-t-it",
+ "ja-Kana-t-it",
+ "und-Latn-t-und-cyrl"
+ ],
+ "casing": [
+ "en-US-U-hc-h12",
+ "en-GB-u-CA-gregory-hc-h12",
+ "es-AR-x-Private",
+ "th-u-ca-buDDhist",
+ "de-u-co-phonebk-KA-shifted",
+ "AR_U-NU-native",
+ "ar-u-nu-LaTN",
+ "jA-T-it",
+ "ja-kanA-T-IT",
+ "unD-Latn-T-und-cyrl"
+ ]
+}
diff --git a/vendor/icu_locid/benches/fixtures/mod.rs b/vendor/icu_locid/benches/fixtures/mod.rs
new file mode 100644
index 000000000..006b22312
--- /dev/null
+++ b/vendor/icu_locid/benches/fixtures/mod.rs
@@ -0,0 +1,25 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use serde::Deserialize;
+
+#[derive(Deserialize)]
+pub struct SubtagData {
+ pub valid: Vec<String>,
+ pub invalid: Vec<String>,
+}
+
+#[derive(Deserialize)]
+pub struct Subtags {
+ pub language: SubtagData,
+ pub script: SubtagData,
+ pub region: SubtagData,
+ pub variant: SubtagData,
+}
+
+#[derive(Deserialize)]
+pub struct LocaleList {
+ pub canonicalized: Vec<String>,
+ pub casing: Vec<String>,
+}
diff --git a/vendor/icu_locid/benches/fixtures/subtags.json b/vendor/icu_locid/benches/fixtures/subtags.json
new file mode 100644
index 000000000..cf8419cc9
--- /dev/null
+++ b/vendor/icu_locid/benches/fixtures/subtags.json
@@ -0,0 +1,18 @@
+{
+ "language": {
+ "valid": ["en", "it", "pl", "de", "fr", "cs", "csb", "und", "ru", "nb", "NB", "UK", "pL", "Zh", "ES"],
+ "invalid": ["", "1", "$", "a1", "1211", "as_sa^a", "-0we", "3e3", "kk$$22", "testingaverylongstring"]
+ },
+ "script": {
+ "valid": ["Latn", "latn", "Arab", "xxxx", "Flan", "fAlA", "oOoO", "pPlQ", "esta", "RUSS"],
+ "invalid": ["", "1", "$", "a1", "1211", "assaa", "-0we", "3e3", "kk$$22", "testingaverylongstring"]
+ },
+ "region": {
+ "valid": ["DE", "321", "zh", "IA", "fN", "rU", "ru", "RU", "Ru", "CN", "AR"],
+ "invalid": ["", "1", "$", "a1", "1211", "assaa", "-0we", "3e3", "kk$$22", "testingaverylongstring"]
+ },
+ "variant": {
+ "valid": ["macos", "MaCoS", "windows", "posix", "POSIX", "Posix", "linux", "lINUX", "mAcOs", "testing", "WWWWWW"],
+ "invalid": ["", "1", "$", "a1", "a211", "ass__aa", "-0we", "3e3", "kk$$22", "testingaverylongstring"]
+ }
+}
diff --git a/vendor/icu_locid/benches/helpers/macros.rs b/vendor/icu_locid/benches/helpers/macros.rs
new file mode 100644
index 000000000..848a360c4
--- /dev/null
+++ b/vendor/icu_locid/benches/helpers/macros.rs
@@ -0,0 +1,110 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#[macro_export]
+macro_rules! overview {
+ ($c:expr, $struct:ident, $data_str:expr, $compare:expr) => {
+ $c.bench_function("overview", |b| {
+ b.iter(|| {
+ let mut values = vec![];
+ for s in $data_str {
+ let value: Result<$struct, _> = black_box(s).parse();
+ values.push(value.expect("Parsing failed"));
+ }
+ let _ = values
+ .iter()
+ .filter(|&v| v.normalizing_eq($compare))
+ .count();
+
+ values
+ .iter()
+ .map(|v| v.to_string())
+ .collect::<Vec<String>>()
+ })
+ });
+ };
+}
+
+#[macro_export]
+macro_rules! construct {
+ ($c:expr, $struct:ident, $struct_name:expr, $data_str:expr) => {
+ $c.bench_function($struct_name, |b| {
+ b.iter(|| {
+ for s in $data_str {
+ let _: Result<$struct, _> = black_box(s).parse();
+ }
+ })
+ });
+ };
+}
+
+#[macro_export]
+macro_rules! to_string {
+ ($c:expr, $struct:ident, $struct_name:expr, $data:expr) => {
+ $c.bench_function($struct_name, |b| {
+ b.iter(|| {
+ for s in $data {
+ let _ = black_box(s).to_string();
+ }
+ })
+ });
+ $c.bench_function(std::concat!($struct_name, "/writeable"), |b| {
+ use writeable::Writeable;
+ b.iter(|| {
+ for s in $data {
+ let _ = black_box(s).write_to_string();
+ }
+ })
+ });
+ };
+}
+
+#[macro_export]
+macro_rules! compare_struct {
+ ($c:expr, $struct:ident, $struct_name:expr, $data1:expr, $data2:expr) => {
+ $c.bench_function(BenchmarkId::new("struct", $struct_name), |b| {
+ b.iter(|| {
+ for (lid1, lid2) in $data1.iter().zip($data2.iter()) {
+ let _ = black_box(lid1) == black_box(lid2);
+ }
+ })
+ });
+ };
+}
+
+#[macro_export]
+macro_rules! compare_str {
+ ($c:expr, $struct:ident, $struct_name:expr, $data1:expr, $data2:expr) => {
+ $c.bench_function(BenchmarkId::new("str", $struct_name), |b| {
+ b.iter(|| {
+ for (lid, s) in $data1.iter().zip($data2.iter()) {
+ let _ = black_box(lid).normalizing_eq(&black_box(s));
+ }
+ })
+ });
+ $c.bench_function(BenchmarkId::new("strict_cmp", $struct_name), |b| {
+ b.iter(|| {
+ for (lid, s) in $data1.iter().zip($data2.iter()) {
+ let _ = black_box(lid).strict_cmp(&black_box(s).as_str().as_bytes());
+ }
+ })
+ });
+ };
+}
+
+#[macro_export]
+macro_rules! canonicalize {
+ ($c:expr, $struct:ident, $struct_name:expr, $data:expr) => {
+ $c.bench_function($struct_name, |b| {
+ b.iter(|| {
+ for s in $data {
+ let _ = black_box(s).to_string();
+ }
+ for s in $data {
+ let _ = $struct::canonicalize(black_box(s));
+ }
+ })
+ });
+ };
+}
diff --git a/vendor/icu_locid/benches/helpers/mod.rs b/vendor/icu_locid/benches/helpers/mod.rs
new file mode 100644
index 000000000..27e455f7b
--- /dev/null
+++ b/vendor/icu_locid/benches/helpers/mod.rs
@@ -0,0 +1,17 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod macros;
+
+use std::fs::File;
+use std::io::{BufReader, Error};
+
+pub fn read_fixture<T>(path: &str) -> Result<T, Error>
+where
+ T: serde::de::DeserializeOwned,
+{
+ let file = File::open(path)?;
+ let reader = BufReader::new(file);
+ Ok(serde_json::from_reader(reader)?)
+}
diff --git a/vendor/icu_locid/benches/iai_langid.rs b/vendor/icu_locid/benches/iai_langid.rs
new file mode 100644
index 000000000..f964d1462
--- /dev/null
+++ b/vendor/icu_locid/benches/iai_langid.rs
@@ -0,0 +1,118 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use icu_locid::{
+ langid, subtags_language as language, subtags_region as region, LanguageIdentifier,
+};
+
+const LIDS: &[LanguageIdentifier] = &[
+ langid!("en"),
+ langid!("pl"),
+ langid!("fr-CA"),
+ langid!("zh-Hans"),
+ langid!("en-US"),
+ langid!("en-Latn-US"),
+ langid!("sr-Cyrl-BA"),
+];
+
+const LIDS_STR: &[&str] = &[
+ "en",
+ "pl",
+ "fr-CA",
+ "zh-Hans",
+ "en-US",
+ "en-Latn-US",
+ "sr-Cyrl-BA",
+];
+
+fn bench_langid_constr() {
+ // Tests the instructions required to construct a LID from an str.
+
+ let _: Vec<LanguageIdentifier> = LIDS_STR
+ .iter()
+ .map(|l| l.parse().expect("Failed to parse"))
+ .collect();
+}
+
+fn bench_langid_compare_components() {
+ // Tests the cost of comparing LID components.
+
+ let result = LIDS
+ .iter()
+ .filter(|l| l.language == language!("en") && l.region == Some(region!("US")))
+ .count();
+
+ assert_eq!(result, 2);
+}
+
+fn bench_langid_compare_components_str() {
+ // Tests the cost of comparing LID components to str.
+
+ let result = LIDS
+ .iter()
+ .filter(|l| {
+ l.language == language!("en") && l.region.map(|r| r == region!("US")).unwrap_or(false)
+ })
+ .count();
+
+ assert_eq!(result, 2);
+}
+
+fn bench_langid_strict_cmp() {
+ // Tests the cost of comparing a langid against byte strings.
+ use core::cmp::Ordering;
+
+ let lid = langid!("en_us");
+
+ let result = LIDS_STR
+ .iter()
+ .filter(|s| lid.strict_cmp(s.as_bytes()) == Ordering::Equal)
+ .count();
+
+ assert_eq!(result, 1);
+}
+
+fn bench_langid_matching() {
+ // Tests matching a LID against other LIDs.
+
+ let lid = langid!("en_us");
+
+ let count = LIDS.iter().filter(|l| lid == **l).count();
+ assert_eq!(count, 1);
+}
+
+fn bench_langid_matching_str() {
+ // Tests matching a LID against list of str.
+
+ let lid = langid!("en_us");
+
+ let count = LIDS_STR.iter().filter(|&l| lid.normalizing_eq(l)).count();
+ assert_eq!(count, 1);
+}
+
+fn bench_langid_serialize() {
+ // Tests serialization of LIDs.
+
+ let _: Vec<String> = LIDS.iter().map(|l| l.to_string()).collect();
+}
+
+fn bench_langid_canonicalize() {
+ // Tests canonicalization of strings.
+
+ let _: Vec<String> = LIDS_STR
+ .iter()
+ .map(|l| LanguageIdentifier::canonicalize(l).expect("Canonicalization failed"))
+ .collect();
+}
+
+iai::main!(
+ bench_langid_constr,
+ bench_langid_compare_components,
+ bench_langid_compare_components_str,
+ bench_langid_strict_cmp,
+ bench_langid_matching,
+ bench_langid_matching_str,
+ bench_langid_serialize,
+ bench_langid_canonicalize,
+);
diff --git a/vendor/icu_locid/benches/langid.rs b/vendor/icu_locid/benches/langid.rs
new file mode 100644
index 000000000..e5c9b6734
--- /dev/null
+++ b/vendor/icu_locid/benches/langid.rs
@@ -0,0 +1,93 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod fixtures;
+mod helpers;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+use icu_locid::LanguageIdentifier;
+
+fn langid_benches(c: &mut Criterion) {
+ let path = "./benches/fixtures/langid.json";
+ let data: fixtures::LocaleList = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ // Overview
+ {
+ let mut group = c.benchmark_group("langid");
+
+ overview!(group, LanguageIdentifier, &data.canonicalized, "en-US");
+
+ group.finish();
+ }
+
+ #[cfg(feature = "bench")]
+ {
+ use criterion::BenchmarkId;
+
+ // Construct
+ {
+ let mut group = c.benchmark_group("langid/construct");
+
+ construct!(group, LanguageIdentifier, "langid", &data.canonicalized);
+
+ group.finish();
+ }
+
+ // Stringify
+ {
+ let mut group = c.benchmark_group("langid/to_string");
+
+ let langids: Vec<LanguageIdentifier> = data
+ .canonicalized
+ .iter()
+ .map(|s| s.parse().unwrap())
+ .collect();
+
+ to_string!(group, LanguageIdentifier, "langid", &langids);
+
+ group.finish();
+ }
+
+ // Compare
+ {
+ let mut group = c.benchmark_group("langid/compare");
+
+ let langids: Vec<LanguageIdentifier> = data
+ .canonicalized
+ .iter()
+ .map(|s| s.parse().unwrap())
+ .collect();
+ let langids2: Vec<LanguageIdentifier> = data
+ .canonicalized
+ .iter()
+ .map(|s| s.parse().unwrap())
+ .collect();
+
+ compare_struct!(group, LanguageIdentifier, "langid", &langids, &langids2);
+
+ compare_str!(
+ group,
+ LanguageIdentifier,
+ "langid",
+ &langids,
+ &data.canonicalized
+ );
+
+ group.finish();
+ }
+
+ // Canonicalize
+ {
+ let mut group = c.benchmark_group("langid/canonicalize");
+
+ canonicalize!(group, LanguageIdentifier, "langid", &data.casing);
+
+ group.finish();
+ }
+ }
+}
+
+criterion_group!(benches, langid_benches,);
+criterion_main!(benches);
diff --git a/vendor/icu_locid/benches/locale.rs b/vendor/icu_locid/benches/locale.rs
new file mode 100644
index 000000000..948fbb5e8
--- /dev/null
+++ b/vendor/icu_locid/benches/locale.rs
@@ -0,0 +1,87 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod fixtures;
+mod helpers;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+use icu_locid::Locale;
+
+fn locale_benches(c: &mut Criterion) {
+ let path = "./benches/fixtures/locale.json";
+ let data: fixtures::LocaleList = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ // Overview
+ {
+ let mut group = c.benchmark_group("locale");
+
+ overview!(group, Locale, &data.canonicalized, "en-US");
+
+ group.finish();
+ }
+
+ #[cfg(feature = "bench")]
+ {
+ use criterion::BenchmarkId;
+
+ // Construct
+ {
+ let mut group = c.benchmark_group("locale/construct");
+
+ construct!(group, Locale, "locale", &data.canonicalized);
+
+ group.finish();
+ }
+
+ // Stringify
+ {
+ let mut group = c.benchmark_group("locale/to_string");
+
+ let locales: Vec<Locale> = data
+ .canonicalized
+ .iter()
+ .map(|s| s.parse().unwrap())
+ .collect();
+
+ to_string!(group, Locale, "locale", &locales);
+
+ group.finish();
+ }
+
+ // Compare
+ {
+ let mut group = c.benchmark_group("locale/compare");
+
+ let locales: Vec<Locale> = data
+ .canonicalized
+ .iter()
+ .map(|s| s.parse().unwrap())
+ .collect();
+ let locales2: Vec<Locale> = data
+ .canonicalized
+ .iter()
+ .map(|s| s.parse().unwrap())
+ .collect();
+
+ compare_struct!(group, Locale, "locale", &locales, &locales2);
+
+ compare_str!(group, Locale, "locale", &locales, &data.canonicalized);
+
+ group.finish();
+ }
+
+ // Canonicalize
+ {
+ let mut group = c.benchmark_group("locale/canonicalize");
+
+ canonicalize!(group, Locale, "locale", &data.casing);
+
+ group.finish();
+ }
+ }
+}
+
+criterion_group!(benches, locale_benches,);
+criterion_main!(benches);
diff --git a/vendor/icu_locid/benches/subtags.rs b/vendor/icu_locid/benches/subtags.rs
new file mode 100644
index 000000000..4f81b71d2
--- /dev/null
+++ b/vendor/icu_locid/benches/subtags.rs
@@ -0,0 +1,39 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod fixtures;
+mod helpers;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+use icu_locid::subtags::{Language, Region, Script, Variant};
+use icu_locid::ParserError;
+
+macro_rules! subtag_bench {
+ ($c:expr, $name:expr, $subtag:ident, $data:expr) => {
+ $c.bench_function(&format!("subtags/{}/parse", $name), |b| {
+ b.iter(|| {
+ for s in &$data.valid {
+ let _: $subtag = black_box(s).parse().unwrap();
+ }
+ for s in &$data.invalid {
+ let _: ParserError = black_box(s).parse::<$subtag>().unwrap_err();
+ }
+ })
+ });
+ };
+}
+
+fn subtags_bench(c: &mut Criterion) {
+ let path = "./benches/fixtures/subtags.json";
+ let data: fixtures::Subtags = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ subtag_bench!(c, "language", Language, data.language);
+ subtag_bench!(c, "script", Script, data.script);
+ subtag_bench!(c, "region", Region, data.region);
+ subtag_bench!(c, "variant", Variant, data.variant);
+}
+
+criterion_group!(benches, subtags_bench,);
+criterion_main!(benches);
diff --git a/vendor/icu_locid/examples/filter_langids.rs b/vendor/icu_locid/examples/filter_langids.rs
new file mode 100644
index 000000000..9e5b54e39
--- /dev/null
+++ b/vendor/icu_locid/examples/filter_langids.rs
@@ -0,0 +1,66 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// A sample application which takes a comma separated list of language identifiers,
+// filters out identifiers with language subtags different than `en` and serializes
+// the list back into a comma separated list in canonical syntax.
+//
+// Note: This is an example of the API use, and is not a good base for language matching.
+// For language matching, please consider algorithms such as Locale Matcher.
+
+#![no_main] // https://github.com/unicode-org/icu4x/issues/395
+
+icu_benchmark_macros::static_setup!();
+
+use std::env;
+
+use icu_locid::{subtags, LanguageIdentifier};
+
+const DEFAULT_INPUT: &str =
+ "de, en-us, zh-hant, sr-cyrl, fr-ca, es-cl, pl, en-latn-us, ca-valencia, und-arab";
+
+fn filter_input(input: &str) -> String {
+ // 1. Parse the input string into a list of language identifiers.
+ let langids = input.split(',').filter_map(|s| s.trim().parse().ok());
+
+ // 2. Filter for LanguageIdentifiers with Language subtag `en`.
+ let en_lang: subtags::Language = "en".parse().expect("Failed to parse language subtag.");
+
+ let en_langids = langids.filter(|langid: &LanguageIdentifier| langid.language == en_lang);
+
+ // 3. Serialize the output.
+ let en_strs: Vec<String> = en_langids.map(|langid| langid.to_string()).collect();
+
+ en_strs.join(", ")
+}
+
+#[no_mangle]
+fn main(_argc: isize, _argv: *const *const u8) -> isize {
+ icu_benchmark_macros::main_setup!();
+ let args: Vec<String> = env::args().collect();
+
+ let input = if let Some(input) = args.get(1) {
+ input.as_str()
+ } else {
+ DEFAULT_INPUT
+ };
+ let _output = filter_input(input);
+
+ #[cfg(debug_assertions)]
+ println!("\nInput: {}\nOutput: {}", input, _output);
+
+ 0
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const DEFAULT_OUTPUT: &str = "en-US, en-Latn-US";
+
+ #[test]
+ fn ensure_default_output() {
+ assert_eq!(filter_input(DEFAULT_INPUT), DEFAULT_OUTPUT);
+ }
+}
diff --git a/vendor/icu_locid/examples/syntatically_canonicalize_locales.rs b/vendor/icu_locid/examples/syntatically_canonicalize_locales.rs
new file mode 100644
index 000000000..659e8eff0
--- /dev/null
+++ b/vendor/icu_locid/examples/syntatically_canonicalize_locales.rs
@@ -0,0 +1,54 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// A sample application which takes a comma separated list of locales,
+// makes them syntatically canonical and serializes the list back into a comma separated list.
+
+icu_benchmark_macros::static_setup!();
+
+use std::env;
+
+use icu_locid::Locale;
+
+const DEFAULT_INPUT: &str = "sr-cyrL-rS, es-mx, und-arab-u-ca-Buddhist";
+
+fn syntatically_canonicalize_locales(input: &str) -> String {
+ // Split input string and canonicalize each locale identifier.
+ let canonical_locales: Vec<String> = input
+ .split(',')
+ .filter_map(|s| Locale::canonicalize(s.trim()).ok())
+ .collect();
+
+ canonical_locales.join(", ")
+}
+
+fn main() {
+ icu_benchmark_macros::main_setup!();
+ let args: Vec<String> = env::args().collect();
+
+ let input = if let Some(input) = args.get(1) {
+ input.as_str()
+ } else {
+ DEFAULT_INPUT
+ };
+ let _output = syntatically_canonicalize_locales(input);
+
+ #[cfg(debug_assertions)]
+ println!("\nInput: {}\nOutput: {}", input, _output);
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const DEFAULT_OUTPUT: &str = "sr-Cyrl-RS, es-MX, und-Arab-u-ca-buddhist";
+
+ #[test]
+ fn ensure_default_output() {
+ assert_eq!(
+ syntatically_canonicalize_locales(DEFAULT_INPUT),
+ DEFAULT_OUTPUT
+ );
+ }
+}
diff --git a/vendor/icu_locid/src/extensions/mod.rs b/vendor/icu_locid/src/extensions/mod.rs
new file mode 100644
index 000000000..42bfcd3c9
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/mod.rs
@@ -0,0 +1,313 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Unicode Extensions provide a mechanism to extend the [`LanguageIdentifier`] with
+//! additional bits of information - a combination of a [`LanguageIdentifier`] and [`Extensions`]
+//! is called [`Locale`].
+//!
+//! There are four types of extensions:
+//!
+//! * [`Unicode Extensions`] - marked as `u`.
+//! * [`Transform Extensions`] - marked as `t`.
+//! * [`Private Use Extensions`] - marked as `x`.
+//! * [`Other Extensions`] - marked as any `a-z` except of `u`, `t` and `x`.
+//!
+//! One can think of extensions as a bag of extra information on top of basic 4 [`subtags`].
+//!
+//! Notice: `Other` extension type is currently not supported.
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::extensions::unicode::{Key, Value};
+//! use icu::locid::Locale;
+//!
+//! let loc: Locale = "en-US-u-ca-buddhist-t-en-US-h0-hybrid-x-foo"
+//! .parse()
+//! .expect("Failed to parse.");
+//!
+//! assert_eq!(loc.id.language, "en".parse().unwrap());
+//! assert_eq!(loc.id.script, None);
+//! assert_eq!(loc.id.region, Some("US".parse().unwrap()));
+//! assert_eq!(loc.id.variants.len(), 0);
+//!
+//! let key: Key = "ca".parse().expect("Parsing key failed.");
+//! let value: Value = "buddhist".parse().expect("Parsing value failed.");
+//! assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value));
+//! ```
+//!
+//! [`LanguageIdentifier`]: super::LanguageIdentifier
+//! [`Locale`]: super::Locale
+//! [`subtags`]: super::subtags
+//! [`Other Extensions`]: other
+//! [`Private Use Extensions`]: private
+//! [`Transform Extensions`]: transform
+//! [`Unicode Extensions`]: unicode
+pub mod other;
+pub mod private;
+pub mod transform;
+pub mod unicode;
+
+use other::Other;
+use private::Private;
+use transform::Transform;
+use unicode::Unicode;
+
+use alloc::vec::Vec;
+
+use crate::parser::ParserError;
+use crate::parser::SubtagIterator;
+
+/// Defines the type of extension.
+#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)]
+#[non_exhaustive]
+pub enum ExtensionType {
+ /// Transform Extension Type marked as `t`.
+ Transform,
+ /// Unicode Extension Type marked as `u`.
+ Unicode,
+ /// Private Extension Type marked as `x`.
+ Private,
+ /// All other extension types.
+ Other(u8),
+}
+
+impl ExtensionType {
+ pub(crate) const fn try_from_byte(key: u8) -> Result<Self, ParserError> {
+ let key = key.to_ascii_lowercase();
+ match key {
+ b'u' => Ok(Self::Unicode),
+ b't' => Ok(Self::Transform),
+ b'x' => Ok(Self::Private),
+ b'a'..=b'z' => Ok(Self::Other(key)),
+ _ => Err(ParserError::InvalidExtension),
+ }
+ }
+
+ pub(crate) const fn try_from_bytes_manual_slice(
+ bytes: &[u8],
+ start: usize,
+ end: usize,
+ ) -> Result<Self, ParserError> {
+ if end - start != 1 {
+ return Err(ParserError::InvalidExtension);
+ }
+ #[allow(clippy::indexing_slicing)]
+ Self::try_from_byte(bytes[start])
+ }
+}
+
+/// A map of extensions associated with a given [`Locale`](crate::Locale).
+#[derive(Debug, Default, PartialEq, Eq, Clone, Hash)]
+#[non_exhaustive]
+pub struct Extensions {
+ /// A representation of the data for a Unicode extension, when present in the locale identifer.
+ pub unicode: Unicode,
+ /// A representation of the data for a transform extension, when present in the locale identifer.
+ pub transform: Transform,
+ /// A representation of the data for a private-use extension, when present in the locale identifer.
+ pub private: Private,
+ /// A sequence of any other extensions that are present in the locale identifier but are not formally
+ /// [defined](https://unicode.org/reports/tr35/) and represented explicitly as [`Unicode`], [`Transform`],
+ /// and [`Private`] are.
+ pub other: Vec<Other>,
+}
+
+impl Extensions {
+ /// Returns a new empty map of extensions. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::Extensions;
+ ///
+ /// assert_eq!(Extensions::new(), Extensions::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self {
+ unicode: Unicode::new(),
+ transform: Transform::new(),
+ private: Private::new(),
+ other: Vec::new(),
+ }
+ }
+
+ /// Function to create a new map of extensions containing exactly one unicode extension, callable in `const`
+ /// context.
+ #[inline]
+ pub const fn from_unicode(unicode: Unicode) -> Self {
+ Self {
+ unicode,
+ transform: Transform::new(),
+ private: Private::new(),
+ other: Vec::new(),
+ }
+ }
+
+ /// Returns whether there are no extensions present.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed.");
+ ///
+ /// assert!(!loc.extensions.is_empty());
+ /// ```
+ pub fn is_empty(&self) -> bool {
+ self.unicode.is_empty()
+ && self.transform.is_empty()
+ && self.private.is_empty()
+ && self.other.is_empty()
+ }
+
+ /// Retains the specified extension types, clearing all others.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::ExtensionType;
+ /// use icu::locid::Locale;
+ ///
+ /// let loc: Locale =
+ /// "und-a-hello-t-mul-u-world-z-zzz-x-extra".parse().unwrap();
+ ///
+ /// let mut only_unicode = loc.clone();
+ /// only_unicode
+ /// .extensions
+ /// .retain_by_type(|t| t == ExtensionType::Unicode);
+ /// assert_eq!(only_unicode, "und-u-world".parse().unwrap());
+ ///
+ /// let mut only_t_z = loc.clone();
+ /// only_t_z.extensions.retain_by_type(|t| {
+ /// t == ExtensionType::Transform || t == ExtensionType::Other(b'z')
+ /// });
+ /// assert_eq!(only_t_z, "und-t-mul-z-zzz".parse().unwrap());
+ /// ```
+ pub fn retain_by_type<F>(&mut self, mut predicate: F)
+ where
+ F: FnMut(ExtensionType) -> bool,
+ {
+ if !predicate(ExtensionType::Unicode) {
+ self.unicode.clear();
+ }
+ if !predicate(ExtensionType::Transform) {
+ self.transform.clear();
+ }
+ if !predicate(ExtensionType::Private) {
+ self.private.clear();
+ }
+ self.other
+ .retain(|o| predicate(ExtensionType::Other(o.get_ext_byte())));
+ }
+
+ pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
+ let mut unicode = None;
+ let mut transform = None;
+ let mut private = None;
+ let mut other = Vec::new();
+
+ let mut st = iter.next();
+ while let Some(subtag) = st {
+ match subtag.get(0).map(|b| ExtensionType::try_from_byte(*b)) {
+ Some(Ok(ExtensionType::Unicode)) => {
+ unicode = Some(Unicode::try_from_iter(iter)?);
+ }
+ Some(Ok(ExtensionType::Transform)) => {
+ transform = Some(Transform::try_from_iter(iter)?);
+ }
+ Some(Ok(ExtensionType::Private)) => {
+ private = Some(Private::try_from_iter(iter)?);
+ }
+ Some(Ok(ExtensionType::Other(ext))) => {
+ let parsed = Other::try_from_iter(ext, iter)?;
+ if let Err(idx) = other.binary_search(&parsed) {
+ other.insert(idx, parsed);
+ } else {
+ return Err(ParserError::InvalidExtension);
+ }
+ }
+ None => {}
+ _ => return Err(ParserError::InvalidExtension),
+ }
+
+ st = iter.next();
+ }
+
+ Ok(Self {
+ unicode: unicode.unwrap_or_default(),
+ transform: transform.unwrap_or_default(),
+ private: private.unwrap_or_default(),
+ other,
+ })
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ let mut wrote_tu = false;
+ // Alphabetic by singleton
+ self.other.iter().try_for_each(|other| {
+ if other.get_ext() > 't' && !wrote_tu {
+ // Since 't' and 'u' are next to each other in alphabetical
+ // order, write both now.
+ self.transform.for_each_subtag_str(f)?;
+ self.unicode.for_each_subtag_str(f)?;
+ wrote_tu = true;
+ }
+ other.for_each_subtag_str(f)?;
+ Ok(())
+ })?;
+
+ if !wrote_tu {
+ self.transform.for_each_subtag_str(f)?;
+ self.unicode.for_each_subtag_str(f)?;
+ }
+
+ // Private must be written last, since it allows single character
+ // keys. Extensions must also be written in alphabetical order,
+ // which would seem to imply that other extensions `y` and `z` are
+ // invalid, but this is not specified.
+ self.private.for_each_subtag_str(f)?;
+ Ok(())
+ }
+}
+
+impl_writeable_for_each_subtag_str_no_test!(Extensions);
+
+#[test]
+fn test_writeable() {
+ use crate::Locale;
+ use writeable::assert_writeable_eq;
+ assert_writeable_eq!(Extensions::new(), "",);
+ assert_writeable_eq!(
+ "my-t-my-d0-zawgyi".parse::<Locale>().unwrap().extensions,
+ "t-my-d0-zawgyi",
+ );
+ assert_writeable_eq!(
+ "ar-SA-u-ca-islamic-civil"
+ .parse::<Locale>()
+ .unwrap()
+ .extensions,
+ "u-ca-islamic-civil",
+ );
+ assert_writeable_eq!(
+ "en-001-x-foo-bar".parse::<Locale>().unwrap().extensions,
+ "x-foo-bar",
+ );
+ assert_writeable_eq!(
+ "und-t-m0-true".parse::<Locale>().unwrap().extensions,
+ "t-m0-true",
+ );
+ assert_writeable_eq!(
+ "und-a-foo-t-foo-u-foo-w-foo-z-foo-x-foo"
+ .parse::<Locale>()
+ .unwrap()
+ .extensions,
+ "a-foo-t-foo-u-foo-w-foo-z-foo-x-foo",
+ );
+}
diff --git a/vendor/icu_locid/src/extensions/other/mod.rs b/vendor/icu_locid/src/extensions/other/mod.rs
new file mode 100644
index 000000000..36dbc49b6
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/other/mod.rs
@@ -0,0 +1,157 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Other Use Extensions is a list of extensions other than unicode,
+//! transform or private.
+//!
+//! Those extensions are treated as a pass-through, and no Unicode related
+//! behavior depends on them.
+//!
+//! The main struct for this extension is [`Other`] which is a list of [`Subtag`]s.
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::extensions::other::Other;
+//! use icu::locid::Locale;
+//!
+//! let mut loc: Locale = "en-US-a-foo-faa".parse().expect("Parsing failed.");
+//! ```
+
+mod subtag;
+
+use crate::parser::ParserError;
+use crate::parser::SubtagIterator;
+use alloc::vec::Vec;
+pub use subtag::Subtag;
+
+/// A list of [`Other Use Extensions`] as defined in [`Unicode Locale
+/// Identifier`] specification.
+///
+/// Those extensions are treated as a pass-through, and no Unicode related
+/// behavior depends on them.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::other::{Other, Subtag};
+///
+/// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag.");
+/// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag.");
+///
+/// let other = Other::from_vec_unchecked(b'a', vec![subtag1, subtag2]);
+/// assert_eq!(&other.to_string(), "-a-foo-bar");
+/// ```
+///
+/// [`Other Use Extensions`]: https://unicode.org/reports/tr35/#other_extensions
+/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
+#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
+pub struct Other((u8, Vec<Subtag>));
+
+impl Other {
+ /// A constructor which takes a pre-sorted list of [`Subtag`].
+ ///
+ /// # Panics
+ ///
+ /// Panics if `ext` is not ASCII alphabetic.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::other::{Other, Subtag};
+ ///
+ /// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag.");
+ /// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag.");
+ ///
+ /// let other = Other::from_vec_unchecked(b'a', vec![subtag1, subtag2]);
+ /// assert_eq!(&other.to_string(), "-a-foo-bar");
+ /// ```
+ pub fn from_vec_unchecked(ext: u8, input: Vec<Subtag>) -> Self {
+ assert!(ext.is_ascii_alphabetic());
+ Self((ext, input))
+ }
+
+ pub(crate) fn try_from_iter(ext: u8, iter: &mut SubtagIterator) -> Result<Self, ParserError> {
+ debug_assert!(ext.is_ascii_alphabetic());
+
+ let mut keys = Vec::new();
+ while let Some(subtag) = iter.peek() {
+ if !Subtag::valid_key(subtag) {
+ break;
+ }
+ if let Ok(key) = Subtag::try_from_bytes(subtag) {
+ keys.push(key);
+ }
+ iter.next();
+ }
+
+ Ok(Self::from_vec_unchecked(ext, keys))
+ }
+
+ /// Gets the tag character for this extension as a char.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let loc: Locale = "und-a-hello-world".parse().unwrap();
+ /// let other_ext = &loc.extensions.other[0];
+ /// assert_eq!(other_ext.get_ext(), 'a');
+ /// ```
+ pub fn get_ext(&self) -> char {
+ self.get_ext_byte() as char
+ }
+
+ /// Gets the tag character for this extension as a byte.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let loc: Locale = "und-a-hello-world".parse().unwrap();
+ /// let other_ext = &loc.extensions.other[0];
+ /// assert_eq!(other_ext.get_ext_byte(), b'a');
+ /// ```
+ pub fn get_ext_byte(&self) -> u8 {
+ self.0 .0
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ let (ext, keys) = &self.0;
+ debug_assert!(ext.is_ascii_alphabetic());
+ // Safety: ext is ascii_alphabetic, so it is valid UTF-8
+ let ext_str = unsafe { core::str::from_utf8_unchecked(core::slice::from_ref(ext)) };
+ f(ext_str)?;
+ keys.iter().map(|t| t.as_str()).try_for_each(f)
+ }
+}
+
+writeable::impl_display_with_writeable!(Other);
+
+impl writeable::Writeable for Other {
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ let (ext, keys) = &self.0;
+ sink.write_char('-')?;
+ sink.write_char(*ext as char)?;
+ for key in keys.iter() {
+ sink.write_char('-')?;
+ writeable::Writeable::write_to(key, sink)?;
+ }
+
+ Ok(())
+ }
+
+ fn writeable_length_hint(&self) -> writeable::LengthHint {
+ let mut result = writeable::LengthHint::exact(2);
+ for key in self.0 .1.iter() {
+ result += writeable::Writeable::writeable_length_hint(key) + 1;
+ }
+ result
+ }
+}
diff --git a/vendor/icu_locid/src/extensions/other/subtag.rs b/vendor/icu_locid/src/extensions/other/subtag.rs
new file mode 100644
index 000000000..60995c395
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/other/subtag.rs
@@ -0,0 +1,37 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A single item used in a list of [`Other`](super::Other) extensions.
+ ///
+ /// The subtag has to be an ASCII alphanumerical string no shorter than
+ /// two characters and no longer than eight.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::other::Subtag;
+ ///
+ /// let subtag: Subtag = "Foo".parse().expect("Failed to parse a Subtag.");
+ ///
+ /// assert_eq!(subtag.as_str(), "foo");
+ /// ```
+ Subtag,
+ extensions::other::Subtag,
+ extensions_other_subtag,
+ 2..=8,
+ s,
+ s.is_ascii_alphanumeric(),
+ s.to_ascii_lowercase(),
+ s.is_ascii_alphanumeric() && s.is_ascii_lowercase(),
+ InvalidExtension,
+ ["foo12"],
+ ["y", "toolooong"],
+);
+
+impl Subtag {
+ pub(crate) const fn valid_key(v: &[u8]) -> bool {
+ 2 <= v.len() && v.len() <= 8
+ }
+}
diff --git a/vendor/icu_locid/src/extensions/private/mod.rs b/vendor/icu_locid/src/extensions/private/mod.rs
new file mode 100644
index 000000000..13090c94a
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/private/mod.rs
@@ -0,0 +1,167 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Private Use Extensions is a list of extensions intended for
+//! private use.
+//!
+//! Those extensions are treated as a pass-through, and no Unicode related
+//! behavior depends on them.
+//!
+//! The main struct for this extension is [`Private`] which is a list of [`Subtag`]s.
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::extensions::private::{Private, Subtag};
+//! use icu::locid::Locale;
+//!
+//! let mut loc: Locale = "en-US-x-foo-faa".parse().expect("Parsing failed.");
+//!
+//! let subtag: Subtag = "foo".parse().expect("Parsing subtag failed.");
+//! assert!(loc.extensions.private.contains(&subtag));
+//! assert_eq!(loc.extensions.private.iter().next(), Some(&subtag));
+//! loc.extensions.private.clear();
+//! assert_eq!(loc.to_string(), "en-US");
+//! ```
+
+mod other;
+
+use alloc::vec::Vec;
+use core::ops::Deref;
+
+pub use other::Subtag;
+
+use crate::parser::ParserError;
+use crate::parser::SubtagIterator;
+
+/// A list of [`Private Use Extensions`] as defined in [`Unicode Locale
+/// Identifier`] specification.
+///
+/// Those extensions are treated as a pass-through, and no Unicode related
+/// behavior depends on them.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::private::{Private, Subtag};
+///
+/// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag.");
+/// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag.");
+///
+/// let private = Private::from_vec_unchecked(vec![subtag1, subtag2]);
+/// assert_eq!(&private.to_string(), "-x-foo-bar");
+/// ```
+///
+/// [`Private Use Extensions`]: https://unicode.org/reports/tr35/#pu_extensions
+/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
+#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
+pub struct Private(Vec<Subtag>);
+
+impl Private {
+ /// Returns a new empty list of private-use extensions. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::private::Private;
+ ///
+ /// assert_eq!(Private::new(), Private::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self(Vec::new())
+ }
+
+ /// A constructor which takes a pre-sorted list of [`Subtag`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::private::{Private, Subtag};
+ ///
+ /// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag.");
+ /// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag.");
+ ///
+ /// let private = Private::from_vec_unchecked(vec![subtag1, subtag2]);
+ /// assert_eq!(&private.to_string(), "-x-foo-bar");
+ /// ```
+ pub fn from_vec_unchecked(input: Vec<Subtag>) -> Self {
+ Self(input)
+ }
+
+ /// Empties the [`Private`] list.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::private::{Private, Subtag};
+ ///
+ /// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag.");
+ /// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag.");
+ /// let mut private = Private::from_vec_unchecked(vec![subtag1, subtag2]);
+ ///
+ /// assert_eq!(&private.to_string(), "-x-foo-bar");
+ ///
+ /// private.clear();
+ ///
+ /// assert_eq!(&private.to_string(), "");
+ /// ```
+ pub fn clear(&mut self) {
+ self.0.clear();
+ }
+
+ pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
+ let keys = iter
+ .map(Subtag::try_from_bytes)
+ .collect::<Result<Vec<_>, _>>()?;
+
+ Ok(Self::from_vec_unchecked(keys))
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ if self.is_empty() {
+ return Ok(());
+ }
+ f("x")?;
+ self.deref().iter().map(|t| t.as_str()).try_for_each(f)
+ }
+}
+
+writeable::impl_display_with_writeable!(Private);
+
+impl writeable::Writeable for Private {
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ if self.is_empty() {
+ return Ok(());
+ }
+ sink.write_str("-x")?;
+ for key in self.iter() {
+ sink.write_char('-')?;
+ writeable::Writeable::write_to(key, sink)?;
+ }
+ Ok(())
+ }
+
+ fn writeable_length_hint(&self) -> writeable::LengthHint {
+ if self.is_empty() {
+ return writeable::LengthHint::exact(0);
+ }
+ let mut result = writeable::LengthHint::exact(2);
+ for key in self.iter() {
+ result += writeable::Writeable::writeable_length_hint(key) + 1;
+ }
+ result
+ }
+}
+
+impl Deref for Private {
+ type Target = [Subtag];
+
+ fn deref(&self) -> &Self::Target {
+ self.0.deref()
+ }
+}
diff --git a/vendor/icu_locid/src/extensions/private/other.rs b/vendor/icu_locid/src/extensions/private/other.rs
new file mode 100644
index 000000000..a91e12855
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/private/other.rs
@@ -0,0 +1,31 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A single item used in a list of [`Private`](super::Private) extensions.
+ ///
+ /// The subtag has to be an ASCII alphanumerical string no shorter than
+ /// one character and no longer than eight.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::private::Subtag;
+ ///
+ /// let subtag1: Subtag = "Foo".parse().expect("Failed to parse a Subtag.");
+ ///
+ /// assert_eq!(subtag1.as_str(), "foo");
+ /// ```
+ Subtag,
+ extensions::private::Subtag,
+ extensions_private_subtag,
+ 1..=8,
+ s,
+ s.is_ascii_alphanumeric(),
+ s.to_ascii_lowercase(),
+ s.is_ascii_alphanumeric() && s.is_ascii_lowercase(),
+ InvalidExtension,
+ ["foo12"],
+ ["toolooong"],
+);
diff --git a/vendor/icu_locid/src/extensions/transform/fields.rs b/vendor/icu_locid/src/extensions/transform/fields.rs
new file mode 100644
index 000000000..ca10000a7
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/transform/fields.rs
@@ -0,0 +1,228 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::borrow::Borrow;
+use core::iter::FromIterator;
+use litemap::LiteMap;
+
+use super::Key;
+use super::Value;
+
+/// A list of [`Key`]-[`Value`] pairs representing functional information
+/// about content transformations.
+///
+/// Here are examples of fields used in Unicode:
+/// - `s0`, `d0` - Transform source/destination
+/// - `t0` - Machine Translation
+/// - `h0` - Hybrid Locale Identifiers
+///
+/// You can find the full list in [`Unicode BCP 47 T Extension`] section of LDML.
+///
+/// [`Unicode BCP 47 T Extension`]: https://unicode.org/reports/tr35/tr35.html#BCP47_T_Extension
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::transform::{Fields, Key, Value};
+///
+/// let key: Key = "h0".parse().expect("Failed to parse a Key.");
+/// let value: Value = "hybrid".parse().expect("Failed to parse a Value.");
+/// let fields: Fields = vec![(key, value)].into_iter().collect();
+///
+/// assert_eq!(&fields.to_string(), "h0-hybrid");
+/// ```
+#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
+pub struct Fields(LiteMap<Key, Value>);
+
+impl Fields {
+ /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::Fields;
+ ///
+ /// assert_eq!(Fields::new(), Fields::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self(LiteMap::new())
+ }
+
+ /// Returns `true` if there are no fields.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::Fields;
+ /// use icu::locid::locale;
+ /// use icu::locid::Locale;
+ ///
+ /// let loc1 = Locale::try_from_bytes(b"und-t-h0-hybrid").unwrap();
+ /// let loc2 = locale!("und-u-ca-buddhist");
+ ///
+ /// assert!(!loc1.extensions.transform.fields.is_empty());
+ /// assert!(loc2.extensions.transform.fields.is_empty());
+ /// ```
+ pub fn is_empty(&self) -> bool {
+ self.0.is_empty()
+ }
+
+ /// Empties the [`Fields`] list.
+ ///
+ /// Returns the old list.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::{Fields, Key, Value};
+ ///
+ /// let key: Key = "h0".parse().expect("Failed to parse a Key.");
+ /// let value: Value = "hybrid".parse().expect("Failed to parse a Value.");
+ /// let mut fields: Fields = vec![(key, value)].into_iter().collect();
+ ///
+ /// assert_eq!(&fields.to_string(), "h0-hybrid");
+ ///
+ /// fields.clear();
+ ///
+ /// assert_eq!(&fields.to_string(), "");
+ /// ```
+ pub fn clear(&mut self) -> Self {
+ core::mem::take(self)
+ }
+
+ /// Returns `true` if the list contains a [`Value`] for the specified [`Key`].
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::{Fields, Key, Value};
+ ///
+ /// let key: Key = "h0".parse().expect("Failed to parse a Key.");
+ /// let value: Value = "hybrid".parse().expect("Failed to parse a Value.");
+ /// let mut fields: Fields = vec![(key, value)].into_iter().collect();
+ ///
+ /// let key: Key = "h0".parse().expect("Failed to parse a Key.");
+ /// assert!(&fields.contains_key(&key));
+ /// ```
+ pub fn contains_key<Q>(&self, key: &Q) -> bool
+ where
+ Key: Borrow<Q>,
+ Q: Ord,
+ {
+ self.0.contains_key(key)
+ }
+
+ /// Returns a reference to the [`Value`] corresponding to the [`Key`].
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::{Fields, Key, Value};
+ ///
+ /// let key: Key = "h0".parse().expect("Failed to parse a Key.");
+ /// let value: Value = "hybrid".parse().expect("Failed to parse a Value.");
+ /// let mut fields: Fields = vec![(key, value)].into_iter().collect();
+ ///
+ /// let key: Key = "h0".parse().expect("Failed to parse a Key.");
+ /// assert_eq!(
+ /// fields.get(&key).map(|v| v.to_string()),
+ /// Some("hybrid".to_string())
+ /// );
+ /// ```
+ pub fn get<Q>(&self, key: &Q) -> Option<&Value>
+ where
+ Key: Borrow<Q>,
+ Q: Ord,
+ {
+ self.0.get(key)
+ }
+
+ /// Sets the specified keyword, returning the old value if it already existed.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::Key;
+ /// use icu::locid::extensions::transform::Value;
+ /// use icu::locid::extensions_transform_key as key;
+ /// use icu::locid::Locale;
+ ///
+ /// let lower = "lower".parse::<Value>().expect("valid extension subtag");
+ /// let casefold = "casefold".parse::<Value>().expect("valid extension subtag");
+ ///
+ /// let mut loc: Locale = "en-t-hi-d0-casefold"
+ /// .parse()
+ /// .expect("valid BCP-47 identifier");
+ /// let old_value = loc.extensions.transform.fields.set(key!("d0"), lower);
+ ///
+ /// assert_eq!(old_value, Some(casefold));
+ /// assert_eq!(loc, "en-t-hi-d0-lower".parse().unwrap());
+ /// ```
+ pub fn set(&mut self, key: Key, value: Value) -> Option<Value> {
+ self.0.insert(key, value)
+ }
+
+ /// Retains a subset of fields as specified by the predicate function.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions_transform_key as key;
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "und-t-h0-hybrid-d0-hex-m0-xml".parse().unwrap();
+ ///
+ /// loc.extensions
+ /// .transform
+ /// .fields
+ /// .retain_by_key(|&k| k == key!("h0"));
+ /// assert_eq!(loc, "und-t-h0-hybrid".parse().unwrap());
+ ///
+ /// loc.extensions
+ /// .transform
+ /// .fields
+ /// .retain_by_key(|&k| k == key!("d0"));
+ /// assert_eq!(loc, Locale::UND);
+ /// ```
+ pub fn retain_by_key<F>(&mut self, mut predicate: F)
+ where
+ F: FnMut(&Key) -> bool,
+ {
+ self.0.retain(|k, _| predicate(k))
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ for (k, v) in self.0.iter() {
+ f(k.as_str())?;
+ v.for_each_subtag_str(f)?;
+ }
+ Ok(())
+ }
+
+ /// This needs to be its own method to help with type inference in helpers.rs
+ #[cfg(test)]
+ pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self {
+ v.into_iter().collect()
+ }
+}
+
+impl From<LiteMap<Key, Value>> for Fields {
+ fn from(map: LiteMap<Key, Value>) -> Self {
+ Self(map)
+ }
+}
+
+impl FromIterator<(Key, Value)> for Fields {
+ fn from_iter<I: IntoIterator<Item = (Key, Value)>>(iter: I) -> Self {
+ LiteMap::from_iter(iter).into()
+ }
+}
+
+impl_writeable_for_key_value!(Fields, "h0", "hybrid", "m0", "m0-true");
diff --git a/vendor/icu_locid/src/extensions/transform/key.rs b/vendor/icu_locid/src/extensions/transform/key.rs
new file mode 100644
index 000000000..5400988a1
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/transform/key.rs
@@ -0,0 +1,31 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A key used in a list of [`Fields`](super::Fields).
+ ///
+ /// The key has to be a two ASCII characters long, with the first
+ /// character being alphabetic, and the second being a number.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::Key;
+ ///
+ /// let key1: Key = "k0".parse().expect("Failed to parse a Key.");
+ ///
+ /// assert_eq!(key1.as_str(), "k0");
+ /// ```
+ Key,
+ extensions::transform::Key,
+ extensions_transform_key,
+ 2..=2,
+ s,
+ s.all_bytes()[0].is_ascii_alphabetic() && s.all_bytes()[1].is_ascii_digit(),
+ s.to_ascii_lowercase(),
+ s.all_bytes()[0].is_ascii_lowercase() && s.all_bytes()[1].is_ascii_digit(),
+ InvalidExtension,
+ ["k0"],
+ ["", "k", "0k", "k12"],
+);
diff --git a/vendor/icu_locid/src/extensions/transform/mod.rs b/vendor/icu_locid/src/extensions/transform/mod.rs
new file mode 100644
index 000000000..a8c605146
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/transform/mod.rs
@@ -0,0 +1,236 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Transform Extensions provide information on content transformations in a given locale.
+//!
+//! The main struct for this extension is [`Transform`] which contains [`Fields`] and an
+//! optional [`LanguageIdentifier`].
+//!
+//! [`LanguageIdentifier`]: super::super::LanguageIdentifier
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::extensions::transform::{Fields, Key, Transform, Value};
+//! use icu::locid::{LanguageIdentifier, Locale};
+//!
+//! let mut loc: Locale =
+//! "en-US-t-es-AR-h0-hybrid".parse().expect("Parsing failed.");
+//!
+//! let lang: LanguageIdentifier =
+//! "es-AR".parse().expect("Parsing LanguageIdentifier failed.");
+//!
+//! let key: Key = "h0".parse().expect("Parsing key failed.");
+//! let value: Value = "hybrid".parse().expect("Parsing value failed.");
+//!
+//! assert_eq!(loc.extensions.transform.lang, Some(lang));
+//! assert!(loc.extensions.transform.fields.contains_key(&key));
+//! assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
+//!
+//! assert_eq!(&loc.extensions.transform.to_string(), "-t-es-AR-h0-hybrid");
+//! ```
+mod fields;
+mod key;
+mod value;
+
+pub use fields::Fields;
+pub use key::Key;
+pub use value::Value;
+
+use crate::parser::SubtagIterator;
+use crate::parser::{parse_language_identifier_from_iter, ParserError, ParserMode};
+use crate::subtags::Language;
+use crate::LanguageIdentifier;
+use alloc::vec;
+use litemap::LiteMap;
+
+/// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale
+/// Identifier`] specification.
+///
+/// Transform extension carries information about source language or script of
+/// transformed content, including content that has been transliterated, transcribed,
+/// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details).
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::transform::{Key, Value};
+/// use icu::locid::{LanguageIdentifier, Locale};
+///
+/// let mut loc: Locale =
+/// "de-t-en-US-h0-hybrid".parse().expect("Parsing failed.");
+///
+/// let en_us: LanguageIdentifier = "en-US".parse().expect("Parsing failed.");
+///
+/// assert_eq!(loc.extensions.transform.lang, Some(en_us));
+/// let key: Key = "h0".parse().expect("Parsing key failed.");
+/// let value: Value = "hybrid".parse().expect("Parsing value failed.");
+/// assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
+/// ```
+/// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension
+/// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt
+/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
+#[derive(Clone, PartialEq, Eq, Debug, Default, Hash)]
+#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
+pub struct Transform {
+ /// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present.
+ pub lang: Option<LanguageIdentifier>,
+ /// The key-value pairs present in this locale extension, with each extension key subtag
+ /// associated to its provided value subtag.
+ pub fields: Fields,
+}
+
+impl Transform {
+ /// Returns a new empty map of Transform extensions. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::Transform;
+ ///
+ /// assert_eq!(Transform::new(), Transform::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self {
+ lang: None,
+ fields: Fields::new(),
+ }
+ }
+
+ /// Returns `true` if there are no tfields and no tlang in the `TransformExtensionList`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "en-US-t-es-AR".parse().expect("Parsing failed.");
+ ///
+ /// assert!(!loc.extensions.transform.is_empty());
+ /// ```
+ pub fn is_empty(&self) -> bool {
+ self.lang.is_none() && self.fields.is_empty()
+ }
+
+ /// Clears the transform extension, effectively removing it from the locale.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "en-US-t-es-AR".parse().unwrap();
+ /// loc.extensions.transform.clear();
+ /// assert_eq!(loc, "en-US".parse().unwrap());
+ /// ```
+ pub fn clear(&mut self) {
+ self.lang = None;
+ self.fields.clear();
+ }
+
+ pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
+ let mut tlang = None;
+ let mut tfields = LiteMap::new();
+
+ if let Some(subtag) = iter.peek() {
+ if Language::try_from_bytes(subtag).is_ok() {
+ tlang = Some(parse_language_identifier_from_iter(
+ iter,
+ ParserMode::Partial,
+ )?);
+ }
+ }
+
+ let mut current_tkey = None;
+ let mut current_tvalue = vec![];
+
+ while let Some(subtag) = iter.peek() {
+ if let Some(tkey) = current_tkey {
+ if let Ok(val) = Value::parse_subtag(subtag) {
+ current_tvalue.push(val);
+ } else {
+ if current_tvalue.is_empty() {
+ return Err(ParserError::InvalidExtension);
+ }
+ tfields.try_insert(
+ tkey,
+ Value::from_vec_unchecked(current_tvalue.drain(..).flatten().collect()),
+ );
+ current_tkey = None;
+ continue;
+ }
+ } else if let Ok(tkey) = Key::try_from_bytes(subtag) {
+ current_tkey = Some(tkey);
+ } else {
+ break;
+ }
+
+ iter.next();
+ }
+
+ if let Some(tkey) = current_tkey {
+ if current_tvalue.is_empty() {
+ return Err(ParserError::InvalidExtension);
+ }
+ tfields.try_insert(
+ tkey,
+ Value::from_vec_unchecked(current_tvalue.into_iter().flatten().collect()),
+ );
+ }
+
+ Ok(Self {
+ lang: tlang,
+ fields: tfields.into(),
+ })
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ if self.is_empty() {
+ return Ok(());
+ }
+ f("t")?;
+ if let Some(lang) = &self.lang {
+ lang.for_each_subtag_str(f)?;
+ }
+ self.fields.for_each_subtag_str(f)
+ }
+}
+
+writeable::impl_display_with_writeable!(Transform);
+
+impl writeable::Writeable for Transform {
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ if self.is_empty() {
+ return Ok(());
+ }
+ sink.write_str("-t")?;
+ if let Some(lang) = &self.lang {
+ sink.write_char('-')?;
+ writeable::Writeable::write_to(lang, sink)?;
+ }
+ if !self.fields.is_empty() {
+ sink.write_char('-')?;
+ writeable::Writeable::write_to(&self.fields, sink)?;
+ }
+ Ok(())
+ }
+
+ fn writeable_length_hint(&self) -> writeable::LengthHint {
+ if self.is_empty() {
+ return writeable::LengthHint::exact(0);
+ }
+ let mut result = writeable::LengthHint::exact(2);
+ if let Some(lang) = &self.lang {
+ result += writeable::Writeable::writeable_length_hint(lang) + 1;
+ }
+ if !self.fields.is_empty() {
+ result += writeable::Writeable::writeable_length_hint(&self.fields) + 1;
+ }
+ result
+ }
+}
diff --git a/vendor/icu_locid/src/extensions/transform/value.rs b/vendor/icu_locid/src/extensions/transform/value.rs
new file mode 100644
index 000000000..84468361a
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/transform/value.rs
@@ -0,0 +1,119 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::parser::{get_subtag_iterator, ParserError};
+use alloc::vec;
+use alloc::vec::Vec;
+use core::ops::RangeInclusive;
+use core::str::FromStr;
+use tinystr::TinyAsciiStr;
+
+/// A value used in a list of [`Fields`](super::Fields).
+///
+/// The value has to be a sequence of one or more alphanumerical strings
+/// separated by `-`.
+/// Each part of the sequence has to be no shorter than three characters and no
+/// longer than 8.
+///
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::transform::Value;
+///
+/// let value1: Value = "hybrid".parse().expect("Failed to parse a Value.");
+/// let value2: Value =
+/// "hybrid-foobar".parse().expect("Failed to parse a Value.");
+///
+/// assert_eq!(&value1.to_string(), "hybrid");
+/// assert_eq!(&value2.to_string(), "hybrid-foobar");
+/// ```
+#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
+pub struct Value(Vec<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>);
+
+const TYPE_LENGTH: RangeInclusive<usize> = 3..=8;
+const TRUE_TVALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");
+
+impl Value {
+ /// A constructor which takes a utf8 slice, parses it and
+ /// produces a well-formed [`Value`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::transform::Value;
+ ///
+ /// let value = Value::try_from_bytes(b"hybrid").expect("Parsing failed.");
+ ///
+ /// assert_eq!(&value.to_string(), "hybrid");
+ /// ```
+ pub fn try_from_bytes(input: &[u8]) -> Result<Self, ParserError> {
+ let mut v = vec![];
+ let mut has_value = false;
+
+ for subtag in get_subtag_iterator(input) {
+ if !Self::is_type_subtag(subtag) {
+ return Err(ParserError::InvalidExtension);
+ }
+ has_value = true;
+ let val =
+ TinyAsciiStr::from_bytes(subtag).map_err(|_| ParserError::InvalidExtension)?;
+ if val != TRUE_TVALUE {
+ v.push(val);
+ }
+ }
+
+ if !has_value {
+ return Err(ParserError::InvalidExtension);
+ }
+ Ok(Self(v))
+ }
+
+ pub(crate) fn from_vec_unchecked(input: Vec<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>) -> Self {
+ Self(input)
+ }
+
+ pub(crate) fn is_type_subtag(t: &[u8]) -> bool {
+ TYPE_LENGTH.contains(&t.len()) && !t.iter().any(|c: &u8| !c.is_ascii_alphanumeric())
+ }
+
+ pub(crate) fn parse_subtag(
+ t: &[u8],
+ ) -> Result<Option<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>, ParserError> {
+ let s = TinyAsciiStr::from_bytes(t).map_err(|_| ParserError::InvalidSubtag)?;
+ if !TYPE_LENGTH.contains(&t.len()) || !s.is_ascii_alphanumeric() {
+ return Err(ParserError::InvalidExtension);
+ }
+
+ let s = s.to_ascii_lowercase();
+
+ if s == TRUE_TVALUE {
+ Ok(None)
+ } else {
+ Ok(Some(s))
+ }
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ if self.0.is_empty() {
+ f("true")?;
+ } else {
+ self.0.iter().map(TinyAsciiStr::as_str).try_for_each(f)?;
+ }
+ Ok(())
+ }
+}
+
+impl FromStr for Value {
+ type Err = ParserError;
+
+ fn from_str(source: &str) -> Result<Self, Self::Err> {
+ Self::try_from_bytes(source.as_bytes())
+ }
+}
+
+impl_writeable_for_tinystr_list!(Value, "true", "hybrid", "foobar");
diff --git a/vendor/icu_locid/src/extensions/unicode/attribute.rs b/vendor/icu_locid/src/extensions/unicode/attribute.rs
new file mode 100644
index 000000000..ba4b70924
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/unicode/attribute.rs
@@ -0,0 +1,34 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// An attribute used in a set of [`Attributes`](super::Attributes).
+ ///
+ /// An attribute has to be a sequence of alphanumerical characters no
+ /// shorter than three and no longer than eight characters.
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Attribute;
+ /// use icu::locid::extensions_unicode_attribute as attribute;
+ ///
+ /// let attr: Attribute =
+ /// "buddhist".parse().expect("Failed to parse an Attribute.");
+ ///
+ /// assert_eq!(attr, attribute!("buddhist"));
+ /// ```
+ Attribute,
+ extensions::unicode::Attribute,
+ extensions_unicode_attribute,
+ 3..=8,
+ s,
+ s.is_ascii_alphanumeric(),
+ s.to_ascii_lowercase(),
+ s.is_ascii_alphanumeric() && s.is_ascii_lowercase(),
+ InvalidExtension,
+ ["foo12"],
+ ["no", "toolooong"],
+);
diff --git a/vendor/icu_locid/src/extensions/unicode/attributes.rs b/vendor/icu_locid/src/extensions/unicode/attributes.rs
new file mode 100644
index 000000000..1f9536bfa
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/unicode/attributes.rs
@@ -0,0 +1,115 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::Attribute;
+
+use alloc::vec::Vec;
+use core::ops::Deref;
+
+/// A set of [`Attribute`] elements as defined in [`Unicode Extension Attributes`].
+///
+/// [`Unicode Extension Attributes`]: https://unicode.org/reports/tr35/tr35.html#u_Extension
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::unicode::{Attribute, Attributes};
+///
+/// let attribute1: Attribute =
+/// "foobar".parse().expect("Failed to parse a variant subtag.");
+///
+/// let attribute2: Attribute = "testing"
+/// .parse()
+/// .expect("Failed to parse a variant subtag.");
+/// let mut v = vec![attribute1, attribute2];
+/// v.sort();
+/// v.dedup();
+///
+/// let attributes: Attributes = Attributes::from_vec_unchecked(v);
+/// assert_eq!(attributes.to_string(), "foobar-testing");
+/// ```
+#[derive(Default, Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
+pub struct Attributes(Vec<Attribute>);
+
+impl Attributes {
+ /// Returns a new empty set of attributes. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Attributes;
+ ///
+ /// assert_eq!(Attributes::new(), Attributes::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self(Vec::new())
+ }
+
+ /// A constructor which takes a pre-sorted list of [`Attribute`] elements.
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{Attribute, Attributes};
+ ///
+ /// let attribute1: Attribute = "foobar".parse().expect("Parsing failed.");
+ /// let attribute2: Attribute = "testing".parse().expect("Parsing failed.");
+ /// let mut v = vec![attribute1, attribute2];
+ /// v.sort();
+ /// v.dedup();
+ ///
+ /// let attributes = Attributes::from_vec_unchecked(v);
+ /// ```
+ ///
+ /// Notice: For performance- and memory-constrained environments, it is recommended
+ /// for the caller to use [`binary_search`](slice::binary_search) instead of [`sort`](slice::sort)
+ /// and [`dedup`](Vec::dedup()).
+ pub fn from_vec_unchecked(input: Vec<Attribute>) -> Self {
+ Self(input)
+ }
+
+ /// Empties the [`Attributes`] list.
+ ///
+ /// Returns the old list.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{Attribute, Attributes};
+ ///
+ /// let attribute1: Attribute = "foobar".parse().expect("Parsing failed.");
+ /// let attribute2: Attribute = "testing".parse().expect("Parsing failed.");
+ /// let mut v = vec![attribute1, attribute2];
+ ///
+ /// let mut attributes: Attributes = Attributes::from_vec_unchecked(v);
+ ///
+ /// assert_eq!(attributes.to_string(), "foobar-testing");
+ ///
+ /// attributes.clear();
+ ///
+ /// assert_eq!(attributes.to_string(), "");
+ /// ```
+ pub fn clear(&mut self) -> Self {
+ core::mem::take(self)
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ self.deref().iter().map(|t| t.as_str()).try_for_each(f)
+ }
+}
+
+impl_writeable_for_subtag_list!(Attributes, "foobar", "testing");
+
+impl Deref for Attributes {
+ type Target = [Attribute];
+
+ fn deref(&self) -> &[Attribute] {
+ self.0.deref()
+ }
+}
diff --git a/vendor/icu_locid/src/extensions/unicode/key.rs b/vendor/icu_locid/src/extensions/unicode/key.rs
new file mode 100644
index 000000000..bdfdd4e5c
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/unicode/key.rs
@@ -0,0 +1,31 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A key used in a list of [`Keywords`](super::Keywords).
+ ///
+ /// The key has to be a two ASCII alphanumerical characters long, with the first
+ /// character being alphanumeric, and the second being alphabetic.
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Key;
+ ///
+ /// assert!("ca".parse::<Key>().is_ok());
+ /// ```
+ Key,
+ extensions::unicode::Key,
+ extensions_unicode_key,
+ 2..=2,
+ s,
+ s.all_bytes()[0].is_ascii_alphanumeric() && s.all_bytes()[1].is_ascii_alphabetic(),
+ s.to_ascii_lowercase(),
+ (s.all_bytes()[0].is_ascii_lowercase() || s.all_bytes()[0].is_ascii_digit())
+ && s.all_bytes()[1].is_ascii_lowercase(),
+ InvalidExtension,
+ ["ca", "8a"],
+ ["a", "a8", "abc"],
+);
diff --git a/vendor/icu_locid/src/extensions/unicode/keywords.rs b/vendor/icu_locid/src/extensions/unicode/keywords.rs
new file mode 100644
index 000000000..dc9a15921
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/unicode/keywords.rs
@@ -0,0 +1,404 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::borrow::Borrow;
+use core::cmp::Ordering;
+use core::iter::FromIterator;
+use litemap::LiteMap;
+
+use super::Key;
+use super::Value;
+use crate::helpers::ShortVec;
+use crate::ordering::SubtagOrderingResult;
+
+/// A list of [`Key`]-[`Value`] pairs representing functional information
+/// about locale's internationnalization preferences.
+///
+/// Here are examples of fields used in Unicode:
+/// - `hc` - Hour Cycle (`h11`, `h12`, `h23`, `h24`)
+/// - `ca` - Calendar (`buddhist`, `gregory`, ...)
+/// - `fw` - First Day Of the Week (`sun`, `mon`, `sat`, ...)
+///
+/// You can find the full list in [`Unicode BCP 47 U Extension`] section of LDML.
+///
+/// [`Unicode BCP 47 U Extension`]: https://unicode.org/reports/tr35/tr35.html#Key_And_Type_Definitions_
+///
+/// # Examples
+///
+/// Manually build up a [`Keywords`] object:
+///
+/// ```
+/// use icu::locid::extensions::unicode::{Key, Keywords, Value};
+///
+/// let key: Key = "hc".parse().expect("Failed to parse a Key.");
+/// let value: Value = "h23".parse().expect("Failed to parse a Value.");
+/// let keywords: Keywords = vec![(key, value)].into_iter().collect();
+///
+/// assert_eq!(&keywords.to_string(), "hc-h23");
+/// ```
+///
+/// Access a [`Keywords`] object from a [`Locale`]:
+///
+/// ```
+/// use icu::locid::{
+/// extensions_unicode_key as key, extensions_unicode_value as value,
+/// Locale,
+/// };
+///
+/// let loc: Locale = "und-u-hc-h23-kc-true".parse().expect("Valid BCP-47");
+///
+/// assert_eq!(loc.extensions.unicode.keywords.get(&key!("ca")), None);
+/// assert_eq!(
+/// loc.extensions.unicode.keywords.get(&key!("hc")),
+/// Some(&value!("h23"))
+/// );
+/// assert_eq!(
+/// loc.extensions.unicode.keywords.get(&key!("kc")),
+/// Some(&value!("true"))
+/// );
+///
+/// assert_eq!(loc.extensions.unicode.keywords.to_string(), "hc-h23-kc");
+/// ```
+///
+/// [`Locale`]: crate::Locale
+#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
+pub struct Keywords(LiteMap<Key, Value, ShortVec<(Key, Value)>>);
+
+impl Keywords {
+ /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Keywords;
+ ///
+ /// assert_eq!(Keywords::new(), Keywords::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self(LiteMap::new())
+ }
+
+ /// Create a new list of key-value pairs having exactly one pair, callable in a `const` context.
+ #[inline]
+ pub const fn new_single(key: Key, value: Value) -> Self {
+ Self(LiteMap::from_sorted_store_unchecked(ShortVec::new_single(
+ (key, value),
+ )))
+ }
+
+ /// Returns `true` if there are no keywords.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Keywords;
+ /// use icu::locid::locale;
+ /// use icu::locid::Locale;
+ ///
+ /// let loc1 = Locale::try_from_bytes(b"und-t-h0-hybrid").unwrap();
+ /// let loc2 = locale!("und-u-ca-buddhist");
+ ///
+ /// assert!(loc1.extensions.unicode.keywords.is_empty());
+ /// assert!(!loc2.extensions.unicode.keywords.is_empty());
+ /// ```
+ pub fn is_empty(&self) -> bool {
+ self.0.is_empty()
+ }
+
+ /// Returns `true` if the list contains a [`Value`] for the specified [`Key`].
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{Key, Keywords, Value};
+ /// use litemap::LiteMap;
+ ///
+ /// let key: Key = "ca".parse().expect("Failed to parse a Key.");
+ /// let value: Value = "gregory".parse().expect("Failed to parse a Value.");
+ /// let keywords: Keywords = vec![(key, value)].into_iter().collect();
+ ///
+ /// let key: Key = "ca".parse().expect("Failed to parse a Key.");
+ /// assert!(&keywords.contains_key(&key));
+ /// ```
+ pub fn contains_key<Q>(&self, key: &Q) -> bool
+ where
+ Key: Borrow<Q>,
+ Q: Ord,
+ {
+ self.0.contains_key(key)
+ }
+
+ /// Returns a reference to the [`Value`] corresponding to the [`Key`].
+ ///
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{Key, Keywords, Value};
+ ///
+ /// let key: Key = "ca".parse().expect("Failed to parse a Key.");
+ /// let value: Value = "buddhist".parse().expect("Failed to parse a Value.");
+ /// let keywords: Keywords = vec![(key, value)].into_iter().collect();
+ ///
+ /// let key: Key = "ca".parse().expect("Failed to parse a Key.");
+ /// assert_eq!(
+ /// keywords.get(&key).map(|v| v.to_string()),
+ /// Some("buddhist".to_string())
+ /// );
+ /// ```
+ pub fn get<Q>(&self, key: &Q) -> Option<&Value>
+ where
+ Key: Borrow<Q>,
+ Q: Ord,
+ {
+ self.0.get(key)
+ }
+
+ /// Returns a mutable reference to the [`Value`] corresponding to the [`Key`].
+ ///
+ /// Returns `None` if the key doesn't exist or if the key has no value.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::{Key, Keywords, Value};
+ ///
+ /// let key: Key = "ca".parse().expect("Failed to parse a Key.");
+ /// let value: Value = "buddhist".parse().expect("Failed to parse a Value.");
+ /// let mut keywords: Keywords = vec![(key, value)].into_iter().collect();
+ ///
+ /// let key: Key = "ca".parse().expect("Failed to parse a Key.");
+ /// if let Some(value) = keywords.get_mut(&key) {
+ /// *value = "gregory".parse().expect("Failed to parse a Value.");
+ /// }
+ /// assert_eq!(
+ /// keywords.get(&key).map(|v| v.to_string()),
+ /// Some("gregory".to_string())
+ /// );
+ /// ```
+ pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut Value>
+ where
+ Key: Borrow<Q>,
+ Q: Ord,
+ {
+ self.0.get_mut(key)
+ }
+
+ /// Sets the specified keyword, returning the old value if it already existed.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Key;
+ /// use icu::locid::extensions::unicode::Value;
+ /// use icu::locid::Locale;
+ /// use icu::locid::{
+ /// extensions_unicode_key as key, extensions_unicode_value as value,
+ /// };
+ ///
+ /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12"
+ /// .parse()
+ /// .expect("valid BCP-47 identifier");
+ /// let old_value = loc
+ /// .extensions
+ /// .unicode
+ /// .keywords
+ /// .set(key!("ca"), value!("japanese"));
+ ///
+ /// assert_eq!(old_value, Some(value!("buddhist")));
+ /// assert_eq!(loc, "und-u-hello-ca-japanese-hc-h12".parse().unwrap());
+ /// ```
+ pub fn set(&mut self, key: Key, value: Value) -> Option<Value> {
+ self.0.insert(key, value)
+ }
+
+ /// Removes the specified keyword, returning the old value if it existed.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Key;
+ /// use icu::locid::extensions_unicode_key as key;
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12"
+ /// .parse()
+ /// .expect("valid BCP-47 identifier");
+ /// loc.extensions.unicode.keywords.remove(key!("ca"));
+ /// assert_eq!(loc, "und-u-hello-hc-h12".parse().unwrap());
+ /// ```
+ pub fn remove<Q: Borrow<Key>>(&mut self, key: Q) -> Option<Value> {
+ self.0.remove(key.borrow())
+ }
+
+ /// Clears all Unicode extension keywords, leaving Unicode attributes.
+ ///
+ /// Returns the old Unicode extension keywords.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12".parse().unwrap();
+ /// loc.extensions.unicode.keywords.clear();
+ /// assert_eq!(loc, "und-u-hello".parse().unwrap());
+ /// ```
+ pub fn clear(&mut self) -> Self {
+ core::mem::take(self)
+ }
+
+ /// Retains a subset of keywords as specified by the predicate function.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions_unicode_key as key;
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale = "und-u-ca-buddhist-hc-h12-ms-metric".parse().unwrap();
+ ///
+ /// loc.extensions
+ /// .unicode
+ /// .keywords
+ /// .retain_by_key(|&k| k == key!("hc"));
+ /// assert_eq!(loc, "und-u-hc-h12".parse().unwrap());
+ ///
+ /// loc.extensions
+ /// .unicode
+ /// .keywords
+ /// .retain_by_key(|&k| k == key!("ms"));
+ /// assert_eq!(loc, Locale::UND);
+ /// ```
+ pub fn retain_by_key<F>(&mut self, mut predicate: F)
+ where
+ F: FnMut(&Key) -> bool,
+ {
+ self.0.retain(|k, _| predicate(k))
+ }
+
+ /// Compare this [`Keywords`] with BCP-47 bytes.
+ ///
+ /// The return value is equivalent to what would happen if you first converted this
+ /// [`Keywords`] to a BCP-47 string and then performed a byte comparison.
+ ///
+ /// This function is case-sensitive and results in a *total order*, so it is appropriate for
+ /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Keywords;
+ /// use icu::locid::Locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let bcp47_strings: &[&str] =
+ /// &["ca-hebrew", "ca-japanese", "ca-japanese-nu-latn", "nu-latn"];
+ ///
+ /// for ab in bcp47_strings.windows(2) {
+ /// let a = ab[0];
+ /// let b = ab[1];
+ /// assert!(a.cmp(b) == Ordering::Less);
+ /// let a_kwds = format!("und-u-{}", a)
+ /// .parse::<Locale>()
+ /// .unwrap()
+ /// .extensions
+ /// .unicode
+ /// .keywords;
+ /// assert_eq!(a, a_kwds.to_string());
+ /// assert!(a_kwds.strict_cmp(a.as_bytes()) == Ordering::Equal);
+ /// assert!(a_kwds.strict_cmp(b.as_bytes()) == Ordering::Less);
+ /// }
+ /// ```
+ pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
+ self.strict_cmp_iter(other.split(|b| *b == b'-')).end()
+ }
+
+ /// Compare this [`Keywords`] with an iterator of BCP-47 subtags.
+ ///
+ /// This function has the same equality semantics as [`Keywords::strict_cmp`]. It is intended as
+ /// a more modular version that allows multiple subtag iterators to be chained together.
+ ///
+ /// For an additional example, see [`SubtagOrderingResult`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Keywords;
+ /// use icu::locid::locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let subtags: &[&[u8]] = &[b"ca", b"buddhist"];
+ ///
+ /// let kwds = locale!("und-u-ca-buddhist").extensions.unicode.keywords;
+ /// assert_eq!(
+ /// Ordering::Equal,
+ /// kwds.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let kwds = locale!("und").extensions.unicode.keywords;
+ /// assert_eq!(
+ /// Ordering::Less,
+ /// kwds.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let kwds = locale!("und-u-nu-latn").extensions.unicode.keywords;
+ /// assert_eq!(
+ /// Ordering::Greater,
+ /// kwds.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ /// ```
+ pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
+ where
+ I: Iterator<Item = &'l [u8]>,
+ {
+ let r = self.for_each_subtag_str(&mut |subtag| {
+ if let Some(other) = subtags.next() {
+ match subtag.as_bytes().cmp(other) {
+ Ordering::Equal => Ok(()),
+ not_equal => Err(not_equal),
+ }
+ } else {
+ Err(Ordering::Greater)
+ }
+ });
+ match r {
+ Ok(_) => SubtagOrderingResult::Subtags(subtags),
+ Err(o) => SubtagOrderingResult::Ordering(o),
+ }
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ for (k, v) in self.0.iter() {
+ f(k.as_str())?;
+ v.for_each_subtag_str(f)?;
+ }
+ Ok(())
+ }
+
+ /// This needs to be its own method to help with type inference in helpers.rs
+ #[cfg(test)]
+ pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self {
+ v.into_iter().collect()
+ }
+}
+
+impl From<LiteMap<Key, Value, ShortVec<(Key, Value)>>> for Keywords {
+ fn from(map: LiteMap<Key, Value, ShortVec<(Key, Value)>>) -> Self {
+ Self(map)
+ }
+}
+
+impl FromIterator<(Key, Value)> for Keywords {
+ fn from_iter<I: IntoIterator<Item = (Key, Value)>>(iter: I) -> Self {
+ LiteMap::from_iter(iter).into()
+ }
+}
+
+impl_writeable_for_key_value!(Keywords, "ca", "islamic-civil", "mm", "mm");
diff --git a/vendor/icu_locid/src/extensions/unicode/mod.rs b/vendor/icu_locid/src/extensions/unicode/mod.rs
new file mode 100644
index 000000000..fabf1036c
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/unicode/mod.rs
@@ -0,0 +1,233 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Unicode Extensions provide information about user preferences in a given locale.
+//!
+//! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and
+//! [`Attributes`].
+//!
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::extensions::unicode::{Attribute, Key, Unicode, Value};
+//! use icu::locid::{LanguageIdentifier, Locale};
+//!
+//! let mut loc: Locale =
+//! "en-US-u-foobar-hc-h12".parse().expect("Parsing failed.");
+//!
+//! let key: Key = "hc".parse().expect("Parsing key failed.");
+//! let value: Value = "h12".parse().expect("Parsing value failed.");
+//! let attribute: Attribute =
+//! "foobar".parse().expect("Parsing attribute failed.");
+//!
+//! assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value));
+//! assert!(loc.extensions.unicode.attributes.contains(&attribute));
+//!
+//! assert_eq!(&loc.extensions.unicode.to_string(), "-u-foobar-hc-h12");
+//! ```
+mod attribute;
+mod attributes;
+mod key;
+mod keywords;
+mod value;
+
+use alloc::vec;
+pub use attribute::Attribute;
+pub use attributes::Attributes;
+pub use key::Key;
+pub use keywords::Keywords;
+pub use value::Value;
+
+use crate::parser::ParserError;
+use crate::parser::SubtagIterator;
+use litemap::LiteMap;
+
+/// Unicode Extensions provide information about user preferences in a given locale.
+///
+/// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale
+/// Identifier`] specification.
+///
+/// Unicode extensions provide subtags that specify language and/or locale-based behavior
+/// or refinements to language tags, according to work done by the Unicode Consortium.
+/// (See [`RFC 6067`] for details).
+///
+/// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension
+/// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt
+/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::unicode::{Key, Value};
+/// use icu::locid::Locale;
+///
+/// let mut loc: Locale =
+/// "de-u-hc-h12-ca-buddhist".parse().expect("Parsing failed.");
+///
+/// let key: Key = "ca".parse().expect("Parsing key failed.");
+/// let value: Value = "buddhist".parse().expect("Parsing value failed.");
+/// assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value));
+/// ```
+#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
+#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
+pub struct Unicode {
+ /// The key-value pairs present in this locale extension, with each extension key subtag
+ /// associated to its provided value subtag.
+ pub keywords: Keywords,
+ /// A canonically ordered sequence of single standalone subtags for this locale extension.
+ pub attributes: Attributes,
+}
+
+impl Unicode {
+ /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Unicode;
+ ///
+ /// assert_eq!(Unicode::new(), Unicode::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self {
+ keywords: Keywords::new(),
+ attributes: Attributes::new(),
+ }
+ }
+
+ /// Returns [`true`] if there list of keywords and attributes is empty.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed.");
+ ///
+ /// assert!(!loc.extensions.unicode.is_empty());
+ /// ```
+ pub fn is_empty(&self) -> bool {
+ self.keywords.is_empty() && self.attributes.is_empty()
+ }
+
+ /// Clears all Unicode extension keywords and attributes, effectively removing
+ /// the Unicode extension.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let mut loc: Locale =
+ /// "und-t-mul-u-hello-ca-buddhist-hc-h12".parse().unwrap();
+ /// loc.extensions.unicode.clear();
+ /// assert_eq!(loc, "und-t-mul".parse().unwrap());
+ /// ```
+ pub fn clear(&mut self) {
+ self.keywords.clear();
+ self.attributes.clear();
+ }
+
+ pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
+ let mut attributes = vec![];
+ let mut keywords = LiteMap::new();
+
+ let mut current_keyword = None;
+ let mut current_type = vec![];
+
+ while let Some(subtag) = iter.peek() {
+ if let Ok(attr) = Attribute::try_from_bytes(subtag) {
+ if let Err(idx) = attributes.binary_search(&attr) {
+ attributes.insert(idx, attr);
+ }
+ } else {
+ break;
+ }
+ iter.next();
+ }
+
+ while let Some(subtag) = iter.peek() {
+ let slen = subtag.len();
+ if slen == 2 {
+ if let Some(kw) = current_keyword.take() {
+ keywords.try_insert(kw, Value::from_vec_unchecked(current_type));
+ current_type = vec![];
+ }
+ current_keyword = Some(Key::try_from_bytes(subtag)?);
+ } else if current_keyword.is_some() {
+ match Value::parse_subtag(subtag) {
+ Ok(Some(t)) => current_type.push(t),
+ Ok(None) => {}
+ Err(_) => break,
+ }
+ } else {
+ break;
+ }
+ iter.next();
+ }
+
+ if let Some(kw) = current_keyword.take() {
+ keywords.try_insert(kw, Value::from_vec_unchecked(current_type));
+ }
+
+ // Ensure we've defined at least one attribute or keyword
+ if attributes.is_empty() && keywords.is_empty() {
+ return Err(ParserError::InvalidExtension);
+ }
+
+ Ok(Self {
+ keywords: keywords.into(),
+ attributes: Attributes::from_vec_unchecked(attributes),
+ })
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ if self.is_empty() {
+ return Ok(());
+ }
+ f("u")?;
+ self.attributes.for_each_subtag_str(f)?;
+ self.keywords.for_each_subtag_str(f)?;
+ Ok(())
+ }
+}
+
+writeable::impl_display_with_writeable!(Unicode);
+
+impl writeable::Writeable for Unicode {
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ if self.is_empty() {
+ return Ok(());
+ }
+ sink.write_str("-u")?;
+ if !self.attributes.is_empty() {
+ sink.write_char('-')?;
+ writeable::Writeable::write_to(&self.attributes, sink)?;
+ }
+ if !self.keywords.is_empty() {
+ sink.write_char('-')?;
+ writeable::Writeable::write_to(&self.keywords, sink)?;
+ }
+ Ok(())
+ }
+
+ fn writeable_length_hint(&self) -> writeable::LengthHint {
+ if self.is_empty() {
+ return writeable::LengthHint::exact(0);
+ }
+ let mut result = writeable::LengthHint::exact(2);
+ if !self.attributes.is_empty() {
+ result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1;
+ }
+ if !self.keywords.is_empty() {
+ result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1;
+ }
+ result
+ }
+}
diff --git a/vendor/icu_locid/src/extensions/unicode/value.rs b/vendor/icu_locid/src/extensions/unicode/value.rs
new file mode 100644
index 000000000..ce9982a4c
--- /dev/null
+++ b/vendor/icu_locid/src/extensions/unicode/value.rs
@@ -0,0 +1,199 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::helpers::ShortVec;
+use crate::parser::{get_subtag_iterator, ParserError};
+use alloc::vec::Vec;
+use core::ops::RangeInclusive;
+use core::str::FromStr;
+use tinystr::TinyAsciiStr;
+
+/// A value used in a list of [`Keywords`](super::Keywords).
+///
+/// The value has to be a sequence of one or more alphanumerical strings
+/// separated by `-`.
+/// Each part of the sequence has to be no shorter than three characters and no
+/// longer than 8.
+///
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::unicode::Value;
+///
+/// let value1: Value = "gregory".parse().expect("Failed to parse a Value.");
+/// let value2: Value =
+/// "islamic-civil".parse().expect("Failed to parse a Value.");
+/// let value3: Value = "true".parse().expect("Failed to parse a Value.");
+///
+/// assert_eq!(&value1.to_string(), "gregory");
+/// assert_eq!(&value2.to_string(), "islamic-civil");
+///
+/// // The value "true" is special-cased to an empty value
+/// assert_eq!(&value3.to_string(), "");
+/// ```
+#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
+pub struct Value(ShortVec<TinyAsciiStr<{ *VALUE_LENGTH.end() }>>);
+
+const VALUE_LENGTH: RangeInclusive<usize> = 3..=8;
+const TRUE_VALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");
+
+impl Value {
+ /// A constructor which takes a utf8 slice, parses it and
+ /// produces a well-formed [`Value`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Value;
+ ///
+ /// let value = Value::try_from_bytes(b"buddhist").expect("Parsing failed.");
+ ///
+ /// assert_eq!(&value.to_string(), "buddhist");
+ /// ```
+ pub fn try_from_bytes(input: &[u8]) -> Result<Self, ParserError> {
+ let mut v = ShortVec::new();
+
+ if !input.is_empty() {
+ for subtag in get_subtag_iterator(input) {
+ let val = Self::subtag_from_bytes(subtag)?;
+ if let Some(val) = val {
+ v.push(val);
+ }
+ }
+ }
+ Ok(Self(v))
+ }
+
+ /// Const constructor for when the value contains only a single subtag.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Value;
+ ///
+ /// Value::try_from_single_subtag(b"buddhist").expect("valid subtag");
+ /// Value::try_from_single_subtag(b"#####").expect_err("invalid subtag");
+ /// Value::try_from_single_subtag(b"foo-bar").expect_err("not a single subtag");
+ /// ```
+ pub const fn try_from_single_subtag(subtag: &[u8]) -> Result<Self, ParserError> {
+ match Self::subtag_from_bytes(subtag) {
+ Err(_) => Err(ParserError::InvalidExtension),
+ Ok(option) => Ok(Self::from_tinystr(option)),
+ }
+ }
+
+ #[doc(hidden)]
+ pub fn as_tinystr_slice(&self) -> &[TinyAsciiStr<8>] {
+ self.0.as_slice()
+ }
+
+ #[doc(hidden)]
+ pub const fn as_single_subtag(&self) -> Option<&TinyAsciiStr<8>> {
+ self.0.single()
+ }
+
+ #[doc(hidden)]
+ pub const fn from_tinystr(subtag: Option<TinyAsciiStr<8>>) -> Self {
+ match subtag {
+ None => Self(ShortVec::new()),
+ Some(val) => {
+ debug_assert!(val.is_ascii_alphanumeric());
+ debug_assert!(!matches!(val, TRUE_VALUE));
+ Self(ShortVec::new_single(val))
+ }
+ }
+ }
+
+ pub(crate) fn from_vec_unchecked(input: Vec<TinyAsciiStr<8>>) -> Self {
+ Self(input.into())
+ }
+
+ #[doc(hidden)]
+ pub const fn subtag_from_bytes(bytes: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
+ Self::parse_subtag_from_bytes_manual_slice(bytes, 0, bytes.len())
+ }
+
+ pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
+ Self::parse_subtag_from_bytes_manual_slice(t, 0, t.len())
+ }
+
+ pub(crate) const fn parse_subtag_from_bytes_manual_slice(
+ bytes: &[u8],
+ start: usize,
+ end: usize,
+ ) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
+ let slice_len = end - start;
+ if slice_len > *VALUE_LENGTH.end() || slice_len < *VALUE_LENGTH.start() {
+ return Err(ParserError::InvalidExtension);
+ }
+
+ match TinyAsciiStr::from_bytes_manual_slice(bytes, start, end) {
+ Ok(TRUE_VALUE) => Ok(None),
+ Ok(s) if s.is_ascii_alphanumeric() => Ok(Some(s.to_ascii_lowercase())),
+ Ok(_) => Err(ParserError::InvalidExtension),
+ Err(_) => Err(ParserError::InvalidSubtag),
+ }
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ self.0.as_slice().iter().map(|t| t.as_str()).try_for_each(f)
+ }
+}
+
+impl FromStr for Value {
+ type Err = ParserError;
+
+ fn from_str(source: &str) -> Result<Self, Self::Err> {
+ Self::try_from_bytes(source.as_bytes())
+ }
+}
+
+impl_writeable_for_tinystr_list!(Value, "", "islamic", "civil");
+
+/// A macro allowing for compile-time construction of valid Unicode [`Value`] subtag.
+///
+/// The macro only supports single-subtag values.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::Locale;
+/// use icu::locid::{
+/// extensions_unicode_key as key, extensions_unicode_value as value,
+/// };
+///
+/// let loc: Locale = "de-u-ca-buddhist".parse().unwrap();
+///
+/// assert_eq!(
+/// loc.extensions.unicode.keywords.get(&key!("ca")),
+/// Some(&value!("buddhist"))
+/// );
+/// ```
+///
+/// [`Value`]: crate::extensions::unicode::Value
+#[macro_export]
+macro_rules! extensions_unicode_value {
+ ($value:literal) => {{
+ // What we want:
+ // const R: $crate::extensions::unicode::Value =
+ // match $crate::extensions::unicode::Value::try_from_single_subtag($value.as_bytes()) {
+ // Ok(r) => r,
+ // #[allow(clippy::panic)] // const context
+ // _ => panic!(concat!("Invalid Unicode extension value: ", $value)),
+ // };
+ // Workaround until https://github.com/rust-lang/rust/issues/73255 lands:
+ const R: $crate::extensions::unicode::Value =
+ $crate::extensions::unicode::Value::from_tinystr(
+ match $crate::extensions::unicode::Value::subtag_from_bytes($value.as_bytes()) {
+ Ok(r) => r,
+ _ => panic!(concat!("Invalid Unicode extension value: ", $value)),
+ },
+ );
+ R
+ }};
+}
diff --git a/vendor/icu_locid/src/helpers.rs b/vendor/icu_locid/src/helpers.rs
new file mode 100644
index 000000000..e617ded5d
--- /dev/null
+++ b/vendor/icu_locid/src/helpers.rs
@@ -0,0 +1,648 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::iter::FromIterator;
+
+use alloc::vec;
+use alloc::vec::Vec;
+use litemap::store::*;
+
+/// Internal: A vector that supports no-allocation, constant values if length 0 or 1.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) enum ShortVec<T> {
+ Empty,
+ Single(T),
+ Multi(Vec<T>),
+}
+
+impl<T> ShortVec<T> {
+ #[inline]
+ pub const fn new() -> Self {
+ Self::Empty
+ }
+
+ #[inline]
+ pub const fn new_single(item: T) -> Self {
+ Self::Single(item)
+ }
+
+ pub fn push(&mut self, item: T) {
+ *self = match core::mem::replace(self, Self::Empty) {
+ ShortVec::Empty => ShortVec::Single(item),
+ ShortVec::Single(prev_item) => ShortVec::Multi(vec![prev_item, item]),
+ ShortVec::Multi(mut items) => {
+ items.push(item);
+ ShortVec::Multi(items)
+ }
+ };
+ }
+
+ #[inline]
+ pub fn as_slice(&self) -> &[T] {
+ match self {
+ ShortVec::Empty => &[],
+ ShortVec::Single(v) => core::slice::from_ref(v),
+ ShortVec::Multi(v) => v.as_slice(),
+ }
+ }
+
+ #[inline]
+ pub fn as_mut_slice(&mut self) -> &mut [T] {
+ match self {
+ ShortVec::Empty => &mut [],
+ ShortVec::Single(v) => core::slice::from_mut(v),
+ ShortVec::Multi(v) => v.as_mut_slice(),
+ }
+ }
+
+ #[inline]
+ pub const fn single(&self) -> Option<&T> {
+ match self {
+ ShortVec::Single(v) => Some(v),
+ _ => None,
+ }
+ }
+
+ #[inline]
+ pub fn len(&self) -> usize {
+ match self {
+ ShortVec::Empty => 0,
+ ShortVec::Single(_) => 1,
+ ShortVec::Multi(ref v) => v.len(),
+ }
+ }
+
+ pub fn insert(&mut self, index: usize, elt: T) {
+ assert!(
+ index <= self.len(),
+ "insertion index (is {}) should be <= len (is {})",
+ index,
+ self.len()
+ );
+
+ *self = match core::mem::replace(self, ShortVec::Empty) {
+ ShortVec::Empty => ShortVec::Single(elt),
+ ShortVec::Single(item) => {
+ let items = if index == 0 {
+ vec![elt, item]
+ } else {
+ vec![item, elt]
+ };
+ ShortVec::Multi(items)
+ }
+ ShortVec::Multi(mut items) => {
+ items.insert(index, elt);
+ ShortVec::Multi(items)
+ }
+ }
+ }
+
+ pub fn remove(&mut self, index: usize) -> T {
+ assert!(
+ index < self.len(),
+ "removal index (is {}) should be < len (is {})",
+ index,
+ self.len()
+ );
+
+ let (replaced, removed_item) = match core::mem::replace(self, ShortVec::Empty) {
+ ShortVec::Empty => unreachable!(),
+ ShortVec::Single(v) => (ShortVec::Empty, v),
+ ShortVec::Multi(mut v) => {
+ let removed_item = v.remove(index);
+ match v.len() {
+ #[allow(clippy::unwrap_used)]
+ // we know that the vec has exactly one element left
+ 1 => (ShortVec::Single(v.pop().unwrap()), removed_item),
+ // v has atleast 2 elements, create a Multi variant
+ _ => (ShortVec::Multi(v), removed_item),
+ }
+ }
+ };
+ *self = replaced;
+ removed_item
+ }
+
+ #[inline]
+ pub fn clear(&mut self) {
+ let _ = core::mem::replace(self, ShortVec::Empty);
+ }
+}
+
+impl<T> From<Vec<T>> for ShortVec<T> {
+ fn from(v: Vec<T>) -> Self {
+ match v.len() {
+ 0 => ShortVec::Empty,
+ #[allow(clippy::unwrap_used)] // we know that the vec is not empty
+ 1 => ShortVec::Single(v.into_iter().next().unwrap()),
+ _ => ShortVec::Multi(v),
+ }
+ }
+}
+
+impl<T> Default for ShortVec<T> {
+ fn default() -> Self {
+ ShortVec::Empty
+ }
+}
+
+impl<T> FromIterator<T> for ShortVec<T> {
+ fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
+ iter.into_iter().collect::<Vec<_>>().into()
+ }
+}
+
+impl<K, V> StoreConstEmpty<K, V> for ShortVec<(K, V)> {
+ const EMPTY: ShortVec<(K, V)> = ShortVec::Empty;
+}
+
+impl<K, V> Store<K, V> for ShortVec<(K, V)> {
+ #[inline]
+ fn lm_len(&self) -> usize {
+ self.len()
+ }
+
+ #[inline]
+ fn lm_is_empty(&self) -> bool {
+ matches!(self, ShortVec::Empty)
+ }
+
+ #[inline]
+ fn lm_get(&self, index: usize) -> Option<(&K, &V)> {
+ self.as_slice().get(index).map(|elt| (&elt.0, &elt.1))
+ }
+
+ #[inline]
+ fn lm_last(&self) -> Option<(&K, &V)> {
+ match self {
+ ShortVec::Empty => None,
+ ShortVec::Single(v) => Some(v),
+ ShortVec::Multi(v) => v.as_slice().last(),
+ }
+ .map(|elt| (&elt.0, &elt.1))
+ }
+
+ #[inline]
+ fn lm_binary_search_by<F>(&self, mut cmp: F) -> Result<usize, usize>
+ where
+ F: FnMut(&K) -> core::cmp::Ordering,
+ {
+ self.as_slice().binary_search_by(|(k, _)| cmp(k))
+ }
+}
+
+impl<K, V> StoreMut<K, V> for ShortVec<(K, V)> {
+ fn lm_with_capacity(_capacity: usize) -> Self {
+ ShortVec::Empty
+ }
+
+ // ShortVec supports reserving capacity for additional elements only if we have already allocated a vector
+ fn lm_reserve(&mut self, additional: usize) {
+ if let ShortVec::Multi(ref mut v) = self {
+ v.reserve(additional)
+ }
+ }
+
+ fn lm_get_mut(&mut self, index: usize) -> Option<(&K, &mut V)> {
+ self.as_mut_slice()
+ .get_mut(index)
+ .map(|elt| (&elt.0, &mut elt.1))
+ }
+
+ fn lm_push(&mut self, key: K, value: V) {
+ self.push((key, value))
+ }
+
+ fn lm_insert(&mut self, index: usize, key: K, value: V) {
+ self.insert(index, (key, value))
+ }
+
+ fn lm_remove(&mut self, index: usize) -> (K, V) {
+ self.remove(index)
+ }
+
+ fn lm_clear(&mut self) {
+ self.clear();
+ }
+}
+
+impl<'a, K: 'a, V: 'a> StoreIterable<'a, K, V> for ShortVec<(K, V)> {
+ type KeyValueIter =
+ core::iter::Map<core::slice::Iter<'a, (K, V)>, for<'r> fn(&'r (K, V)) -> (&'r K, &'r V)>;
+
+ fn lm_iter(&'a self) -> Self::KeyValueIter {
+ self.as_slice().iter().map(|elt| (&elt.0, &elt.1))
+ }
+}
+
+impl<K, V> StoreFromIterator<K, V> for ShortVec<(K, V)> {}
+
+#[test]
+fn test_shortvec_impl() {
+ litemap::testing::check_store::<ShortVec<(u32, u64)>>();
+}
+
+macro_rules! impl_tinystr_subtag {
+ (
+ $(#[$doc:meta])*
+ $name:ident,
+ $($full_name:ident)::+,
+ $macro_name:ident,
+ $len_start:literal..=$len_end:literal,
+ $tinystr_ident:ident,
+ $validate:expr,
+ $normalize:expr,
+ $is_normalized:expr,
+ $error:ident,
+ [$good_example:literal $(,$more_good_examples:literal)*],
+ [$bad_example:literal $(, $more_bad_examples:literal)*],
+ ) => {
+ #[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)]
+ #[cfg_attr(feature = "serde", derive(serde::Serialize))]
+ #[repr(transparent)]
+ $(#[$doc])*
+ pub struct $name(tinystr::TinyAsciiStr<$len_end>);
+
+ impl $name {
+ /// A constructor which takes a UTF-8 slice, parses it and
+ #[doc = concat!("produces a well-formed [`", stringify!($name), "`].")]
+ ///
+ /// # Examples
+ ///
+ /// ```
+ #[doc = concat!("use icu_locid::", stringify!($($full_name)::+), ";")]
+ ///
+ #[doc = concat!("assert!(", stringify!($name), "::try_from_bytes(b", stringify!($good_example), ").is_ok());")]
+ #[doc = concat!("assert!(", stringify!($name), "::try_from_bytes(b", stringify!($bad_example), ").is_err());")]
+ /// ```
+ pub const fn try_from_bytes(v: &[u8]) -> Result<Self, crate::parser::errors::ParserError> {
+ Self::try_from_bytes_manual_slice(v, 0, v.len())
+ }
+
+ /// Equivalent to [`try_from_bytes(bytes[start..end])`](Self::try_from_bytes),
+ /// but callable in a `const` context (which range indexing is not).
+ pub const fn try_from_bytes_manual_slice(
+ v: &[u8],
+ start: usize,
+ end: usize,
+ ) -> Result<Self, crate::parser::errors::ParserError> {
+ let slen = end - start;
+
+ #[allow(clippy::double_comparisons)] // if len_start == len_end
+ if slen < $len_start || slen > $len_end {
+ return Err(crate::parser::errors::ParserError::$error);
+ }
+
+ match tinystr::TinyAsciiStr::from_bytes_manual_slice(v, start, end) {
+ Ok($tinystr_ident) if $validate => Ok(Self($normalize)),
+ _ => Err(crate::parser::errors::ParserError::$error),
+ }
+ }
+
+ #[doc = concat!("Safely creates a [`", stringify!($name), "`] from its raw format")]
+ /// as returned by [`Self::into_raw`]. Unlike [`Self::try_from_bytes`],
+ /// this constructor only takes normalized values.
+ pub const fn try_from_raw(
+ v: [u8; $len_end],
+ ) -> Result<Self, crate::parser::errors::ParserError> {
+ if let Ok($tinystr_ident) = tinystr::TinyAsciiStr::<$len_end>::try_from_raw(v) {
+ if $tinystr_ident.len() >= $len_start && $is_normalized {
+ Ok(Self($tinystr_ident))
+ } else {
+ Err(crate::parser::errors::ParserError::$error)
+ }
+ } else {
+ Err(crate::parser::errors::ParserError::$error)
+ }
+ }
+
+ #[doc = concat!("Unsafely creates a [`", stringify!($name), "`] from its raw format")]
+ /// as returned by [`Self::into_raw`]. Unlike [`Self::try_from_bytes`],
+ /// this constructor only takes normalized values.
+ ///
+ /// # Safety
+ ///
+ /// This function is safe iff [`Self::try_from_raw`] returns an `Ok`. This is the case
+ /// for inputs that are correctly normalized.
+ pub const unsafe fn from_raw_unchecked(v: [u8; $len_end]) -> Self {
+ Self(tinystr::TinyAsciiStr::from_bytes_unchecked(v))
+ }
+
+ /// Deconstructs into a raw format to be consumed by
+ /// [`from_raw_unchecked`](Self::from_raw_unchecked()) or
+ /// [`try_from_raw`](Self::try_from_raw()).
+ pub const fn into_raw(self) -> [u8; $len_end] {
+ *self.0.all_bytes()
+ }
+
+ #[inline]
+ /// A helper function for displaying as a `&str`.
+ pub const fn as_str(&self) -> &str {
+ self.0.as_str()
+ }
+
+ /// Compare with BCP-47 bytes.
+ ///
+ /// The return value is equivalent to what would happen if you first converted
+ /// `self` to a BCP-47 string and then performed a byte comparison.
+ ///
+ /// This function is case-sensitive and results in a *total order*, so it is appropriate for
+ /// binary search. The only argument producing [`Ordering::Equal`](core::cmp::Ordering::Equal)
+ /// is `self.as_str().as_bytes()`.
+ #[inline]
+ pub fn strict_cmp(self, other: &[u8]) -> core::cmp::Ordering {
+ self.as_str().as_bytes().cmp(other)
+ }
+
+ /// Compare with a potentially unnormalized BCP-47 string.
+ ///
+ /// The return value is equivalent to what would happen if you first parsed the
+ /// BCP-47 string and then performed a structural comparison.
+ ///
+ #[inline]
+ pub fn normalizing_eq(self, other: &str) -> bool {
+ self.as_str().eq_ignore_ascii_case(other)
+ }
+ }
+
+ impl core::str::FromStr for $name {
+ type Err = crate::parser::errors::ParserError;
+
+ fn from_str(source: &str) -> Result<Self, Self::Err> {
+ Self::try_from_bytes(source.as_bytes())
+ }
+ }
+
+ impl<'l> From<&'l $name> for &'l str {
+ fn from(input: &'l $name) -> Self {
+ input.as_str()
+ }
+ }
+
+ impl From<$name> for tinystr::TinyAsciiStr<$len_end> {
+ fn from(input: $name) -> Self {
+ input.0
+ }
+ }
+
+ impl writeable::Writeable for $name {
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ sink.write_str(self.as_str())
+ }
+ #[inline]
+ fn writeable_length_hint(&self) -> writeable::LengthHint {
+ writeable::LengthHint::exact(self.0.len())
+ }
+ }
+
+ writeable::impl_display_with_writeable!($name);
+
+ #[doc = concat!("A macro allowing for compile-time construction of valid [`", stringify!($name), "`] subtags.")]
+ ///
+ /// # Examples
+ ///
+ /// Parsing errors don't have to be handled at runtime:
+ /// ```
+ /// assert_eq!(
+ #[doc = concat!(" icu_locid::", stringify!($macro_name), "!(", stringify!($good_example) ,"),")]
+ #[doc = concat!(" ", stringify!($good_example), ".parse::<icu_locid::", stringify!($($full_name)::+),">().unwrap()")]
+ /// );
+ /// ```
+ ///
+ /// Invalid input is a compile failure:
+ /// ```compile_fail,E0080
+ #[doc = concat!("icu_locid::", stringify!($macro_name), "!(", stringify!($bad_example) ,");")]
+ /// ```
+ ///
+ #[doc = concat!("[`", stringify!($name), "`]: crate::", stringify!($($full_name)::+))]
+ #[macro_export]
+ macro_rules! $macro_name {
+ ($string:literal) => {{
+ use $crate::$($full_name)::+;
+ const R: $name =
+ match $name::try_from_bytes($string.as_bytes()) {
+ Ok(r) => r,
+ #[allow(clippy::panic)] // const context
+ _ => panic!(concat!("Invalid ", stringify!($name), ": ", $string)),
+ };
+ R
+ }};
+ }
+
+ #[cfg(feature = "databake")]
+ impl databake::Bake for $name {
+ fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
+ env.insert("icu_locid");
+ let string = self.as_str();
+ databake::quote! {::icu_locid::$macro_name!(#string) }
+ }
+ }
+
+ #[test]
+ fn test_construction() {
+ let maybe = $name::try_from_bytes($good_example.as_bytes());
+ assert!(maybe.is_ok());
+ assert_eq!(maybe, $name::try_from_raw(maybe.unwrap().into_raw()));
+ assert_eq!(maybe.unwrap().as_str(), $good_example);
+ $(
+ let maybe = $name::try_from_bytes($more_good_examples.as_bytes());
+ assert!(maybe.is_ok());
+ assert_eq!(maybe, $name::try_from_raw(maybe.unwrap().into_raw()));
+ assert_eq!(maybe.unwrap().as_str(), $more_good_examples);
+ )*
+ assert!($name::try_from_bytes($bad_example.as_bytes()).is_err());
+ $(
+ assert!($name::try_from_bytes($more_bad_examples.as_bytes()).is_err());
+ )*
+ }
+
+ #[test]
+ fn test_writeable() {
+ writeable::assert_writeable_eq!(&$good_example.parse::<$name>().unwrap(), $good_example);
+ $(
+ writeable::assert_writeable_eq!($more_good_examples.parse::<$name>().unwrap(), $more_good_examples);
+ )*
+ }
+
+ #[cfg(feature = "serde")]
+ impl<'de> serde::Deserialize<'de> for $name {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::de::Deserializer<'de>,
+ {
+ struct Visitor;
+
+ impl<'de> serde::de::Visitor<'de> for Visitor {
+ type Value = $name;
+
+ fn expecting(
+ &self,
+ formatter: &mut core::fmt::Formatter<'_>,
+ ) -> core::fmt::Result {
+ write!(formatter, "a valid BCP-47 {}", stringify!($name))
+ }
+
+ fn visit_str<E: serde::de::Error>(self, s: &str) -> Result<Self::Value, E> {
+ s.parse().map_err(serde::de::Error::custom)
+ }
+ }
+
+ if deserializer.is_human_readable() {
+ deserializer.deserialize_string(Visitor)
+ } else {
+ Self::try_from_raw(serde::de::Deserialize::deserialize(deserializer)?)
+ .map_err(serde::de::Error::custom)
+ }
+ }
+ }
+
+ // Safety checklist for ULE:
+ //
+ // 1. Must not include any uninitialized or padding bytes (true since transparent over a ULE).
+ // 2. Must have an alignment of 1 byte (true since transparent over a ULE).
+ // 3. ULE::validate_byte_slice() checks that the given byte slice represents a valid slice.
+ // 4. ULE::validate_byte_slice() checks that the given byte slice has a valid length.
+ // 5. All other methods must be left with their default impl.
+ // 6. Byte equality is semantic equality.
+ #[cfg(feature = "zerovec")]
+ unsafe impl zerovec::ule::ULE for $name {
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), zerovec::ZeroVecError> {
+ let it = bytes.chunks_exact(core::mem::size_of::<Self>());
+ if !it.remainder().is_empty() {
+ return Err(zerovec::ZeroVecError::length::<Self>(bytes.len()));
+ }
+ for v in it {
+ // The following can be removed once `array_chunks` is stabilized.
+ let mut a = [0; core::mem::size_of::<Self>()];
+ a.copy_from_slice(v);
+ if Self::try_from_raw(a).is_err() {
+ return Err(zerovec::ZeroVecError::parse::<Self>());
+ }
+ }
+ Ok(())
+ }
+ }
+
+ #[cfg(feature = "zerovec")]
+ impl zerovec::ule::AsULE for $name {
+ type ULE = Self;
+ fn to_unaligned(self) -> Self::ULE {
+ self
+ }
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned
+ }
+ }
+
+ #[cfg(feature = "zerovec")]
+ impl<'a> zerovec::maps::ZeroMapKV<'a> for $name {
+ type Container = zerovec::ZeroVec<'a, $name>;
+ type Slice = zerovec::ZeroSlice<$name>;
+ type GetType = $name;
+ type OwnedType = $name;
+ }
+ };
+}
+
+macro_rules! impl_writeable_for_each_subtag_str_no_test {
+ ($type:tt) => {
+ impl writeable::Writeable for $type {
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ let mut initial = true;
+ self.for_each_subtag_str(&mut |subtag| {
+ if initial {
+ initial = false;
+ } else {
+ sink.write_char('-')?;
+ }
+ sink.write_str(subtag)
+ })
+ }
+
+ #[inline]
+ fn writeable_length_hint(&self) -> writeable::LengthHint {
+ let mut result = writeable::LengthHint::exact(0);
+ let mut initial = true;
+ self.for_each_subtag_str::<core::convert::Infallible, _>(&mut |subtag| {
+ if initial {
+ initial = false;
+ } else {
+ result += 1;
+ }
+ result += subtag.len();
+ Ok(())
+ })
+ .expect("infallible");
+ result
+ }
+ }
+
+ writeable::impl_display_with_writeable!($type);
+ };
+}
+
+macro_rules! impl_writeable_for_subtag_list {
+ ($type:tt, $sample1:literal, $sample2:literal) => {
+ impl_writeable_for_each_subtag_str_no_test!($type);
+
+ #[test]
+ fn test_writeable() {
+ writeable::assert_writeable_eq!(&$type::default(), "");
+ writeable::assert_writeable_eq!(
+ &$type::from_vec_unchecked(alloc::vec![$sample1.parse().unwrap()]),
+ $sample1,
+ );
+ writeable::assert_writeable_eq!(
+ &$type::from_vec_unchecked(alloc::vec![
+ $sample1.parse().unwrap(),
+ $sample2.parse().unwrap()
+ ]),
+ core::concat!($sample1, "-", $sample2),
+ );
+ }
+ };
+}
+
+macro_rules! impl_writeable_for_tinystr_list {
+ ($type:tt, $if_empty:literal, $sample1:literal, $sample2:literal) => {
+ impl_writeable_for_each_subtag_str_no_test!($type);
+
+ #[test]
+ fn test_writeable() {
+ writeable::assert_writeable_eq!(
+ &$type::from_vec_unchecked(vec![$sample1.parse().unwrap()]),
+ $sample1,
+ );
+ writeable::assert_writeable_eq!(
+ &$type::from_vec_unchecked(vec![
+ $sample1.parse().unwrap(),
+ $sample2.parse().unwrap()
+ ]),
+ core::concat!($sample1, "-", $sample2),
+ );
+ }
+ };
+}
+
+macro_rules! impl_writeable_for_key_value {
+ ($type:tt, $key1:literal, $value1:literal, $key2:literal, $expected2:literal) => {
+ impl_writeable_for_each_subtag_str_no_test!($type);
+
+ #[test]
+ fn test_writeable() {
+ writeable::assert_writeable_eq!(&$type::default(), "");
+ writeable::assert_writeable_eq!(
+ &$type::from_tuple_vec(vec![($key1.parse().unwrap(), $value1.parse().unwrap())]),
+ core::concat!($key1, "-", $value1),
+ );
+ writeable::assert_writeable_eq!(
+ &$type::from_tuple_vec(vec![
+ ($key1.parse().unwrap(), $value1.parse().unwrap()),
+ ($key2.parse().unwrap(), "true".parse().unwrap())
+ ]),
+ core::concat!($key1, "-", $value1, "-", $expected2),
+ );
+ }
+ };
+}
diff --git a/vendor/icu_locid/src/langid.rs b/vendor/icu_locid/src/langid.rs
new file mode 100644
index 000000000..fc5435766
--- /dev/null
+++ b/vendor/icu_locid/src/langid.rs
@@ -0,0 +1,523 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::cmp::Ordering;
+use core::str::FromStr;
+
+use crate::ordering::SubtagOrderingResult;
+use crate::parser::{
+ get_subtag_iterator, parse_language_identifier, parse_language_identifier_with_single_variant,
+ ParserError, ParserMode,
+};
+use crate::subtags;
+use alloc::string::String;
+use alloc::string::ToString;
+
+/// A core struct representing a [`Unicode BCP47 Language Identifier`].
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::{subtags::*, LanguageIdentifier};
+///
+/// let li: LanguageIdentifier = "en-US".parse().expect("Failed to parse.");
+///
+/// assert_eq!(li.language, "en".parse::<Language>().unwrap());
+/// assert_eq!(li.script, None);
+/// assert_eq!(li.region.unwrap(), "US".parse::<Region>().unwrap());
+/// assert_eq!(li.variants.len(), 0);
+/// assert_eq!(li.to_string(), "en-US");
+/// ```
+///
+/// # Parsing
+///
+/// Unicode recognizes three levels of standard conformance for any language identifier:
+///
+/// * *well-formed* - syntactically correct
+/// * *valid* - well-formed and only uses registered language, region, script and variant subtags...
+/// * *canonical* - valid and no deprecated codes or structure.
+///
+/// At the moment parsing normalizes a well-formed language identifier converting
+/// `_` separators to `-` and adjusting casing to conform to the Unicode standard.
+///
+/// Any bogus subtags will cause the parsing to fail with an error.
+/// No subtag validation is performed.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::{subtags::*, LanguageIdentifier};
+///
+/// let li: LanguageIdentifier =
+/// "eN_latn_Us-Valencia".parse().expect("Failed to parse.");
+///
+/// assert_eq!(li.language, "en".parse::<Language>().unwrap());
+/// assert_eq!(li.script, "Latn".parse::<Script>().ok());
+/// assert_eq!(li.region, "US".parse::<Region>().ok());
+/// assert_eq!(
+/// li.variants.get(0),
+/// "valencia".parse::<Variant>().ok().as_ref()
+/// );
+/// ```
+///
+/// [`Unicode BCP47 Language Identifier`]: https://unicode.org/reports/tr35/tr35.html#Unicode_language_identifier
+#[derive(Default, PartialEq, Eq, Clone, Hash)]
+#[allow(clippy::exhaustive_structs)] // This struct is stable (and invoked by a macro)
+pub struct LanguageIdentifier {
+ /// Language subtag of the language identifier.
+ pub language: subtags::Language,
+ /// Script subtag of the language identifier.
+ pub script: Option<subtags::Script>,
+ /// Region subtag of the language identifier.
+ pub region: Option<subtags::Region>,
+ /// Variant subtags of the language identifier.
+ pub variants: subtags::Variants,
+}
+
+impl LanguageIdentifier {
+ /// A constructor which takes a utf8 slice, parses it and
+ /// produces a well-formed [`LanguageIdentifier`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::LanguageIdentifier;
+ ///
+ /// let li =
+ /// LanguageIdentifier::try_from_bytes(b"en-US").expect("Parsing failed.");
+ ///
+ /// assert_eq!(li.to_string(), "en-US");
+ /// ```
+ pub fn try_from_bytes(v: &[u8]) -> Result<Self, ParserError> {
+ parse_language_identifier(v, ParserMode::LanguageIdentifier)
+ }
+
+ #[doc(hidden)]
+ #[allow(clippy::type_complexity)]
+ // The return type should be `Result<Self, ParserError>` once the `const_precise_live_drops`
+ // is stabilized ([rust-lang#73255](https://github.com/rust-lang/rust/issues/73255)).
+ pub const fn try_from_bytes_with_single_variant(
+ v: &[u8],
+ ) -> Result<
+ (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Option<subtags::Variant>,
+ ),
+ ParserError,
+ > {
+ parse_language_identifier_with_single_variant(v, ParserMode::LanguageIdentifier)
+ }
+
+ /// A constructor which takes a utf8 slice which may contain extension keys,
+ /// parses it and produces a well-formed [`LanguageIdentifier`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::LanguageIdentifier;
+ ///
+ /// let li = LanguageIdentifier::try_from_locale_bytes(b"en-US-x-posix")
+ /// .expect("Parsing failed.");
+ ///
+ /// assert_eq!(li.to_string(), "en-US");
+ /// ```
+ ///
+ /// This method should be used for input that may be a locale identifier.
+ /// All extensions will be lost.
+ pub fn try_from_locale_bytes(v: &[u8]) -> Result<Self, ParserError> {
+ parse_language_identifier(v, ParserMode::Locale)
+ }
+
+ /// The default undefined language "und". Same as [`default()`](Default::default()).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::LanguageIdentifier;
+ ///
+ /// assert_eq!(LanguageIdentifier::default(), LanguageIdentifier::UND);
+ /// assert_eq!("und", LanguageIdentifier::UND.to_string());
+ /// ```
+ pub const UND: Self = Self {
+ language: subtags::Language::UND,
+ script: None,
+ region: None,
+ variants: subtags::Variants::new(),
+ };
+
+ /// This is a best-effort operation that performs all available levels of canonicalization.
+ ///
+ /// At the moment the operation will normalize casing and the separator, but in the future
+ /// it may also validate and update from deprecated subtags to canonical ones.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::LanguageIdentifier;
+ ///
+ /// assert_eq!(
+ /// LanguageIdentifier::canonicalize("pL_latn_pl"),
+ /// Ok("pl-Latn-PL".to_string())
+ /// );
+ /// ```
+ pub fn canonicalize<S: AsRef<[u8]>>(input: S) -> Result<String, ParserError> {
+ let lang_id = Self::try_from_bytes(input.as_ref())?;
+ Ok(lang_id.to_string())
+ }
+
+ /// Compare this [`LanguageIdentifier`] with BCP-47 bytes.
+ ///
+ /// The return value is equivalent to what would happen if you first converted this
+ /// [`LanguageIdentifier`] to a BCP-47 string and then performed a byte comparison.
+ ///
+ /// This function is case-sensitive and results in a *total order*, so it is appropriate for
+ /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::LanguageIdentifier;
+ /// use std::cmp::Ordering;
+ ///
+ /// let bcp47_strings: &[&str] = &[
+ /// "pl-Latn-PL",
+ /// "und",
+ /// "und-Adlm",
+ /// "und-GB",
+ /// "und-ZA",
+ /// "und-fonipa",
+ /// "zh",
+ /// ];
+ ///
+ /// for ab in bcp47_strings.windows(2) {
+ /// let a = ab[0];
+ /// let b = ab[1];
+ /// assert!(a.cmp(b) == Ordering::Less);
+ /// let a_langid = a.parse::<LanguageIdentifier>().unwrap();
+ /// assert_eq!(a, a_langid.to_string());
+ /// assert!(a_langid.strict_cmp(a.as_bytes()) == Ordering::Equal);
+ /// assert!(a_langid.strict_cmp(b.as_bytes()) == Ordering::Less);
+ /// }
+ /// ```
+ pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
+ self.strict_cmp_iter(other.split(|b| *b == b'-')).end()
+ }
+
+ /// Compare this [`LanguageIdentifier`] with an iterator of BCP-47 subtags.
+ ///
+ /// This function has the same equality semantics as [`LanguageIdentifier::strict_cmp`]. It is intended as
+ /// a more modular version that allows multiple subtag iterators to be chained together.
+ ///
+ /// For an additional example, see [`SubtagOrderingResult`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::LanguageIdentifier;
+ /// use std::cmp::Ordering;
+ ///
+ /// let subtags: &[&[u8]] = &[b"ca", b"ES", b"valencia"];
+ ///
+ /// let loc = "ca-ES-valencia".parse::<LanguageIdentifier>().unwrap();
+ /// assert_eq!(
+ /// Ordering::Equal,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let loc = "ca-ES".parse::<LanguageIdentifier>().unwrap();
+ /// assert_eq!(
+ /// Ordering::Less,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let loc = "ca-ZA".parse::<LanguageIdentifier>().unwrap();
+ /// assert_eq!(
+ /// Ordering::Greater,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ /// ```
+ pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
+ where
+ I: Iterator<Item = &'l [u8]>,
+ {
+ let r = self.for_each_subtag_str(&mut |subtag| {
+ if let Some(other) = subtags.next() {
+ match subtag.as_bytes().cmp(other) {
+ Ordering::Equal => Ok(()),
+ not_equal => Err(not_equal),
+ }
+ } else {
+ Err(Ordering::Greater)
+ }
+ });
+ match r {
+ Ok(_) => SubtagOrderingResult::Subtags(subtags),
+ Err(o) => SubtagOrderingResult::Ordering(o),
+ }
+ }
+
+ /// Compare this `LanguageIdentifier` with a potentially unnormalized BCP-47 string.
+ ///
+ /// The return value is equivalent to what would happen if you first parsed the
+ /// BCP-47 string to a `LanguageIdentifier` and then performed a structucal comparison.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::LanguageIdentifier;
+ /// use std::cmp::Ordering;
+ ///
+ /// let bcp47_strings: &[&str] = &[
+ /// "pl-LaTn-pL",
+ /// "uNd",
+ /// "UnD-adlm",
+ /// "uNd-GB",
+ /// "UND-FONIPA",
+ /// "ZH",
+ /// ];
+ ///
+ /// for a in bcp47_strings {
+ /// assert!(a.parse::<LanguageIdentifier>().unwrap().normalizing_eq(a));
+ /// }
+ /// ```
+ pub fn normalizing_eq(&self, other: &str) -> bool {
+ macro_rules! subtag_matches {
+ ($T:ty, $iter:ident, $expected:expr) => {
+ $iter
+ .next()
+ .map(|b| <$T>::try_from_bytes(b) == Ok($expected))
+ .unwrap_or(false)
+ };
+ }
+
+ let mut iter = get_subtag_iterator(other.as_bytes());
+ if !subtag_matches!(subtags::Language, iter, self.language) {
+ return false;
+ }
+ if let Some(ref script) = self.script {
+ if !subtag_matches!(subtags::Script, iter, *script) {
+ return false;
+ }
+ }
+ if let Some(ref region) = self.region {
+ if !subtag_matches!(subtags::Region, iter, *region) {
+ return false;
+ }
+ }
+ for variant in self.variants.iter() {
+ if !subtag_matches!(subtags::Variant, iter, *variant) {
+ return false;
+ }
+ }
+ iter.next() == None
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ f(self.language.as_str())?;
+ if let Some(ref script) = self.script {
+ f(script.as_str())?;
+ }
+ if let Some(ref region) = self.region {
+ f(region.as_str())?;
+ }
+ for variant in self.variants.iter() {
+ f(variant.as_str())?;
+ }
+ Ok(())
+ }
+}
+
+impl AsRef<LanguageIdentifier> for LanguageIdentifier {
+ fn as_ref(&self) -> &Self {
+ self
+ }
+}
+
+impl AsMut<LanguageIdentifier> for LanguageIdentifier {
+ fn as_mut(&mut self) -> &mut Self {
+ self
+ }
+}
+
+impl core::fmt::Debug for LanguageIdentifier {
+ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+ core::fmt::Display::fmt(&self, f)
+ }
+}
+
+impl FromStr for LanguageIdentifier {
+ type Err = ParserError;
+
+ fn from_str(source: &str) -> Result<Self, Self::Err> {
+ Self::try_from_bytes(source.as_bytes())
+ }
+}
+
+impl_writeable_for_each_subtag_str_no_test!(LanguageIdentifier);
+
+#[test]
+fn test_writeable() {
+ use writeable::assert_writeable_eq;
+ assert_writeable_eq!(LanguageIdentifier::UND, "und");
+ assert_writeable_eq!("und-001".parse::<LanguageIdentifier>().unwrap(), "und-001");
+ assert_writeable_eq!(
+ "und-Mymr".parse::<LanguageIdentifier>().unwrap(),
+ "und-Mymr",
+ );
+ assert_writeable_eq!(
+ "my-Mymr-MM".parse::<LanguageIdentifier>().unwrap(),
+ "my-Mymr-MM",
+ );
+ assert_writeable_eq!(
+ "my-Mymr-MM-posix".parse::<LanguageIdentifier>().unwrap(),
+ "my-Mymr-MM-posix",
+ );
+ assert_writeable_eq!(
+ "zh-macos-posix".parse::<LanguageIdentifier>().unwrap(),
+ "zh-macos-posix",
+ );
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::subtags_language as language;
+/// use icu::locid::LanguageIdentifier;
+///
+/// let language = language!("en");
+/// let li = LanguageIdentifier::from(language);
+///
+/// assert_eq!(li.language, language);
+/// assert_eq!(li.to_string(), "en");
+/// ```
+impl From<subtags::Language> for LanguageIdentifier {
+ fn from(language: subtags::Language) -> Self {
+ Self {
+ language,
+ ..Default::default()
+ }
+ }
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::subtags_script as script;
+/// use icu::locid::LanguageIdentifier;
+///
+/// let script = script!("latn");
+/// let li = LanguageIdentifier::from(Some(script));
+///
+/// assert_eq!(li.script.unwrap(), script);
+/// assert_eq!(li.to_string(), "und-Latn");
+/// ```
+impl From<Option<subtags::Script>> for LanguageIdentifier {
+ fn from(script: Option<subtags::Script>) -> Self {
+ Self {
+ script,
+ ..Default::default()
+ }
+ }
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::subtags_region as region;
+/// use icu::locid::LanguageIdentifier;
+///
+/// let region = region!("US");
+/// let li = LanguageIdentifier::from(Some(region));
+///
+/// assert_eq!(li.region.unwrap(), region);
+/// assert_eq!(li.to_string(), "und-US");
+/// ```
+impl From<Option<subtags::Region>> for LanguageIdentifier {
+ fn from(region: Option<subtags::Region>) -> Self {
+ Self {
+ region,
+ ..Default::default()
+ }
+ }
+}
+
+/// Convert from an LSR tuple to a [`LanguageIdentifier`].
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::LanguageIdentifier;
+/// use icu::locid::{
+/// subtags_language as language, subtags_region as region,
+/// subtags_script as script,
+/// };
+///
+/// let lang = language!("en");
+/// let script = script!("Latn");
+/// let region = region!("US");
+/// let li = LanguageIdentifier::from((lang, Some(script), Some(region)));
+///
+/// assert_eq!(li.language, lang);
+/// assert_eq!(li.script.unwrap(), script);
+/// assert_eq!(li.region.unwrap(), region);
+/// assert_eq!(li.variants.len(), 0);
+/// assert_eq!(li.to_string(), "en-Latn-US");
+/// ```
+impl
+ From<(
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ )> for LanguageIdentifier
+{
+ fn from(
+ lsr: (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ ),
+ ) -> Self {
+ Self {
+ language: lsr.0,
+ script: lsr.1,
+ region: lsr.2,
+ ..Default::default()
+ }
+ }
+}
+
+/// Convert from a [`LanguageIdentifier`] to an LSR tuple.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::LanguageIdentifier;
+/// use icu::locid::{
+/// langid, subtags_language as language, subtags_region as region,
+/// subtags_script as script,
+/// };
+///
+/// let lid = langid!("en-Latn-US");
+/// let (lang, script, region) = (&lid).into();
+///
+/// assert_eq!(lang, language!("en"));
+/// assert_eq!(script, Some(script!("Latn")));
+/// assert_eq!(region, Some(region!("US")));
+/// ```
+impl From<&LanguageIdentifier>
+ for (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ )
+{
+ fn from(langid: &LanguageIdentifier) -> Self {
+ (langid.language, langid.script, langid.region)
+ }
+}
diff --git a/vendor/icu_locid/src/lib.rs b/vendor/icu_locid/src/lib.rs
new file mode 100644
index 000000000..885c4b743
--- /dev/null
+++ b/vendor/icu_locid/src/lib.rs
@@ -0,0 +1,101 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Parsing, manipulating, and serializing Unicode Language and Locale Identifiers.
+//!
+//! This module is published as its own crate ([`icu_locid`](https://docs.rs/icu_locid/latest/icu_locid/))
+//! and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project.
+//!
+//! The module provides algorithms for parsing a string into a well-formed language or locale identifier
+//! as defined by [`UTS #35: Unicode LDML 3. Unicode Language and Locale Identifiers`].
+//!
+//! [`Locale`] is the most common structure to use for storing information about a language,
+//! script, region, variants and extensions. In almost all cases, this struct should be used as the
+//! base unit for all locale management operations.
+//!
+//! [`LanguageIdentifier`] is a strict subset of [`Locale`] which can be useful in a narrow range of
+//! cases where [`Unicode Extensions`] are not relevant.
+//!
+//! If in doubt, use [`Locale`].
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::subtags::{Language, Region};
+//! use icu::locid::Locale;
+//!
+//! let mut loc: Locale = "en-US".parse().expect("Parsing failed.");
+//!
+//! let lang: Language = "en".parse().expect("Parsing failed.");
+//! let region: Region = "US".parse().expect("Parsing failed.");
+//!
+//! assert_eq!(loc.id.language, lang);
+//! assert_eq!(loc.id.script, None);
+//! assert_eq!(loc.id.region, Some(region));
+//! assert_eq!(loc.id.variants.len(), 0);
+//!
+//! let region: Region = "GB".parse().expect("Parsing failed.");
+//! loc.id.region = Some(region);
+//!
+//! assert_eq!(loc.to_string(), "en-GB");
+//! ```
+//!
+//! ## Macros
+//!
+//! ```rust
+//! use icu::locid::{
+//! langid, subtags_language as language, subtags_region as region,
+//! };
+//!
+//! let lid = langid!("EN_US");
+//!
+//! assert_eq!(lid.language, language!("en"));
+//! assert_eq!(lid.region, Some(region!("US")));
+//! ```
+
+//!
+//! For more details, see [`Locale`] and [`LanguageIdentifier`].
+//!
+//! [`UTS #35: Unicode LDML 3. Unicode Language and Locale Identifiers`]: https://unicode.org/reports/tr35/tr35.html#Unicode_Language_and_Locale_Identifiers
+//! [`ICU4X`]: ../icu/index.html
+//! [`Unicode Extensions`]: extensions
+
+// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations
+#![cfg_attr(not(any(test, feature = "std")), no_std)]
+#![cfg_attr(
+ not(test),
+ deny(
+ clippy::indexing_slicing,
+ clippy::unwrap_used,
+ clippy::expect_used,
+ clippy::panic,
+ clippy::exhaustive_structs,
+ clippy::exhaustive_enums,
+ // TODO(#2266): enable missing_debug_implementations,
+ )
+)]
+#![warn(missing_docs)]
+
+extern crate alloc;
+
+#[macro_use]
+mod helpers;
+
+mod langid;
+mod locale;
+mod macros;
+mod ordering;
+mod parser;
+
+pub use langid::LanguageIdentifier;
+pub use locale::Locale;
+pub use ordering::SubtagOrderingResult;
+pub use parser::errors::ParserError;
+
+pub mod extensions;
+pub mod subtags;
+pub mod zerovec;
+
+#[cfg(feature = "serde")]
+mod serde;
diff --git a/vendor/icu_locid/src/locale.rs b/vendor/icu_locid/src/locale.rs
new file mode 100644
index 000000000..d7040d31a
--- /dev/null
+++ b/vendor/icu_locid/src/locale.rs
@@ -0,0 +1,528 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::ordering::SubtagOrderingResult;
+use crate::parser::{
+ get_subtag_iterator, parse_locale,
+ parse_locale_with_single_variant_single_keyword_unicode_keyword_extension, ParserError,
+ ParserMode,
+};
+use crate::{extensions, subtags, LanguageIdentifier};
+use alloc::string::String;
+use alloc::string::ToString;
+use core::cmp::Ordering;
+use core::str::FromStr;
+use tinystr::TinyAsciiStr;
+
+/// A core struct representing a [`Unicode Locale Identifier`].
+///
+/// A locale is made of two parts:
+/// * Unicode Language Identifier
+/// * A set of Unicode Extensions
+///
+/// [`Locale`] exposes all of the same fields and methods as [`LanguageIdentifier`], and
+/// on top of that is able to parse, manipulate and serialize unicode extension fields.
+///
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::extensions::unicode::{Key, Value};
+/// use icu::locid::{subtags::*, Locale};
+///
+/// let loc: Locale = "en-US-u-ca-buddhist".parse().expect("Failed to parse.");
+///
+/// assert_eq!(loc.id.language, "en".parse::<Language>().unwrap());
+/// assert_eq!(loc.id.script, None);
+/// assert_eq!(loc.id.region, "US".parse::<Region>().ok());
+/// assert_eq!(loc.id.variants.len(), 0);
+/// assert_eq!(loc.to_string(), "en-US-u-ca-buddhist");
+///
+/// let key: Key = "ca".parse().expect("Parsing key failed.");
+/// let value: Value = "buddhist".parse().expect("Parsing value failed.");
+/// assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value));
+/// ```
+///
+/// # Parsing
+///
+/// Unicode recognizes three levels of standard conformance for a locale:
+///
+/// * *well-formed* - syntactically correct
+/// * *valid* - well-formed and only uses registered language subtags, extensions, keywords, types...
+/// * *canonical* - valid and no deprecated codes or structure.
+///
+/// At the moment parsing normalizes a well-formed locale identifier converting
+/// `_` separators to `-` and adjusting casing to conform to the Unicode standard.
+///
+/// Any bogus subtags will cause the parsing to fail with an error.
+/// No subtag validation or canonicalization is performed.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::{subtags::*, Locale};
+///
+/// let loc: Locale = "eN_latn_Us-Valencia_u-hC-H12"
+/// .parse()
+/// .expect("Failed to parse.");
+///
+/// assert_eq!(loc.id.language, "en".parse::<Language>().unwrap());
+/// assert_eq!(loc.id.script, "Latn".parse::<Script>().ok());
+/// assert_eq!(loc.id.region, "US".parse::<Region>().ok());
+/// assert_eq!(
+/// loc.id.variants.get(0),
+/// "valencia".parse::<Variant>().ok().as_ref()
+/// );
+/// ```
+/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/tr35.html#Unicode_locale_identifier
+#[derive(Default, PartialEq, Eq, Clone, Hash)]
+#[allow(clippy::exhaustive_structs)] // This struct is stable (and invoked by a macro)
+pub struct Locale {
+ /// The basic language/script/region components in the locale identifier along with any variants.
+ pub id: LanguageIdentifier,
+ /// Any extensions present in the locale identifier.
+ pub extensions: extensions::Extensions,
+}
+
+#[test]
+fn test_sizes() {
+ assert_eq!(core::mem::size_of::<subtags::Language>(), 3);
+ assert_eq!(core::mem::size_of::<subtags::Script>(), 4);
+ assert_eq!(core::mem::size_of::<subtags::Region>(), 3);
+ assert_eq!(core::mem::size_of::<subtags::Variant>(), 8);
+ assert_eq!(core::mem::size_of::<subtags::Variants>(), 32);
+ assert_eq!(core::mem::size_of::<LanguageIdentifier>(), 48);
+
+ assert_eq!(core::mem::size_of::<extensions::transform::Transform>(), 72);
+ assert_eq!(core::mem::size_of::<Option<LanguageIdentifier>>(), 48);
+ assert_eq!(core::mem::size_of::<extensions::transform::Fields>(), 24);
+
+ assert_eq!(core::mem::size_of::<extensions::unicode::Attributes>(), 24);
+ assert_eq!(core::mem::size_of::<extensions::unicode::Keywords>(), 48);
+ assert_eq!(core::mem::size_of::<Vec<extensions::other::Other>>(), 24);
+ assert_eq!(core::mem::size_of::<extensions::private::Private>(), 24);
+ assert_eq!(core::mem::size_of::<extensions::Extensions>(), 192);
+
+ assert_eq!(core::mem::size_of::<Locale>(), 240);
+}
+
+impl Locale {
+ /// A constructor which takes a utf8 slice, parses it and
+ /// produces a well-formed [`Locale`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// let loc = Locale::try_from_bytes("en-US-u-hc-h12".as_bytes())
+ /// .expect("Parsing failed.");
+ ///
+ /// assert_eq!(loc.to_string(), "en-US-u-hc-h12");
+ /// ```
+ pub fn try_from_bytes(v: &[u8]) -> Result<Self, ParserError> {
+ parse_locale(v)
+ }
+
+ /// The default undefined locale "und". Same as [`default()`](Default::default()).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// assert_eq!(Locale::default(), Locale::UND);
+ /// assert_eq!("und", Locale::UND.to_string());
+ /// ```
+ pub const UND: Self = Self {
+ id: LanguageIdentifier::UND,
+ extensions: extensions::Extensions::new(),
+ };
+
+ /// This is a best-effort operation that performs all available levels of canonicalization.
+ ///
+ /// At the moment the operation will normalize casing and the separator, but in the future
+ /// it may also validate and update from deprecated subtags to canonical ones.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ ///
+ /// assert_eq!(
+ /// Locale::canonicalize("pL_latn_pl-U-HC-H12"),
+ /// Ok("pl-Latn-PL-u-hc-h12".to_string())
+ /// );
+ /// ```
+ pub fn canonicalize<S: AsRef<[u8]>>(input: S) -> Result<String, ParserError> {
+ let locale = Self::try_from_bytes(input.as_ref())?;
+ Ok(locale.to_string())
+ }
+
+ /// Compare this [`Locale`] with BCP-47 bytes.
+ ///
+ /// The return value is equivalent to what would happen if you first converted this
+ /// [`Locale`] to a BCP-47 string and then performed a byte comparison.
+ ///
+ /// This function is case-sensitive and results in a *total order*, so it is appropriate for
+ /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let bcp47_strings: &[&str] = &[
+ /// "pl-Latn-PL",
+ /// "und",
+ /// "und-fonipa",
+ /// "und-t-m0-true",
+ /// "und-u-ca-hebrew",
+ /// "und-u-ca-japanese",
+ /// "zh",
+ /// ];
+ ///
+ /// for ab in bcp47_strings.windows(2) {
+ /// let a = ab[0];
+ /// let b = ab[1];
+ /// assert!(a.cmp(b) == Ordering::Less);
+ /// let a_loc = a.parse::<Locale>().unwrap();
+ /// assert_eq!(a, a_loc.to_string());
+ /// assert!(a_loc.strict_cmp(a.as_bytes()) == Ordering::Equal);
+ /// assert!(a_loc.strict_cmp(b.as_bytes()) == Ordering::Less);
+ /// }
+ /// ```
+ pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
+ self.strict_cmp_iter(other.split(|b| *b == b'-')).end()
+ }
+
+ /// Compare this [`Locale`] with an iterator of BCP-47 subtags.
+ ///
+ /// This function has the same equality semantics as [`Locale::strict_cmp`]. It is intended as
+ /// a more modular version that allows multiple subtag iterators to be chained together.
+ ///
+ /// For an additional example, see [`SubtagOrderingResult`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let subtags: &[&[u8]] =
+ /// &[b"ca", b"ES", b"valencia", b"u", b"ca", b"hebrew"];
+ ///
+ /// let loc = locale!("ca-ES-valencia-u-ca-hebrew");
+ /// assert_eq!(
+ /// Ordering::Equal,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let loc = locale!("ca-ES-valencia");
+ /// assert_eq!(
+ /// Ordering::Less,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ ///
+ /// let loc = locale!("ca-ES-valencia-u-nu-arab");
+ /// assert_eq!(
+ /// Ordering::Greater,
+ /// loc.strict_cmp_iter(subtags.iter().copied()).end()
+ /// );
+ /// ```
+ pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
+ where
+ I: Iterator<Item = &'l [u8]>,
+ {
+ let r = self.for_each_subtag_str(&mut |subtag| {
+ if let Some(other) = subtags.next() {
+ match subtag.as_bytes().cmp(other) {
+ Ordering::Equal => Ok(()),
+ not_equal => Err(not_equal),
+ }
+ } else {
+ Err(Ordering::Greater)
+ }
+ });
+ match r {
+ Ok(_) => SubtagOrderingResult::Subtags(subtags),
+ Err(o) => SubtagOrderingResult::Ordering(o),
+ }
+ }
+
+ /// Compare this `Locale` with a potentially unnormalized BCP-47 string.
+ ///
+ /// The return value is equivalent to what would happen if you first parsed the
+ /// BCP-47 string to a `Locale` and then performed a structucal comparison.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::Locale;
+ /// use std::cmp::Ordering;
+ ///
+ /// let bcp47_strings: &[&str] = &[
+ /// "pl-LaTn-pL",
+ /// "uNd",
+ /// "UND-FONIPA",
+ /// "UnD-t-m0-TrUe",
+ /// "uNd-u-CA-Japanese",
+ /// "ZH",
+ /// ];
+ ///
+ /// for a in bcp47_strings {
+ /// assert!(a.parse::<Locale>().unwrap().normalizing_eq(a));
+ /// }
+ /// ```
+ pub fn normalizing_eq(&self, other: &str) -> bool {
+ macro_rules! subtag_matches {
+ ($T:ty, $iter:ident, $expected:expr) => {
+ $iter
+ .next()
+ .map(|b| <$T>::try_from_bytes(b) == Ok($expected))
+ .unwrap_or(false)
+ };
+ }
+
+ let mut iter = get_subtag_iterator(other.as_bytes());
+ if !subtag_matches!(subtags::Language, iter, self.id.language) {
+ return false;
+ }
+ if let Some(ref script) = self.id.script {
+ if !subtag_matches!(subtags::Script, iter, *script) {
+ return false;
+ }
+ }
+ if let Some(ref region) = self.id.region {
+ if !subtag_matches!(subtags::Region, iter, *region) {
+ return false;
+ }
+ }
+ for variant in self.id.variants.iter() {
+ if !subtag_matches!(subtags::Variant, iter, *variant) {
+ return false;
+ }
+ }
+ if !self.extensions.is_empty() {
+ match extensions::Extensions::try_from_iter(&mut iter) {
+ Ok(exts) => {
+ if self.extensions != exts {
+ return false;
+ }
+ }
+ Err(_) => {
+ return false;
+ }
+ }
+ }
+ iter.next() == None
+ }
+
+ #[doc(hidden)]
+ #[allow(clippy::type_complexity)]
+ pub const fn try_from_bytes_with_single_variant_single_keyword_unicode_extension(
+ v: &[u8],
+ ) -> Result<
+ (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Option<subtags::Variant>,
+ Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>,
+ ),
+ ParserError,
+ > {
+ parse_locale_with_single_variant_single_keyword_unicode_keyword_extension(
+ v,
+ ParserMode::Locale,
+ )
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ self.id.for_each_subtag_str(f)?;
+ self.extensions.for_each_subtag_str(f)?;
+ Ok(())
+ }
+}
+
+impl FromStr for Locale {
+ type Err = ParserError;
+
+ fn from_str(source: &str) -> Result<Self, Self::Err> {
+ Self::try_from_bytes(source.as_bytes())
+ }
+}
+
+impl From<LanguageIdentifier> for Locale {
+ fn from(id: LanguageIdentifier) -> Self {
+ Self {
+ id,
+ extensions: extensions::Extensions::default(),
+ }
+ }
+}
+
+impl From<Locale> for LanguageIdentifier {
+ fn from(loc: Locale) -> Self {
+ loc.id
+ }
+}
+
+impl AsRef<LanguageIdentifier> for Locale {
+ fn as_ref(&self) -> &LanguageIdentifier {
+ &self.id
+ }
+}
+
+impl AsMut<LanguageIdentifier> for Locale {
+ fn as_mut(&mut self) -> &mut LanguageIdentifier {
+ &mut self.id
+ }
+}
+
+impl core::fmt::Debug for Locale {
+ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+ writeable::Writeable::write_to(self, f)
+ }
+}
+
+impl_writeable_for_each_subtag_str_no_test!(Locale);
+
+#[test]
+fn test_writeable() {
+ use writeable::assert_writeable_eq;
+ assert_writeable_eq!(Locale::UND, "und");
+ assert_writeable_eq!("und-001".parse::<Locale>().unwrap(), "und-001");
+ assert_writeable_eq!("und-Mymr".parse::<Locale>().unwrap(), "und-Mymr");
+ assert_writeable_eq!("my-Mymr-MM".parse::<Locale>().unwrap(), "my-Mymr-MM");
+ assert_writeable_eq!(
+ "my-Mymr-MM-posix".parse::<Locale>().unwrap(),
+ "my-Mymr-MM-posix",
+ );
+ assert_writeable_eq!(
+ "zh-macos-posix".parse::<Locale>().unwrap(),
+ "zh-macos-posix",
+ );
+ assert_writeable_eq!(
+ "my-t-my-d0-zawgyi".parse::<Locale>().unwrap(),
+ "my-t-my-d0-zawgyi",
+ );
+ assert_writeable_eq!(
+ "ar-SA-u-ca-islamic-civil".parse::<Locale>().unwrap(),
+ "ar-SA-u-ca-islamic-civil",
+ );
+ assert_writeable_eq!(
+ "en-001-x-foo-bar".parse::<Locale>().unwrap(),
+ "en-001-x-foo-bar",
+ );
+ assert_writeable_eq!("und-t-m0-true".parse::<Locale>().unwrap(), "und-t-m0-true",);
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::subtags_language as language;
+/// use icu::locid::Locale;
+///
+/// let language = language!("en");
+/// let loc = Locale::from(language);
+///
+/// assert_eq!(loc.id.language, language);
+/// assert_eq!(loc.to_string(), "en");
+/// ```
+impl From<subtags::Language> for Locale {
+ fn from(language: subtags::Language) -> Self {
+ Self {
+ id: language.into(),
+ ..Default::default()
+ }
+ }
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::subtags_script as script;
+/// use icu::locid::Locale;
+///
+/// let script = script!("latn");
+/// let loc = Locale::from(Some(script));
+///
+/// assert_eq!(loc.id.script.unwrap(), script);
+/// assert_eq!(loc.to_string(), "und-Latn");
+/// ```
+impl From<Option<subtags::Script>> for Locale {
+ fn from(script: Option<subtags::Script>) -> Self {
+ Self {
+ id: script.into(),
+ ..Default::default()
+ }
+ }
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::subtags_region as region;
+/// use icu::locid::Locale;
+///
+/// let region = region!("US");
+/// let loc = Locale::from(Some(region));
+///
+/// assert_eq!(loc.id.region.unwrap(), region);
+/// assert_eq!(loc.to_string(), "und-US");
+/// ```
+impl From<Option<subtags::Region>> for Locale {
+ fn from(region: Option<subtags::Region>) -> Self {
+ Self {
+ id: region.into(),
+ ..Default::default()
+ }
+ }
+}
+
+/// # Examples
+///
+/// ```
+/// use icu::locid::Locale;
+/// use icu::locid::{
+/// subtags_language as language, subtags_region as region,
+/// subtags_script as script,
+/// };
+///
+/// let lang = language!("en");
+/// let script = script!("Latn");
+/// let region = region!("US");
+/// let loc = Locale::from((lang, Some(script), Some(region)));
+///
+/// assert_eq!(loc.id.language, lang);
+/// assert_eq!(loc.id.script.unwrap(), script);
+/// assert_eq!(loc.id.region.unwrap(), region);
+/// assert_eq!(loc.id.variants.len(), 0);
+/// assert_eq!(loc.to_string(), "en-Latn-US");
+/// ```
+impl
+ From<(
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ )> for Locale
+{
+ fn from(
+ lsr: (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ ),
+ ) -> Self {
+ Self {
+ id: lsr.into(),
+ ..Default::default()
+ }
+ }
+}
diff --git a/vendor/icu_locid/src/macros.rs b/vendor/icu_locid/src/macros.rs
new file mode 100644
index 000000000..4537cd403
--- /dev/null
+++ b/vendor/icu_locid/src/macros.rs
@@ -0,0 +1,191 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+/// A macro allowing for compile-time construction of valid [`LanguageIdentifier`]s.
+///
+/// The macro will perform syntax canonicalization of the tag.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::{langid, LanguageIdentifier};
+///
+/// const DE_AT: LanguageIdentifier = langid!("de_at");
+///
+/// let de_at: LanguageIdentifier = "de_at".parse().unwrap();
+///
+/// assert_eq!(DE_AT, de_at);
+/// ```
+///
+/// *Note*: The macro cannot produce language identifiers with more than one variants due to const
+/// limitations (see [`Heap Allocations in Constants`]):
+///
+/// ```compile_fail,E0080
+/// icu::locid::langid!("und-variant1-variant2");
+/// ```
+///
+/// Use runtime parsing instead:
+/// ```
+/// "und-variant1-variant2"
+/// .parse::<icu::locid::LanguageIdentifier>()
+/// .unwrap();
+/// ```
+///
+/// [`LanguageIdentifier`]: crate::LanguageIdentifier
+/// [`Heap Allocations in Constants`]: https://github.com/rust-lang/const-eval/issues/20
+#[macro_export]
+macro_rules! langid {
+ ($langid:literal) => {{
+ const R: $crate::LanguageIdentifier =
+ match $crate::LanguageIdentifier::try_from_bytes_with_single_variant($langid.as_bytes()) {
+ Ok((language, script, region, variant)) => $crate::LanguageIdentifier {
+ language,
+ script,
+ region,
+ variants: match variant {
+ Some(v) => $crate::subtags::Variants::from_variant(v),
+ None => $crate::subtags::Variants::new(),
+ }
+ },
+ #[allow(clippy::panic)] // const context
+ _ => panic!(concat!("Invalid language code: ", $langid, " . Note langid! macro can only support up to a single variant tag. Use runtime parsing instead.")),
+ };
+ R
+ }};
+}
+
+/// A macro allowing for compile-time construction of valid [`Locale`]s.
+///
+/// The macro will perform syntax canonicalization of the tag.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::{locale, Locale};
+///
+/// const DE_AT: Locale = locale!("de_at");
+///
+/// let de_at: Locale = "de_at".parse().unwrap();
+///
+/// assert_eq!(DE_AT, de_at);
+/// ```
+///
+/// *Note*: The macro cannot produce locales with more than one variant or multiple extensions
+/// (only single keyword unicode extension is supported) due to const
+/// limitations (see [`Heap Allocations in Constants`]):
+///
+/// ```compile_fail,E0080
+/// icu::locid::locale!("sl-IT-rozaj-biske-1994");
+/// ```
+/// Use runtime parsing instead:
+/// ```
+/// "sl-IT-rozaj-biske-1994"
+/// .parse::<icu::locid::Locale>()
+/// .unwrap();
+/// ```
+///
+/// Locales with multiple keys are not supported
+/// ```compile_fail,E0080
+/// icu::locid::locale!("th-TH-u-ca-buddhist-nu-thai");
+/// ```
+/// Use runtime parsing instead:
+/// ```
+/// "th-TH-u-ca-buddhist-nu-thai"
+/// .parse::<icu::locid::Locale>()
+/// .unwrap();
+/// ```
+///
+/// Locales with attributes are not supported
+/// ```compile_fail,E0080
+/// icu::locid::locale!("en-US-u-foobar-ca-buddhist");
+/// ```
+/// Use runtime parsing instead:
+/// ```
+/// "en-US-u-foobar-ca-buddhist"
+/// .parse::<icu::locid::Locale>()
+/// .unwrap();
+/// ```
+///
+/// Locales with single key but multiple types are not supported
+/// ```compile_fail,E0080
+/// icu::locid::locale!("en-US-u-ca-islamic-umalqura");
+/// ```
+/// Use runtime parsing instead:
+/// ```
+/// "en-US-u-ca-islamic-umalqura"
+/// .parse::<icu::locid::Locale>()
+/// .unwrap();
+/// ```
+/// [`Locale`]: crate::Locale
+/// [`Heap Allocations in Constants`]: https://github.com/rust-lang/const-eval/issues/20
+#[macro_export]
+macro_rules! locale {
+ ($locale:literal) => {{
+ const R: $crate::Locale =
+ match $crate::Locale::try_from_bytes_with_single_variant_single_keyword_unicode_extension(
+ $locale.as_bytes(),
+ ) {
+ Ok((language, script, region, variant, keyword)) => $crate::Locale {
+ id: $crate::LanguageIdentifier {
+ language,
+ script,
+ region,
+ variants: match variant {
+ Some(v) => $crate::subtags::Variants::from_variant(v),
+ None => $crate::subtags::Variants::new(),
+ },
+ },
+ extensions: match keyword {
+ Some(k) => $crate::extensions::Extensions::from_unicode(
+ $crate::extensions::unicode::Unicode {
+ keywords: $crate::extensions::unicode::Keywords::new_single(
+ k.0,
+ $crate::extensions::unicode::Value::from_tinystr(k.1),
+ ),
+
+ attributes: $crate::extensions::unicode::Attributes::new(),
+ },
+ ),
+ None => $crate::extensions::Extensions::new(),
+ },
+ },
+ #[allow(clippy::panic)] // const context
+ _ => panic!(concat!(
+ "Invalid language code: ",
+ $locale,
+ " . Note the locale! macro only supports up to one variant tag; \
+ unicode extensions are not supported. Use \
+ runtime parsing instead."
+ )),
+ };
+ R
+ }};
+}
+
+#[cfg(test)]
+mod test {
+ use crate::LanguageIdentifier;
+ use crate::Locale;
+
+ #[test]
+ fn test_langid_macro_can_parse_langid_with_single_variant() {
+ const DE_AT_FOOBAR: LanguageIdentifier = langid!("de_at-foobar");
+ let de_at_foobar: LanguageIdentifier = "de_at-foobar".parse().unwrap();
+ assert_eq!(DE_AT_FOOBAR, de_at_foobar);
+ }
+
+ #[test]
+ fn test_locale_macro_can_parse_locale_with_single_variant() {
+ const DE_AT_FOOBAR: Locale = locale!("de_at-foobar");
+ let de_at_foobar: Locale = "de_at-foobar".parse().unwrap();
+ assert_eq!(DE_AT_FOOBAR, de_at_foobar);
+ }
+
+ #[test]
+ fn test_locale_macro_can_parse_locale_with_single_keyword_unicode_extension() {
+ const DE_AT_U_CA_FOOBAR: Locale = locale!("de_at-u-ca-foobar");
+ let de_at_u_ca_foobar: Locale = "de_at-u-ca-foobar".parse().unwrap();
+ assert_eq!(DE_AT_U_CA_FOOBAR, de_at_u_ca_foobar);
+ }
+}
diff --git a/vendor/icu_locid/src/ordering.rs b/vendor/icu_locid/src/ordering.rs
new file mode 100644
index 000000000..b4e5c2760
--- /dev/null
+++ b/vendor/icu_locid/src/ordering.rs
@@ -0,0 +1,61 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Utilities for performing ordering operations on locales.
+
+use core::cmp::Ordering;
+
+/// The result of a subtag iterator comparison operation.
+///
+/// See [`Locale::strict_cmp_iter`].
+///
+/// # Examples
+///
+/// Check whether a stream of subtags contains two expected locales back-to-back:
+///
+/// ```
+/// use icu::locid::{locale, Locale, SubtagOrderingResult};
+/// use std::cmp::Ordering;
+///
+/// let subtags = b"en-US-it-IT".split(|b| *b == b'-');
+/// let locales = [locale!("en-US"), locale!("it-IT")];
+/// let mut result = SubtagOrderingResult::Subtags(subtags);
+/// for loc in locales.iter() {
+/// match result {
+/// SubtagOrderingResult::Subtags(it) => {
+/// result = loc.strict_cmp_iter(it);
+/// }
+/// SubtagOrderingResult::Ordering(ord) => break,
+/// }
+/// }
+///
+/// assert_eq!(Ordering::Equal, result.end());
+/// ```
+///
+/// [`Locale::strict_cmp_iter`]: crate::Locale::strict_cmp_iter
+#[allow(clippy::exhaustive_enums)] // well-defined exhaustive enum semantics
+pub enum SubtagOrderingResult<I> {
+ /// Potentially remaining subtags after the comparison operation.
+ Subtags(I),
+ /// Resolved ordering between the locale object and the subtags.
+ Ordering(Ordering),
+}
+
+impl<I> SubtagOrderingResult<I>
+where
+ I: Iterator,
+{
+ /// Invoke this function if there are no remaining locale objects to chain in order to get
+ /// a fully resolved [`Ordering`].
+ #[inline]
+ pub fn end(self) -> Ordering {
+ match self {
+ Self::Subtags(mut it) => match it.next() {
+ Some(_) => Ordering::Less,
+ None => Ordering::Equal,
+ },
+ Self::Ordering(o) => o,
+ }
+ }
+}
diff --git a/vendor/icu_locid/src/parser/errors.rs b/vendor/icu_locid/src/parser/errors.rs
new file mode 100644
index 000000000..a989bcc60
--- /dev/null
+++ b/vendor/icu_locid/src/parser/errors.rs
@@ -0,0 +1,54 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use displaydoc::Display;
+
+/// List of parser errors that can be generated
+/// while parsing [`LanguageIdentifier`](crate::LanguageIdentifier), [`Locale`](crate::Locale),
+/// [`subtags`](crate::subtags) or [`extensions`](crate::extensions).
+#[derive(Display, Debug, PartialEq, Copy, Clone)]
+#[non_exhaustive]
+pub enum ParserError {
+ /// Invalid language subtag.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ /// use icu::locid::ParserError;
+ ///
+ /// assert_eq!("x2".parse::<Language>(), Err(ParserError::InvalidLanguage));
+ /// ```
+ #[displaydoc("The given language subtag is invalid")]
+ InvalidLanguage,
+
+ /// Invalid script, region or variant subtag.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Region;
+ /// use icu::locid::ParserError;
+ ///
+ /// assert_eq!("#@2X".parse::<Region>(), Err(ParserError::InvalidSubtag));
+ /// ```
+ #[displaydoc("Invalid subtag")]
+ InvalidSubtag,
+
+ /// Invalid extension subtag.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::extensions::unicode::Key;
+ /// use icu::locid::ParserError;
+ ///
+ /// assert_eq!("#@2X".parse::<Key>(), Err(ParserError::InvalidExtension));
+ /// ```
+ #[displaydoc("Invalid extension")]
+ InvalidExtension,
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for ParserError {}
diff --git a/vendor/icu_locid/src/parser/langid.rs b/vendor/icu_locid/src/parser/langid.rs
new file mode 100644
index 000000000..9efa078ac
--- /dev/null
+++ b/vendor/icu_locid/src/parser/langid.rs
@@ -0,0 +1,269 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+pub use super::errors::ParserError;
+use crate::extensions::unicode::{Attribute, Key, Value};
+use crate::extensions::ExtensionType;
+use crate::parser::{get_subtag_iterator, SubtagIterator};
+use crate::LanguageIdentifier;
+use crate::{extensions, subtags};
+use alloc::vec::Vec;
+use tinystr::TinyAsciiStr;
+
+#[derive(PartialEq, Clone, Copy)]
+pub enum ParserMode {
+ LanguageIdentifier,
+ Locale,
+ Partial,
+}
+
+#[derive(PartialEq, Clone, Copy)]
+enum ParserPosition {
+ Script,
+ Region,
+ Variant,
+}
+
+pub fn parse_language_identifier_from_iter(
+ iter: &mut SubtagIterator,
+ mode: ParserMode,
+) -> Result<LanguageIdentifier, ParserError> {
+ let mut script = None;
+ let mut region = None;
+ let mut variants = Vec::new();
+
+ let language = if let Some(subtag) = iter.next() {
+ subtags::Language::try_from_bytes(subtag)?
+ } else {
+ return Err(ParserError::InvalidLanguage);
+ };
+
+ let mut position = ParserPosition::Script;
+
+ while let Some(subtag) = iter.peek() {
+ if mode != ParserMode::LanguageIdentifier && subtag.len() == 1 {
+ break;
+ }
+
+ if position == ParserPosition::Script {
+ if let Ok(s) = subtags::Script::try_from_bytes(subtag) {
+ script = Some(s);
+ position = ParserPosition::Region;
+ } else if let Ok(s) = subtags::Region::try_from_bytes(subtag) {
+ region = Some(s);
+ position = ParserPosition::Variant;
+ } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
+ if let Err(idx) = variants.binary_search(&v) {
+ variants.insert(idx, v);
+ }
+ position = ParserPosition::Variant;
+ } else if mode == ParserMode::Partial {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if position == ParserPosition::Region {
+ if let Ok(s) = subtags::Region::try_from_bytes(subtag) {
+ region = Some(s);
+ position = ParserPosition::Variant;
+ } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
+ if let Err(idx) = variants.binary_search(&v) {
+ variants.insert(idx, v);
+ }
+ position = ParserPosition::Variant;
+ } else if mode == ParserMode::Partial {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
+ if let Err(idx) = variants.binary_search(&v) {
+ variants.insert(idx, v);
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if mode == ParserMode::Partial {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ iter.next();
+ }
+
+ Ok(LanguageIdentifier {
+ language,
+ script,
+ region,
+ variants: subtags::Variants::from_vec_unchecked(variants),
+ })
+}
+
+pub fn parse_language_identifier(
+ t: &[u8],
+ mode: ParserMode,
+) -> Result<LanguageIdentifier, ParserError> {
+ let mut iter = get_subtag_iterator(t);
+ parse_language_identifier_from_iter(&mut iter, mode)
+}
+
+#[allow(clippy::type_complexity)]
+pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(
+ mut iter: SubtagIterator,
+ mode: ParserMode,
+) -> Result<
+ (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Option<subtags::Variant>,
+ Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>,
+ ),
+ ParserError,
+> {
+ let language;
+ let mut script = None;
+ let mut region = None;
+ let mut variant = None;
+ let mut keyword = None;
+
+ if let (i, Some((t, start, end))) = iter.next_manual() {
+ iter = i;
+ match subtags::Language::try_from_bytes_manual_slice(t, start, end) {
+ Ok(l) => language = l,
+ Err(e) => return Err(e),
+ }
+ } else {
+ return Err(ParserError::InvalidLanguage);
+ }
+
+ let mut position = ParserPosition::Script;
+
+ while let Some((t, start, end)) = iter.peek_manual() {
+ if !matches!(mode, ParserMode::LanguageIdentifier) && end - start == 1 {
+ break;
+ }
+
+ if matches!(position, ParserPosition::Script) {
+ if let Ok(s) = subtags::Script::try_from_bytes_manual_slice(t, start, end) {
+ script = Some(s);
+ position = ParserPosition::Region;
+ } else if let Ok(r) = subtags::Region::try_from_bytes_manual_slice(t, start, end) {
+ region = Some(r);
+ position = ParserPosition::Variant;
+ } else if let Ok(v) = subtags::Variant::try_from_bytes_manual_slice(t, start, end) {
+ // We cannot handle multiple variants in a const context
+ debug_assert!(variant.is_none());
+ variant = Some(v);
+ position = ParserPosition::Variant;
+ } else if matches!(mode, ParserMode::Partial) {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if matches!(position, ParserPosition::Region) {
+ if let Ok(s) = subtags::Region::try_from_bytes_manual_slice(t, start, end) {
+ region = Some(s);
+ position = ParserPosition::Variant;
+ } else if let Ok(v) = subtags::Variant::try_from_bytes_manual_slice(t, start, end) {
+ // We cannot handle multiple variants in a const context
+ debug_assert!(variant.is_none());
+ variant = Some(v);
+ position = ParserPosition::Variant;
+ } else if matches!(mode, ParserMode::Partial) {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+ } else if let Ok(v) = subtags::Variant::try_from_bytes_manual_slice(t, start, end) {
+ debug_assert!(matches!(position, ParserPosition::Variant));
+ if variant.is_some() {
+ // We cannot handle multiple variants in a const context
+ return Err(ParserError::InvalidSubtag);
+ }
+ variant = Some(v);
+ } else if matches!(mode, ParserMode::Partial) {
+ break;
+ } else {
+ return Err(ParserError::InvalidSubtag);
+ }
+
+ iter = iter.next_manual().0;
+ }
+
+ if matches!(mode, ParserMode::Locale) {
+ if let Some((bytes, start, end)) = iter.peek_manual() {
+ match ExtensionType::try_from_bytes_manual_slice(bytes, start, end) {
+ Ok(ExtensionType::Unicode) => {
+ iter = iter.next_manual().0;
+ if let Some((bytes, start, end)) = iter.peek_manual() {
+ if Attribute::try_from_bytes_manual_slice(bytes, start, end).is_ok() {
+ // We cannot handle Attributes in a const context
+ return Err(ParserError::InvalidSubtag);
+ }
+ }
+
+ let mut key = None;
+ let mut current_type = None;
+
+ while let Some((bytes, start, end)) = iter.peek_manual() {
+ let slen = end - start;
+ if slen == 2 {
+ if key.is_some() {
+ // We cannot handle more than one Key in a const context
+ return Err(ParserError::InvalidSubtag);
+ }
+ match Key::try_from_bytes_manual_slice(bytes, start, end) {
+ Ok(k) => key = Some(k),
+ Err(e) => return Err(e),
+ };
+ } else if key.is_some() {
+ match Value::parse_subtag_from_bytes_manual_slice(bytes, start, end) {
+ Ok(Some(t)) => {
+ if current_type.is_some() {
+ // We cannot handle more than one type in a const context
+ return Err(ParserError::InvalidSubtag);
+ }
+ current_type = Some(t);
+ }
+ Ok(None) => {}
+ Err(e) => return Err(e),
+ }
+ } else {
+ break;
+ }
+ iter = iter.next_manual().0
+ }
+ if let Some(k) = key {
+ keyword = Some((k, current_type));
+ }
+ }
+ // We cannot handle Transform, Private, Other extensions in a const context
+ Ok(_) => return Err(ParserError::InvalidSubtag),
+ Err(e) => return Err(e),
+ }
+ }
+ }
+
+ Ok((language, script, region, variant, keyword))
+}
+
+#[allow(clippy::type_complexity)]
+pub const fn parse_language_identifier_with_single_variant(
+ t: &[u8],
+ mode: ParserMode,
+) -> Result<
+ (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Option<subtags::Variant>,
+ ),
+ ParserError,
+> {
+ let iter = get_subtag_iterator(t);
+ match parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode) {
+ Ok((l, s, r, v, _)) => Ok((l, s, r, v)),
+ Err(e) => Err(e),
+ }
+}
diff --git a/vendor/icu_locid/src/parser/locale.rs b/vendor/icu_locid/src/parser/locale.rs
new file mode 100644
index 000000000..805b6c290
--- /dev/null
+++ b/vendor/icu_locid/src/parser/locale.rs
@@ -0,0 +1,42 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use tinystr::TinyAsciiStr;
+
+use crate::extensions::{self, Extensions};
+use crate::parser::errors::ParserError;
+use crate::parser::{get_subtag_iterator, parse_language_identifier_from_iter, ParserMode};
+use crate::{subtags, Locale};
+
+use super::parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter;
+
+pub fn parse_locale(t: &[u8]) -> Result<Locale, ParserError> {
+ let mut iter = get_subtag_iterator(t);
+
+ let id = parse_language_identifier_from_iter(&mut iter, ParserMode::Locale)?;
+ let extensions = if iter.peek().is_some() {
+ Extensions::try_from_iter(&mut iter)?
+ } else {
+ Extensions::default()
+ };
+ Ok(Locale { id, extensions })
+}
+
+#[allow(clippy::type_complexity)]
+pub const fn parse_locale_with_single_variant_single_keyword_unicode_keyword_extension(
+ t: &[u8],
+ mode: ParserMode,
+) -> Result<
+ (
+ subtags::Language,
+ Option<subtags::Script>,
+ Option<subtags::Region>,
+ Option<subtags::Variant>,
+ Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>,
+ ),
+ ParserError,
+> {
+ let iter = get_subtag_iterator(t);
+ parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode)
+}
diff --git a/vendor/icu_locid/src/parser/mod.rs b/vendor/icu_locid/src/parser/mod.rs
new file mode 100644
index 000000000..fef10b0ab
--- /dev/null
+++ b/vendor/icu_locid/src/parser/mod.rs
@@ -0,0 +1,98 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+pub mod errors;
+mod langid;
+mod locale;
+
+pub use errors::ParserError;
+pub use langid::{
+ parse_language_identifier, parse_language_identifier_from_iter,
+ parse_language_identifier_with_single_variant,
+ parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter, ParserMode,
+};
+
+pub use locale::{
+ parse_locale, parse_locale_with_single_variant_single_keyword_unicode_keyword_extension,
+};
+
+pub const fn get_subtag_iterator(slice: &[u8]) -> SubtagIterator {
+ let mut current_start = 0;
+ #[allow(clippy::indexing_slicing)]
+ while current_start < slice.len()
+ && (slice[current_start] == b'-' || slice[current_start] == b'_')
+ {
+ current_start += 1;
+ }
+ let mut current_end = current_start;
+ #[allow(clippy::indexing_slicing)]
+ while current_end < slice.len() && slice[current_end] != b'-' && slice[current_end] != b'_' {
+ current_end += 1;
+ }
+ SubtagIterator {
+ slice,
+ current_start,
+ current_end,
+ }
+}
+
+#[derive(Copy, Clone, Debug)]
+pub struct SubtagIterator<'a> {
+ slice: &'a [u8],
+ current_start: usize,
+ current_end: usize,
+}
+
+pub type ManualSlice<'a> = (&'a [u8], usize, usize);
+
+impl<'a> SubtagIterator<'a> {
+ pub const fn next_manual(mut self) -> (Self, Option<ManualSlice<'a>>) {
+ if self.current_start == self.current_end {
+ (self, None)
+ } else {
+ let r = (self.slice, self.current_start, self.current_end);
+ self.current_start = self.current_end;
+ #[allow(clippy::indexing_slicing)]
+ while self.current_start < self.slice.len()
+ && (self.slice[self.current_start] == b'-'
+ || self.slice[self.current_start] == b'_')
+ {
+ self.current_start += 1;
+ }
+ self.current_end = self.current_start;
+ #[allow(clippy::indexing_slicing)]
+ while self.current_end < self.slice.len()
+ && self.slice[self.current_end] != b'-'
+ && self.slice[self.current_end] != b'_'
+ {
+ self.current_end += 1;
+ }
+ (self, Some(r))
+ }
+ }
+
+ pub const fn peek_manual(&self) -> Option<ManualSlice<'a>> {
+ if self.current_start == self.current_end {
+ None
+ } else {
+ Some((self.slice, self.current_start, self.current_end))
+ }
+ }
+
+ pub fn peek(&self) -> Option<&'a [u8]> {
+ #[allow(clippy::indexing_slicing)] // peek_manual returns valid indices
+ self.peek_manual().map(|(t, s, e)| &t[s..e])
+ }
+}
+
+impl<'a> Iterator for SubtagIterator<'a> {
+ type Item = &'a [u8];
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let (s, res) = self.next_manual();
+ self.clone_from(&s);
+ #[allow(clippy::indexing_slicing)] // next_manual returns valid indices
+ res.map(|(t, s, e)| &t[s..e])
+ }
+}
diff --git a/vendor/icu_locid/src/serde.rs b/vendor/icu_locid/src/serde.rs
new file mode 100644
index 000000000..3bfe30388
--- /dev/null
+++ b/vendor/icu_locid/src/serde.rs
@@ -0,0 +1,135 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::LanguageIdentifier;
+use alloc::string::ToString;
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
+
+impl Serialize for LanguageIdentifier {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ serializer.serialize_str(&self.to_string())
+ }
+}
+
+impl<'de> Deserialize<'de> for LanguageIdentifier {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ struct LanguageIdentifierVisitor;
+
+ impl<'de> serde::de::Visitor<'de> for LanguageIdentifierVisitor {
+ type Value = LanguageIdentifier;
+
+ fn expecting(&self, formatter: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(formatter, "a valid Unicode Language Identifier")
+ }
+
+ fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
+ where
+ E: serde::de::Error,
+ {
+ s.parse::<LanguageIdentifier>()
+ .map_err(serde::de::Error::custom)
+ }
+ }
+
+ deserializer.deserialize_string(LanguageIdentifierVisitor)
+ }
+}
+
+#[test]
+fn json() {
+ use crate::langid;
+ use crate::subtags::{Language, Region, Script};
+
+ assert_eq!(
+ serde_json::to_string(&langid!("en-US")).unwrap(),
+ r#""en-US""#
+ );
+ assert_eq!(
+ serde_json::from_str::<LanguageIdentifier>(r#""en-US""#).unwrap(),
+ langid!("en-US")
+ );
+ assert!(serde_json::from_str::<LanguageIdentifier>(r#""2Xs""#).is_err());
+
+ assert_eq!(
+ serde_json::to_string(&"fr".parse::<Language>().unwrap()).unwrap(),
+ r#""fr""#
+ );
+ assert_eq!(
+ serde_json::from_str::<Language>(r#""fr""#).unwrap(),
+ "fr".parse::<Language>().unwrap()
+ );
+ assert!(serde_json::from_str::<Language>(r#""2Xs""#).is_err());
+
+ assert_eq!(
+ serde_json::to_string(&"Latn".parse::<Script>().unwrap()).unwrap(),
+ r#""Latn""#
+ );
+ assert_eq!(
+ serde_json::from_str::<Script>(r#""Latn""#).unwrap(),
+ "Latn".parse::<Script>().unwrap()
+ );
+ assert!(serde_json::from_str::<Script>(r#""2Xs""#).is_err());
+
+ assert_eq!(
+ serde_json::to_string(&"US".parse::<Region>().unwrap()).unwrap(),
+ r#""US""#
+ );
+ assert_eq!(
+ serde_json::from_str::<Region>(r#""US""#).unwrap(),
+ "US".parse::<Region>().unwrap()
+ );
+ assert!(serde_json::from_str::<Region>(r#""2Xs""#).is_err());
+}
+
+#[test]
+fn postcard() {
+ use crate::langid;
+ use crate::subtags::{Language, Region, Script};
+
+ assert_eq!(
+ postcard::to_stdvec(&langid!("en-US")).unwrap(),
+ &[5, b'e', b'n', b'-', b'U', b'S']
+ );
+ assert_eq!(
+ postcard::from_bytes::<LanguageIdentifier>(&[5, b'e', b'n', b'-', b'U', b'S']).unwrap(),
+ langid!("en-US")
+ );
+ assert!(postcard::from_bytes::<LanguageIdentifier>(&[3, b'2', b'X', b's']).is_err());
+
+ assert_eq!(
+ postcard::to_stdvec(&"fr".parse::<Language>().unwrap()).unwrap(),
+ b"fr\0"
+ );
+ assert_eq!(
+ postcard::from_bytes::<Language>(b"fr\0").unwrap(),
+ "fr".parse::<Language>().unwrap()
+ );
+ assert!(postcard::from_bytes::<Language>(b"2Xs").is_err());
+
+ assert_eq!(
+ postcard::to_stdvec(&"Latn".parse::<Script>().unwrap()).unwrap(),
+ b"Latn"
+ );
+ assert_eq!(
+ postcard::from_bytes::<Script>(b"Latn").unwrap(),
+ "Latn".parse::<Script>().unwrap()
+ );
+ assert!(postcard::from_bytes::<Script>(b"2Xss").is_err());
+
+ assert_eq!(
+ postcard::to_stdvec(&"US".parse::<Region>().unwrap()).unwrap(),
+ b"US\0"
+ );
+ assert_eq!(
+ postcard::from_bytes::<Region>(b"US\0").unwrap(),
+ "US".parse::<Region>().unwrap()
+ );
+ assert!(postcard::from_bytes::<Region>(b"2Xs").is_err());
+}
diff --git a/vendor/icu_locid/src/subtags/language.rs b/vendor/icu_locid/src/subtags/language.rs
new file mode 100644
index 000000000..a5ec8d76e
--- /dev/null
+++ b/vendor/icu_locid/src/subtags/language.rs
@@ -0,0 +1,107 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A language subtag (examples: `"en"`, `"csb"`, `"zh"`, `"und"`, etc.)
+ ///
+ /// [`Language`] represents a Unicode base language code conformat to the
+ /// [`unicode_language_id`] field of the Language and Locale Identifier.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ ///
+ /// let language: Language =
+ /// "en".parse().expect("Failed to parse a language subtag.");
+ /// ```
+ ///
+ /// If the [`Language`] has no value assigned, it serializes to a string `"und"`, which
+ /// can be then parsed back to an empty [`Language`] field.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ ///
+ /// assert_eq!(Language::default().as_str(), "und");
+ /// ```
+ ///
+ /// `Notice`: ICU4X uses a narrow form of language subtag of 2-3 characters.
+ /// The specification allows language subtag to optionally also be 5-8 characters
+ /// but that form has not been used and ICU4X does not support it right now.
+ ///
+ /// [`unicode_language_id`]: https://unicode.org/reports/tr35/#unicode_language_id
+ Language,
+ subtags::Language,
+ subtags_language,
+ 2..=3,
+ s,
+ s.is_ascii_alphabetic(),
+ s.to_ascii_lowercase(),
+ s.is_ascii_alphabetic_lowercase(),
+ InvalidLanguage,
+ ["en", "foo"],
+ ["419", "german", "en1"],
+);
+
+impl Language {
+ /// The default undefined language "und". Same as [`default()`](Default::default()).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ ///
+ /// assert_eq!(Language::default(), Language::UND);
+ /// assert_eq!("und", Language::UND.to_string());
+ /// ```
+ pub const UND: Self = unsafe { Self::from_raw_unchecked(*b"und") };
+
+ /// Resets the [`Language`] subtag to an empty one (equal to `"und"`).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ ///
+ /// let mut lang: Language = "csb".parse().expect("Parsing failed.");
+ ///
+ /// assert_eq!(lang.as_str(), "csb");
+ ///
+ /// lang.clear();
+ ///
+ /// assert_eq!(lang.as_str(), "und");
+ /// ```
+ #[inline]
+ pub fn clear(&mut self) {
+ *self = Self::UND
+ }
+
+ /// Tests if the [`Language`] subtag is empty (equal to `"und"`).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Language;
+ ///
+ /// let mut lang: Language = "und".parse().expect("Parsing failed.");
+ ///
+ /// assert!(lang.is_empty());
+ ///
+ /// lang.clear();
+ ///
+ /// assert!(lang.is_empty());
+ /// ```
+ #[inline]
+ pub fn is_empty(self) -> bool {
+ self == Self::UND
+ }
+}
+
+impl Default for Language {
+ fn default() -> Language {
+ Language::UND
+ }
+}
diff --git a/vendor/icu_locid/src/subtags/mod.rs b/vendor/icu_locid/src/subtags/mod.rs
new file mode 100644
index 000000000..bd243a321
--- /dev/null
+++ b/vendor/icu_locid/src/subtags/mod.rs
@@ -0,0 +1,58 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Language Identifier and Locale contains a set of subtags
+//! which represent different fields of the structure.
+//!
+//! * [`Language`] is the only mandatory field, which when empty,
+//! takes the value `und`.
+//! * [`Script`] is an optional field representing the written script used by the locale.
+//! * [`Region`] is the region used by the locale.
+//! * [`Variants`] is a list of optional [`Variant`] subtags containing information about the
+//! variant adjustments used by the locale.
+//!
+//! Subtags can be used in isolation, and all basic operations such as parsing, syntax canonicalization
+//! and serialization are supported on each individual subtag, but most commonly
+//! they are used to construct a [`LanguageIdentifier`] instance.
+//!
+//! [`Variants`] is a special structure which contains a list of [`Variant`] subtags.
+//! It is wrapped around to allow for sorting and deduplication of variants, which
+//! is one of the required steps of language identifier and locale syntax canonicalization.
+//!
+//! # Examples
+//!
+//! ```
+//! use icu::locid::subtags::{Language, Region, Script, Variant};
+//!
+//! let language: Language =
+//! "en".parse().expect("Failed to parse a language subtag.");
+//! let script: Script =
+//! "arab".parse().expect("Failed to parse a script subtag.");
+//! let region: Region =
+//! "cn".parse().expect("Failed to parse a region subtag.");
+//! let variant: Variant =
+//! "MacOS".parse().expect("Failed to parse a variant subtag.");
+//!
+//! assert_eq!(language.as_str(), "en");
+//! assert_eq!(script.as_str(), "Arab");
+//! assert_eq!(region.as_str(), "CN");
+//! assert_eq!(variant.as_str(), "macos");
+//! ```
+//!
+//! `Notice`: The subtags are canonicalized on parsing. That means
+//! that all operations work on a canonicalized version of the subtag
+//! and serialization is very cheap.
+//!
+//! [`LanguageIdentifier`]: super::LanguageIdentifier
+mod language;
+mod region;
+mod script;
+mod variant;
+mod variants;
+
+pub use language::Language;
+pub use region::Region;
+pub use script::Script;
+pub use variant::Variant;
+pub use variants::Variants;
diff --git a/vendor/icu_locid/src/subtags/region.rs b/vendor/icu_locid/src/subtags/region.rs
new file mode 100644
index 000000000..f605937ce
--- /dev/null
+++ b/vendor/icu_locid/src/subtags/region.rs
@@ -0,0 +1,61 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A region subtag (examples: `"US"`, `"CN"`, `"AR"` etc.)
+ ///
+ /// [`Region`] represents a Unicode base language code conformat to the
+ /// [`unicode_region_id`] field of the Language and Locale Identifier.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Region;
+ ///
+ /// let region: Region =
+ /// "DE".parse().expect("Failed to parse a region subtag.");
+ /// ```
+ ///
+ /// [`unicode_region_id`]: https://unicode.org/reports/tr35/#unicode_region_id
+ Region,
+ subtags::Region,
+ subtags_region,
+ 2..=3,
+ s,
+ if s.len() == 2 {
+ s.is_ascii_alphabetic()
+ } else {
+ s.is_ascii_numeric()
+ },
+ if s.len() == 2 {
+ s.to_ascii_uppercase()
+ } else {
+ s
+ },
+ if s.len() == 2 {
+ s.is_ascii_alphabetic_uppercase()
+ } else {
+ s.is_ascii_numeric()
+ },
+ InvalidSubtag,
+ ["FR", "123"],
+ ["12", "FRA", "b2"],
+);
+
+impl Region {
+ /// Returns true if the Region has an alphabetic code.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Region;
+ ///
+ /// let region = Region::try_from_bytes(b"us").expect("Parsing failed.");
+ ///
+ /// assert!(region.is_alphabetic());
+ /// ```
+ pub fn is_alphabetic(&self) -> bool {
+ self.0.len() == 2
+ }
+}
diff --git a/vendor/icu_locid/src/subtags/script.rs b/vendor/icu_locid/src/subtags/script.rs
new file mode 100644
index 000000000..05eb63d1c
--- /dev/null
+++ b/vendor/icu_locid/src/subtags/script.rs
@@ -0,0 +1,32 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A script subtag (examples: `"Latn"`, `"Arab"`, etc.)
+ ///
+ /// [`Script`] represents a Unicode base language code conformat to the
+ /// [`unicode_script_id`] field of the Language and Locale Identifier.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Script;
+ ///
+ /// let script: Script =
+ /// "Latn".parse().expect("Failed to parse a script subtag.");
+ /// ```
+ ///
+ /// [`unicode_script_id`]: https://unicode.org/reports/tr35/#unicode_script_id
+ Script,
+ subtags::Script,
+ subtags_script,
+ 4..=4,
+ s,
+ s.is_ascii_alphabetic(),
+ s.to_ascii_titlecase(),
+ s.is_ascii_alphabetic_titlecase(),
+ InvalidSubtag,
+ ["Latn"],
+ ["Latin"],
+);
diff --git a/vendor/icu_locid/src/subtags/variant.rs b/vendor/icu_locid/src/subtags/variant.rs
new file mode 100644
index 000000000..96fd7500e
--- /dev/null
+++ b/vendor/icu_locid/src/subtags/variant.rs
@@ -0,0 +1,34 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+impl_tinystr_subtag!(
+ /// A variant subtag (examples: `"macos"`, `"posix"`, `"1996"` etc.)
+ ///
+ /// [`Variant`] represents a Unicode base language code conformat to the
+ /// [`unicode_variant_id`] field of the Language and Locale Identifier.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Variant;
+ ///
+ /// let variant: Variant =
+ /// "macos".parse().expect("Failed to parse a variant subtag.");
+ /// ```
+ ///
+ /// [`unicode_variant_id`]: https://unicode.org/reports/tr35/#unicode_variant_id
+ Variant,
+ subtags::Variant,
+ subtags_variant,
+ 4..=8,
+ s,
+ s.is_ascii_alphanumeric() && (s.len() != 4 || s.all_bytes()[0].is_ascii_digit()),
+ s.to_ascii_lowercase(),
+ s.is_ascii_lowercase()
+ && s.is_ascii_alphanumeric()
+ && (s.len() != 4 || s.all_bytes()[0].is_ascii_digit()),
+ InvalidSubtag,
+ ["posix", "1996"],
+ ["yes"],
+);
diff --git a/vendor/icu_locid/src/subtags/variants.rs b/vendor/icu_locid/src/subtags/variants.rs
new file mode 100644
index 000000000..bbff9ebac
--- /dev/null
+++ b/vendor/icu_locid/src/subtags/variants.rs
@@ -0,0 +1,134 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::Variant;
+use crate::helpers::ShortVec;
+
+use alloc::vec::Vec;
+use core::ops::Deref;
+
+/// A list of variants (examples: `["macos", "posix"]`, etc.)
+///
+/// [`Variants`] stores a list of [`Variant`] subtags in a canonical form
+/// by sorting and deduplicating them.
+///
+/// # Examples
+///
+/// ```
+/// use icu::locid::subtags::{Variant, Variants};
+///
+/// let variant1: Variant =
+/// "posix".parse().expect("Failed to parse a variant subtag.");
+///
+/// let variant2: Variant =
+/// "macos".parse().expect("Failed to parse a variant subtag.");
+/// let mut v = vec![variant1, variant2];
+/// v.sort();
+/// v.dedup();
+///
+/// let variants: Variants = Variants::from_vec_unchecked(v);
+/// assert_eq!(variants.to_string(), "macos-posix");
+/// ```
+#[derive(Default, Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
+pub struct Variants(ShortVec<Variant>);
+
+impl Variants {
+ /// Returns a new empty list of variants. Same as [`default()`](Default::default()), but is `const`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::Variants;
+ ///
+ /// assert_eq!(Variants::new(), Variants::default());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self(ShortVec::new())
+ }
+
+ /// Creates a new [`Variants`] set from a single [`Variant`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::{Variant, Variants};
+ ///
+ /// let variant: Variant = "posix".parse().expect("Parsing failed.");
+ /// let variants = Variants::from_variant(variant);
+ /// ```
+ #[inline]
+ pub const fn from_variant(variant: Variant) -> Self {
+ Self(ShortVec::new_single(variant))
+ }
+
+ /// Creates a new [`Variants`] set from a [`Vec`].
+ /// The caller is expected to provide sorted and deduplicated vector as
+ /// an input.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::{Variant, Variants};
+ ///
+ /// let variant1: Variant = "posix".parse().expect("Parsing failed.");
+ /// let variant2: Variant = "macos".parse().expect("Parsing failed.");
+ /// let mut v = vec![variant1, variant2];
+ /// v.sort();
+ /// v.dedup();
+ ///
+ /// let variants = Variants::from_vec_unchecked(v);
+ /// ```
+ ///
+ /// Notice: For performance- and memory-constrained environments, it is recommended
+ /// for the caller to use [`binary_search`](slice::binary_search) instead of [`sort`](slice::sort)
+ /// and [`dedup`](Vec::dedup()).
+ pub fn from_vec_unchecked(input: Vec<Variant>) -> Self {
+ Self(ShortVec::from(input))
+ }
+
+ /// Empties the [`Variants`] list.
+ ///
+ /// Returns the old list.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu::locid::subtags::{Variant, Variants};
+ ///
+ /// let variant1: Variant = "posix".parse().expect("Parsing failed.");
+ /// let variant2: Variant = "macos".parse().expect("Parsing failed.");
+ /// let mut v = vec![variant1, variant2];
+ /// v.sort();
+ /// v.dedup();
+ ///
+ /// let mut variants: Variants = Variants::from_vec_unchecked(v);
+ ///
+ /// assert_eq!(variants.to_string(), "macos-posix");
+ ///
+ /// variants.clear();
+ ///
+ /// assert_eq!(variants.to_string(), "");
+ /// ```
+ pub fn clear(&mut self) -> Self {
+ core::mem::take(self)
+ }
+
+ pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
+ where
+ F: FnMut(&str) -> Result<(), E>,
+ {
+ self.deref().iter().map(|t| t.as_str()).try_for_each(f)
+ }
+}
+
+impl_writeable_for_subtag_list!(Variants, "macos", "posix");
+
+impl Deref for Variants {
+ type Target = [Variant];
+
+ fn deref(&self) -> &[Variant] {
+ self.0.as_slice()
+ }
+}
diff --git a/vendor/icu_locid/src/zerovec.rs b/vendor/icu_locid/src/zerovec.rs
new file mode 100644
index 000000000..530d21499
--- /dev/null
+++ b/vendor/icu_locid/src/zerovec.rs
@@ -0,0 +1,132 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Documentation on zero-copy deserialization of locale types.
+//!
+//! [`Locale`] and [`LanguageIdentifier`] are highly structured types that cannot be directly
+//! stored in a zero-copy data structure, such as those provided by the [`zerovec`] crate.
+//! This page explains how to indirectly store these types in a [`zerovec`].
+//!
+//! There are two main use cases, which have different solutions:
+//!
+//! 1. **Lookup:** You need to locate a locale in a zero-copy vector, such as when querying a map.
+//! 2. **Obtain:** You have a locale stored in a zero-copy vector, and you need to obtain a proper
+//! [`Locale`] or [`LanguageIdentifier`] for use elsewhere in your program.
+//!
+//! # Lookup
+//!
+//! To perform lookup, store the stringified locale in a canonical BCP-47 form as a byte array,
+//! and then use [`Locale::strict_cmp()`] to perform an efficient, zero-allocation lookup.
+//!
+//! To produce more human-readable serialized output, you can use [`UnvalidatedStr`].
+//!
+//! ```
+//! use icu_locid::Locale;
+//! use zerovec::ule::UnvalidatedStr;
+//! use zerovec::ZeroMap;
+//!
+//! // ZeroMap from locales to integers
+//! let data: &[(&UnvalidatedStr, u32)] = &[
+//! ("de-DE-u-hc-h12".into(), 5),
+//! ("en-US-u-ca-buddhist".into(), 10),
+//! ("my-MM".into(), 15),
+//! ("sr-Cyrl-ME".into(), 20),
+//! ("zh-TW".into(), 25),
+//! ];
+//! let zm: ZeroMap<UnvalidatedStr, u32> = data.iter().copied().collect();
+//!
+//! // Get the value associated with a locale
+//! let loc: Locale = "en-US-u-ca-buddhist".parse().unwrap();
+//! let value = zm.get_copied_by(|uvstr| loc.strict_cmp(uvstr).reverse());
+//! assert_eq!(value, Some(10));
+//! ```
+//!
+//! # Obtain
+//!
+//! Obtaining a [`Locale`] or [`LanguageIdentifier`] is not generally a zero-copy operation, since
+//! both of these types may require memory allocation. If possible, architect your code such that
+//! you do not need to obtain a structured type.
+//!
+//! If you need the structured type, such as if you need to manipulate it in some way, there are two
+//! options: storing subtags, and storing a string for parsing.
+//!
+//! ## Storing Subtags
+//!
+//! If the data being stored only contains a limited number of subtags, you can store them as a
+//! tuple, and then construct the [`LanguageIdentifier`] externally.
+//!
+//! ```
+//! use icu_locid::subtags::{Language, Region, Script};
+//! use icu_locid::LanguageIdentifier;
+//! use icu_locid::{
+//! langid, subtags_language as language, subtags_region as region,
+//! subtags_script as script,
+//! };
+//! use zerovec::ZeroMap;
+//!
+//! // ZeroMap from integer to LSR (language-script-region)
+//! let zm: ZeroMap<u32, (Language, Option<Script>, Option<Region>)> = [
+//! (5, (language!("de"), None, Some(region!("DE")))),
+//! (10, (language!("en"), None, Some(region!("US")))),
+//! (15, (language!("my"), None, Some(region!("MM")))),
+//! (
+//! 20,
+//! (language!("sr"), Some(script!("Cyrl")), Some(region!("ME"))),
+//! ),
+//! (25, (language!("zh"), None, Some(region!("TW")))),
+//! ]
+//! .into_iter()
+//! .collect();
+//!
+//! // Construct a LanguageIdentifier from a tuple entry
+//! let lid: LanguageIdentifier =
+//! zm.get_copied(&25).expect("element is present").into();
+//!
+//! assert_eq!(lid, langid!("zh-TW"));
+//! ```
+//!
+//! ## Storing Strings
+//!
+//! If it is necessary to store and obtain an arbitrary locale, it is currently recommended to
+//! store a BCP-47 string and parse it when needed.
+//!
+//! Since the string is stored in an unparsed state, it is not safe to `unwrap` the result from
+//! `Locale::try_from_bytes()`. See [icu4x#831](https://github.com/unicode-org/icu4x/issues/831)
+//! for a discussion on potential data models that could ensure that the locale is valid during
+//! deserialization.
+//!
+//! As above, to produce more human-readable serialized output, you can use [`UnvalidatedStr`].
+//!
+//! ```
+//! use icu_locid::langid;
+//! use icu_locid::Locale;
+//! use zerovec::ule::UnvalidatedStr;
+//! use zerovec::ZeroMap;
+//!
+//! // ZeroMap from integer to locale string
+//! let data: &[(u32, &UnvalidatedStr)] = &[
+//! (5, "de-DE-u-hc-h12".into()),
+//! (10, "en-US-u-ca-buddhist".into()),
+//! (15, "my-MM".into()),
+//! (20, "sr-Cyrl-ME".into()),
+//! (25, "zh-TW".into()),
+//! (30, "INVALID".into()),
+//! ];
+//! let zm: ZeroMap<u32, UnvalidatedStr> = data.iter().copied().collect();
+//!
+//! // Construct a Locale by parsing the string.
+//! let value = zm.get(&25).expect("element is present");
+//! let loc = Locale::try_from_bytes(value);
+//! assert_eq!(loc, Ok(langid!("zh-TW").into()));
+//!
+//! // Invalid entries are fallible
+//! let err_value = zm.get(&30).expect("element is present");
+//! let err_loc = Locale::try_from_bytes(err_value);
+//! assert!(matches!(err_loc, Err(_)));
+//! ```
+//!
+//! [`Locale`]: crate::Locale
+//! [`Locale::strict_cmp()`]: crate::Locale::strict_cmp()
+//! [`LanguageIdentifier`]: crate::LanguageIdentifier
+//! [`UnvalidatedStr`]: zerovec::ule::UnvalidatedStr
diff --git a/vendor/icu_locid/tests/fixtures/canonicalize.json b/vendor/icu_locid/tests/fixtures/canonicalize.json
new file mode 100644
index 000000000..b20e64c4b
--- /dev/null
+++ b/vendor/icu_locid/tests/fixtures/canonicalize.json
@@ -0,0 +1,18 @@
+[
+ {
+ "input": "Pl",
+ "output": "pl"
+ },
+ {
+ "input": "eN-uS",
+ "output": "en-US"
+ },
+ {
+ "input": "ZH_hans_hK",
+ "output": "zh-Hans-HK"
+ },
+ {
+ "input": "en-scouse-fonipa",
+ "output": "en-fonipa-scouse"
+ }
+]
diff --git a/vendor/icu_locid/tests/fixtures/invalid-extensions.json b/vendor/icu_locid/tests/fixtures/invalid-extensions.json
new file mode 100644
index 000000000..a5f3a923d
--- /dev/null
+++ b/vendor/icu_locid/tests/fixtures/invalid-extensions.json
@@ -0,0 +1,112 @@
+[
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pl-US-x-waytoolongkey"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pl-US-x-@A_3"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pl-US-t-h0"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pl-US-t-h0-x-foo"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pl-US-t-h0"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-t-m0"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-t-m0-n0-mixed"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "da-u"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "da-u--"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "da-u-t-latn"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "cmn-hans-cn-u-u"
+ },
+ "output": {
+ "error": "InvalidExtension",
+ "text": "Invalid subtag"
+ }
+ }
+]
diff --git a/vendor/icu_locid/tests/fixtures/invalid.json b/vendor/icu_locid/tests/fixtures/invalid.json
new file mode 100644
index 000000000..d44007596
--- /dev/null
+++ b/vendor/icu_locid/tests/fixtures/invalid.json
@@ -0,0 +1,85 @@
+[
+ {
+ "input": "a1a",
+ "output": {
+ "error": "InvalidLanguage",
+ "text": "The given language subtag is invalid"
+ }
+ },
+ {
+ "input": "Arab-US",
+ "output": {
+ "error": "InvalidLanguage",
+ "text": "The given language subtag is invalid"
+ }
+ },
+ {
+ "input": "",
+ "output": {
+ "error": "InvalidLanguage",
+ "text": "The given language subtag is invalid"
+ }
+ },
+ {
+ "input": "pl-DSDAFAFDF",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": "pl-Latn-$1231",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": "pl-Latn-US-$1231",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": "pl-Latn-12",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": "pl-Latn-a12",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": "pl-Latn-US-3_dd",
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pl-Latn-US-variant-h0-hybrid"
+ },
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-variant-emodeng-emodeng"
+ },
+ "output": {
+ "error": "InvalidSubtag",
+ "text": "Invalid subtag"
+ }
+ }
+]
diff --git a/vendor/icu_locid/tests/fixtures/langid.json b/vendor/icu_locid/tests/fixtures/langid.json
new file mode 100644
index 000000000..31740d99a
--- /dev/null
+++ b/vendor/icu_locid/tests/fixtures/langid.json
@@ -0,0 +1,167 @@
+[
+ {
+ "input": "en",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "en"
+ }
+ },
+ {
+ "input": "lij",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "lij"
+ }
+ },
+ {
+ "input": "en-Latn",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "en",
+ "script": "Latn"
+ }
+ },
+ {
+ "input": "lij-Arab",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "lij",
+ "script": "Arab"
+ }
+ },
+ {
+ "input": "en-Latn-US",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "en",
+ "script": "Latn",
+ "region": "US"
+ }
+ },
+ {
+ "input": "lij-Arab-FA",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "lij",
+ "script": "Arab",
+ "region": "FA"
+ }
+ },
+ {
+ "input": "en-Latn-US-windows",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "en",
+ "script": "Latn",
+ "region": "US",
+ "variants": ["windows"]
+ }
+ },
+ {
+ "input": "lij-Arab-FA-linux",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "lij",
+ "script": "Arab",
+ "region": "FA",
+ "variants": ["linux"]
+ }
+ },
+ {
+ "input": "lij-Arab-FA-linux-nedis",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "lij",
+ "script": "Arab",
+ "region": "FA",
+ "variants": ["linux", "nedis"]
+ }
+ },
+ {
+ "input": "EN-latn-us",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "en",
+ "script": "Latn",
+ "region": "US"
+ }
+ },
+ {
+ "input": "sl-nedis",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "sl",
+ "variants": ["nedis"]
+ }
+ },
+ {
+ "input": "de-CH-1996",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "de",
+ "region": "CH",
+ "variants": ["1996"]
+ }
+ },
+ {
+ "input": "sr-Latn",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "sr",
+ "script": "Latn"
+ }
+ },
+ {
+ "input": "es-419",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "es",
+ "region": "419"
+ }
+ },
+ {
+ "input": "und-Latn-US",
+ "output": {
+ "type": "LanguageIdentifier",
+ "script": "Latn",
+ "region": "US"
+ }
+ },
+ {
+ "input": "und",
+ "output": {
+ "type": "LanguageIdentifier"
+ }
+ },
+ {
+ "input": "und-Latn",
+ "output": {
+ "type": "LanguageIdentifier",
+ "script": "Latn"
+ }
+ },
+ {
+ "input": "pl-macos-Windows-nedis-aRabic",
+ "output": {
+ "type": "LanguageIdentifier",
+ "language": "pl",
+ "variants": ["arabic", "macos", "nedis", "windows"]
+ }
+ },
+ {
+ "input": "und-Latn-macos",
+ "output": {
+ "type": "LanguageIdentifier",
+ "script": "Latn",
+ "variants": ["macos"]
+ }
+ },
+ {
+ "input": "und-Latn-312",
+ "output": {
+ "type": "LanguageIdentifier",
+ "script": "Latn",
+ "region": "312"
+ }
+ }
+]
diff --git a/vendor/icu_locid/tests/fixtures/locale.json b/vendor/icu_locid/tests/fixtures/locale.json
new file mode 100644
index 000000000..e87699351
--- /dev/null
+++ b/vendor/icu_locid/tests/fixtures/locale.json
@@ -0,0 +1,298 @@
+[
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-u-hc-h12"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "extensions": {
+ "unicode": {
+ "keywords": {
+ "hc": "h12"
+ }
+ }
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-u-hc-h23"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "unicode": {
+ "keywords": {
+ "hc": "h23"
+ }
+ }
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-u-foo"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "unicode": {
+ "attributes": [
+ "foo"
+ ]
+ }
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-u-hc-h23-ca-islamic-civil-ss-true"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "unicode": {
+ "keywords": {
+ "hc": "h23",
+ "ca": "islamic-civil",
+ "ss": "true"
+ }
+ }
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-t-pl-Latn-DE"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "transform": {
+ "tlang": "pl-Latn-DE"
+ }
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-x-private-foobar"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "private": ["private", "foobar"]
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-t-h0-hybrid-k0-platform-s0-true"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "transform": {
+ "tfields": {
+ "h0": "hybrid",
+ "k0": "platform",
+ "s0": "true"
+ }
+ }
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-t-es-AR-x-foo"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "transform": {
+ "tlang": "es-AR"
+ },
+ "private": ["foo"]
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-US-u-ca-buddhist-hc-h12-t-es-AR-h0-hybrid-x-private-foobar"
+ },
+ "output": {
+ "type": "Locale",
+ "language": "en",
+ "region": "US",
+ "extensions": {
+ "unicode": {
+ "keywords": {
+ "ca": "buddhist",
+ "hc": "h12"
+ }
+ },
+ "transform": {
+ "tlang": "es-AR",
+ "tfields": {
+ "h0": "hybrid"
+ }
+ },
+ "private": ["private", "foobar"]
+ }
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "language": "es",
+ "region": "MX",
+ "extensions": {
+ "unicode": {
+ "keywords": {
+ "ca": "islamic",
+ "co": "search",
+ "nu": "roman"
+ }
+ }
+ }
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "es-MX-u-ca-islamic-co-search-nu-roman"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-u-kn"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "und-u-kn"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-u-kn-ca-calendar"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "und-u-ca-calendar-kn"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-u-kn-nu-arab"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "und-u-kn-nu-arab"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-t-m0-true"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "und-t-m0-true"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-t-m0-true-n0-mixed"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "und-t-m0-true-n0-mixed"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "und-t-m0-true-c0-mixed"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "und-t-c0-mixed-m0-true"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "da-u-ca-gregory-ca-buddhist"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "da-u-ca-gregory"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pt-u-attr2-attr1-ca-gregory"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "pt-u-attr1-attr2-ca-gregory"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "pt-u-attr1-attr2-attr1-ca-gregory"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "pt-u-attr1-attr2-ca-gregory"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-a-not-assigned"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "en-a-not-assigned"
+ }
+ },
+ {
+ "input": {
+ "type": "Locale",
+ "identifier": "en-w-bar-u-foo-a-bar-x-u-foo"
+ },
+ "output": {
+ "type": "Locale",
+ "identifier": "en-a-bar-u-foo-w-bar-x-u-foo"
+ }
+ }
+]
diff --git a/vendor/icu_locid/tests/fixtures/mod.rs b/vendor/icu_locid/tests/fixtures/mod.rs
new file mode 100644
index 000000000..b688632ba
--- /dev/null
+++ b/vendor/icu_locid/tests/fixtures/mod.rs
@@ -0,0 +1,260 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use std::collections::HashMap;
+use std::convert::{TryFrom, TryInto};
+
+use icu_locid::extensions::private;
+use icu_locid::extensions::transform;
+use icu_locid::extensions::unicode;
+use icu_locid::extensions::Extensions;
+use icu_locid::{subtags, LanguageIdentifier, Locale, ParserError};
+use serde::Deserialize;
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct LocaleIdentifier {
+ #[serde(rename = "type")]
+ pub field_type: String,
+ pub identifier: String,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct LocaleExtensionUnicode {
+ #[serde(default)]
+ keywords: HashMap<String, Option<String>>,
+ #[serde(default)]
+ attributes: Vec<String>,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct LocaleExtensionTransform {
+ tlang: Option<String>,
+ #[serde(default)]
+ tfields: HashMap<String, Option<String>>,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct LocaleExtensions {
+ unicode: Option<LocaleExtensionUnicode>,
+ transform: Option<LocaleExtensionTransform>,
+ #[serde(default)]
+ private: Vec<String>,
+ _other: Option<String>,
+}
+
+impl TryFrom<LocaleExtensions> for Extensions {
+ type Error = ParserError;
+
+ fn try_from(input: LocaleExtensions) -> Result<Self, Self::Error> {
+ let mut ext = Extensions::default();
+ if let Some(unicode) = input.unicode {
+ ext.unicode.keywords = unicode
+ .keywords
+ .iter()
+ .map(|(k, v)| {
+ (
+ unicode::Key::try_from_bytes(k.as_bytes()).expect("Parsing key failed."),
+ v.as_ref().map_or(
+ unicode::Value::try_from_bytes(b"").expect("Failed to parse Value"),
+ |v| {
+ unicode::Value::try_from_bytes(v.as_bytes())
+ .expect("Parsing type failed.")
+ },
+ ),
+ )
+ })
+ .collect();
+ let v: Vec<unicode::Attribute> = unicode
+ .attributes
+ .iter()
+ .map(|v| {
+ unicode::Attribute::try_from_bytes(v.as_bytes())
+ .expect("Parsing attribute failed.")
+ })
+ .collect();
+ ext.unicode.attributes = unicode::Attributes::from_vec_unchecked(v);
+ }
+ if let Some(transform) = input.transform {
+ ext.transform.fields = transform
+ .tfields
+ .iter()
+ .map(|(k, v)| {
+ (
+ transform::Key::try_from_bytes(k.as_bytes()).expect("Parsing key failed."),
+ v.as_ref()
+ .map(|v| {
+ transform::Value::try_from_bytes(v.as_bytes())
+ .expect("Parsing value failed.")
+ })
+ .expect("Value cannot be empty."),
+ )
+ })
+ .collect();
+
+ if let Some(tlang) = transform.tlang {
+ ext.transform.lang = Some(tlang.parse().expect("Failed to parse tlang."));
+ }
+ }
+ let v: Vec<private::Subtag> = input
+ .private
+ .iter()
+ .map(|v| private::Subtag::try_from_bytes(v.as_bytes()).expect("Failed to add field."))
+ .collect();
+ ext.private = private::Private::from_vec_unchecked(v);
+ Ok(ext)
+ }
+}
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct LocaleSubtags {
+ #[serde(rename = "type")]
+ pub field_type: String,
+ pub language: Option<String>,
+ pub script: Option<String>,
+ pub region: Option<String>,
+ #[serde(default)]
+ pub variants: Vec<String>,
+ pub extensions: Option<LocaleExtensions>,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+pub struct LocaleError {
+ pub error: String,
+ pub text: String,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+#[serde(untagged)]
+#[allow(clippy::large_enum_variant)] // test code
+pub enum LocaleInfo {
+ String(String),
+ Error(LocaleError),
+ Identifier(LocaleIdentifier),
+ Object(LocaleSubtags),
+}
+
+impl TryFrom<LocaleInfo> for LanguageIdentifier {
+ type Error = ParserError;
+
+ fn try_from(input: LocaleInfo) -> Result<Self, Self::Error> {
+ match input {
+ LocaleInfo::String(s) => s.parse(),
+ LocaleInfo::Error(e) => Err(e.into()),
+ LocaleInfo::Identifier(ident) => ident.try_into(),
+ LocaleInfo::Object(o) => o.try_into(),
+ }
+ }
+}
+
+impl TryFrom<LocaleInfo> for Locale {
+ type Error = ParserError;
+
+ fn try_from(input: LocaleInfo) -> Result<Self, Self::Error> {
+ match input {
+ LocaleInfo::String(s) => s.parse(),
+ LocaleInfo::Error(e) => Err(e.into()),
+ LocaleInfo::Identifier(ident) => ident.try_into(),
+ LocaleInfo::Object(o) => o.try_into(),
+ }
+ }
+}
+
+impl TryFrom<LocaleIdentifier> for LanguageIdentifier {
+ type Error = ParserError;
+
+ fn try_from(input: LocaleIdentifier) -> Result<Self, Self::Error> {
+ LanguageIdentifier::try_from_locale_bytes(input.identifier.as_bytes())
+ }
+}
+
+impl TryFrom<LocaleIdentifier> for Locale {
+ type Error = ParserError;
+
+ fn try_from(input: LocaleIdentifier) -> Result<Self, Self::Error> {
+ Locale::try_from_bytes(input.identifier.as_bytes())
+ }
+}
+
+impl TryFrom<LocaleSubtags> for LanguageIdentifier {
+ type Error = ParserError;
+
+ fn try_from(subtags: LocaleSubtags) -> Result<Self, Self::Error> {
+ let language = if let Some(lang) = subtags.language {
+ lang.parse().expect("Failed to parse language subtag")
+ } else {
+ subtags::Language::default()
+ };
+ let script = subtags
+ .script
+ .map(|s| s.parse().expect("Failed to parse script subtag."));
+ let region = subtags
+ .region
+ .map(|s| s.parse().expect("Failed to parse region subtag."));
+ let variants = subtags
+ .variants
+ .iter()
+ .map(|v| v.parse().expect("Failed to parse variant subtag."))
+ .collect::<Vec<_>>();
+ Ok(LanguageIdentifier {
+ language,
+ script,
+ region,
+ variants: subtags::Variants::from_vec_unchecked(variants),
+ })
+ }
+}
+
+impl TryFrom<LocaleSubtags> for Locale {
+ type Error = ParserError;
+
+ fn try_from(subtags: LocaleSubtags) -> Result<Self, Self::Error> {
+ let language = if let Some(lang) = subtags.language {
+ lang.parse().expect("Failed to parse language subtag")
+ } else {
+ subtags::Language::default()
+ };
+ let script = subtags
+ .script
+ .map(|s| s.parse().expect("Failed to parse script subtag."));
+ let region = subtags
+ .region
+ .map(|s| s.parse().expect("Failed to parse region subtag."));
+ let variants = subtags
+ .variants
+ .iter()
+ .map(|v| v.parse().expect("Failed to parse variant subtag."))
+ .collect::<Vec<_>>();
+ let extensions = if let Some(e) = subtags.extensions {
+ e.try_into().expect("Failed to parse extensions.")
+ } else {
+ Extensions::default()
+ };
+ Ok(Locale {
+ id: LanguageIdentifier {
+ language,
+ script,
+ region,
+ variants: subtags::Variants::from_vec_unchecked(variants),
+ },
+ extensions,
+ })
+ }
+}
+
+impl From<LocaleError> for ParserError {
+ fn from(e: LocaleError) -> Self {
+ match e.error.as_str() {
+ "InvalidLanguage" => ParserError::InvalidLanguage,
+ "InvalidSubtag" => ParserError::InvalidSubtag,
+ "InvalidExtension" => ParserError::InvalidExtension,
+ _ => unreachable!("Unknown error name"),
+ }
+ }
+}
+
+#[derive(Debug, Deserialize)]
+pub struct LocaleTest {
+ pub input: LocaleInfo,
+ pub output: LocaleInfo,
+}
diff --git a/vendor/icu_locid/tests/helpers/mod.rs b/vendor/icu_locid/tests/helpers/mod.rs
new file mode 100644
index 000000000..d250c510c
--- /dev/null
+++ b/vendor/icu_locid/tests/helpers/mod.rs
@@ -0,0 +1,15 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use std::fs::File;
+use std::io::{BufReader, Error};
+
+pub fn read_fixture<T>(path: &str) -> Result<T, Error>
+where
+ T: serde::de::DeserializeOwned,
+{
+ let file = File::open(path)?;
+ let reader = BufReader::new(file);
+ Ok(serde_json::from_reader(reader)?)
+}
diff --git a/vendor/icu_locid/tests/langid.rs b/vendor/icu_locid/tests/langid.rs
new file mode 100644
index 000000000..96d022a9b
--- /dev/null
+++ b/vendor/icu_locid/tests/langid.rs
@@ -0,0 +1,157 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod fixtures;
+mod helpers;
+
+use std::convert::TryInto;
+
+use icu_locid::{subtags, LanguageIdentifier, ParserError};
+
+type Result = std::result::Result<LanguageIdentifier, ParserError>;
+
+fn test_langid_fixtures(tests: Vec<fixtures::LocaleTest>) {
+ for test in tests {
+ match test.output {
+ fixtures::LocaleInfo::String(s) => {
+ if let fixtures::LocaleInfo::Object(ref o) = &test.input {
+ if o.field_type == "Locale" {
+ continue;
+ }
+ }
+ let input: LanguageIdentifier = test.input.try_into().expect("Parsing failed.");
+ assert_eq!(input.to_string(), s);
+ }
+ fixtures::LocaleInfo::Error(err) => {
+ let err: ParserError = err.into();
+ let input: Result = test.input.try_into();
+ assert_eq!(input, Err(err));
+ }
+ fixtures::LocaleInfo::Identifier(ident) => {
+ let input: LanguageIdentifier = test.input.try_into().expect("Parsing failed.");
+ let output: LanguageIdentifier = ident.try_into().expect("Parsing failed.");
+ assert_eq!(input, output);
+ }
+ fixtures::LocaleInfo::Object(o) => {
+ let input: LanguageIdentifier = test.input.try_into().expect("Parsing failed.");
+ let output: LanguageIdentifier = o.try_into().expect("Parsing failed.");
+ assert_eq!(input, output);
+ }
+ }
+ }
+}
+
+#[test]
+fn test_langid_parsing() {
+ let path = "./tests/fixtures/langid.json";
+ let data = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ test_langid_fixtures(data);
+}
+
+#[test]
+fn test_langid_invalid() {
+ let path = "./tests/fixtures/invalid.json";
+ let data = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ test_langid_fixtures(data);
+}
+
+#[test]
+fn test_langid_canonicalize() {
+ let path = "./tests/fixtures/canonicalize.json";
+ let data = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ test_langid_fixtures(data);
+}
+
+#[test]
+fn test_langid_from_locale() {
+ let path = "./tests/fixtures/locale.json";
+ let data = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ test_langid_fixtures(data);
+}
+
+#[test]
+fn test_langid_subtag_language() {
+ let mut lang: subtags::Language = "en".parse().expect("Failed to parse a language.");
+ assert_eq!(lang.as_str(), "en");
+
+ lang.clear();
+ assert_eq!(lang, subtags::Language::UND);
+ assert!(lang.is_empty());
+
+ assert_eq!(lang.to_string(), "und");
+}
+
+#[test]
+fn test_langid_subtag_region() {
+ let region: subtags::Region = "en".parse().expect("Failed to parse a region.");
+ assert_eq!(region.as_str(), "EN");
+ assert_eq!(region.to_string(), "EN");
+}
+
+#[test]
+fn test_langid_subtag_script() {
+ let script: subtags::Script = "Latn".parse().expect("Failed to parse a script.");
+ assert_eq!(script.as_str(), "Latn");
+ assert_eq!(script.to_string(), "Latn");
+}
+
+#[test]
+fn test_langid_subtag_variant() {
+ let variant: subtags::Variant = "macos".parse().expect("Failed to parse a variant.");
+ assert_eq!(variant.as_str(), "macos");
+ assert_eq!(variant.to_string(), "macos");
+}
+
+#[test]
+fn test_langid_subtag_variants() {
+ let variant: subtags::Variant = "macos".parse().expect("Failed to parse a variant.");
+ let mut variants = subtags::Variants::from_vec_unchecked(vec![variant]);
+ assert_eq!(variants.get(0), Some(&variant));
+ variants.clear();
+ assert_eq!(variants.len(), 0);
+}
+
+#[test]
+fn test_langid_normalizing_eq_str() {
+ let path = "./tests/fixtures/langid.json";
+ let tests: Vec<fixtures::LocaleTest> =
+ helpers::read_fixture(path).expect("Failed to read a fixture");
+ for test in tests {
+ let parsed: LanguageIdentifier = test.input.try_into().expect("Parsing failed.");
+ assert!(parsed.normalizing_eq(parsed.to_string().as_str()));
+ }
+
+ // Check that trailing characters are not ignored
+ let lang: LanguageIdentifier = "en".parse().expect("Parsing failed.");
+ assert!(!lang.normalizing_eq("en-US"));
+}
+
+#[test]
+fn test_langid_strict_cmp() {
+ let path = "./tests/fixtures/langid.json";
+ let tests: Vec<fixtures::LocaleTest> =
+ helpers::read_fixture(path).expect("Failed to read a fixture");
+ let bcp47_strings = tests
+ .iter()
+ .map(|t| match t.input {
+ fixtures::LocaleInfo::String(ref s) => s.as_str(),
+ _ => panic!("Invalid fixture"),
+ })
+ .collect::<Vec<&str>>();
+ for a in bcp47_strings.iter() {
+ for b in bcp47_strings.iter() {
+ let a_langid = a
+ .parse::<LanguageIdentifier>()
+ .expect("Invalid BCP-47 in fixture");
+ let a_normalized = a_langid.to_string();
+ let string_cmp = a_normalized.as_bytes().cmp(b.as_bytes());
+ let test_cmp = a_langid.strict_cmp(b.as_bytes());
+ assert_eq!(string_cmp, test_cmp, "{:?}/{:?}", a, b);
+ }
+ }
+}
diff --git a/vendor/icu_locid/tests/locale.rs b/vendor/icu_locid/tests/locale.rs
new file mode 100644
index 000000000..37c43181e
--- /dev/null
+++ b/vendor/icu_locid/tests/locale.rs
@@ -0,0 +1,122 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod fixtures;
+mod helpers;
+
+use std::convert::TryInto;
+
+use icu_locid::{LanguageIdentifier, Locale, ParserError};
+
+type Result = std::result::Result<Locale, ParserError>;
+
+fn test_langid_fixtures(tests: Vec<fixtures::LocaleTest>) {
+ for test in tests {
+ match test.output {
+ fixtures::LocaleInfo::String(s) => {
+ let input: Locale = test.input.try_into().expect("Parsing failed.");
+ assert_eq!(input.to_string(), s);
+ }
+ fixtures::LocaleInfo::Error(err) => {
+ let err: ParserError = err.into();
+ let input: Result = test.input.try_into();
+ assert_eq!(input, Err(err));
+ }
+ fixtures::LocaleInfo::Identifier(ident) => {
+ let input: Locale = test.input.try_into().expect("Parsing failed.");
+ let output: Locale = ident.clone().try_into().expect("Parsing failed.");
+ assert_eq!(input, output);
+ assert_eq!(input.to_string(), ident.identifier);
+ }
+ fixtures::LocaleInfo::Object(o) => {
+ let input: Locale = test.input.try_into().expect("Parsing failed.");
+ let output: Locale = o.try_into().expect("Parsing failed.");
+ assert_eq!(input, output);
+ }
+ }
+ }
+}
+
+#[test]
+fn test_locale_parsing() {
+ let path = "./tests/fixtures/locale.json";
+ let data = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ test_langid_fixtures(data);
+}
+
+#[test]
+fn test_langid_invalid() {
+ let path = "./tests/fixtures/invalid-extensions.json";
+ let data = helpers::read_fixture(path).expect("Failed to read a fixture");
+
+ test_langid_fixtures(data);
+}
+
+#[test]
+fn test_locale_is_empty() {
+ let locale: Locale = Locale::default();
+ assert!(locale.extensions.is_empty());
+ assert_eq!(locale.to_string(), "und".to_string());
+}
+
+#[test]
+fn test_locale_conversions() {
+ let locale: Locale = Locale::default();
+ let langid: LanguageIdentifier = locale.clone().into();
+ let locale2: Locale = langid.into();
+ assert_eq!(locale, locale2);
+}
+
+#[test]
+fn test_locale_canonicalize() {
+ let locale: Locale = "En-latn-US-MacOS"
+ .parse()
+ .expect("Failed to parse a locale.");
+ assert_eq!(
+ locale.to_string(),
+ Locale::canonicalize("eN-latN-uS-macOS").unwrap()
+ );
+}
+
+#[test]
+fn test_locale_normalizing_eq_str() {
+ let path = "./tests/fixtures/locale.json";
+ let tests: Vec<fixtures::LocaleTest> =
+ helpers::read_fixture(path).expect("Failed to read a fixture");
+ for test in tests {
+ let parsed: Locale = test.input.try_into().expect("Parsing failed.");
+ assert!(parsed.normalizing_eq(parsed.to_string().as_str()));
+ }
+
+ // Check that trailing characters are not ignored
+ let locale: Locale = "en".parse().expect("Parsing failed.");
+ assert!(!locale.normalizing_eq("en-US"));
+}
+
+#[test]
+fn test_locale_strict_cmp() {
+ let path = "./tests/fixtures/locale.json";
+ let tests: Vec<fixtures::LocaleTest> =
+ helpers::read_fixture(path).expect("Failed to read a fixture");
+ let bcp47_strings = tests
+ .iter()
+ .map(|t| match t.input {
+ fixtures::LocaleInfo::Identifier(ref s) => s.identifier.as_str(),
+ _ => match t.output {
+ fixtures::LocaleInfo::Identifier(ref s) => s.identifier.as_str(),
+ _ => panic!("No string in fixture input or output: {:?}", t),
+ },
+ })
+ .collect::<Vec<&str>>();
+ for a in bcp47_strings.iter() {
+ for b in bcp47_strings.iter() {
+ let a_langid = a.parse::<Locale>().expect("Invalid BCP-47 in fixture");
+ let a_normalized = a_langid.to_string();
+ let string_cmp = a_normalized.as_bytes().cmp(b.as_bytes());
+ let test_cmp = a_langid.strict_cmp(b.as_bytes());
+ assert_eq!(string_cmp, test_cmp, "{:?}/{:?}", a, b);
+ }
+ }
+}