From 4e8199b572f2035b7749cba276ece3a26630d23e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:18:21 +0200 Subject: Adding upstream version 1.67.1+dfsg1. Signed-off-by: Daniel Baumann --- vendor/icu_locid/.cargo-checksum.json | 1 + vendor/icu_locid/Cargo.lock | 725 +++++++++++++++++++++ vendor/icu_locid/Cargo.toml | 136 ++++ vendor/icu_locid/LICENSE | 51 ++ vendor/icu_locid/README.md | 63 ++ vendor/icu_locid/benches/fixtures/langid.json | 48 ++ vendor/icu_locid/benches/fixtures/locale.json | 26 + vendor/icu_locid/benches/fixtures/mod.rs | 25 + vendor/icu_locid/benches/fixtures/subtags.json | 18 + vendor/icu_locid/benches/helpers/macros.rs | 110 ++++ vendor/icu_locid/benches/helpers/mod.rs | 17 + vendor/icu_locid/benches/iai_langid.rs | 118 ++++ vendor/icu_locid/benches/langid.rs | 93 +++ vendor/icu_locid/benches/locale.rs | 87 +++ vendor/icu_locid/benches/subtags.rs | 39 ++ vendor/icu_locid/examples/filter_langids.rs | 66 ++ .../examples/syntatically_canonicalize_locales.rs | 54 ++ vendor/icu_locid/src/extensions/mod.rs | 313 +++++++++ vendor/icu_locid/src/extensions/other/mod.rs | 157 +++++ vendor/icu_locid/src/extensions/other/subtag.rs | 37 ++ vendor/icu_locid/src/extensions/private/mod.rs | 167 +++++ vendor/icu_locid/src/extensions/private/other.rs | 31 + .../icu_locid/src/extensions/transform/fields.rs | 228 +++++++ vendor/icu_locid/src/extensions/transform/key.rs | 31 + vendor/icu_locid/src/extensions/transform/mod.rs | 236 +++++++ vendor/icu_locid/src/extensions/transform/value.rs | 119 ++++ .../icu_locid/src/extensions/unicode/attribute.rs | 34 + .../icu_locid/src/extensions/unicode/attributes.rs | 115 ++++ vendor/icu_locid/src/extensions/unicode/key.rs | 31 + .../icu_locid/src/extensions/unicode/keywords.rs | 404 ++++++++++++ vendor/icu_locid/src/extensions/unicode/mod.rs | 233 +++++++ vendor/icu_locid/src/extensions/unicode/value.rs | 199 ++++++ vendor/icu_locid/src/helpers.rs | 648 ++++++++++++++++++ vendor/icu_locid/src/langid.rs | 523 +++++++++++++++ vendor/icu_locid/src/lib.rs | 101 +++ vendor/icu_locid/src/locale.rs | 528 +++++++++++++++ vendor/icu_locid/src/macros.rs | 191 ++++++ vendor/icu_locid/src/ordering.rs | 61 ++ vendor/icu_locid/src/parser/errors.rs | 54 ++ vendor/icu_locid/src/parser/langid.rs | 269 ++++++++ vendor/icu_locid/src/parser/locale.rs | 42 ++ vendor/icu_locid/src/parser/mod.rs | 98 +++ vendor/icu_locid/src/serde.rs | 135 ++++ vendor/icu_locid/src/subtags/language.rs | 107 +++ vendor/icu_locid/src/subtags/mod.rs | 58 ++ vendor/icu_locid/src/subtags/region.rs | 61 ++ vendor/icu_locid/src/subtags/script.rs | 32 + vendor/icu_locid/src/subtags/variant.rs | 34 + vendor/icu_locid/src/subtags/variants.rs | 134 ++++ vendor/icu_locid/src/zerovec.rs | 132 ++++ vendor/icu_locid/tests/fixtures/canonicalize.json | 18 + .../tests/fixtures/invalid-extensions.json | 112 ++++ vendor/icu_locid/tests/fixtures/invalid.json | 85 +++ vendor/icu_locid/tests/fixtures/langid.json | 167 +++++ vendor/icu_locid/tests/fixtures/locale.json | 298 +++++++++ vendor/icu_locid/tests/fixtures/mod.rs | 260 ++++++++ vendor/icu_locid/tests/helpers/mod.rs | 15 + vendor/icu_locid/tests/langid.rs | 157 +++++ vendor/icu_locid/tests/locale.rs | 122 ++++ 59 files changed, 8454 insertions(+) create mode 100644 vendor/icu_locid/.cargo-checksum.json create mode 100644 vendor/icu_locid/Cargo.lock create mode 100644 vendor/icu_locid/Cargo.toml create mode 100644 vendor/icu_locid/LICENSE create mode 100644 vendor/icu_locid/README.md create mode 100644 vendor/icu_locid/benches/fixtures/langid.json create mode 100644 vendor/icu_locid/benches/fixtures/locale.json create mode 100644 vendor/icu_locid/benches/fixtures/mod.rs create mode 100644 vendor/icu_locid/benches/fixtures/subtags.json create mode 100644 vendor/icu_locid/benches/helpers/macros.rs create mode 100644 vendor/icu_locid/benches/helpers/mod.rs create mode 100644 vendor/icu_locid/benches/iai_langid.rs create mode 100644 vendor/icu_locid/benches/langid.rs create mode 100644 vendor/icu_locid/benches/locale.rs create mode 100644 vendor/icu_locid/benches/subtags.rs create mode 100644 vendor/icu_locid/examples/filter_langids.rs create mode 100644 vendor/icu_locid/examples/syntatically_canonicalize_locales.rs create mode 100644 vendor/icu_locid/src/extensions/mod.rs create mode 100644 vendor/icu_locid/src/extensions/other/mod.rs create mode 100644 vendor/icu_locid/src/extensions/other/subtag.rs create mode 100644 vendor/icu_locid/src/extensions/private/mod.rs create mode 100644 vendor/icu_locid/src/extensions/private/other.rs create mode 100644 vendor/icu_locid/src/extensions/transform/fields.rs create mode 100644 vendor/icu_locid/src/extensions/transform/key.rs create mode 100644 vendor/icu_locid/src/extensions/transform/mod.rs create mode 100644 vendor/icu_locid/src/extensions/transform/value.rs create mode 100644 vendor/icu_locid/src/extensions/unicode/attribute.rs create mode 100644 vendor/icu_locid/src/extensions/unicode/attributes.rs create mode 100644 vendor/icu_locid/src/extensions/unicode/key.rs create mode 100644 vendor/icu_locid/src/extensions/unicode/keywords.rs create mode 100644 vendor/icu_locid/src/extensions/unicode/mod.rs create mode 100644 vendor/icu_locid/src/extensions/unicode/value.rs create mode 100644 vendor/icu_locid/src/helpers.rs create mode 100644 vendor/icu_locid/src/langid.rs create mode 100644 vendor/icu_locid/src/lib.rs create mode 100644 vendor/icu_locid/src/locale.rs create mode 100644 vendor/icu_locid/src/macros.rs create mode 100644 vendor/icu_locid/src/ordering.rs create mode 100644 vendor/icu_locid/src/parser/errors.rs create mode 100644 vendor/icu_locid/src/parser/langid.rs create mode 100644 vendor/icu_locid/src/parser/locale.rs create mode 100644 vendor/icu_locid/src/parser/mod.rs create mode 100644 vendor/icu_locid/src/serde.rs create mode 100644 vendor/icu_locid/src/subtags/language.rs create mode 100644 vendor/icu_locid/src/subtags/mod.rs create mode 100644 vendor/icu_locid/src/subtags/region.rs create mode 100644 vendor/icu_locid/src/subtags/script.rs create mode 100644 vendor/icu_locid/src/subtags/variant.rs create mode 100644 vendor/icu_locid/src/subtags/variants.rs create mode 100644 vendor/icu_locid/src/zerovec.rs create mode 100644 vendor/icu_locid/tests/fixtures/canonicalize.json create mode 100644 vendor/icu_locid/tests/fixtures/invalid-extensions.json create mode 100644 vendor/icu_locid/tests/fixtures/invalid.json create mode 100644 vendor/icu_locid/tests/fixtures/langid.json create mode 100644 vendor/icu_locid/tests/fixtures/locale.json create mode 100644 vendor/icu_locid/tests/fixtures/mod.rs create mode 100644 vendor/icu_locid/tests/helpers/mod.rs create mode 100644 vendor/icu_locid/tests/langid.rs create mode 100644 vendor/icu_locid/tests/locale.rs (limited to 'vendor/icu_locid') diff --git a/vendor/icu_locid/.cargo-checksum.json b/vendor/icu_locid/.cargo-checksum.json new file mode 100644 index 000000000..1f1097bd7 --- /dev/null +++ b/vendor/icu_locid/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.lock":"6bf9c8304a3fe9f99d7189f9a082be2c7859ea164976975069f8fd2f7f80bbbd","Cargo.toml":"44c6bcdc448226df67e425cb00bf02596c96d0a0bfcb3951d3a5d0998afaa60d","LICENSE":"4ad7541d66a407234e2c84902124cef325c29f3e966353efdb800bedb8b8da21","README.md":"d0e5ced27519cf715a66dc4fece18c8cacece8dbb81eb1e03ab82dd57f0bc7f5","benches/fixtures/langid.json":"373c11527653c63c685c9e229a8de5ae2b557c25b686a9d891c59e1f603232d8","benches/fixtures/locale.json":"669b19db933094290a45bf856559920f4e92401072e364ac82c482119dc9233a","benches/fixtures/mod.rs":"9a9671eddcf38a6faa10cb814949f8abc15d89f5e70f3ad6f684f1bc3ffe72ea","benches/fixtures/subtags.json":"28be3a639e452d713e807d5779b6819e06277e2dbbf67801ef34964fb9b074b6","benches/helpers/macros.rs":"bba0945a826bc083156bc302507c48c0c99c4d965e2a84352644d768591b0339","benches/helpers/mod.rs":"c98167d866fdb7f66c8cab41e8d57b5aab9e9707dfc66c37ef136e088dac6fef","benches/iai_langid.rs":"675ab67edc2820894e1179e97e3aad6037957084efa07e494c17c40f3c0bbe35","benches/langid.rs":"4e3d307d48fd9071308a567a0ef927b229814978abd2ba29f57c65edd51f38e4","benches/locale.rs":"b8d5b1e3f8b5578c549a5149229656fb60de26b76a1bf66b6c1abce75042d674","benches/subtags.rs":"e7e80dabaf31bf031779456614f139cafcdadb805986e71b49133ac964928432","examples/filter_langids.rs":"28bea5b7dc715d6c00694437c3f12a72cf68dc984bb13acbb7b1ce5f97c5726a","examples/syntatically_canonicalize_locales.rs":"de97579c82f1670629d077a6216ecce06761da28715387f46250f81b8172ae6b","src/extensions/mod.rs":"76efffe1c99da3ef61a93f8174267e4b0b63abc3283ec8e0c5170ebc582263fe","src/extensions/other/mod.rs":"4216cd8a4dcef13105b48e507659920feaaa3fa3aebc2ba8d7702b40bbec2881","src/extensions/other/subtag.rs":"cb52ec1acec55e4c0e1d37cc5a552d11010051d827786202702257c8fcd96c49","src/extensions/private/mod.rs":"961bfb455114ad7166beb5acb36a1b182d2e81d99cccbfd3b3bf68853cae490d","src/extensions/private/other.rs":"586fd24398e78c5fda0afdb98de28a6467afd2d702683daf5dfab2a6c45af1e9","src/extensions/transform/fields.rs":"376ae5862329709d54b262a6d91be97bb02fc5e0198f30be8a2f4b0adc420c8b","src/extensions/transform/key.rs":"53e8c9ce13f00f678c2322855cc1d90afd91cd33a2af3758d098b7bbcc7090e5","src/extensions/transform/mod.rs":"c932d7e4484ac3bf3c9fe0c63b17847d8cb29f8874d71cd17070e63b8bca5998","src/extensions/transform/value.rs":"153c4edeb987e052dafe0790bcda560da4dcfa6897e5aaf3f62ae772b0605009","src/extensions/unicode/attribute.rs":"d558a193b72f54cdb03afe8e023a145ac74832c8416ca55401cd417ebba2431c","src/extensions/unicode/attributes.rs":"f2f13714750035ff805455b43ba665710978d13b90a53358314e798662c436b6","src/extensions/unicode/key.rs":"6c8694527079c5dd5f03f8e85f23ae6b5aa4b47899d1047036960e8400dca7de","src/extensions/unicode/keywords.rs":"58a2eca7c5e6ac6ad6812538a5b8118e35274c6b5de8029d55cbe1b4cd0a4abb","src/extensions/unicode/mod.rs":"e81db13fdb2db8d6cf7cfcd7c0d926b929fceca500894e688768b3494d02d0c3","src/extensions/unicode/value.rs":"02876ed95059d21d09ff2b986776d6bf0cb14c698237a86a9be24886ffd7a1cd","src/helpers.rs":"a6b8c22ef40a57339e4051fad54e724320851f827bc6f888187f30371024d04a","src/langid.rs":"b3258b1be6566dc117295a525dcb04237f0049c59dc91f460d939cd162ef8b39","src/lib.rs":"6f6248e20709be74b9e186b45810a4963ffa91c680be4ad78be9a6af5a10da5c","src/locale.rs":"a1ff7500d17581fe06524f6d70d59f0765c5d5ca89cb64b42953b286b20727b4","src/macros.rs":"f7154fc103ea1120a55bb5898540b20df80de6eec42e70ce15f339d997f2bf52","src/ordering.rs":"c70aa4e33d5cbab8d75d2833641141b71984a93648634cfc57fc25c3f79a5f58","src/parser/errors.rs":"ccea5e49c109db3766a71ac4aab1d759e2351c4cd31816b6abdca166699c7f3e","src/parser/langid.rs":"ef5c3dc233a5cea1953688e69152c601a3260196caa9327dd07edc7b6be7b0b8","src/parser/locale.rs":"b7d4cd4ed80b0acae9e77046a3b4943ee19e4aec395e36951750da32366b9a8e","src/parser/mod.rs":"c65268221fc67a692a2a647b08dd81b244a9186c04f5ab0837383dcaa983b740","src/serde.rs":"06e940e4f2d15f02d313b4e2b233aea3e74c93c6c43076f5ffe52d49c133608f","src/subtags/language.rs":"e9dc6de6c6aebb6d8bf6e55f1ae9fab41844a52e681b4309e625a5076c02f9f3","src/subtags/mod.rs":"0257f746ed368ea3fa675054c9e7e40d972ec31cd7cc525be655a16a83c9d17b","src/subtags/region.rs":"4f4120f4910d0a4496f29c193d00313e71be4c646867d97ebd0e9a7438693847","src/subtags/script.rs":"6b1a68783cb90409bdd39b0184dfb2cb1c302fdee7202e3b6f7c7c8941bc7dfe","src/subtags/variant.rs":"956f1ea3d98172b6ead333411f010cf4e84404584a3051cb775d148d79beb4f8","src/subtags/variants.rs":"7740d1b20f596b04f509db917e9c2fffba80a013ffc42f0046cdc2d32b088aeb","src/zerovec.rs":"9d01a235d18296fbf0c2e89d188459e9446df0e63aaedc7e150165604af885b9","tests/fixtures/canonicalize.json":"9f2b7cbef72c24944cd4dc50de368c6e3ef69949f29c9ce1aa8807de767a4d0a","tests/fixtures/invalid-extensions.json":"0af95f38e458c8f52760f76c6540993beb9ba9421a3967df0cd6abb9fe2ce21a","tests/fixtures/invalid.json":"1f1ae207f1ce886b3f57cfcdfb2525aa3e58d538f997b2bda4088062de7aa68d","tests/fixtures/langid.json":"960fd01722217ef1ea9077e2e0821d7089fe318a241bd7fb7918f50bf8f3f5c3","tests/fixtures/locale.json":"8606e0569fc6ea0e50a1fecb9295b911fbef7d8dbfde3c585476284a751baccf","tests/fixtures/mod.rs":"28dec3e5c9d766e148adbff6857dce884d9ff94f7ef8aee17fde0084cc78a7ee","tests/helpers/mod.rs":"d3bf59e7eed6230f340bef6c87a7b8de3a387ec391f60afc1b15a0d001cbfb67","tests/langid.rs":"2e21d576a6eaba000fbe88d52362384f460ba350cac1e7034a1661302000ac58","tests/locale.rs":"91af0a738ca5def89fdb4d7f8d3504ad7b757e1d7c8e4d24dc246de610b46a04"},"package":"34b3de5d99a0e275fe6193b9586dbf37364daebc0d39c89b5cf8376a53b789e8"} \ No newline at end of file diff --git a/vendor/icu_locid/Cargo.lock b/vendor/icu_locid/Cargo.lock new file mode 100644 index 000000000..9940858d2 --- /dev/null +++ b/vendor/icu_locid/Cargo.lock @@ -0,0 +1,725 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "bumpalo" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "bitflags", + "textwrap", + "unicode-width", +] + +[[package]] +name = "cobs" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" + +[[package]] +name = "criterion" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "once_cell", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa 0.4.8", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "databake" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c87777d6d7bde863ba217aa87521dc857239de1f36d66aac46fd173fb0495858" +dependencies = [ + "databake-derive", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "databake-derive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "905c7a060fc0c84c0452d97473b1177dd7a5cbc7670cfbae4a7fe22e42f6432e" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "displaydoc" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" + +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "iai" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71a816c97c42258aa5834d07590b718b4c9a598944cd39a52dc25b351185d678" + +[[package]] +name = "icu_benchmark_macros" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c867656f2d9c90b13709ac88e710a9d6afe33998c1dfa22384bab8804e8b3d4" + +[[package]] +name = "icu_locid" +version = "1.0.0" +dependencies = [ + "criterion", + "databake", + "displaydoc", + "iai", + "icu_benchmark_macros", + "litemap", + "postcard", + "serde", + "serde_json", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "itoa" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" + +[[package]] +name = "js-sys" +version = "0.3.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.133" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f80d65747a3e43d1596c7c5492d95d5edddaabd45a7fcdb02b95f644164966" + +[[package]] +name = "litemap" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f34a3f4798fac63fb48cf277eefa38f94d3443baff555bb98e4f56bc9092368e" + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" + +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + +[[package]] +name = "plotters" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2538b639e642295546c50fcd545198c9d64ee2a38620a628724a3b266d5fbf97" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142" + +[[package]] +name = "plotters-svg" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9a81d2759aae1dae668f783c308bc5c8ebd191ff4184aaa1b37f65a6ae5a56f" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "postcard" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c2b180dc0bade59f03fd005cb967d3f1e5f69b13922dad0cd6e047cb8af2363" +dependencies = [ + "cobs", + "serde", +] + +[[package]] +name = "proc-macro2" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + +[[package]] +name = "regex" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "regex-syntax" +version = "0.6.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" + +[[package]] +name = "ryu" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "serde" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44" +dependencies = [ + "itoa 1.0.3", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "unicode-xid", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "tinystr" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8aeafdfd935e4a7fe16a91ab711fa52d54df84f9c8f7ca5837a9d1d902ef4c2" +dependencies = [ + "displaydoc", + "serde", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "unicode-ident" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd" + +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" + +[[package]] +name = "web-sys" +version = "0.3.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "writeable" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8e6ab4f5da1b24daf2c590cfac801bacb27b15b4f050e84eb60149ea726f06b" + +[[package]] +name = "zerofrom" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79e9355fccf72b04b7deaa99ce7a0f6630530acf34045391b74460fcd714de54" + +[[package]] +name = "zerovec" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d919a74c17749ccb17beaf6405562e413cd94e98ba52ca1e64bbe7eefbd8b8" +dependencies = [ + "zerofrom", +] diff --git a/vendor/icu_locid/Cargo.toml b/vendor/icu_locid/Cargo.toml new file mode 100644 index 000000000..3ce7066e7 --- /dev/null +++ b/vendor/icu_locid/Cargo.toml @@ -0,0 +1,136 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +name = "icu_locid" +version = "1.0.0" +authors = ["The ICU4X Project Developers"] +include = [ + "src/**/*", + "examples/**/*", + "benches/**/*", + "tests/**/*", + "Cargo.toml", + "LICENSE", + "README.md", +] +description = "API for managing Unicode Language and Locale Identifiers" +readme = "README.md" +categories = ["internationalization"] +license = "Unicode-DFS-2016" +repository = "https://github.com/unicode-org/icu4x" +resolver = "2" + +[package.metadata.cargo-all-features] +skip_optional_dependencies = true +denylist = ["bench"] +extra_features = ["serde"] + +[package.metadata.docs.rs] +all-features = true + +[lib] +path = "src/lib.rs" +bench = false + +[[example]] +name = "filter_langids" +test = true + +[[example]] +name = "syntatically_canonicalize_locales" +test = true + +[[bench]] +name = "subtags" +harness = false +required-features = ["bench"] + +[[bench]] +name = "langid" +harness = false + +[[bench]] +name = "locale" +harness = false + +[[bench]] +name = "iai_langid" +harness = false +required-features = ["bench"] + +[dependencies.databake] +version = "0.1.0" +features = ["derive"] +optional = true + +[dependencies.displaydoc] +version = "0.2.3" +default-features = false + +[dependencies.litemap] +version = "0.6" + +[dependencies.serde] +version = "1.0" +features = [ + "alloc", + "derive", +] +optional = true +default-features = false + +[dependencies.tinystr] +version = "0.7" +features = ["alloc"] +default-features = false + +[dependencies.writeable] +version = "0.5" + +[dependencies.zerovec] +version = "0.9" +optional = true + +[dev-dependencies.criterion] +version = "0.3.3" + +[dev-dependencies.iai] +version = "0.1.1" + +[dev-dependencies.icu_benchmark_macros] +version = "0.7" + +[dev-dependencies.litemap] +version = "0.6" +features = ["testing"] + +[dev-dependencies.postcard] +version = "1.0.0" +features = ["use-std"] +default-features = false + +[dev-dependencies.serde] +version = "1.0" +features = ["derive"] + +[dev-dependencies.serde_json] +version = "1.0" + +[features] +bench = ["serde"] +default = [] +serde = [ + "dep:serde", + "tinystr/serde", +] +std = [] diff --git a/vendor/icu_locid/LICENSE b/vendor/icu_locid/LICENSE new file mode 100644 index 000000000..9858d01ab --- /dev/null +++ b/vendor/icu_locid/LICENSE @@ -0,0 +1,51 @@ +UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE + +See Terms of Use +for definitions of Unicode Inc.’s Data Files and Software. + +NOTICE TO USER: Carefully read the following legal agreement. +BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +TERMS AND CONDITIONS OF THIS AGREEMENT. +IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +THE DATA FILES OR SOFTWARE. + +COPYRIGHT AND PERMISSION NOTICE + +Copyright © 1991-2022 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in https://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that either +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, or +(b) this copyright and permission notice appear in associated +Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. + +— + +Portions of ICU4X may have been adapted from ICU4C and/or ICU4J. +ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others. diff --git a/vendor/icu_locid/README.md b/vendor/icu_locid/README.md new file mode 100644 index 000000000..cc2a0b023 --- /dev/null +++ b/vendor/icu_locid/README.md @@ -0,0 +1,63 @@ +# icu_locid [![crates.io](https://img.shields.io/crates/v/icu_locid)](https://crates.io/crates/icu_locid) + +Parsing, manipulating, and serializing Unicode Language and Locale Identifiers. + +This module is published as its own crate ([`icu_locid`](https://docs.rs/icu_locid/latest/icu_locid/)) +and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project. + +The module provides algorithms for parsing a string into a well-formed language or locale identifier +as defined by [`UTS #35: Unicode LDML 3. Unicode Language and Locale Identifiers`]. + +[`Locale`] is the most common structure to use for storing information about a language, +script, region, variants and extensions. In almost all cases, this struct should be used as the +base unit for all locale management operations. + +[`LanguageIdentifier`] is a strict subset of [`Locale`] which can be useful in a narrow range of +cases where [`Unicode Extensions`] are not relevant. + +If in doubt, use [`Locale`]. + +## Examples + +```rust +use icu::locid::subtags::{Language, Region}; +use icu::locid::Locale; + +let mut loc: Locale = "en-US".parse().expect("Parsing failed."); + +let lang: Language = "en".parse().expect("Parsing failed."); +let region: Region = "US".parse().expect("Parsing failed."); + +assert_eq!(loc.id.language, lang); +assert_eq!(loc.id.script, None); +assert_eq!(loc.id.region, Some(region)); +assert_eq!(loc.id.variants.len(), 0); + +let region: Region = "GB".parse().expect("Parsing failed."); +loc.id.region = Some(region); + +assert_eq!(loc.to_string(), "en-GB"); +``` + +### Macros + +```rust +use icu::locid::{ + langid, subtags_language as language, subtags_region as region, +}; + +let lid = langid!("EN_US"); + +assert_eq!(lid.language, language!("en")); +assert_eq!(lid.region, Some(region!("US"))); +``` + +For more details, see [`Locale`] and [`LanguageIdentifier`]. + +[`UTS #35: Unicode LDML 3. Unicode Language and Locale Identifiers`]: https://unicode.org/reports/tr35/tr35.html#Unicode_Language_and_Locale_Identifiers +[`ICU4X`]: ../icu/index.html +[`Unicode Extensions`]: extensions + +## More Information + +For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x). diff --git a/vendor/icu_locid/benches/fixtures/langid.json b/vendor/icu_locid/benches/fixtures/langid.json new file mode 100644 index 000000000..43c56d5a2 --- /dev/null +++ b/vendor/icu_locid/benches/fixtures/langid.json @@ -0,0 +1,48 @@ +{ + "canonicalized": [ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + "en-US", + "en-GB", + "es-AR", + "th", + "de", + "zh-Cyrl-HN", + "en-Latn-US" + ], + "casing": [ + "En_uS", + "EN-GB", + "ES-aR", + "iT", + "zH_HaNs_cN", + "dE-aT", + "Pl", + "FR-FR", + "de_AT", + "sR-CyrL_sr", + "NB-NO", + "fr_fr", + "Mk", + "uK", + "en-us", + "en_gb", + "ES-AR", + "tH", + "DE", + "ZH_cyrl_hN", + "eN-lAtN-uS" + ] +} diff --git a/vendor/icu_locid/benches/fixtures/locale.json b/vendor/icu_locid/benches/fixtures/locale.json new file mode 100644 index 000000000..f974a166f --- /dev/null +++ b/vendor/icu_locid/benches/fixtures/locale.json @@ -0,0 +1,26 @@ +{ + "canonicalized": [ + "en-US-u-hc-h12", + "en-GB-u-ca-gregory-hc-h12", + "es-AR-x-private", + "th-u-ca-buddhist", + "de-u-co-phonebk-ka-shifted", + "ar-u-nu-native", + "ar-u-nu-latn", + "ja-t-it", + "ja-Kana-t-it", + "und-Latn-t-und-cyrl" + ], + "casing": [ + "en-US-U-hc-h12", + "en-GB-u-CA-gregory-hc-h12", + "es-AR-x-Private", + "th-u-ca-buDDhist", + "de-u-co-phonebk-KA-shifted", + "AR_U-NU-native", + "ar-u-nu-LaTN", + "jA-T-it", + "ja-kanA-T-IT", + "unD-Latn-T-und-cyrl" + ] +} diff --git a/vendor/icu_locid/benches/fixtures/mod.rs b/vendor/icu_locid/benches/fixtures/mod.rs new file mode 100644 index 000000000..006b22312 --- /dev/null +++ b/vendor/icu_locid/benches/fixtures/mod.rs @@ -0,0 +1,25 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use serde::Deserialize; + +#[derive(Deserialize)] +pub struct SubtagData { + pub valid: Vec, + pub invalid: Vec, +} + +#[derive(Deserialize)] +pub struct Subtags { + pub language: SubtagData, + pub script: SubtagData, + pub region: SubtagData, + pub variant: SubtagData, +} + +#[derive(Deserialize)] +pub struct LocaleList { + pub canonicalized: Vec, + pub casing: Vec, +} diff --git a/vendor/icu_locid/benches/fixtures/subtags.json b/vendor/icu_locid/benches/fixtures/subtags.json new file mode 100644 index 000000000..cf8419cc9 --- /dev/null +++ b/vendor/icu_locid/benches/fixtures/subtags.json @@ -0,0 +1,18 @@ +{ + "language": { + "valid": ["en", "it", "pl", "de", "fr", "cs", "csb", "und", "ru", "nb", "NB", "UK", "pL", "Zh", "ES"], + "invalid": ["", "1", "$", "a1", "1211", "as_sa^a", "-0we", "3e3", "kk$$22", "testingaverylongstring"] + }, + "script": { + "valid": ["Latn", "latn", "Arab", "xxxx", "Flan", "fAlA", "oOoO", "pPlQ", "esta", "RUSS"], + "invalid": ["", "1", "$", "a1", "1211", "assaa", "-0we", "3e3", "kk$$22", "testingaverylongstring"] + }, + "region": { + "valid": ["DE", "321", "zh", "IA", "fN", "rU", "ru", "RU", "Ru", "CN", "AR"], + "invalid": ["", "1", "$", "a1", "1211", "assaa", "-0we", "3e3", "kk$$22", "testingaverylongstring"] + }, + "variant": { + "valid": ["macos", "MaCoS", "windows", "posix", "POSIX", "Posix", "linux", "lINUX", "mAcOs", "testing", "WWWWWW"], + "invalid": ["", "1", "$", "a1", "a211", "ass__aa", "-0we", "3e3", "kk$$22", "testingaverylongstring"] + } +} diff --git a/vendor/icu_locid/benches/helpers/macros.rs b/vendor/icu_locid/benches/helpers/macros.rs new file mode 100644 index 000000000..848a360c4 --- /dev/null +++ b/vendor/icu_locid/benches/helpers/macros.rs @@ -0,0 +1,110 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#[macro_export] +macro_rules! overview { + ($c:expr, $struct:ident, $data_str:expr, $compare:expr) => { + $c.bench_function("overview", |b| { + b.iter(|| { + let mut values = vec![]; + for s in $data_str { + let value: Result<$struct, _> = black_box(s).parse(); + values.push(value.expect("Parsing failed")); + } + let _ = values + .iter() + .filter(|&v| v.normalizing_eq($compare)) + .count(); + + values + .iter() + .map(|v| v.to_string()) + .collect::>() + }) + }); + }; +} + +#[macro_export] +macro_rules! construct { + ($c:expr, $struct:ident, $struct_name:expr, $data_str:expr) => { + $c.bench_function($struct_name, |b| { + b.iter(|| { + for s in $data_str { + let _: Result<$struct, _> = black_box(s).parse(); + } + }) + }); + }; +} + +#[macro_export] +macro_rules! to_string { + ($c:expr, $struct:ident, $struct_name:expr, $data:expr) => { + $c.bench_function($struct_name, |b| { + b.iter(|| { + for s in $data { + let _ = black_box(s).to_string(); + } + }) + }); + $c.bench_function(std::concat!($struct_name, "/writeable"), |b| { + use writeable::Writeable; + b.iter(|| { + for s in $data { + let _ = black_box(s).write_to_string(); + } + }) + }); + }; +} + +#[macro_export] +macro_rules! compare_struct { + ($c:expr, $struct:ident, $struct_name:expr, $data1:expr, $data2:expr) => { + $c.bench_function(BenchmarkId::new("struct", $struct_name), |b| { + b.iter(|| { + for (lid1, lid2) in $data1.iter().zip($data2.iter()) { + let _ = black_box(lid1) == black_box(lid2); + } + }) + }); + }; +} + +#[macro_export] +macro_rules! compare_str { + ($c:expr, $struct:ident, $struct_name:expr, $data1:expr, $data2:expr) => { + $c.bench_function(BenchmarkId::new("str", $struct_name), |b| { + b.iter(|| { + for (lid, s) in $data1.iter().zip($data2.iter()) { + let _ = black_box(lid).normalizing_eq(&black_box(s)); + } + }) + }); + $c.bench_function(BenchmarkId::new("strict_cmp", $struct_name), |b| { + b.iter(|| { + for (lid, s) in $data1.iter().zip($data2.iter()) { + let _ = black_box(lid).strict_cmp(&black_box(s).as_str().as_bytes()); + } + }) + }); + }; +} + +#[macro_export] +macro_rules! canonicalize { + ($c:expr, $struct:ident, $struct_name:expr, $data:expr) => { + $c.bench_function($struct_name, |b| { + b.iter(|| { + for s in $data { + let _ = black_box(s).to_string(); + } + for s in $data { + let _ = $struct::canonicalize(black_box(s)); + } + }) + }); + }; +} diff --git a/vendor/icu_locid/benches/helpers/mod.rs b/vendor/icu_locid/benches/helpers/mod.rs new file mode 100644 index 000000000..27e455f7b --- /dev/null +++ b/vendor/icu_locid/benches/helpers/mod.rs @@ -0,0 +1,17 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod macros; + +use std::fs::File; +use std::io::{BufReader, Error}; + +pub fn read_fixture(path: &str) -> Result +where + T: serde::de::DeserializeOwned, +{ + let file = File::open(path)?; + let reader = BufReader::new(file); + Ok(serde_json::from_reader(reader)?) +} diff --git a/vendor/icu_locid/benches/iai_langid.rs b/vendor/icu_locid/benches/iai_langid.rs new file mode 100644 index 000000000..f964d1462 --- /dev/null +++ b/vendor/icu_locid/benches/iai_langid.rs @@ -0,0 +1,118 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use icu_locid::{ + langid, subtags_language as language, subtags_region as region, LanguageIdentifier, +}; + +const LIDS: &[LanguageIdentifier] = &[ + langid!("en"), + langid!("pl"), + langid!("fr-CA"), + langid!("zh-Hans"), + langid!("en-US"), + langid!("en-Latn-US"), + langid!("sr-Cyrl-BA"), +]; + +const LIDS_STR: &[&str] = &[ + "en", + "pl", + "fr-CA", + "zh-Hans", + "en-US", + "en-Latn-US", + "sr-Cyrl-BA", +]; + +fn bench_langid_constr() { + // Tests the instructions required to construct a LID from an str. + + let _: Vec = LIDS_STR + .iter() + .map(|l| l.parse().expect("Failed to parse")) + .collect(); +} + +fn bench_langid_compare_components() { + // Tests the cost of comparing LID components. + + let result = LIDS + .iter() + .filter(|l| l.language == language!("en") && l.region == Some(region!("US"))) + .count(); + + assert_eq!(result, 2); +} + +fn bench_langid_compare_components_str() { + // Tests the cost of comparing LID components to str. + + let result = LIDS + .iter() + .filter(|l| { + l.language == language!("en") && l.region.map(|r| r == region!("US")).unwrap_or(false) + }) + .count(); + + assert_eq!(result, 2); +} + +fn bench_langid_strict_cmp() { + // Tests the cost of comparing a langid against byte strings. + use core::cmp::Ordering; + + let lid = langid!("en_us"); + + let result = LIDS_STR + .iter() + .filter(|s| lid.strict_cmp(s.as_bytes()) == Ordering::Equal) + .count(); + + assert_eq!(result, 1); +} + +fn bench_langid_matching() { + // Tests matching a LID against other LIDs. + + let lid = langid!("en_us"); + + let count = LIDS.iter().filter(|l| lid == **l).count(); + assert_eq!(count, 1); +} + +fn bench_langid_matching_str() { + // Tests matching a LID against list of str. + + let lid = langid!("en_us"); + + let count = LIDS_STR.iter().filter(|&l| lid.normalizing_eq(l)).count(); + assert_eq!(count, 1); +} + +fn bench_langid_serialize() { + // Tests serialization of LIDs. + + let _: Vec = LIDS.iter().map(|l| l.to_string()).collect(); +} + +fn bench_langid_canonicalize() { + // Tests canonicalization of strings. + + let _: Vec = LIDS_STR + .iter() + .map(|l| LanguageIdentifier::canonicalize(l).expect("Canonicalization failed")) + .collect(); +} + +iai::main!( + bench_langid_constr, + bench_langid_compare_components, + bench_langid_compare_components_str, + bench_langid_strict_cmp, + bench_langid_matching, + bench_langid_matching_str, + bench_langid_serialize, + bench_langid_canonicalize, +); diff --git a/vendor/icu_locid/benches/langid.rs b/vendor/icu_locid/benches/langid.rs new file mode 100644 index 000000000..e5c9b6734 --- /dev/null +++ b/vendor/icu_locid/benches/langid.rs @@ -0,0 +1,93 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod fixtures; +mod helpers; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use icu_locid::LanguageIdentifier; + +fn langid_benches(c: &mut Criterion) { + let path = "./benches/fixtures/langid.json"; + let data: fixtures::LocaleList = helpers::read_fixture(path).expect("Failed to read a fixture"); + + // Overview + { + let mut group = c.benchmark_group("langid"); + + overview!(group, LanguageIdentifier, &data.canonicalized, "en-US"); + + group.finish(); + } + + #[cfg(feature = "bench")] + { + use criterion::BenchmarkId; + + // Construct + { + let mut group = c.benchmark_group("langid/construct"); + + construct!(group, LanguageIdentifier, "langid", &data.canonicalized); + + group.finish(); + } + + // Stringify + { + let mut group = c.benchmark_group("langid/to_string"); + + let langids: Vec = data + .canonicalized + .iter() + .map(|s| s.parse().unwrap()) + .collect(); + + to_string!(group, LanguageIdentifier, "langid", &langids); + + group.finish(); + } + + // Compare + { + let mut group = c.benchmark_group("langid/compare"); + + let langids: Vec = data + .canonicalized + .iter() + .map(|s| s.parse().unwrap()) + .collect(); + let langids2: Vec = data + .canonicalized + .iter() + .map(|s| s.parse().unwrap()) + .collect(); + + compare_struct!(group, LanguageIdentifier, "langid", &langids, &langids2); + + compare_str!( + group, + LanguageIdentifier, + "langid", + &langids, + &data.canonicalized + ); + + group.finish(); + } + + // Canonicalize + { + let mut group = c.benchmark_group("langid/canonicalize"); + + canonicalize!(group, LanguageIdentifier, "langid", &data.casing); + + group.finish(); + } + } +} + +criterion_group!(benches, langid_benches,); +criterion_main!(benches); diff --git a/vendor/icu_locid/benches/locale.rs b/vendor/icu_locid/benches/locale.rs new file mode 100644 index 000000000..948fbb5e8 --- /dev/null +++ b/vendor/icu_locid/benches/locale.rs @@ -0,0 +1,87 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod fixtures; +mod helpers; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use icu_locid::Locale; + +fn locale_benches(c: &mut Criterion) { + let path = "./benches/fixtures/locale.json"; + let data: fixtures::LocaleList = helpers::read_fixture(path).expect("Failed to read a fixture"); + + // Overview + { + let mut group = c.benchmark_group("locale"); + + overview!(group, Locale, &data.canonicalized, "en-US"); + + group.finish(); + } + + #[cfg(feature = "bench")] + { + use criterion::BenchmarkId; + + // Construct + { + let mut group = c.benchmark_group("locale/construct"); + + construct!(group, Locale, "locale", &data.canonicalized); + + group.finish(); + } + + // Stringify + { + let mut group = c.benchmark_group("locale/to_string"); + + let locales: Vec = data + .canonicalized + .iter() + .map(|s| s.parse().unwrap()) + .collect(); + + to_string!(group, Locale, "locale", &locales); + + group.finish(); + } + + // Compare + { + let mut group = c.benchmark_group("locale/compare"); + + let locales: Vec = data + .canonicalized + .iter() + .map(|s| s.parse().unwrap()) + .collect(); + let locales2: Vec = data + .canonicalized + .iter() + .map(|s| s.parse().unwrap()) + .collect(); + + compare_struct!(group, Locale, "locale", &locales, &locales2); + + compare_str!(group, Locale, "locale", &locales, &data.canonicalized); + + group.finish(); + } + + // Canonicalize + { + let mut group = c.benchmark_group("locale/canonicalize"); + + canonicalize!(group, Locale, "locale", &data.casing); + + group.finish(); + } + } +} + +criterion_group!(benches, locale_benches,); +criterion_main!(benches); diff --git a/vendor/icu_locid/benches/subtags.rs b/vendor/icu_locid/benches/subtags.rs new file mode 100644 index 000000000..4f81b71d2 --- /dev/null +++ b/vendor/icu_locid/benches/subtags.rs @@ -0,0 +1,39 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod fixtures; +mod helpers; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use icu_locid::subtags::{Language, Region, Script, Variant}; +use icu_locid::ParserError; + +macro_rules! subtag_bench { + ($c:expr, $name:expr, $subtag:ident, $data:expr) => { + $c.bench_function(&format!("subtags/{}/parse", $name), |b| { + b.iter(|| { + for s in &$data.valid { + let _: $subtag = black_box(s).parse().unwrap(); + } + for s in &$data.invalid { + let _: ParserError = black_box(s).parse::<$subtag>().unwrap_err(); + } + }) + }); + }; +} + +fn subtags_bench(c: &mut Criterion) { + let path = "./benches/fixtures/subtags.json"; + let data: fixtures::Subtags = helpers::read_fixture(path).expect("Failed to read a fixture"); + + subtag_bench!(c, "language", Language, data.language); + subtag_bench!(c, "script", Script, data.script); + subtag_bench!(c, "region", Region, data.region); + subtag_bench!(c, "variant", Variant, data.variant); +} + +criterion_group!(benches, subtags_bench,); +criterion_main!(benches); diff --git a/vendor/icu_locid/examples/filter_langids.rs b/vendor/icu_locid/examples/filter_langids.rs new file mode 100644 index 000000000..9e5b54e39 --- /dev/null +++ b/vendor/icu_locid/examples/filter_langids.rs @@ -0,0 +1,66 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +// A sample application which takes a comma separated list of language identifiers, +// filters out identifiers with language subtags different than `en` and serializes +// the list back into a comma separated list in canonical syntax. +// +// Note: This is an example of the API use, and is not a good base for language matching. +// For language matching, please consider algorithms such as Locale Matcher. + +#![no_main] // https://github.com/unicode-org/icu4x/issues/395 + +icu_benchmark_macros::static_setup!(); + +use std::env; + +use icu_locid::{subtags, LanguageIdentifier}; + +const DEFAULT_INPUT: &str = + "de, en-us, zh-hant, sr-cyrl, fr-ca, es-cl, pl, en-latn-us, ca-valencia, und-arab"; + +fn filter_input(input: &str) -> String { + // 1. Parse the input string into a list of language identifiers. + let langids = input.split(',').filter_map(|s| s.trim().parse().ok()); + + // 2. Filter for LanguageIdentifiers with Language subtag `en`. + let en_lang: subtags::Language = "en".parse().expect("Failed to parse language subtag."); + + let en_langids = langids.filter(|langid: &LanguageIdentifier| langid.language == en_lang); + + // 3. Serialize the output. + let en_strs: Vec = en_langids.map(|langid| langid.to_string()).collect(); + + en_strs.join(", ") +} + +#[no_mangle] +fn main(_argc: isize, _argv: *const *const u8) -> isize { + icu_benchmark_macros::main_setup!(); + let args: Vec = env::args().collect(); + + let input = if let Some(input) = args.get(1) { + input.as_str() + } else { + DEFAULT_INPUT + }; + let _output = filter_input(input); + + #[cfg(debug_assertions)] + println!("\nInput: {}\nOutput: {}", input, _output); + + 0 +} + +#[cfg(test)] +mod tests { + use super::*; + + const DEFAULT_OUTPUT: &str = "en-US, en-Latn-US"; + + #[test] + fn ensure_default_output() { + assert_eq!(filter_input(DEFAULT_INPUT), DEFAULT_OUTPUT); + } +} diff --git a/vendor/icu_locid/examples/syntatically_canonicalize_locales.rs b/vendor/icu_locid/examples/syntatically_canonicalize_locales.rs new file mode 100644 index 000000000..659e8eff0 --- /dev/null +++ b/vendor/icu_locid/examples/syntatically_canonicalize_locales.rs @@ -0,0 +1,54 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +// A sample application which takes a comma separated list of locales, +// makes them syntatically canonical and serializes the list back into a comma separated list. + +icu_benchmark_macros::static_setup!(); + +use std::env; + +use icu_locid::Locale; + +const DEFAULT_INPUT: &str = "sr-cyrL-rS, es-mx, und-arab-u-ca-Buddhist"; + +fn syntatically_canonicalize_locales(input: &str) -> String { + // Split input string and canonicalize each locale identifier. + let canonical_locales: Vec = input + .split(',') + .filter_map(|s| Locale::canonicalize(s.trim()).ok()) + .collect(); + + canonical_locales.join(", ") +} + +fn main() { + icu_benchmark_macros::main_setup!(); + let args: Vec = env::args().collect(); + + let input = if let Some(input) = args.get(1) { + input.as_str() + } else { + DEFAULT_INPUT + }; + let _output = syntatically_canonicalize_locales(input); + + #[cfg(debug_assertions)] + println!("\nInput: {}\nOutput: {}", input, _output); +} + +#[cfg(test)] +mod tests { + use super::*; + + const DEFAULT_OUTPUT: &str = "sr-Cyrl-RS, es-MX, und-Arab-u-ca-buddhist"; + + #[test] + fn ensure_default_output() { + assert_eq!( + syntatically_canonicalize_locales(DEFAULT_INPUT), + DEFAULT_OUTPUT + ); + } +} diff --git a/vendor/icu_locid/src/extensions/mod.rs b/vendor/icu_locid/src/extensions/mod.rs new file mode 100644 index 000000000..42bfcd3c9 --- /dev/null +++ b/vendor/icu_locid/src/extensions/mod.rs @@ -0,0 +1,313 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Unicode Extensions provide a mechanism to extend the [`LanguageIdentifier`] with +//! additional bits of information - a combination of a [`LanguageIdentifier`] and [`Extensions`] +//! is called [`Locale`]. +//! +//! There are four types of extensions: +//! +//! * [`Unicode Extensions`] - marked as `u`. +//! * [`Transform Extensions`] - marked as `t`. +//! * [`Private Use Extensions`] - marked as `x`. +//! * [`Other Extensions`] - marked as any `a-z` except of `u`, `t` and `x`. +//! +//! One can think of extensions as a bag of extra information on top of basic 4 [`subtags`]. +//! +//! Notice: `Other` extension type is currently not supported. +//! +//! # Examples +//! +//! ``` +//! use icu::locid::extensions::unicode::{Key, Value}; +//! use icu::locid::Locale; +//! +//! let loc: Locale = "en-US-u-ca-buddhist-t-en-US-h0-hybrid-x-foo" +//! .parse() +//! .expect("Failed to parse."); +//! +//! assert_eq!(loc.id.language, "en".parse().unwrap()); +//! assert_eq!(loc.id.script, None); +//! assert_eq!(loc.id.region, Some("US".parse().unwrap())); +//! assert_eq!(loc.id.variants.len(), 0); +//! +//! let key: Key = "ca".parse().expect("Parsing key failed."); +//! let value: Value = "buddhist".parse().expect("Parsing value failed."); +//! assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value)); +//! ``` +//! +//! [`LanguageIdentifier`]: super::LanguageIdentifier +//! [`Locale`]: super::Locale +//! [`subtags`]: super::subtags +//! [`Other Extensions`]: other +//! [`Private Use Extensions`]: private +//! [`Transform Extensions`]: transform +//! [`Unicode Extensions`]: unicode +pub mod other; +pub mod private; +pub mod transform; +pub mod unicode; + +use other::Other; +use private::Private; +use transform::Transform; +use unicode::Unicode; + +use alloc::vec::Vec; + +use crate::parser::ParserError; +use crate::parser::SubtagIterator; + +/// Defines the type of extension. +#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)] +#[non_exhaustive] +pub enum ExtensionType { + /// Transform Extension Type marked as `t`. + Transform, + /// Unicode Extension Type marked as `u`. + Unicode, + /// Private Extension Type marked as `x`. + Private, + /// All other extension types. + Other(u8), +} + +impl ExtensionType { + pub(crate) const fn try_from_byte(key: u8) -> Result { + let key = key.to_ascii_lowercase(); + match key { + b'u' => Ok(Self::Unicode), + b't' => Ok(Self::Transform), + b'x' => Ok(Self::Private), + b'a'..=b'z' => Ok(Self::Other(key)), + _ => Err(ParserError::InvalidExtension), + } + } + + pub(crate) const fn try_from_bytes_manual_slice( + bytes: &[u8], + start: usize, + end: usize, + ) -> Result { + if end - start != 1 { + return Err(ParserError::InvalidExtension); + } + #[allow(clippy::indexing_slicing)] + Self::try_from_byte(bytes[start]) + } +} + +/// A map of extensions associated with a given [`Locale`](crate::Locale). +#[derive(Debug, Default, PartialEq, Eq, Clone, Hash)] +#[non_exhaustive] +pub struct Extensions { + /// A representation of the data for a Unicode extension, when present in the locale identifer. + pub unicode: Unicode, + /// A representation of the data for a transform extension, when present in the locale identifer. + pub transform: Transform, + /// A representation of the data for a private-use extension, when present in the locale identifer. + pub private: Private, + /// A sequence of any other extensions that are present in the locale identifier but are not formally + /// [defined](https://unicode.org/reports/tr35/) and represented explicitly as [`Unicode`], [`Transform`], + /// and [`Private`] are. + pub other: Vec, +} + +impl Extensions { + /// Returns a new empty map of extensions. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::Extensions; + /// + /// assert_eq!(Extensions::new(), Extensions::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self { + unicode: Unicode::new(), + transform: Transform::new(), + private: Private::new(), + other: Vec::new(), + } + } + + /// Function to create a new map of extensions containing exactly one unicode extension, callable in `const` + /// context. + #[inline] + pub const fn from_unicode(unicode: Unicode) -> Self { + Self { + unicode, + transform: Transform::new(), + private: Private::new(), + other: Vec::new(), + } + } + + /// Returns whether there are no extensions present. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed."); + /// + /// assert!(!loc.extensions.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.unicode.is_empty() + && self.transform.is_empty() + && self.private.is_empty() + && self.other.is_empty() + } + + /// Retains the specified extension types, clearing all others. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::ExtensionType; + /// use icu::locid::Locale; + /// + /// let loc: Locale = + /// "und-a-hello-t-mul-u-world-z-zzz-x-extra".parse().unwrap(); + /// + /// let mut only_unicode = loc.clone(); + /// only_unicode + /// .extensions + /// .retain_by_type(|t| t == ExtensionType::Unicode); + /// assert_eq!(only_unicode, "und-u-world".parse().unwrap()); + /// + /// let mut only_t_z = loc.clone(); + /// only_t_z.extensions.retain_by_type(|t| { + /// t == ExtensionType::Transform || t == ExtensionType::Other(b'z') + /// }); + /// assert_eq!(only_t_z, "und-t-mul-z-zzz".parse().unwrap()); + /// ``` + pub fn retain_by_type(&mut self, mut predicate: F) + where + F: FnMut(ExtensionType) -> bool, + { + if !predicate(ExtensionType::Unicode) { + self.unicode.clear(); + } + if !predicate(ExtensionType::Transform) { + self.transform.clear(); + } + if !predicate(ExtensionType::Private) { + self.private.clear(); + } + self.other + .retain(|o| predicate(ExtensionType::Other(o.get_ext_byte()))); + } + + pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result { + let mut unicode = None; + let mut transform = None; + let mut private = None; + let mut other = Vec::new(); + + let mut st = iter.next(); + while let Some(subtag) = st { + match subtag.get(0).map(|b| ExtensionType::try_from_byte(*b)) { + Some(Ok(ExtensionType::Unicode)) => { + unicode = Some(Unicode::try_from_iter(iter)?); + } + Some(Ok(ExtensionType::Transform)) => { + transform = Some(Transform::try_from_iter(iter)?); + } + Some(Ok(ExtensionType::Private)) => { + private = Some(Private::try_from_iter(iter)?); + } + Some(Ok(ExtensionType::Other(ext))) => { + let parsed = Other::try_from_iter(ext, iter)?; + if let Err(idx) = other.binary_search(&parsed) { + other.insert(idx, parsed); + } else { + return Err(ParserError::InvalidExtension); + } + } + None => {} + _ => return Err(ParserError::InvalidExtension), + } + + st = iter.next(); + } + + Ok(Self { + unicode: unicode.unwrap_or_default(), + transform: transform.unwrap_or_default(), + private: private.unwrap_or_default(), + other, + }) + } + + pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + let mut wrote_tu = false; + // Alphabetic by singleton + self.other.iter().try_for_each(|other| { + if other.get_ext() > 't' && !wrote_tu { + // Since 't' and 'u' are next to each other in alphabetical + // order, write both now. + self.transform.for_each_subtag_str(f)?; + self.unicode.for_each_subtag_str(f)?; + wrote_tu = true; + } + other.for_each_subtag_str(f)?; + Ok(()) + })?; + + if !wrote_tu { + self.transform.for_each_subtag_str(f)?; + self.unicode.for_each_subtag_str(f)?; + } + + // Private must be written last, since it allows single character + // keys. Extensions must also be written in alphabetical order, + // which would seem to imply that other extensions `y` and `z` are + // invalid, but this is not specified. + self.private.for_each_subtag_str(f)?; + Ok(()) + } +} + +impl_writeable_for_each_subtag_str_no_test!(Extensions); + +#[test] +fn test_writeable() { + use crate::Locale; + use writeable::assert_writeable_eq; + assert_writeable_eq!(Extensions::new(), "",); + assert_writeable_eq!( + "my-t-my-d0-zawgyi".parse::().unwrap().extensions, + "t-my-d0-zawgyi", + ); + assert_writeable_eq!( + "ar-SA-u-ca-islamic-civil" + .parse::() + .unwrap() + .extensions, + "u-ca-islamic-civil", + ); + assert_writeable_eq!( + "en-001-x-foo-bar".parse::().unwrap().extensions, + "x-foo-bar", + ); + assert_writeable_eq!( + "und-t-m0-true".parse::().unwrap().extensions, + "t-m0-true", + ); + assert_writeable_eq!( + "und-a-foo-t-foo-u-foo-w-foo-z-foo-x-foo" + .parse::() + .unwrap() + .extensions, + "a-foo-t-foo-u-foo-w-foo-z-foo-x-foo", + ); +} diff --git a/vendor/icu_locid/src/extensions/other/mod.rs b/vendor/icu_locid/src/extensions/other/mod.rs new file mode 100644 index 000000000..36dbc49b6 --- /dev/null +++ b/vendor/icu_locid/src/extensions/other/mod.rs @@ -0,0 +1,157 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Other Use Extensions is a list of extensions other than unicode, +//! transform or private. +//! +//! Those extensions are treated as a pass-through, and no Unicode related +//! behavior depends on them. +//! +//! The main struct for this extension is [`Other`] which is a list of [`Subtag`]s. +//! +//! # Examples +//! +//! ``` +//! use icu::locid::extensions::other::Other; +//! use icu::locid::Locale; +//! +//! let mut loc: Locale = "en-US-a-foo-faa".parse().expect("Parsing failed."); +//! ``` + +mod subtag; + +use crate::parser::ParserError; +use crate::parser::SubtagIterator; +use alloc::vec::Vec; +pub use subtag::Subtag; + +/// A list of [`Other Use Extensions`] as defined in [`Unicode Locale +/// Identifier`] specification. +/// +/// Those extensions are treated as a pass-through, and no Unicode related +/// behavior depends on them. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::other::{Other, Subtag}; +/// +/// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag."); +/// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag."); +/// +/// let other = Other::from_vec_unchecked(b'a', vec![subtag1, subtag2]); +/// assert_eq!(&other.to_string(), "-a-foo-bar"); +/// ``` +/// +/// [`Other Use Extensions`]: https://unicode.org/reports/tr35/#other_extensions +/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier +#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] +pub struct Other((u8, Vec)); + +impl Other { + /// A constructor which takes a pre-sorted list of [`Subtag`]. + /// + /// # Panics + /// + /// Panics if `ext` is not ASCII alphabetic. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::other::{Other, Subtag}; + /// + /// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag."); + /// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag."); + /// + /// let other = Other::from_vec_unchecked(b'a', vec![subtag1, subtag2]); + /// assert_eq!(&other.to_string(), "-a-foo-bar"); + /// ``` + pub fn from_vec_unchecked(ext: u8, input: Vec) -> Self { + assert!(ext.is_ascii_alphabetic()); + Self((ext, input)) + } + + pub(crate) fn try_from_iter(ext: u8, iter: &mut SubtagIterator) -> Result { + debug_assert!(ext.is_ascii_alphabetic()); + + let mut keys = Vec::new(); + while let Some(subtag) = iter.peek() { + if !Subtag::valid_key(subtag) { + break; + } + if let Ok(key) = Subtag::try_from_bytes(subtag) { + keys.push(key); + } + iter.next(); + } + + Ok(Self::from_vec_unchecked(ext, keys)) + } + + /// Gets the tag character for this extension as a char. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let loc: Locale = "und-a-hello-world".parse().unwrap(); + /// let other_ext = &loc.extensions.other[0]; + /// assert_eq!(other_ext.get_ext(), 'a'); + /// ``` + pub fn get_ext(&self) -> char { + self.get_ext_byte() as char + } + + /// Gets the tag character for this extension as a byte. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let loc: Locale = "und-a-hello-world".parse().unwrap(); + /// let other_ext = &loc.extensions.other[0]; + /// assert_eq!(other_ext.get_ext_byte(), b'a'); + /// ``` + pub fn get_ext_byte(&self) -> u8 { + self.0 .0 + } + + pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + let (ext, keys) = &self.0; + debug_assert!(ext.is_ascii_alphabetic()); + // Safety: ext is ascii_alphabetic, so it is valid UTF-8 + let ext_str = unsafe { core::str::from_utf8_unchecked(core::slice::from_ref(ext)) }; + f(ext_str)?; + keys.iter().map(|t| t.as_str()).try_for_each(f) + } +} + +writeable::impl_display_with_writeable!(Other); + +impl writeable::Writeable for Other { + fn write_to(&self, sink: &mut W) -> core::fmt::Result { + let (ext, keys) = &self.0; + sink.write_char('-')?; + sink.write_char(*ext as char)?; + for key in keys.iter() { + sink.write_char('-')?; + writeable::Writeable::write_to(key, sink)?; + } + + Ok(()) + } + + fn writeable_length_hint(&self) -> writeable::LengthHint { + let mut result = writeable::LengthHint::exact(2); + for key in self.0 .1.iter() { + result += writeable::Writeable::writeable_length_hint(key) + 1; + } + result + } +} diff --git a/vendor/icu_locid/src/extensions/other/subtag.rs b/vendor/icu_locid/src/extensions/other/subtag.rs new file mode 100644 index 000000000..60995c395 --- /dev/null +++ b/vendor/icu_locid/src/extensions/other/subtag.rs @@ -0,0 +1,37 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +impl_tinystr_subtag!( + /// A single item used in a list of [`Other`](super::Other) extensions. + /// + /// The subtag has to be an ASCII alphanumerical string no shorter than + /// two characters and no longer than eight. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::other::Subtag; + /// + /// let subtag: Subtag = "Foo".parse().expect("Failed to parse a Subtag."); + /// + /// assert_eq!(subtag.as_str(), "foo"); + /// ``` + Subtag, + extensions::other::Subtag, + extensions_other_subtag, + 2..=8, + s, + s.is_ascii_alphanumeric(), + s.to_ascii_lowercase(), + s.is_ascii_alphanumeric() && s.is_ascii_lowercase(), + InvalidExtension, + ["foo12"], + ["y", "toolooong"], +); + +impl Subtag { + pub(crate) const fn valid_key(v: &[u8]) -> bool { + 2 <= v.len() && v.len() <= 8 + } +} diff --git a/vendor/icu_locid/src/extensions/private/mod.rs b/vendor/icu_locid/src/extensions/private/mod.rs new file mode 100644 index 000000000..13090c94a --- /dev/null +++ b/vendor/icu_locid/src/extensions/private/mod.rs @@ -0,0 +1,167 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Private Use Extensions is a list of extensions intended for +//! private use. +//! +//! Those extensions are treated as a pass-through, and no Unicode related +//! behavior depends on them. +//! +//! The main struct for this extension is [`Private`] which is a list of [`Subtag`]s. +//! +//! # Examples +//! +//! ``` +//! use icu::locid::extensions::private::{Private, Subtag}; +//! use icu::locid::Locale; +//! +//! let mut loc: Locale = "en-US-x-foo-faa".parse().expect("Parsing failed."); +//! +//! let subtag: Subtag = "foo".parse().expect("Parsing subtag failed."); +//! assert!(loc.extensions.private.contains(&subtag)); +//! assert_eq!(loc.extensions.private.iter().next(), Some(&subtag)); +//! loc.extensions.private.clear(); +//! assert_eq!(loc.to_string(), "en-US"); +//! ``` + +mod other; + +use alloc::vec::Vec; +use core::ops::Deref; + +pub use other::Subtag; + +use crate::parser::ParserError; +use crate::parser::SubtagIterator; + +/// A list of [`Private Use Extensions`] as defined in [`Unicode Locale +/// Identifier`] specification. +/// +/// Those extensions are treated as a pass-through, and no Unicode related +/// behavior depends on them. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::private::{Private, Subtag}; +/// +/// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag."); +/// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag."); +/// +/// let private = Private::from_vec_unchecked(vec![subtag1, subtag2]); +/// assert_eq!(&private.to_string(), "-x-foo-bar"); +/// ``` +/// +/// [`Private Use Extensions`]: https://unicode.org/reports/tr35/#pu_extensions +/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier +#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] +pub struct Private(Vec); + +impl Private { + /// Returns a new empty list of private-use extensions. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::private::Private; + /// + /// assert_eq!(Private::new(), Private::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self(Vec::new()) + } + + /// A constructor which takes a pre-sorted list of [`Subtag`]. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::private::{Private, Subtag}; + /// + /// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag."); + /// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag."); + /// + /// let private = Private::from_vec_unchecked(vec![subtag1, subtag2]); + /// assert_eq!(&private.to_string(), "-x-foo-bar"); + /// ``` + pub fn from_vec_unchecked(input: Vec) -> Self { + Self(input) + } + + /// Empties the [`Private`] list. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::private::{Private, Subtag}; + /// + /// let subtag1: Subtag = "foo".parse().expect("Failed to parse a Subtag."); + /// let subtag2: Subtag = "bar".parse().expect("Failed to parse a Subtag."); + /// let mut private = Private::from_vec_unchecked(vec![subtag1, subtag2]); + /// + /// assert_eq!(&private.to_string(), "-x-foo-bar"); + /// + /// private.clear(); + /// + /// assert_eq!(&private.to_string(), ""); + /// ``` + pub fn clear(&mut self) { + self.0.clear(); + } + + pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result { + let keys = iter + .map(Subtag::try_from_bytes) + .collect::, _>>()?; + + Ok(Self::from_vec_unchecked(keys)) + } + + pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + if self.is_empty() { + return Ok(()); + } + f("x")?; + self.deref().iter().map(|t| t.as_str()).try_for_each(f) + } +} + +writeable::impl_display_with_writeable!(Private); + +impl writeable::Writeable for Private { + fn write_to(&self, sink: &mut W) -> core::fmt::Result { + if self.is_empty() { + return Ok(()); + } + sink.write_str("-x")?; + for key in self.iter() { + sink.write_char('-')?; + writeable::Writeable::write_to(key, sink)?; + } + Ok(()) + } + + fn writeable_length_hint(&self) -> writeable::LengthHint { + if self.is_empty() { + return writeable::LengthHint::exact(0); + } + let mut result = writeable::LengthHint::exact(2); + for key in self.iter() { + result += writeable::Writeable::writeable_length_hint(key) + 1; + } + result + } +} + +impl Deref for Private { + type Target = [Subtag]; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} diff --git a/vendor/icu_locid/src/extensions/private/other.rs b/vendor/icu_locid/src/extensions/private/other.rs new file mode 100644 index 000000000..a91e12855 --- /dev/null +++ b/vendor/icu_locid/src/extensions/private/other.rs @@ -0,0 +1,31 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +impl_tinystr_subtag!( + /// A single item used in a list of [`Private`](super::Private) extensions. + /// + /// The subtag has to be an ASCII alphanumerical string no shorter than + /// one character and no longer than eight. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::private::Subtag; + /// + /// let subtag1: Subtag = "Foo".parse().expect("Failed to parse a Subtag."); + /// + /// assert_eq!(subtag1.as_str(), "foo"); + /// ``` + Subtag, + extensions::private::Subtag, + extensions_private_subtag, + 1..=8, + s, + s.is_ascii_alphanumeric(), + s.to_ascii_lowercase(), + s.is_ascii_alphanumeric() && s.is_ascii_lowercase(), + InvalidExtension, + ["foo12"], + ["toolooong"], +); diff --git a/vendor/icu_locid/src/extensions/transform/fields.rs b/vendor/icu_locid/src/extensions/transform/fields.rs new file mode 100644 index 000000000..ca10000a7 --- /dev/null +++ b/vendor/icu_locid/src/extensions/transform/fields.rs @@ -0,0 +1,228 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::borrow::Borrow; +use core::iter::FromIterator; +use litemap::LiteMap; + +use super::Key; +use super::Value; + +/// A list of [`Key`]-[`Value`] pairs representing functional information +/// about content transformations. +/// +/// Here are examples of fields used in Unicode: +/// - `s0`, `d0` - Transform source/destination +/// - `t0` - Machine Translation +/// - `h0` - Hybrid Locale Identifiers +/// +/// You can find the full list in [`Unicode BCP 47 T Extension`] section of LDML. +/// +/// [`Unicode BCP 47 T Extension`]: https://unicode.org/reports/tr35/tr35.html#BCP47_T_Extension +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::transform::{Fields, Key, Value}; +/// +/// let key: Key = "h0".parse().expect("Failed to parse a Key."); +/// let value: Value = "hybrid".parse().expect("Failed to parse a Value."); +/// let fields: Fields = vec![(key, value)].into_iter().collect(); +/// +/// assert_eq!(&fields.to_string(), "h0-hybrid"); +/// ``` +#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] +pub struct Fields(LiteMap); + +impl Fields { + /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::Fields; + /// + /// assert_eq!(Fields::new(), Fields::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self(LiteMap::new()) + } + + /// Returns `true` if there are no fields. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::Fields; + /// use icu::locid::locale; + /// use icu::locid::Locale; + /// + /// let loc1 = Locale::try_from_bytes(b"und-t-h0-hybrid").unwrap(); + /// let loc2 = locale!("und-u-ca-buddhist"); + /// + /// assert!(!loc1.extensions.transform.fields.is_empty()); + /// assert!(loc2.extensions.transform.fields.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Empties the [`Fields`] list. + /// + /// Returns the old list. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::{Fields, Key, Value}; + /// + /// let key: Key = "h0".parse().expect("Failed to parse a Key."); + /// let value: Value = "hybrid".parse().expect("Failed to parse a Value."); + /// let mut fields: Fields = vec![(key, value)].into_iter().collect(); + /// + /// assert_eq!(&fields.to_string(), "h0-hybrid"); + /// + /// fields.clear(); + /// + /// assert_eq!(&fields.to_string(), ""); + /// ``` + pub fn clear(&mut self) -> Self { + core::mem::take(self) + } + + /// Returns `true` if the list contains a [`Value`] for the specified [`Key`]. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::{Fields, Key, Value}; + /// + /// let key: Key = "h0".parse().expect("Failed to parse a Key."); + /// let value: Value = "hybrid".parse().expect("Failed to parse a Value."); + /// let mut fields: Fields = vec![(key, value)].into_iter().collect(); + /// + /// let key: Key = "h0".parse().expect("Failed to parse a Key."); + /// assert!(&fields.contains_key(&key)); + /// ``` + pub fn contains_key(&self, key: &Q) -> bool + where + Key: Borrow, + Q: Ord, + { + self.0.contains_key(key) + } + + /// Returns a reference to the [`Value`] corresponding to the [`Key`]. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::{Fields, Key, Value}; + /// + /// let key: Key = "h0".parse().expect("Failed to parse a Key."); + /// let value: Value = "hybrid".parse().expect("Failed to parse a Value."); + /// let mut fields: Fields = vec![(key, value)].into_iter().collect(); + /// + /// let key: Key = "h0".parse().expect("Failed to parse a Key."); + /// assert_eq!( + /// fields.get(&key).map(|v| v.to_string()), + /// Some("hybrid".to_string()) + /// ); + /// ``` + pub fn get(&self, key: &Q) -> Option<&Value> + where + Key: Borrow, + Q: Ord, + { + self.0.get(key) + } + + /// Sets the specified keyword, returning the old value if it already existed. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::Key; + /// use icu::locid::extensions::transform::Value; + /// use icu::locid::extensions_transform_key as key; + /// use icu::locid::Locale; + /// + /// let lower = "lower".parse::().expect("valid extension subtag"); + /// let casefold = "casefold".parse::().expect("valid extension subtag"); + /// + /// let mut loc: Locale = "en-t-hi-d0-casefold" + /// .parse() + /// .expect("valid BCP-47 identifier"); + /// let old_value = loc.extensions.transform.fields.set(key!("d0"), lower); + /// + /// assert_eq!(old_value, Some(casefold)); + /// assert_eq!(loc, "en-t-hi-d0-lower".parse().unwrap()); + /// ``` + pub fn set(&mut self, key: Key, value: Value) -> Option { + self.0.insert(key, value) + } + + /// Retains a subset of fields as specified by the predicate function. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions_transform_key as key; + /// use icu::locid::Locale; + /// + /// let mut loc: Locale = "und-t-h0-hybrid-d0-hex-m0-xml".parse().unwrap(); + /// + /// loc.extensions + /// .transform + /// .fields + /// .retain_by_key(|&k| k == key!("h0")); + /// assert_eq!(loc, "und-t-h0-hybrid".parse().unwrap()); + /// + /// loc.extensions + /// .transform + /// .fields + /// .retain_by_key(|&k| k == key!("d0")); + /// assert_eq!(loc, Locale::UND); + /// ``` + pub fn retain_by_key(&mut self, mut predicate: F) + where + F: FnMut(&Key) -> bool, + { + self.0.retain(|k, _| predicate(k)) + } + + pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + for (k, v) in self.0.iter() { + f(k.as_str())?; + v.for_each_subtag_str(f)?; + } + Ok(()) + } + + /// This needs to be its own method to help with type inference in helpers.rs + #[cfg(test)] + pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self { + v.into_iter().collect() + } +} + +impl From> for Fields { + fn from(map: LiteMap) -> Self { + Self(map) + } +} + +impl FromIterator<(Key, Value)> for Fields { + fn from_iter>(iter: I) -> Self { + LiteMap::from_iter(iter).into() + } +} + +impl_writeable_for_key_value!(Fields, "h0", "hybrid", "m0", "m0-true"); diff --git a/vendor/icu_locid/src/extensions/transform/key.rs b/vendor/icu_locid/src/extensions/transform/key.rs new file mode 100644 index 000000000..5400988a1 --- /dev/null +++ b/vendor/icu_locid/src/extensions/transform/key.rs @@ -0,0 +1,31 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +impl_tinystr_subtag!( + /// A key used in a list of [`Fields`](super::Fields). + /// + /// The key has to be a two ASCII characters long, with the first + /// character being alphabetic, and the second being a number. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::Key; + /// + /// let key1: Key = "k0".parse().expect("Failed to parse a Key."); + /// + /// assert_eq!(key1.as_str(), "k0"); + /// ``` + Key, + extensions::transform::Key, + extensions_transform_key, + 2..=2, + s, + s.all_bytes()[0].is_ascii_alphabetic() && s.all_bytes()[1].is_ascii_digit(), + s.to_ascii_lowercase(), + s.all_bytes()[0].is_ascii_lowercase() && s.all_bytes()[1].is_ascii_digit(), + InvalidExtension, + ["k0"], + ["", "k", "0k", "k12"], +); diff --git a/vendor/icu_locid/src/extensions/transform/mod.rs b/vendor/icu_locid/src/extensions/transform/mod.rs new file mode 100644 index 000000000..a8c605146 --- /dev/null +++ b/vendor/icu_locid/src/extensions/transform/mod.rs @@ -0,0 +1,236 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Transform Extensions provide information on content transformations in a given locale. +//! +//! The main struct for this extension is [`Transform`] which contains [`Fields`] and an +//! optional [`LanguageIdentifier`]. +//! +//! [`LanguageIdentifier`]: super::super::LanguageIdentifier +//! +//! # Examples +//! +//! ``` +//! use icu::locid::extensions::transform::{Fields, Key, Transform, Value}; +//! use icu::locid::{LanguageIdentifier, Locale}; +//! +//! let mut loc: Locale = +//! "en-US-t-es-AR-h0-hybrid".parse().expect("Parsing failed."); +//! +//! let lang: LanguageIdentifier = +//! "es-AR".parse().expect("Parsing LanguageIdentifier failed."); +//! +//! let key: Key = "h0".parse().expect("Parsing key failed."); +//! let value: Value = "hybrid".parse().expect("Parsing value failed."); +//! +//! assert_eq!(loc.extensions.transform.lang, Some(lang)); +//! assert!(loc.extensions.transform.fields.contains_key(&key)); +//! assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value)); +//! +//! assert_eq!(&loc.extensions.transform.to_string(), "-t-es-AR-h0-hybrid"); +//! ``` +mod fields; +mod key; +mod value; + +pub use fields::Fields; +pub use key::Key; +pub use value::Value; + +use crate::parser::SubtagIterator; +use crate::parser::{parse_language_identifier_from_iter, ParserError, ParserMode}; +use crate::subtags::Language; +use crate::LanguageIdentifier; +use alloc::vec; +use litemap::LiteMap; + +/// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale +/// Identifier`] specification. +/// +/// Transform extension carries information about source language or script of +/// transformed content, including content that has been transliterated, transcribed, +/// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details). +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::transform::{Key, Value}; +/// use icu::locid::{LanguageIdentifier, Locale}; +/// +/// let mut loc: Locale = +/// "de-t-en-US-h0-hybrid".parse().expect("Parsing failed."); +/// +/// let en_us: LanguageIdentifier = "en-US".parse().expect("Parsing failed."); +/// +/// assert_eq!(loc.extensions.transform.lang, Some(en_us)); +/// let key: Key = "h0".parse().expect("Parsing key failed."); +/// let value: Value = "hybrid".parse().expect("Parsing value failed."); +/// assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value)); +/// ``` +/// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension +/// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt +/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier +#[derive(Clone, PartialEq, Eq, Debug, Default, Hash)] +#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure +pub struct Transform { + /// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present. + pub lang: Option, + /// The key-value pairs present in this locale extension, with each extension key subtag + /// associated to its provided value subtag. + pub fields: Fields, +} + +impl Transform { + /// Returns a new empty map of Transform extensions. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::Transform; + /// + /// assert_eq!(Transform::new(), Transform::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self { + lang: None, + fields: Fields::new(), + } + } + + /// Returns `true` if there are no tfields and no tlang in the `TransformExtensionList`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let mut loc: Locale = "en-US-t-es-AR".parse().expect("Parsing failed."); + /// + /// assert!(!loc.extensions.transform.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.lang.is_none() && self.fields.is_empty() + } + + /// Clears the transform extension, effectively removing it from the locale. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let mut loc: Locale = "en-US-t-es-AR".parse().unwrap(); + /// loc.extensions.transform.clear(); + /// assert_eq!(loc, "en-US".parse().unwrap()); + /// ``` + pub fn clear(&mut self) { + self.lang = None; + self.fields.clear(); + } + + pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result { + let mut tlang = None; + let mut tfields = LiteMap::new(); + + if let Some(subtag) = iter.peek() { + if Language::try_from_bytes(subtag).is_ok() { + tlang = Some(parse_language_identifier_from_iter( + iter, + ParserMode::Partial, + )?); + } + } + + let mut current_tkey = None; + let mut current_tvalue = vec![]; + + while let Some(subtag) = iter.peek() { + if let Some(tkey) = current_tkey { + if let Ok(val) = Value::parse_subtag(subtag) { + current_tvalue.push(val); + } else { + if current_tvalue.is_empty() { + return Err(ParserError::InvalidExtension); + } + tfields.try_insert( + tkey, + Value::from_vec_unchecked(current_tvalue.drain(..).flatten().collect()), + ); + current_tkey = None; + continue; + } + } else if let Ok(tkey) = Key::try_from_bytes(subtag) { + current_tkey = Some(tkey); + } else { + break; + } + + iter.next(); + } + + if let Some(tkey) = current_tkey { + if current_tvalue.is_empty() { + return Err(ParserError::InvalidExtension); + } + tfields.try_insert( + tkey, + Value::from_vec_unchecked(current_tvalue.into_iter().flatten().collect()), + ); + } + + Ok(Self { + lang: tlang, + fields: tfields.into(), + }) + } + + pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + if self.is_empty() { + return Ok(()); + } + f("t")?; + if let Some(lang) = &self.lang { + lang.for_each_subtag_str(f)?; + } + self.fields.for_each_subtag_str(f) + } +} + +writeable::impl_display_with_writeable!(Transform); + +impl writeable::Writeable for Transform { + fn write_to(&self, sink: &mut W) -> core::fmt::Result { + if self.is_empty() { + return Ok(()); + } + sink.write_str("-t")?; + if let Some(lang) = &self.lang { + sink.write_char('-')?; + writeable::Writeable::write_to(lang, sink)?; + } + if !self.fields.is_empty() { + sink.write_char('-')?; + writeable::Writeable::write_to(&self.fields, sink)?; + } + Ok(()) + } + + fn writeable_length_hint(&self) -> writeable::LengthHint { + if self.is_empty() { + return writeable::LengthHint::exact(0); + } + let mut result = writeable::LengthHint::exact(2); + if let Some(lang) = &self.lang { + result += writeable::Writeable::writeable_length_hint(lang) + 1; + } + if !self.fields.is_empty() { + result += writeable::Writeable::writeable_length_hint(&self.fields) + 1; + } + result + } +} diff --git a/vendor/icu_locid/src/extensions/transform/value.rs b/vendor/icu_locid/src/extensions/transform/value.rs new file mode 100644 index 000000000..84468361a --- /dev/null +++ b/vendor/icu_locid/src/extensions/transform/value.rs @@ -0,0 +1,119 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::parser::{get_subtag_iterator, ParserError}; +use alloc::vec; +use alloc::vec::Vec; +use core::ops::RangeInclusive; +use core::str::FromStr; +use tinystr::TinyAsciiStr; + +/// A value used in a list of [`Fields`](super::Fields). +/// +/// The value has to be a sequence of one or more alphanumerical strings +/// separated by `-`. +/// Each part of the sequence has to be no shorter than three characters and no +/// longer than 8. +/// +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::transform::Value; +/// +/// let value1: Value = "hybrid".parse().expect("Failed to parse a Value."); +/// let value2: Value = +/// "hybrid-foobar".parse().expect("Failed to parse a Value."); +/// +/// assert_eq!(&value1.to_string(), "hybrid"); +/// assert_eq!(&value2.to_string(), "hybrid-foobar"); +/// ``` +#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)] +pub struct Value(Vec>); + +const TYPE_LENGTH: RangeInclusive = 3..=8; +const TRUE_TVALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true"); + +impl Value { + /// A constructor which takes a utf8 slice, parses it and + /// produces a well-formed [`Value`]. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::transform::Value; + /// + /// let value = Value::try_from_bytes(b"hybrid").expect("Parsing failed."); + /// + /// assert_eq!(&value.to_string(), "hybrid"); + /// ``` + pub fn try_from_bytes(input: &[u8]) -> Result { + let mut v = vec![]; + let mut has_value = false; + + for subtag in get_subtag_iterator(input) { + if !Self::is_type_subtag(subtag) { + return Err(ParserError::InvalidExtension); + } + has_value = true; + let val = + TinyAsciiStr::from_bytes(subtag).map_err(|_| ParserError::InvalidExtension)?; + if val != TRUE_TVALUE { + v.push(val); + } + } + + if !has_value { + return Err(ParserError::InvalidExtension); + } + Ok(Self(v)) + } + + pub(crate) fn from_vec_unchecked(input: Vec>) -> Self { + Self(input) + } + + pub(crate) fn is_type_subtag(t: &[u8]) -> bool { + TYPE_LENGTH.contains(&t.len()) && !t.iter().any(|c: &u8| !c.is_ascii_alphanumeric()) + } + + pub(crate) fn parse_subtag( + t: &[u8], + ) -> Result>, ParserError> { + let s = TinyAsciiStr::from_bytes(t).map_err(|_| ParserError::InvalidSubtag)?; + if !TYPE_LENGTH.contains(&t.len()) || !s.is_ascii_alphanumeric() { + return Err(ParserError::InvalidExtension); + } + + let s = s.to_ascii_lowercase(); + + if s == TRUE_TVALUE { + Ok(None) + } else { + Ok(Some(s)) + } + } + + pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + if self.0.is_empty() { + f("true")?; + } else { + self.0.iter().map(TinyAsciiStr::as_str).try_for_each(f)?; + } + Ok(()) + } +} + +impl FromStr for Value { + type Err = ParserError; + + fn from_str(source: &str) -> Result { + Self::try_from_bytes(source.as_bytes()) + } +} + +impl_writeable_for_tinystr_list!(Value, "true", "hybrid", "foobar"); diff --git a/vendor/icu_locid/src/extensions/unicode/attribute.rs b/vendor/icu_locid/src/extensions/unicode/attribute.rs new file mode 100644 index 000000000..ba4b70924 --- /dev/null +++ b/vendor/icu_locid/src/extensions/unicode/attribute.rs @@ -0,0 +1,34 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +impl_tinystr_subtag!( + /// An attribute used in a set of [`Attributes`](super::Attributes). + /// + /// An attribute has to be a sequence of alphanumerical characters no + /// shorter than three and no longer than eight characters. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Attribute; + /// use icu::locid::extensions_unicode_attribute as attribute; + /// + /// let attr: Attribute = + /// "buddhist".parse().expect("Failed to parse an Attribute."); + /// + /// assert_eq!(attr, attribute!("buddhist")); + /// ``` + Attribute, + extensions::unicode::Attribute, + extensions_unicode_attribute, + 3..=8, + s, + s.is_ascii_alphanumeric(), + s.to_ascii_lowercase(), + s.is_ascii_alphanumeric() && s.is_ascii_lowercase(), + InvalidExtension, + ["foo12"], + ["no", "toolooong"], +); diff --git a/vendor/icu_locid/src/extensions/unicode/attributes.rs b/vendor/icu_locid/src/extensions/unicode/attributes.rs new file mode 100644 index 000000000..1f9536bfa --- /dev/null +++ b/vendor/icu_locid/src/extensions/unicode/attributes.rs @@ -0,0 +1,115 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::Attribute; + +use alloc::vec::Vec; +use core::ops::Deref; + +/// A set of [`Attribute`] elements as defined in [`Unicode Extension Attributes`]. +/// +/// [`Unicode Extension Attributes`]: https://unicode.org/reports/tr35/tr35.html#u_Extension +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::unicode::{Attribute, Attributes}; +/// +/// let attribute1: Attribute = +/// "foobar".parse().expect("Failed to parse a variant subtag."); +/// +/// let attribute2: Attribute = "testing" +/// .parse() +/// .expect("Failed to parse a variant subtag."); +/// let mut v = vec![attribute1, attribute2]; +/// v.sort(); +/// v.dedup(); +/// +/// let attributes: Attributes = Attributes::from_vec_unchecked(v); +/// assert_eq!(attributes.to_string(), "foobar-testing"); +/// ``` +#[derive(Default, Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)] +pub struct Attributes(Vec); + +impl Attributes { + /// Returns a new empty set of attributes. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Attributes; + /// + /// assert_eq!(Attributes::new(), Attributes::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self(Vec::new()) + } + + /// A constructor which takes a pre-sorted list of [`Attribute`] elements. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::{Attribute, Attributes}; + /// + /// let attribute1: Attribute = "foobar".parse().expect("Parsing failed."); + /// let attribute2: Attribute = "testing".parse().expect("Parsing failed."); + /// let mut v = vec![attribute1, attribute2]; + /// v.sort(); + /// v.dedup(); + /// + /// let attributes = Attributes::from_vec_unchecked(v); + /// ``` + /// + /// Notice: For performance- and memory-constrained environments, it is recommended + /// for the caller to use [`binary_search`](slice::binary_search) instead of [`sort`](slice::sort) + /// and [`dedup`](Vec::dedup()). + pub fn from_vec_unchecked(input: Vec) -> Self { + Self(input) + } + + /// Empties the [`Attributes`] list. + /// + /// Returns the old list. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::{Attribute, Attributes}; + /// + /// let attribute1: Attribute = "foobar".parse().expect("Parsing failed."); + /// let attribute2: Attribute = "testing".parse().expect("Parsing failed."); + /// let mut v = vec![attribute1, attribute2]; + /// + /// let mut attributes: Attributes = Attributes::from_vec_unchecked(v); + /// + /// assert_eq!(attributes.to_string(), "foobar-testing"); + /// + /// attributes.clear(); + /// + /// assert_eq!(attributes.to_string(), ""); + /// ``` + pub fn clear(&mut self) -> Self { + core::mem::take(self) + } + + pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + self.deref().iter().map(|t| t.as_str()).try_for_each(f) + } +} + +impl_writeable_for_subtag_list!(Attributes, "foobar", "testing"); + +impl Deref for Attributes { + type Target = [Attribute]; + + fn deref(&self) -> &[Attribute] { + self.0.deref() + } +} diff --git a/vendor/icu_locid/src/extensions/unicode/key.rs b/vendor/icu_locid/src/extensions/unicode/key.rs new file mode 100644 index 000000000..bdfdd4e5c --- /dev/null +++ b/vendor/icu_locid/src/extensions/unicode/key.rs @@ -0,0 +1,31 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +impl_tinystr_subtag!( + /// A key used in a list of [`Keywords`](super::Keywords). + /// + /// The key has to be a two ASCII alphanumerical characters long, with the first + /// character being alphanumeric, and the second being alphabetic. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Key; + /// + /// assert!("ca".parse::().is_ok()); + /// ``` + Key, + extensions::unicode::Key, + extensions_unicode_key, + 2..=2, + s, + s.all_bytes()[0].is_ascii_alphanumeric() && s.all_bytes()[1].is_ascii_alphabetic(), + s.to_ascii_lowercase(), + (s.all_bytes()[0].is_ascii_lowercase() || s.all_bytes()[0].is_ascii_digit()) + && s.all_bytes()[1].is_ascii_lowercase(), + InvalidExtension, + ["ca", "8a"], + ["a", "a8", "abc"], +); diff --git a/vendor/icu_locid/src/extensions/unicode/keywords.rs b/vendor/icu_locid/src/extensions/unicode/keywords.rs new file mode 100644 index 000000000..dc9a15921 --- /dev/null +++ b/vendor/icu_locid/src/extensions/unicode/keywords.rs @@ -0,0 +1,404 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::borrow::Borrow; +use core::cmp::Ordering; +use core::iter::FromIterator; +use litemap::LiteMap; + +use super::Key; +use super::Value; +use crate::helpers::ShortVec; +use crate::ordering::SubtagOrderingResult; + +/// A list of [`Key`]-[`Value`] pairs representing functional information +/// about locale's internationnalization preferences. +/// +/// Here are examples of fields used in Unicode: +/// - `hc` - Hour Cycle (`h11`, `h12`, `h23`, `h24`) +/// - `ca` - Calendar (`buddhist`, `gregory`, ...) +/// - `fw` - First Day Of the Week (`sun`, `mon`, `sat`, ...) +/// +/// You can find the full list in [`Unicode BCP 47 U Extension`] section of LDML. +/// +/// [`Unicode BCP 47 U Extension`]: https://unicode.org/reports/tr35/tr35.html#Key_And_Type_Definitions_ +/// +/// # Examples +/// +/// Manually build up a [`Keywords`] object: +/// +/// ``` +/// use icu::locid::extensions::unicode::{Key, Keywords, Value}; +/// +/// let key: Key = "hc".parse().expect("Failed to parse a Key."); +/// let value: Value = "h23".parse().expect("Failed to parse a Value."); +/// let keywords: Keywords = vec![(key, value)].into_iter().collect(); +/// +/// assert_eq!(&keywords.to_string(), "hc-h23"); +/// ``` +/// +/// Access a [`Keywords`] object from a [`Locale`]: +/// +/// ``` +/// use icu::locid::{ +/// extensions_unicode_key as key, extensions_unicode_value as value, +/// Locale, +/// }; +/// +/// let loc: Locale = "und-u-hc-h23-kc-true".parse().expect("Valid BCP-47"); +/// +/// assert_eq!(loc.extensions.unicode.keywords.get(&key!("ca")), None); +/// assert_eq!( +/// loc.extensions.unicode.keywords.get(&key!("hc")), +/// Some(&value!("h23")) +/// ); +/// assert_eq!( +/// loc.extensions.unicode.keywords.get(&key!("kc")), +/// Some(&value!("true")) +/// ); +/// +/// assert_eq!(loc.extensions.unicode.keywords.to_string(), "hc-h23-kc"); +/// ``` +/// +/// [`Locale`]: crate::Locale +#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] +pub struct Keywords(LiteMap>); + +impl Keywords { + /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Keywords; + /// + /// assert_eq!(Keywords::new(), Keywords::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self(LiteMap::new()) + } + + /// Create a new list of key-value pairs having exactly one pair, callable in a `const` context. + #[inline] + pub const fn new_single(key: Key, value: Value) -> Self { + Self(LiteMap::from_sorted_store_unchecked(ShortVec::new_single( + (key, value), + ))) + } + + /// Returns `true` if there are no keywords. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Keywords; + /// use icu::locid::locale; + /// use icu::locid::Locale; + /// + /// let loc1 = Locale::try_from_bytes(b"und-t-h0-hybrid").unwrap(); + /// let loc2 = locale!("und-u-ca-buddhist"); + /// + /// assert!(loc1.extensions.unicode.keywords.is_empty()); + /// assert!(!loc2.extensions.unicode.keywords.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns `true` if the list contains a [`Value`] for the specified [`Key`]. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::{Key, Keywords, Value}; + /// use litemap::LiteMap; + /// + /// let key: Key = "ca".parse().expect("Failed to parse a Key."); + /// let value: Value = "gregory".parse().expect("Failed to parse a Value."); + /// let keywords: Keywords = vec![(key, value)].into_iter().collect(); + /// + /// let key: Key = "ca".parse().expect("Failed to parse a Key."); + /// assert!(&keywords.contains_key(&key)); + /// ``` + pub fn contains_key(&self, key: &Q) -> bool + where + Key: Borrow, + Q: Ord, + { + self.0.contains_key(key) + } + + /// Returns a reference to the [`Value`] corresponding to the [`Key`]. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::{Key, Keywords, Value}; + /// + /// let key: Key = "ca".parse().expect("Failed to parse a Key."); + /// let value: Value = "buddhist".parse().expect("Failed to parse a Value."); + /// let keywords: Keywords = vec![(key, value)].into_iter().collect(); + /// + /// let key: Key = "ca".parse().expect("Failed to parse a Key."); + /// assert_eq!( + /// keywords.get(&key).map(|v| v.to_string()), + /// Some("buddhist".to_string()) + /// ); + /// ``` + pub fn get(&self, key: &Q) -> Option<&Value> + where + Key: Borrow, + Q: Ord, + { + self.0.get(key) + } + + /// Returns a mutable reference to the [`Value`] corresponding to the [`Key`]. + /// + /// Returns `None` if the key doesn't exist or if the key has no value. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::{Key, Keywords, Value}; + /// + /// let key: Key = "ca".parse().expect("Failed to parse a Key."); + /// let value: Value = "buddhist".parse().expect("Failed to parse a Value."); + /// let mut keywords: Keywords = vec![(key, value)].into_iter().collect(); + /// + /// let key: Key = "ca".parse().expect("Failed to parse a Key."); + /// if let Some(value) = keywords.get_mut(&key) { + /// *value = "gregory".parse().expect("Failed to parse a Value."); + /// } + /// assert_eq!( + /// keywords.get(&key).map(|v| v.to_string()), + /// Some("gregory".to_string()) + /// ); + /// ``` + pub fn get_mut(&mut self, key: &Q) -> Option<&mut Value> + where + Key: Borrow, + Q: Ord, + { + self.0.get_mut(key) + } + + /// Sets the specified keyword, returning the old value if it already existed. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Key; + /// use icu::locid::extensions::unicode::Value; + /// use icu::locid::Locale; + /// use icu::locid::{ + /// extensions_unicode_key as key, extensions_unicode_value as value, + /// }; + /// + /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12" + /// .parse() + /// .expect("valid BCP-47 identifier"); + /// let old_value = loc + /// .extensions + /// .unicode + /// .keywords + /// .set(key!("ca"), value!("japanese")); + /// + /// assert_eq!(old_value, Some(value!("buddhist"))); + /// assert_eq!(loc, "und-u-hello-ca-japanese-hc-h12".parse().unwrap()); + /// ``` + pub fn set(&mut self, key: Key, value: Value) -> Option { + self.0.insert(key, value) + } + + /// Removes the specified keyword, returning the old value if it existed. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Key; + /// use icu::locid::extensions_unicode_key as key; + /// use icu::locid::Locale; + /// + /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12" + /// .parse() + /// .expect("valid BCP-47 identifier"); + /// loc.extensions.unicode.keywords.remove(key!("ca")); + /// assert_eq!(loc, "und-u-hello-hc-h12".parse().unwrap()); + /// ``` + pub fn remove>(&mut self, key: Q) -> Option { + self.0.remove(key.borrow()) + } + + /// Clears all Unicode extension keywords, leaving Unicode attributes. + /// + /// Returns the old Unicode extension keywords. + /// + /// # Example + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12".parse().unwrap(); + /// loc.extensions.unicode.keywords.clear(); + /// assert_eq!(loc, "und-u-hello".parse().unwrap()); + /// ``` + pub fn clear(&mut self) -> Self { + core::mem::take(self) + } + + /// Retains a subset of keywords as specified by the predicate function. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions_unicode_key as key; + /// use icu::locid::Locale; + /// + /// let mut loc: Locale = "und-u-ca-buddhist-hc-h12-ms-metric".parse().unwrap(); + /// + /// loc.extensions + /// .unicode + /// .keywords + /// .retain_by_key(|&k| k == key!("hc")); + /// assert_eq!(loc, "und-u-hc-h12".parse().unwrap()); + /// + /// loc.extensions + /// .unicode + /// .keywords + /// .retain_by_key(|&k| k == key!("ms")); + /// assert_eq!(loc, Locale::UND); + /// ``` + pub fn retain_by_key(&mut self, mut predicate: F) + where + F: FnMut(&Key) -> bool, + { + self.0.retain(|k, _| predicate(k)) + } + + /// Compare this [`Keywords`] with BCP-47 bytes. + /// + /// The return value is equivalent to what would happen if you first converted this + /// [`Keywords`] to a BCP-47 string and then performed a byte comparison. + /// + /// This function is case-sensitive and results in a *total order*, so it is appropriate for + /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Keywords; + /// use icu::locid::Locale; + /// use std::cmp::Ordering; + /// + /// let bcp47_strings: &[&str] = + /// &["ca-hebrew", "ca-japanese", "ca-japanese-nu-latn", "nu-latn"]; + /// + /// for ab in bcp47_strings.windows(2) { + /// let a = ab[0]; + /// let b = ab[1]; + /// assert!(a.cmp(b) == Ordering::Less); + /// let a_kwds = format!("und-u-{}", a) + /// .parse::() + /// .unwrap() + /// .extensions + /// .unicode + /// .keywords; + /// assert_eq!(a, a_kwds.to_string()); + /// assert!(a_kwds.strict_cmp(a.as_bytes()) == Ordering::Equal); + /// assert!(a_kwds.strict_cmp(b.as_bytes()) == Ordering::Less); + /// } + /// ``` + pub fn strict_cmp(&self, other: &[u8]) -> Ordering { + self.strict_cmp_iter(other.split(|b| *b == b'-')).end() + } + + /// Compare this [`Keywords`] with an iterator of BCP-47 subtags. + /// + /// This function has the same equality semantics as [`Keywords::strict_cmp`]. It is intended as + /// a more modular version that allows multiple subtag iterators to be chained together. + /// + /// For an additional example, see [`SubtagOrderingResult`]. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Keywords; + /// use icu::locid::locale; + /// use std::cmp::Ordering; + /// + /// let subtags: &[&[u8]] = &[b"ca", b"buddhist"]; + /// + /// let kwds = locale!("und-u-ca-buddhist").extensions.unicode.keywords; + /// assert_eq!( + /// Ordering::Equal, + /// kwds.strict_cmp_iter(subtags.iter().copied()).end() + /// ); + /// + /// let kwds = locale!("und").extensions.unicode.keywords; + /// assert_eq!( + /// Ordering::Less, + /// kwds.strict_cmp_iter(subtags.iter().copied()).end() + /// ); + /// + /// let kwds = locale!("und-u-nu-latn").extensions.unicode.keywords; + /// assert_eq!( + /// Ordering::Greater, + /// kwds.strict_cmp_iter(subtags.iter().copied()).end() + /// ); + /// ``` + pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult + where + I: Iterator, + { + let r = self.for_each_subtag_str(&mut |subtag| { + if let Some(other) = subtags.next() { + match subtag.as_bytes().cmp(other) { + Ordering::Equal => Ok(()), + not_equal => Err(not_equal), + } + } else { + Err(Ordering::Greater) + } + }); + match r { + Ok(_) => SubtagOrderingResult::Subtags(subtags), + Err(o) => SubtagOrderingResult::Ordering(o), + } + } + + pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + for (k, v) in self.0.iter() { + f(k.as_str())?; + v.for_each_subtag_str(f)?; + } + Ok(()) + } + + /// This needs to be its own method to help with type inference in helpers.rs + #[cfg(test)] + pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self { + v.into_iter().collect() + } +} + +impl From>> for Keywords { + fn from(map: LiteMap>) -> Self { + Self(map) + } +} + +impl FromIterator<(Key, Value)> for Keywords { + fn from_iter>(iter: I) -> Self { + LiteMap::from_iter(iter).into() + } +} + +impl_writeable_for_key_value!(Keywords, "ca", "islamic-civil", "mm", "mm"); diff --git a/vendor/icu_locid/src/extensions/unicode/mod.rs b/vendor/icu_locid/src/extensions/unicode/mod.rs new file mode 100644 index 000000000..fabf1036c --- /dev/null +++ b/vendor/icu_locid/src/extensions/unicode/mod.rs @@ -0,0 +1,233 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Unicode Extensions provide information about user preferences in a given locale. +//! +//! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and +//! [`Attributes`]. +//! +//! +//! # Examples +//! +//! ``` +//! use icu::locid::extensions::unicode::{Attribute, Key, Unicode, Value}; +//! use icu::locid::{LanguageIdentifier, Locale}; +//! +//! let mut loc: Locale = +//! "en-US-u-foobar-hc-h12".parse().expect("Parsing failed."); +//! +//! let key: Key = "hc".parse().expect("Parsing key failed."); +//! let value: Value = "h12".parse().expect("Parsing value failed."); +//! let attribute: Attribute = +//! "foobar".parse().expect("Parsing attribute failed."); +//! +//! assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value)); +//! assert!(loc.extensions.unicode.attributes.contains(&attribute)); +//! +//! assert_eq!(&loc.extensions.unicode.to_string(), "-u-foobar-hc-h12"); +//! ``` +mod attribute; +mod attributes; +mod key; +mod keywords; +mod value; + +use alloc::vec; +pub use attribute::Attribute; +pub use attributes::Attributes; +pub use key::Key; +pub use keywords::Keywords; +pub use value::Value; + +use crate::parser::ParserError; +use crate::parser::SubtagIterator; +use litemap::LiteMap; + +/// Unicode Extensions provide information about user preferences in a given locale. +/// +/// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale +/// Identifier`] specification. +/// +/// Unicode extensions provide subtags that specify language and/or locale-based behavior +/// or refinements to language tags, according to work done by the Unicode Consortium. +/// (See [`RFC 6067`] for details). +/// +/// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension +/// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt +/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::unicode::{Key, Value}; +/// use icu::locid::Locale; +/// +/// let mut loc: Locale = +/// "de-u-hc-h12-ca-buddhist".parse().expect("Parsing failed."); +/// +/// let key: Key = "ca".parse().expect("Parsing key failed."); +/// let value: Value = "buddhist".parse().expect("Parsing value failed."); +/// assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value)); +/// ``` +#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] +#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure +pub struct Unicode { + /// The key-value pairs present in this locale extension, with each extension key subtag + /// associated to its provided value subtag. + pub keywords: Keywords, + /// A canonically ordered sequence of single standalone subtags for this locale extension. + pub attributes: Attributes, +} + +impl Unicode { + /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Unicode; + /// + /// assert_eq!(Unicode::new(), Unicode::default()); + /// ``` + #[inline] + pub const fn new() -> Self { + Self { + keywords: Keywords::new(), + attributes: Attributes::new(), + } + } + + /// Returns [`true`] if there list of keywords and attributes is empty. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed."); + /// + /// assert!(!loc.extensions.unicode.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.keywords.is_empty() && self.attributes.is_empty() + } + + /// Clears all Unicode extension keywords and attributes, effectively removing + /// the Unicode extension. + /// + /// # Example + /// + /// ``` + /// use icu::locid::Locale; + /// + /// let mut loc: Locale = + /// "und-t-mul-u-hello-ca-buddhist-hc-h12".parse().unwrap(); + /// loc.extensions.unicode.clear(); + /// assert_eq!(loc, "und-t-mul".parse().unwrap()); + /// ``` + pub fn clear(&mut self) { + self.keywords.clear(); + self.attributes.clear(); + } + + pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result { + let mut attributes = vec![]; + let mut keywords = LiteMap::new(); + + let mut current_keyword = None; + let mut current_type = vec![]; + + while let Some(subtag) = iter.peek() { + if let Ok(attr) = Attribute::try_from_bytes(subtag) { + if let Err(idx) = attributes.binary_search(&attr) { + attributes.insert(idx, attr); + } + } else { + break; + } + iter.next(); + } + + while let Some(subtag) = iter.peek() { + let slen = subtag.len(); + if slen == 2 { + if let Some(kw) = current_keyword.take() { + keywords.try_insert(kw, Value::from_vec_unchecked(current_type)); + current_type = vec![]; + } + current_keyword = Some(Key::try_from_bytes(subtag)?); + } else if current_keyword.is_some() { + match Value::parse_subtag(subtag) { + Ok(Some(t)) => current_type.push(t), + Ok(None) => {} + Err(_) => break, + } + } else { + break; + } + iter.next(); + } + + if let Some(kw) = current_keyword.take() { + keywords.try_insert(kw, Value::from_vec_unchecked(current_type)); + } + + // Ensure we've defined at least one attribute or keyword + if attributes.is_empty() && keywords.is_empty() { + return Err(ParserError::InvalidExtension); + } + + Ok(Self { + keywords: keywords.into(), + attributes: Attributes::from_vec_unchecked(attributes), + }) + } + + pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + if self.is_empty() { + return Ok(()); + } + f("u")?; + self.attributes.for_each_subtag_str(f)?; + self.keywords.for_each_subtag_str(f)?; + Ok(()) + } +} + +writeable::impl_display_with_writeable!(Unicode); + +impl writeable::Writeable for Unicode { + fn write_to(&self, sink: &mut W) -> core::fmt::Result { + if self.is_empty() { + return Ok(()); + } + sink.write_str("-u")?; + if !self.attributes.is_empty() { + sink.write_char('-')?; + writeable::Writeable::write_to(&self.attributes, sink)?; + } + if !self.keywords.is_empty() { + sink.write_char('-')?; + writeable::Writeable::write_to(&self.keywords, sink)?; + } + Ok(()) + } + + fn writeable_length_hint(&self) -> writeable::LengthHint { + if self.is_empty() { + return writeable::LengthHint::exact(0); + } + let mut result = writeable::LengthHint::exact(2); + if !self.attributes.is_empty() { + result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1; + } + if !self.keywords.is_empty() { + result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1; + } + result + } +} diff --git a/vendor/icu_locid/src/extensions/unicode/value.rs b/vendor/icu_locid/src/extensions/unicode/value.rs new file mode 100644 index 000000000..ce9982a4c --- /dev/null +++ b/vendor/icu_locid/src/extensions/unicode/value.rs @@ -0,0 +1,199 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::helpers::ShortVec; +use crate::parser::{get_subtag_iterator, ParserError}; +use alloc::vec::Vec; +use core::ops::RangeInclusive; +use core::str::FromStr; +use tinystr::TinyAsciiStr; + +/// A value used in a list of [`Keywords`](super::Keywords). +/// +/// The value has to be a sequence of one or more alphanumerical strings +/// separated by `-`. +/// Each part of the sequence has to be no shorter than three characters and no +/// longer than 8. +/// +/// +/// # Examples +/// +/// ``` +/// use icu::locid::extensions::unicode::Value; +/// +/// let value1: Value = "gregory".parse().expect("Failed to parse a Value."); +/// let value2: Value = +/// "islamic-civil".parse().expect("Failed to parse a Value."); +/// let value3: Value = "true".parse().expect("Failed to parse a Value."); +/// +/// assert_eq!(&value1.to_string(), "gregory"); +/// assert_eq!(&value2.to_string(), "islamic-civil"); +/// +/// // The value "true" is special-cased to an empty value +/// assert_eq!(&value3.to_string(), ""); +/// ``` +#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)] +pub struct Value(ShortVec>); + +const VALUE_LENGTH: RangeInclusive = 3..=8; +const TRUE_VALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true"); + +impl Value { + /// A constructor which takes a utf8 slice, parses it and + /// produces a well-formed [`Value`]. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Value; + /// + /// let value = Value::try_from_bytes(b"buddhist").expect("Parsing failed."); + /// + /// assert_eq!(&value.to_string(), "buddhist"); + /// ``` + pub fn try_from_bytes(input: &[u8]) -> Result { + let mut v = ShortVec::new(); + + if !input.is_empty() { + for subtag in get_subtag_iterator(input) { + let val = Self::subtag_from_bytes(subtag)?; + if let Some(val) = val { + v.push(val); + } + } + } + Ok(Self(v)) + } + + /// Const constructor for when the value contains only a single subtag. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Value; + /// + /// Value::try_from_single_subtag(b"buddhist").expect("valid subtag"); + /// Value::try_from_single_subtag(b"#####").expect_err("invalid subtag"); + /// Value::try_from_single_subtag(b"foo-bar").expect_err("not a single subtag"); + /// ``` + pub const fn try_from_single_subtag(subtag: &[u8]) -> Result { + match Self::subtag_from_bytes(subtag) { + Err(_) => Err(ParserError::InvalidExtension), + Ok(option) => Ok(Self::from_tinystr(option)), + } + } + + #[doc(hidden)] + pub fn as_tinystr_slice(&self) -> &[TinyAsciiStr<8>] { + self.0.as_slice() + } + + #[doc(hidden)] + pub const fn as_single_subtag(&self) -> Option<&TinyAsciiStr<8>> { + self.0.single() + } + + #[doc(hidden)] + pub const fn from_tinystr(subtag: Option>) -> Self { + match subtag { + None => Self(ShortVec::new()), + Some(val) => { + debug_assert!(val.is_ascii_alphanumeric()); + debug_assert!(!matches!(val, TRUE_VALUE)); + Self(ShortVec::new_single(val)) + } + } + } + + pub(crate) fn from_vec_unchecked(input: Vec>) -> Self { + Self(input.into()) + } + + #[doc(hidden)] + pub const fn subtag_from_bytes(bytes: &[u8]) -> Result>, ParserError> { + Self::parse_subtag_from_bytes_manual_slice(bytes, 0, bytes.len()) + } + + pub(crate) fn parse_subtag(t: &[u8]) -> Result>, ParserError> { + Self::parse_subtag_from_bytes_manual_slice(t, 0, t.len()) + } + + pub(crate) const fn parse_subtag_from_bytes_manual_slice( + bytes: &[u8], + start: usize, + end: usize, + ) -> Result>, ParserError> { + let slice_len = end - start; + if slice_len > *VALUE_LENGTH.end() || slice_len < *VALUE_LENGTH.start() { + return Err(ParserError::InvalidExtension); + } + + match TinyAsciiStr::from_bytes_manual_slice(bytes, start, end) { + Ok(TRUE_VALUE) => Ok(None), + Ok(s) if s.is_ascii_alphanumeric() => Ok(Some(s.to_ascii_lowercase())), + Ok(_) => Err(ParserError::InvalidExtension), + Err(_) => Err(ParserError::InvalidSubtag), + } + } + + pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + where + F: FnMut(&str) -> Result<(), E>, + { + self.0.as_slice().iter().map(|t| t.as_str()).try_for_each(f) + } +} + +impl FromStr for Value { + type Err = ParserError; + + fn from_str(source: &str) -> Result { + Self::try_from_bytes(source.as_bytes()) + } +} + +impl_writeable_for_tinystr_list!(Value, "", "islamic", "civil"); + +/// A macro allowing for compile-time construction of valid Unicode [`Value`] subtag. +/// +/// The macro only supports single-subtag values. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::Locale; +/// use icu::locid::{ +/// extensions_unicode_key as key, extensions_unicode_value as value, +/// }; +/// +/// let loc: Locale = "de-u-ca-buddhist".parse().unwrap(); +/// +/// assert_eq!( +/// loc.extensions.unicode.keywords.get(&key!("ca")), +/// Some(&value!("buddhist")) +/// ); +/// ``` +/// +/// [`Value`]: crate::extensions::unicode::Value +#[macro_export] +macro_rules! extensions_unicode_value { + ($value:literal) => {{ + // What we want: + // const R: $crate::extensions::unicode::Value = + // match $crate::extensions::unicode::Value::try_from_single_subtag($value.as_bytes()) { + // Ok(r) => r, + // #[allow(clippy::panic)] // const context + // _ => panic!(concat!("Invalid Unicode extension value: ", $value)), + // }; + // Workaround until https://github.com/rust-lang/rust/issues/73255 lands: + const R: $crate::extensions::unicode::Value = + $crate::extensions::unicode::Value::from_tinystr( + match $crate::extensions::unicode::Value::subtag_from_bytes($value.as_bytes()) { + Ok(r) => r, + _ => panic!(concat!("Invalid Unicode extension value: ", $value)), + }, + ); + R + }}; +} diff --git a/vendor/icu_locid/src/helpers.rs b/vendor/icu_locid/src/helpers.rs new file mode 100644 index 000000000..e617ded5d --- /dev/null +++ b/vendor/icu_locid/src/helpers.rs @@ -0,0 +1,648 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::iter::FromIterator; + +use alloc::vec; +use alloc::vec::Vec; +use litemap::store::*; + +/// Internal: A vector that supports no-allocation, constant values if length 0 or 1. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) enum ShortVec { + Empty, + Single(T), + Multi(Vec), +} + +impl ShortVec { + #[inline] + pub const fn new() -> Self { + Self::Empty + } + + #[inline] + pub const fn new_single(item: T) -> Self { + Self::Single(item) + } + + pub fn push(&mut self, item: T) { + *self = match core::mem::replace(self, Self::Empty) { + ShortVec::Empty => ShortVec::Single(item), + ShortVec::Single(prev_item) => ShortVec::Multi(vec![prev_item, item]), + ShortVec::Multi(mut items) => { + items.push(item); + ShortVec::Multi(items) + } + }; + } + + #[inline] + pub fn as_slice(&self) -> &[T] { + match self { + ShortVec::Empty => &[], + ShortVec::Single(v) => core::slice::from_ref(v), + ShortVec::Multi(v) => v.as_slice(), + } + } + + #[inline] + pub fn as_mut_slice(&mut self) -> &mut [T] { + match self { + ShortVec::Empty => &mut [], + ShortVec::Single(v) => core::slice::from_mut(v), + ShortVec::Multi(v) => v.as_mut_slice(), + } + } + + #[inline] + pub const fn single(&self) -> Option<&T> { + match self { + ShortVec::Single(v) => Some(v), + _ => None, + } + } + + #[inline] + pub fn len(&self) -> usize { + match self { + ShortVec::Empty => 0, + ShortVec::Single(_) => 1, + ShortVec::Multi(ref v) => v.len(), + } + } + + pub fn insert(&mut self, index: usize, elt: T) { + assert!( + index <= self.len(), + "insertion index (is {}) should be <= len (is {})", + index, + self.len() + ); + + *self = match core::mem::replace(self, ShortVec::Empty) { + ShortVec::Empty => ShortVec::Single(elt), + ShortVec::Single(item) => { + let items = if index == 0 { + vec![elt, item] + } else { + vec![item, elt] + }; + ShortVec::Multi(items) + } + ShortVec::Multi(mut items) => { + items.insert(index, elt); + ShortVec::Multi(items) + } + } + } + + pub fn remove(&mut self, index: usize) -> T { + assert!( + index < self.len(), + "removal index (is {}) should be < len (is {})", + index, + self.len() + ); + + let (replaced, removed_item) = match core::mem::replace(self, ShortVec::Empty) { + ShortVec::Empty => unreachable!(), + ShortVec::Single(v) => (ShortVec::Empty, v), + ShortVec::Multi(mut v) => { + let removed_item = v.remove(index); + match v.len() { + #[allow(clippy::unwrap_used)] + // we know that the vec has exactly one element left + 1 => (ShortVec::Single(v.pop().unwrap()), removed_item), + // v has atleast 2 elements, create a Multi variant + _ => (ShortVec::Multi(v), removed_item), + } + } + }; + *self = replaced; + removed_item + } + + #[inline] + pub fn clear(&mut self) { + let _ = core::mem::replace(self, ShortVec::Empty); + } +} + +impl From> for ShortVec { + fn from(v: Vec) -> Self { + match v.len() { + 0 => ShortVec::Empty, + #[allow(clippy::unwrap_used)] // we know that the vec is not empty + 1 => ShortVec::Single(v.into_iter().next().unwrap()), + _ => ShortVec::Multi(v), + } + } +} + +impl Default for ShortVec { + fn default() -> Self { + ShortVec::Empty + } +} + +impl FromIterator for ShortVec { + fn from_iter>(iter: I) -> Self { + iter.into_iter().collect::>().into() + } +} + +impl StoreConstEmpty for ShortVec<(K, V)> { + const EMPTY: ShortVec<(K, V)> = ShortVec::Empty; +} + +impl Store for ShortVec<(K, V)> { + #[inline] + fn lm_len(&self) -> usize { + self.len() + } + + #[inline] + fn lm_is_empty(&self) -> bool { + matches!(self, ShortVec::Empty) + } + + #[inline] + fn lm_get(&self, index: usize) -> Option<(&K, &V)> { + self.as_slice().get(index).map(|elt| (&elt.0, &elt.1)) + } + + #[inline] + fn lm_last(&self) -> Option<(&K, &V)> { + match self { + ShortVec::Empty => None, + ShortVec::Single(v) => Some(v), + ShortVec::Multi(v) => v.as_slice().last(), + } + .map(|elt| (&elt.0, &elt.1)) + } + + #[inline] + fn lm_binary_search_by(&self, mut cmp: F) -> Result + where + F: FnMut(&K) -> core::cmp::Ordering, + { + self.as_slice().binary_search_by(|(k, _)| cmp(k)) + } +} + +impl StoreMut for ShortVec<(K, V)> { + fn lm_with_capacity(_capacity: usize) -> Self { + ShortVec::Empty + } + + // ShortVec supports reserving capacity for additional elements only if we have already allocated a vector + fn lm_reserve(&mut self, additional: usize) { + if let ShortVec::Multi(ref mut v) = self { + v.reserve(additional) + } + } + + fn lm_get_mut(&mut self, index: usize) -> Option<(&K, &mut V)> { + self.as_mut_slice() + .get_mut(index) + .map(|elt| (&elt.0, &mut elt.1)) + } + + fn lm_push(&mut self, key: K, value: V) { + self.push((key, value)) + } + + fn lm_insert(&mut self, index: usize, key: K, value: V) { + self.insert(index, (key, value)) + } + + fn lm_remove(&mut self, index: usize) -> (K, V) { + self.remove(index) + } + + fn lm_clear(&mut self) { + self.clear(); + } +} + +impl<'a, K: 'a, V: 'a> StoreIterable<'a, K, V> for ShortVec<(K, V)> { + type KeyValueIter = + core::iter::Map, for<'r> fn(&'r (K, V)) -> (&'r K, &'r V)>; + + fn lm_iter(&'a self) -> Self::KeyValueIter { + self.as_slice().iter().map(|elt| (&elt.0, &elt.1)) + } +} + +impl StoreFromIterator for ShortVec<(K, V)> {} + +#[test] +fn test_shortvec_impl() { + litemap::testing::check_store::>(); +} + +macro_rules! impl_tinystr_subtag { + ( + $(#[$doc:meta])* + $name:ident, + $($full_name:ident)::+, + $macro_name:ident, + $len_start:literal..=$len_end:literal, + $tinystr_ident:ident, + $validate:expr, + $normalize:expr, + $is_normalized:expr, + $error:ident, + [$good_example:literal $(,$more_good_examples:literal)*], + [$bad_example:literal $(, $more_bad_examples:literal)*], + ) => { + #[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)] + #[cfg_attr(feature = "serde", derive(serde::Serialize))] + #[repr(transparent)] + $(#[$doc])* + pub struct $name(tinystr::TinyAsciiStr<$len_end>); + + impl $name { + /// A constructor which takes a UTF-8 slice, parses it and + #[doc = concat!("produces a well-formed [`", stringify!($name), "`].")] + /// + /// # Examples + /// + /// ``` + #[doc = concat!("use icu_locid::", stringify!($($full_name)::+), ";")] + /// + #[doc = concat!("assert!(", stringify!($name), "::try_from_bytes(b", stringify!($good_example), ").is_ok());")] + #[doc = concat!("assert!(", stringify!($name), "::try_from_bytes(b", stringify!($bad_example), ").is_err());")] + /// ``` + pub const fn try_from_bytes(v: &[u8]) -> Result { + Self::try_from_bytes_manual_slice(v, 0, v.len()) + } + + /// Equivalent to [`try_from_bytes(bytes[start..end])`](Self::try_from_bytes), + /// but callable in a `const` context (which range indexing is not). + pub const fn try_from_bytes_manual_slice( + v: &[u8], + start: usize, + end: usize, + ) -> Result { + let slen = end - start; + + #[allow(clippy::double_comparisons)] // if len_start == len_end + if slen < $len_start || slen > $len_end { + return Err(crate::parser::errors::ParserError::$error); + } + + match tinystr::TinyAsciiStr::from_bytes_manual_slice(v, start, end) { + Ok($tinystr_ident) if $validate => Ok(Self($normalize)), + _ => Err(crate::parser::errors::ParserError::$error), + } + } + + #[doc = concat!("Safely creates a [`", stringify!($name), "`] from its raw format")] + /// as returned by [`Self::into_raw`]. Unlike [`Self::try_from_bytes`], + /// this constructor only takes normalized values. + pub const fn try_from_raw( + v: [u8; $len_end], + ) -> Result { + if let Ok($tinystr_ident) = tinystr::TinyAsciiStr::<$len_end>::try_from_raw(v) { + if $tinystr_ident.len() >= $len_start && $is_normalized { + Ok(Self($tinystr_ident)) + } else { + Err(crate::parser::errors::ParserError::$error) + } + } else { + Err(crate::parser::errors::ParserError::$error) + } + } + + #[doc = concat!("Unsafely creates a [`", stringify!($name), "`] from its raw format")] + /// as returned by [`Self::into_raw`]. Unlike [`Self::try_from_bytes`], + /// this constructor only takes normalized values. + /// + /// # Safety + /// + /// This function is safe iff [`Self::try_from_raw`] returns an `Ok`. This is the case + /// for inputs that are correctly normalized. + pub const unsafe fn from_raw_unchecked(v: [u8; $len_end]) -> Self { + Self(tinystr::TinyAsciiStr::from_bytes_unchecked(v)) + } + + /// Deconstructs into a raw format to be consumed by + /// [`from_raw_unchecked`](Self::from_raw_unchecked()) or + /// [`try_from_raw`](Self::try_from_raw()). + pub const fn into_raw(self) -> [u8; $len_end] { + *self.0.all_bytes() + } + + #[inline] + /// A helper function for displaying as a `&str`. + pub const fn as_str(&self) -> &str { + self.0.as_str() + } + + /// Compare with BCP-47 bytes. + /// + /// The return value is equivalent to what would happen if you first converted + /// `self` to a BCP-47 string and then performed a byte comparison. + /// + /// This function is case-sensitive and results in a *total order*, so it is appropriate for + /// binary search. The only argument producing [`Ordering::Equal`](core::cmp::Ordering::Equal) + /// is `self.as_str().as_bytes()`. + #[inline] + pub fn strict_cmp(self, other: &[u8]) -> core::cmp::Ordering { + self.as_str().as_bytes().cmp(other) + } + + /// Compare with a potentially unnormalized BCP-47 string. + /// + /// The return value is equivalent to what would happen if you first parsed the + /// BCP-47 string and then performed a structural comparison. + /// + #[inline] + pub fn normalizing_eq(self, other: &str) -> bool { + self.as_str().eq_ignore_ascii_case(other) + } + } + + impl core::str::FromStr for $name { + type Err = crate::parser::errors::ParserError; + + fn from_str(source: &str) -> Result { + Self::try_from_bytes(source.as_bytes()) + } + } + + impl<'l> From<&'l $name> for &'l str { + fn from(input: &'l $name) -> Self { + input.as_str() + } + } + + impl From<$name> for tinystr::TinyAsciiStr<$len_end> { + fn from(input: $name) -> Self { + input.0 + } + } + + impl writeable::Writeable for $name { + fn write_to(&self, sink: &mut W) -> core::fmt::Result { + sink.write_str(self.as_str()) + } + #[inline] + fn writeable_length_hint(&self) -> writeable::LengthHint { + writeable::LengthHint::exact(self.0.len()) + } + } + + writeable::impl_display_with_writeable!($name); + + #[doc = concat!("A macro allowing for compile-time construction of valid [`", stringify!($name), "`] subtags.")] + /// + /// # Examples + /// + /// Parsing errors don't have to be handled at runtime: + /// ``` + /// assert_eq!( + #[doc = concat!(" icu_locid::", stringify!($macro_name), "!(", stringify!($good_example) ,"),")] + #[doc = concat!(" ", stringify!($good_example), ".parse::().unwrap()")] + /// ); + /// ``` + /// + /// Invalid input is a compile failure: + /// ```compile_fail,E0080 + #[doc = concat!("icu_locid::", stringify!($macro_name), "!(", stringify!($bad_example) ,");")] + /// ``` + /// + #[doc = concat!("[`", stringify!($name), "`]: crate::", stringify!($($full_name)::+))] + #[macro_export] + macro_rules! $macro_name { + ($string:literal) => {{ + use $crate::$($full_name)::+; + const R: $name = + match $name::try_from_bytes($string.as_bytes()) { + Ok(r) => r, + #[allow(clippy::panic)] // const context + _ => panic!(concat!("Invalid ", stringify!($name), ": ", $string)), + }; + R + }}; + } + + #[cfg(feature = "databake")] + impl databake::Bake for $name { + fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream { + env.insert("icu_locid"); + let string = self.as_str(); + databake::quote! {::icu_locid::$macro_name!(#string) } + } + } + + #[test] + fn test_construction() { + let maybe = $name::try_from_bytes($good_example.as_bytes()); + assert!(maybe.is_ok()); + assert_eq!(maybe, $name::try_from_raw(maybe.unwrap().into_raw())); + assert_eq!(maybe.unwrap().as_str(), $good_example); + $( + let maybe = $name::try_from_bytes($more_good_examples.as_bytes()); + assert!(maybe.is_ok()); + assert_eq!(maybe, $name::try_from_raw(maybe.unwrap().into_raw())); + assert_eq!(maybe.unwrap().as_str(), $more_good_examples); + )* + assert!($name::try_from_bytes($bad_example.as_bytes()).is_err()); + $( + assert!($name::try_from_bytes($more_bad_examples.as_bytes()).is_err()); + )* + } + + #[test] + fn test_writeable() { + writeable::assert_writeable_eq!(&$good_example.parse::<$name>().unwrap(), $good_example); + $( + writeable::assert_writeable_eq!($more_good_examples.parse::<$name>().unwrap(), $more_good_examples); + )* + } + + #[cfg(feature = "serde")] + impl<'de> serde::Deserialize<'de> for $name { + fn deserialize(deserializer: D) -> Result + where + D: serde::de::Deserializer<'de>, + { + struct Visitor; + + impl<'de> serde::de::Visitor<'de> for Visitor { + type Value = $name; + + fn expecting( + &self, + formatter: &mut core::fmt::Formatter<'_>, + ) -> core::fmt::Result { + write!(formatter, "a valid BCP-47 {}", stringify!($name)) + } + + fn visit_str(self, s: &str) -> Result { + s.parse().map_err(serde::de::Error::custom) + } + } + + if deserializer.is_human_readable() { + deserializer.deserialize_string(Visitor) + } else { + Self::try_from_raw(serde::de::Deserialize::deserialize(deserializer)?) + .map_err(serde::de::Error::custom) + } + } + } + + // Safety checklist for ULE: + // + // 1. Must not include any uninitialized or padding bytes (true since transparent over a ULE). + // 2. Must have an alignment of 1 byte (true since transparent over a ULE). + // 3. ULE::validate_byte_slice() checks that the given byte slice represents a valid slice. + // 4. ULE::validate_byte_slice() checks that the given byte slice has a valid length. + // 5. All other methods must be left with their default impl. + // 6. Byte equality is semantic equality. + #[cfg(feature = "zerovec")] + unsafe impl zerovec::ule::ULE for $name { + fn validate_byte_slice(bytes: &[u8]) -> Result<(), zerovec::ZeroVecError> { + let it = bytes.chunks_exact(core::mem::size_of::()); + if !it.remainder().is_empty() { + return Err(zerovec::ZeroVecError::length::(bytes.len())); + } + for v in it { + // The following can be removed once `array_chunks` is stabilized. + let mut a = [0; core::mem::size_of::()]; + a.copy_from_slice(v); + if Self::try_from_raw(a).is_err() { + return Err(zerovec::ZeroVecError::parse::()); + } + } + Ok(()) + } + } + + #[cfg(feature = "zerovec")] + impl zerovec::ule::AsULE for $name { + type ULE = Self; + fn to_unaligned(self) -> Self::ULE { + self + } + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } + } + + #[cfg(feature = "zerovec")] + impl<'a> zerovec::maps::ZeroMapKV<'a> for $name { + type Container = zerovec::ZeroVec<'a, $name>; + type Slice = zerovec::ZeroSlice<$name>; + type GetType = $name; + type OwnedType = $name; + } + }; +} + +macro_rules! impl_writeable_for_each_subtag_str_no_test { + ($type:tt) => { + impl writeable::Writeable for $type { + fn write_to(&self, sink: &mut W) -> core::fmt::Result { + let mut initial = true; + self.for_each_subtag_str(&mut |subtag| { + if initial { + initial = false; + } else { + sink.write_char('-')?; + } + sink.write_str(subtag) + }) + } + + #[inline] + fn writeable_length_hint(&self) -> writeable::LengthHint { + let mut result = writeable::LengthHint::exact(0); + let mut initial = true; + self.for_each_subtag_str::(&mut |subtag| { + if initial { + initial = false; + } else { + result += 1; + } + result += subtag.len(); + Ok(()) + }) + .expect("infallible"); + result + } + } + + writeable::impl_display_with_writeable!($type); + }; +} + +macro_rules! impl_writeable_for_subtag_list { + ($type:tt, $sample1:literal, $sample2:literal) => { + impl_writeable_for_each_subtag_str_no_test!($type); + + #[test] + fn test_writeable() { + writeable::assert_writeable_eq!(&$type::default(), ""); + writeable::assert_writeable_eq!( + &$type::from_vec_unchecked(alloc::vec![$sample1.parse().unwrap()]), + $sample1, + ); + writeable::assert_writeable_eq!( + &$type::from_vec_unchecked(alloc::vec![ + $sample1.parse().unwrap(), + $sample2.parse().unwrap() + ]), + core::concat!($sample1, "-", $sample2), + ); + } + }; +} + +macro_rules! impl_writeable_for_tinystr_list { + ($type:tt, $if_empty:literal, $sample1:literal, $sample2:literal) => { + impl_writeable_for_each_subtag_str_no_test!($type); + + #[test] + fn test_writeable() { + writeable::assert_writeable_eq!( + &$type::from_vec_unchecked(vec![$sample1.parse().unwrap()]), + $sample1, + ); + writeable::assert_writeable_eq!( + &$type::from_vec_unchecked(vec![ + $sample1.parse().unwrap(), + $sample2.parse().unwrap() + ]), + core::concat!($sample1, "-", $sample2), + ); + } + }; +} + +macro_rules! impl_writeable_for_key_value { + ($type:tt, $key1:literal, $value1:literal, $key2:literal, $expected2:literal) => { + impl_writeable_for_each_subtag_str_no_test!($type); + + #[test] + fn test_writeable() { + writeable::assert_writeable_eq!(&$type::default(), ""); + writeable::assert_writeable_eq!( + &$type::from_tuple_vec(vec![($key1.parse().unwrap(), $value1.parse().unwrap())]), + core::concat!($key1, "-", $value1), + ); + writeable::assert_writeable_eq!( + &$type::from_tuple_vec(vec![ + ($key1.parse().unwrap(), $value1.parse().unwrap()), + ($key2.parse().unwrap(), "true".parse().unwrap()) + ]), + core::concat!($key1, "-", $value1, "-", $expected2), + ); + } + }; +} diff --git a/vendor/icu_locid/src/langid.rs b/vendor/icu_locid/src/langid.rs new file mode 100644 index 000000000..fc5435766 --- /dev/null +++ b/vendor/icu_locid/src/langid.rs @@ -0,0 +1,523 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use core::cmp::Ordering; +use core::str::FromStr; + +use crate::ordering::SubtagOrderingResult; +use crate::parser::{ + get_subtag_iterator, parse_language_identifier, parse_language_identifier_with_single_variant, + ParserError, ParserMode, +}; +use crate::subtags; +use alloc::string::String; +use alloc::string::ToString; + +/// A core struct representing a [`Unicode BCP47 Language Identifier`]. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::{subtags::*, LanguageIdentifier}; +/// +/// let li: LanguageIdentifier = "en-US".parse().expect("Failed to parse."); +/// +/// assert_eq!(li.language, "en".parse::().unwrap()); +/// assert_eq!(li.script, None); +/// assert_eq!(li.region.unwrap(), "US".parse::().unwrap()); +/// assert_eq!(li.variants.len(), 0); +/// assert_eq!(li.to_string(), "en-US"); +/// ``` +/// +/// # Parsing +/// +/// Unicode recognizes three levels of standard conformance for any language identifier: +/// +/// * *well-formed* - syntactically correct +/// * *valid* - well-formed and only uses registered language, region, script and variant subtags... +/// * *canonical* - valid and no deprecated codes or structure. +/// +/// At the moment parsing normalizes a well-formed language identifier converting +/// `_` separators to `-` and adjusting casing to conform to the Unicode standard. +/// +/// Any bogus subtags will cause the parsing to fail with an error. +/// No subtag validation is performed. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::{subtags::*, LanguageIdentifier}; +/// +/// let li: LanguageIdentifier = +/// "eN_latn_Us-Valencia".parse().expect("Failed to parse."); +/// +/// assert_eq!(li.language, "en".parse::().unwrap()); +/// assert_eq!(li.script, "Latn".parse::