From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- .../rust/unic-langid-impl/.cargo-checksum.json | 1 + third_party/rust/unic-langid-impl/Cargo.lock | 617 ++++++ third_party/rust/unic-langid-impl/Cargo.toml | 86 + third_party/rust/unic-langid-impl/README.md | 1 + .../rust/unic-langid-impl/benches/canonicalize.rs | 50 + .../rust/unic-langid-impl/benches/langid.rs | 84 + .../unic-langid-impl/benches/likely_subtags.rs | 88 + .../rust/unic-langid-impl/benches/parser.rs | 82 + .../unic-langid-impl/data/cldr-misc-full/README.md | 6 + .../unic-langid-impl/src/bin/generate_layout.rs | 130 ++ .../src/bin/generate_likelysubtags.rs | 201 ++ third_party/rust/unic-langid-impl/src/errors.rs | 29 + .../rust/unic-langid-impl/src/layout_table.rs | 4 + third_party/rust/unic-langid-impl/src/lib.rs | 516 +++++ .../rust/unic-langid-impl/src/likelysubtags/mod.rs | 136 ++ .../unic-langid-impl/src/likelysubtags/tables.rs | 2226 ++++++++++++++++++++ .../rust/unic-langid-impl/src/parser/errors.rs | 20 + .../rust/unic-langid-impl/src/parser/mod.rs | 83 + third_party/rust/unic-langid-impl/src/serde.rs | 60 + .../rust/unic-langid-impl/src/subtags/language.rs | 108 + .../rust/unic-langid-impl/src/subtags/mod.rs | 9 + .../rust/unic-langid-impl/src/subtags/region.rs | 74 + .../rust/unic-langid-impl/src/subtags/script.rs | 62 + .../rust/unic-langid-impl/src/subtags/variant.rs | 78 + .../unic-langid-impl/tests/canonicalize_test.rs | 13 + .../rust/unic-langid-impl/tests/fixtures.rs | 75 + .../tests/language_identifier_test.rs | 192 ++ .../rust/unic-langid-impl/tests/likelysubtags.rs | 113 + 28 files changed, 5144 insertions(+) create mode 100644 third_party/rust/unic-langid-impl/.cargo-checksum.json create mode 100644 third_party/rust/unic-langid-impl/Cargo.lock create mode 100644 third_party/rust/unic-langid-impl/Cargo.toml create mode 100644 third_party/rust/unic-langid-impl/README.md create mode 100644 third_party/rust/unic-langid-impl/benches/canonicalize.rs create mode 100644 third_party/rust/unic-langid-impl/benches/langid.rs create mode 100644 third_party/rust/unic-langid-impl/benches/likely_subtags.rs create mode 100644 third_party/rust/unic-langid-impl/benches/parser.rs create mode 100644 third_party/rust/unic-langid-impl/data/cldr-misc-full/README.md create mode 100644 third_party/rust/unic-langid-impl/src/bin/generate_layout.rs create mode 100644 third_party/rust/unic-langid-impl/src/bin/generate_likelysubtags.rs create mode 100644 third_party/rust/unic-langid-impl/src/errors.rs create mode 100644 third_party/rust/unic-langid-impl/src/layout_table.rs create mode 100644 third_party/rust/unic-langid-impl/src/lib.rs create mode 100644 third_party/rust/unic-langid-impl/src/likelysubtags/mod.rs create mode 100644 third_party/rust/unic-langid-impl/src/likelysubtags/tables.rs create mode 100644 third_party/rust/unic-langid-impl/src/parser/errors.rs create mode 100644 third_party/rust/unic-langid-impl/src/parser/mod.rs create mode 100644 third_party/rust/unic-langid-impl/src/serde.rs create mode 100644 third_party/rust/unic-langid-impl/src/subtags/language.rs create mode 100644 third_party/rust/unic-langid-impl/src/subtags/mod.rs create mode 100644 third_party/rust/unic-langid-impl/src/subtags/region.rs create mode 100644 third_party/rust/unic-langid-impl/src/subtags/script.rs create mode 100644 third_party/rust/unic-langid-impl/src/subtags/variant.rs create mode 100644 third_party/rust/unic-langid-impl/tests/canonicalize_test.rs create mode 100644 third_party/rust/unic-langid-impl/tests/fixtures.rs create mode 100644 third_party/rust/unic-langid-impl/tests/language_identifier_test.rs create mode 100644 third_party/rust/unic-langid-impl/tests/likelysubtags.rs (limited to 'third_party/rust/unic-langid-impl') diff --git a/third_party/rust/unic-langid-impl/.cargo-checksum.json b/third_party/rust/unic-langid-impl/.cargo-checksum.json new file mode 100644 index 0000000000..b3315aaa15 --- /dev/null +++ b/third_party/rust/unic-langid-impl/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.lock":"da3d5fabc92472ac1f45fa321f1e188fd3fff22b3234d3f74e78a8f79503aac9","Cargo.toml":"67560e144c925cdb1467085067b51138da8461c4babd170777e90082a1664d61","README.md":"1ce686b1ba46e7fff771db934d1e8905e3d96c73d9e6712ca85011cf63cf4157","benches/canonicalize.rs":"1bc9db6454ed711c1c1d58a9236d1ed02c40645782338697812dc21b26c2295f","benches/langid.rs":"46dc68e1ee1c998fe957035c11c373cc5d2c2993dd3058aeb36014028618a46e","benches/likely_subtags.rs":"33debb6db4eabd0166525c7c06868f86e35958c2d0d9e5e7668f28b1aff3da23","benches/parser.rs":"bc5460589125aca2ca4c4cf475fde81e7de6aa70c7bf8443996b6246b44110a3","data/cldr-misc-full/README.md":"b4bbc8e56b8c9f5d482d2dd211e53720cdca27b0e98eec274e56577d41906b92","src/bin/generate_layout.rs":"e1df742c318ba4de09a25edad082a398da1d5591ac24074f9eadf3f40d677a9a","src/bin/generate_likelysubtags.rs":"9b0df3a008d0f6090ee441e4c649f00f9b78c7d23434f389fe3de2288951cf72","src/errors.rs":"091525fd3d704cc4698bab82af9fff1f2e6fc793910700790ec3ef943b3eddfe","src/layout_table.rs":"c311b5399a22efd69f14decb00aee7dfb2943a090832e2aef127e2853ad1ce0b","src/lib.rs":"75b5e8958ba0139ef60139e61441d29c80bde8552d1adead3905165c11ed97cd","src/likelysubtags/mod.rs":"66889568b883675465faa445e257b4b70e9dcba98c6c523eb34a08ebb50377c0","src/likelysubtags/tables.rs":"47ab40497145adc553f9f585d1ec79d01028495a57b1907392642afd26986e89","src/parser/errors.rs":"2dacf5bf388499c9fbecf64ef950038f123335e87b6691bcdb88d185678f38d5","src/parser/mod.rs":"30f9ff487ab7b023ec7e6cfbfe4f4ebc41ca524cef0eba96a6b77ad624c9f6b8","src/serde.rs":"2ce1fdea1217c4f72b2c8e00f333902ff496c54eef5b4534adb289375c50aef0","src/subtags/language.rs":"ea198788c10a9b24a88d2316d516a1b6325b20c57c1d05586edc7c50acc1e27d","src/subtags/mod.rs":"ba23712d7ce5e0fa896c97ddc91d7eacbf21a99d62c15f5cf19a6653876bf56b","src/subtags/region.rs":"f3455679067ed0da9103d5433586cb4809628847e923bbcf35cc2d2b25a0d35e","src/subtags/script.rs":"c5ca9a2ab6d8b3a1dc3f52a73665223f0dd30901b8e5d722dd93e58a69710f5d","src/subtags/variant.rs":"a0feec8b44933ff04a3d3f91e9e69f7537468e7c813c3780eb87e51512f6d1dd","tests/canonicalize_test.rs":"5ca005223ae159c15f8809c28c1cc950dc4d69063d9de70b885f18ef8e6cca28","tests/fixtures.rs":"d03a280b67080dac1bc333255e89d8903164688a6dfaed5adbea68efd65ef1bc","tests/language_identifier_test.rs":"90a4d7ac4961c1e214de166036c1b5cae21f3a99f157fe00d149d8b9106cb7b9","tests/likelysubtags.rs":"f5db0c3ec9f545eef148914486c7a74f1bbc629f4dcff3077242c174c0c1c6a3"},"package":"e35bfd2f2b8796545b55d7d3fd3e89a0613f68a0d1c8bc28cb7ff96b411a35ff"} \ No newline at end of file diff --git a/third_party/rust/unic-langid-impl/Cargo.lock b/third_party/rust/unic-langid-impl/Cargo.lock new file mode 100644 index 0000000000..bd8b6c575c --- /dev/null +++ b/third_party/rust/unic-langid-impl/Cargo.lock @@ -0,0 +1,617 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "bumpalo" +version = "3.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "bitflags", + "textwrap", + "unicode-width", +] + +[[package]] +name = "criterion" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f916dfc5d356b0ed9dae65f1db9fc9770aa2851d2662b988ccf4fe3516e86348" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edbafec5fa1f196ca66527c1b12c2ec4745ca14b50f1ad8f9f6f720b55d11fac" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa 0.4.8", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "displaydoc" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" + +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "itoa" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" + +[[package]] +name = "js-sys" +version = "0.3.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.135" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68783febc7782c6c5cb401fbda4de5a9898be1762314da0bb2c10ced61f18b0c" + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" + +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + +[[package]] +name = "plotters" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2538b639e642295546c50fcd545198c9d64ee2a38620a628724a3b266d5fbf97" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142" + +[[package]] +name = "plotters-svg" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9a81d2759aae1dae668f783c308bc5c8ebd191ff4184aaa1b37f65a6ae5a56f" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "proc-macro2" +version = "1.0.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + +[[package]] +name = "regex" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "regex-syntax" +version = "0.6.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" + +[[package]] +name = "ryu" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "serde" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce777b7b150d76b9cf60d28b55f5847135a003f7d7350c6be7a773508ce7d45" +dependencies = [ + "itoa 1.0.4", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fcd952facd492f9be3ef0d0b7032a6e442ee9b361d4acc2b1d0c4aaa5f613a1" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "tinystr" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8aeafdfd935e4a7fe16a91ab711fa52d54df84f9c8f7ca5837a9d1d902ef4c2" +dependencies = [ + "displaydoc", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "unic-langid-impl" +version = "0.9.1" +dependencies = [ + "criterion", + "serde", + "serde_json", + "tinystr", +] + +[[package]] +name = "unicode-ident" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" + +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" + +[[package]] +name = "web-sys" +version = "0.3.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/third_party/rust/unic-langid-impl/Cargo.toml b/third_party/rust/unic-langid-impl/Cargo.toml new file mode 100644 index 0000000000..eab3f743e9 --- /dev/null +++ b/third_party/rust/unic-langid-impl/Cargo.toml @@ -0,0 +1,86 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "unic-langid-impl" +version = "0.9.1" +authors = ["Zibi Braniecki "] +include = [ + "src/**/*", + "benches/*.rs", + "tests/*.rs", + "Cargo.toml", + "README.md", +] +description = "API for managing Unicode Language Identifiers" +readme = "README.md" +categories = ["internationalization"] +license = "MIT/Apache-2.0" +repository = "https://github.com/zbraniecki/unic-locale" + +[[bin]] +name = "generate_likelysubtags" +required-features = ["binary"] + +[[bin]] +name = "generate_layout" +required-features = ["binary"] + +[[test]] +name = "likelysubtags" +path = "tests/likelysubtags.rs" +required-features = ["likelysubtags"] + +[[bench]] +name = "parser" +harness = false + +[[bench]] +name = "langid" +harness = false + +[[bench]] +name = "canonicalize" +harness = false + +[[bench]] +name = "likely_subtags" +harness = false +required-features = ["likelysubtags"] + +[dependencies.serde] +version = "1.0" +optional = true + +[dependencies.serde_json] +version = "1.0" +optional = true + +[dependencies.tinystr] +version = "0.7.0" + +[dev-dependencies.criterion] +version = "0.3" + +[dev-dependencies.serde] +version = "1.0" +features = ["derive"] + +[dev-dependencies.serde_json] +version = "1.0" + +[features] +binary = [ + "serde", + "serde_json", +] +likelysubtags = [] diff --git a/third_party/rust/unic-langid-impl/README.md b/third_party/rust/unic-langid-impl/README.md new file mode 100644 index 0000000000..1ad62eac92 --- /dev/null +++ b/third_party/rust/unic-langid-impl/README.md @@ -0,0 +1 @@ +This is an internal implementation crate for `unic-langid`. Please use `unic-langid`. diff --git a/third_party/rust/unic-langid-impl/benches/canonicalize.rs b/third_party/rust/unic-langid-impl/benches/canonicalize.rs new file mode 100644 index 0000000000..a9e17e49aa --- /dev/null +++ b/third_party/rust/unic-langid-impl/benches/canonicalize.rs @@ -0,0 +1,50 @@ +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; + +use unic_langid_impl::canonicalize; + +fn langid_canonicalize_bench(c: &mut Criterion) { + let strings = &[ + "En_uS", + "EN-GB", + "ES-aR", + "iT", + "zH_HaNs_cN", + "dE-aT", + "Pl", + "FR-FR", + "de_AT", + "sR-CyrL_sr", + "NB-NO", + "fr_fr", + "Mk", + "uK", + "en-us", + "en_gb", + "ES-AR", + "tH", + "DE", + "ZH_cyrl_hN", + "eN-lAtN-uS", + ]; + c.bench_function("langid_canonicalize", |b| { + b.iter(|| { + for s in strings { + let _ = canonicalize(black_box(s)); + } + }) + }); + c.bench_function("langid_canonicalize_from_bytes", |b| { + let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect(); + b.iter(|| { + for s in &slices { + let _ = canonicalize(black_box(s)); + } + }) + }); +} + +criterion_group!(benches, langid_canonicalize_bench,); +criterion_main!(benches); diff --git a/third_party/rust/unic-langid-impl/benches/langid.rs b/third_party/rust/unic-langid-impl/benches/langid.rs new file mode 100644 index 0000000000..026c288602 --- /dev/null +++ b/third_party/rust/unic-langid-impl/benches/langid.rs @@ -0,0 +1,84 @@ +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; +use criterion::Fun; + +use unic_langid_impl::subtags; +use unic_langid_impl::LanguageIdentifier; + +static STRINGS: &[&str] = &[ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + "en-US", + "en-GB", + "es-AR", + "th", + "de", + "zh-Cyrl-HN", + "en-Latn-US", +]; + +fn language_identifier_construct_bench(c: &mut Criterion) { + let langids: Vec = STRINGS + .iter() + .map(|s| -> LanguageIdentifier { s.parse().unwrap() }) + .collect(); + + let funcs = vec![ + Fun::new("from_str", |b, _| { + b.iter(|| { + for s in STRINGS { + let _: Result = black_box(s).parse(); + } + }) + }), + Fun::new("from_bytes", |b, _| { + let slices: Vec<&[u8]> = STRINGS.iter().map(|s| s.as_bytes()).collect(); + b.iter(|| { + for s in &slices { + let _ = LanguageIdentifier::from_bytes(black_box(s)); + } + }) + }), + Fun::new("from_parts", |b, langids: &Vec| { + let entries: Vec<( + subtags::Language, + Option, + Option, + Vec, + )> = langids + .iter() + .cloned() + .map(|langid| langid.into_parts()) + .collect(); + b.iter(|| { + for (language, script, region, variants) in &entries { + let _ = LanguageIdentifier::from_parts( + language.clone(), + script.clone(), + region.clone(), + variants, + ); + } + }) + }), + ]; + + c.bench_functions("language_identifier_construct", funcs, langids); +} + +criterion_group!(benches, language_identifier_construct_bench,); +criterion_main!(benches); diff --git a/third_party/rust/unic-langid-impl/benches/likely_subtags.rs b/third_party/rust/unic-langid-impl/benches/likely_subtags.rs new file mode 100644 index 0000000000..3b7f8746ee --- /dev/null +++ b/third_party/rust/unic-langid-impl/benches/likely_subtags.rs @@ -0,0 +1,88 @@ +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; + +use unic_langid_impl::subtags; +use unic_langid_impl::LanguageIdentifier; + +static STRINGS: &[&str] = &[ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + "und-PL", + "und-Latn-AM", + "ug-Cyrl", + "sr-ME", + "mn-Mong", + "lif-Limb", + "gan", + "zh-Hant", + "yue-Hans", + "unr", + "unr-Deva", + "und-Thai-CN", + "ug-Cyrl", + "en-Latn-DE", + "pl-FR", + "de-CH", + "tuq", + "sr-ME", + "ng", + "klx", + "kk-Arab", + "en-Cyrl", + "und-Cyrl-UK", + "und-Arab", + "und-Arab-FO", +]; + +fn maximize_bench(c: &mut Criterion) { + let langids: Vec = STRINGS + .iter() + .map(|s| -> LanguageIdentifier { s.parse().unwrap() }) + .collect(); + c.bench_function("maximize", move |b| { + b.iter(|| { + for mut s in langids.clone().into_iter() { + s.maximize(); + } + }) + }); +} + +fn extract_input( + s: &str, +) -> ( + subtags::Language, + Option, + Option, +) { + let langid: LanguageIdentifier = s.parse().unwrap(); + (langid.language, langid.script, langid.region) +} + +fn raw_maximize_bench(c: &mut Criterion) { + let entries: Vec<_> = STRINGS.iter().map(|s| extract_input(s)).collect(); + + c.bench_function("raw_maximize", move |b| { + b.iter(|| { + for (lang, script, region) in entries.clone().into_iter() { + let _ = unic_langid_impl::likelysubtags::maximize(lang, script, region); + } + }) + }); +} + +criterion_group!(benches, maximize_bench, raw_maximize_bench,); +criterion_main!(benches); diff --git a/third_party/rust/unic-langid-impl/benches/parser.rs b/third_party/rust/unic-langid-impl/benches/parser.rs new file mode 100644 index 0000000000..97abe833fb --- /dev/null +++ b/third_party/rust/unic-langid-impl/benches/parser.rs @@ -0,0 +1,82 @@ +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; + +use unic_langid_impl::parser::parse_language_identifier; + +fn language_identifier_parser_bench(c: &mut Criterion) { + let strings = &[ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + "en-US", + "en-GB", + "es-AR", + "th", + "de", + "zh-Cyrl-HN", + "en-Latn-US", + ]; + + c.bench_function("language_identifier_parser", |b| { + let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect(); + b.iter(|| { + for s in &slices { + let _ = parse_language_identifier(black_box(s)); + } + }) + }); +} + +fn language_identifier_parser_casing_bench(c: &mut Criterion) { + let strings = &[ + "En_uS", + "EN-GB", + "ES-aR", + "iT", + "zH_HaNs_cN", + "dE-aT", + "Pl", + "FR-FR", + "de_AT", + "sR-CyrL_sr", + "NB-NO", + "fr_fr", + "Mk", + "uK", + "en-us", + "en_gb", + "ES-AR", + "tH", + "DE", + "ZH_cyrl_hN", + "eN-lAtN-uS", + ]; + c.bench_function("language_identifier_parser_casing", |b| { + let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect(); + b.iter(|| { + for s in &slices { + let _ = parse_language_identifier(black_box(s)); + } + }) + }); +} + +criterion_group!( + benches, + language_identifier_parser_bench, + language_identifier_parser_casing_bench, +); +criterion_main!(benches); diff --git a/third_party/rust/unic-langid-impl/data/cldr-misc-full/README.md b/third_party/rust/unic-langid-impl/data/cldr-misc-full/README.md new file mode 100644 index 0000000000..68cb052559 --- /dev/null +++ b/third_party/rust/unic-langid-impl/data/cldr-misc-full/README.md @@ -0,0 +1,6 @@ +# cldr-misc-full + +This repository provides the a portion of the JSON distribution of CLDR locale data +for internationalization. + +Refer to the README at https://github.com/unicode-cldr/cldr-json for complete details. diff --git a/third_party/rust/unic-langid-impl/src/bin/generate_layout.rs b/third_party/rust/unic-langid-impl/src/bin/generate_layout.rs new file mode 100644 index 0000000000..35d8e27aed --- /dev/null +++ b/third_party/rust/unic-langid-impl/src/bin/generate_layout.rs @@ -0,0 +1,130 @@ +use serde_json::Value; +use std::collections::HashMap; +use std::collections::HashSet; +use std::fs; +use unic_langid_impl::subtags::{Language, Script}; +use unic_langid_impl::CharacterDirection; +use unic_langid_impl::LanguageIdentifier; + +fn langid_to_direction_map(path: &str) -> HashMap { + let mut result = HashMap::new(); + for entry in fs::read_dir(path).unwrap() { + let entry = entry.unwrap(); + let mut path = entry.path(); + path.push("layout.json"); + let contents = fs::read_to_string(path).expect("Something went wrong reading the file"); + let v: Value = serde_json::from_str(&contents).unwrap(); + + let langid_key = v["main"].as_object().unwrap().keys().nth(0).unwrap(); + + if langid_key == "root" { + continue; + } + let langid: LanguageIdentifier = langid_key.parse().unwrap(); + + let character_order = match v["main"][langid_key]["layout"]["orientation"]["characterOrder"] + .as_str() + .unwrap() + { + "right-to-left" => CharacterDirection::RTL, + "left-to-right" => CharacterDirection::LTR, + _ => unimplemented!("Encountered unknown directionality!"), + }; + result.insert(langid, character_order); + } + result +} + +fn check_all_variants_rtl( + map: &HashMap, + lang: Option, + script: Option