diff options
Diffstat (limited to 'vendor/tinystr')
31 files changed, 2359 insertions, 2960 deletions
diff --git a/vendor/tinystr/.cargo-checksum.json b/vendor/tinystr/.cargo-checksum.json index b2cb895be..36b7cede5 100644 --- a/vendor/tinystr/.cargo-checksum.json +++ b/vendor/tinystr/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"CHANGELOG.md":"cb378d2a5c7efd2259cdb7513e1a6bc8bc05b2c5f89b69b69f1f16037495760b","Cargo.lock":"c772766a0c3008c0e528d1ef08c5d8b0e8752a308cb5a782855f691c4b3c223f","Cargo.toml":"af00927b1bd3451ef45685cb34c466566df816aa41f31555c9492c74af597c2c","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"7eaffe990ac4432ab388dcda2703f5594e9b1c7a6f753882906b0b8df9876bb4","benches/construct.rs":"9c8652afb0770f60238952b06abb4b90c6cec41d50fd19b760e573eb8dff8e4f","benches/tinystr.rs":"dc61b52ca7be3312a8c504786ad60e5e2c4c729e111c2033716c9996c5e87bc8","examples/main.rs":"463f7c0db47a3843097b012315fc8b0d68baa4afca0a0fe06210ed3c8c471453","src/helpers.rs":"d7768b34dacbb586ade41249147bce1c90d1c08dbfd7c7de6792c88ea45200e1","src/lib.rs":"17740bacc4b05e433f58ddd06d86287097f83024849166e95e8ddb16e491873f","src/tinystr16.rs":"b40595634190ee298bbdcb60b3699b11a25ef2861bccdb567821815ff21fbac3","src/tinystr4.rs":"1787c52e9ec9711b0a115f4c32bee952fb410ce569db4a89fef4c5282bb4f04f","src/tinystr8.rs":"2baf0330c3a20e2e01ff791aa932450d2727d53eb8ed80d76a685bdd824700b6","src/tinystrauto.rs":"55b9333cd8a69e8ea79db72801c4f2b946c5b61108e1ddfb2bf2e06384c41f4a","tests/main.rs":"83b1892f2c5437f99be3ce6d5e98a8df42a5c293447437cc567dcc133a2768d7"},"package":"29738eedb4388d9ea620eeab9384884fc3f06f586a2eddb56bedc5885126c7c1"}
\ No newline at end of file +{"files":{"Cargo.toml":"279fef44ae26d0bda43cc629cbe22795d696b36635fcb7ca484e74b9d72b9eb4","LICENSE":"4ad7541d66a407234e2c84902124cef325c29f3e966353efdb800bedb8b8da21","README.md":"ce7d4b009ab4878b4b647fa5c01037b4d3e8bf72e44942dbf6cf1ef217d10b4d","benches/common/mod.rs":"7a31f89cb68cec2574287636ac22fe3fc86a66688b8b1e99700a5da692bd485e","benches/construct.rs":"0e0e7c1459dd3efea0c734a999318078b53e18c3389c74a1ff5a226cd3d05cca","benches/overview.rs":"296d19b32a2d52e449140771d89f9c099d19177eb84e1395c942469d51c4c3f8","benches/read.rs":"cbf349393a50eb90e7ba53906f98a689d585242292f867a37acf6842263af4d9","benches/serde.rs":"5c88866d08c07088b82dbd5472e6276c632d11e064417f5d8f2025a5ade867f0","src/ascii.rs":"ca84603237893d515cf4d3cc5bf61470a81c499956b8bdf51239433c0d49785e","src/asciibyte.rs":"fa29de7403c0424c52c2f30bb47002b9abf4bd08b302c411ffe679d3decfb8de","src/databake.rs":"9f29e30e6deec989822cbdf01f5165e098fa544cf7e49ccea3f5de827648fc1e","src/error.rs":"859d03faa3e98d979e0d6b5d232810d42b58f9c6ef69403d442545327053265e","src/int_ops.rs":"c2be314d19dd41cf18fb3589901d7e58ee32fe3f764fb6a66b08a1e005336406","src/lib.rs":"41db27f31650945dbf41b72a21d42fa4de0722b6f0717a45a3569c3dd4f1e148","src/macros.rs":"3fe76e258b0db2896284bcf4f50a4ac35b7efc542649b4c9f13c6e71c5957ae4","src/serde.rs":"0bd6bbe2ee8195aea68dd235d59b94faa3419aaeb8939e3220dd64bd888873f5","src/ule.rs":"139543634949a95405bc49862840b0794db089abed6efe66533858376cae180f","tests/serde.rs":"cf8cee82f731928375888d1b5e7e5e50368d3e16ce372fced230c9b1ee2a7451"},"package":"f8aeafdfd935e4a7fe16a91ab711fa52d54df84f9c8f7ca5837a9d1d902ef4c2"}
\ No newline at end of file diff --git a/vendor/tinystr/CHANGELOG.md b/vendor/tinystr/CHANGELOG.md deleted file mode 100644 index 257012a4a..000000000 --- a/vendor/tinystr/CHANGELOG.md +++ /dev/null @@ -1,35 +0,0 @@ -# Changelog - -## Unreleased - - - … - -## tinystr 0.3.4 (August 21, 2020) - - - Add `macros` feature which exposes `tinystr::macros`. - -## tinystr 0.3.3 (July 26, 2020) - - - Add `TinyStrAuto`. - - Add `no_std` feature. - -## tinystr 0.3.2 (October 28, 2019) - - - Add `from_bytes` method. - -## tinystr 0.3.1 (October 1, 2019) - - - Documentation. - -## tinystr 0.3.1 (October 1, 2019) - - - Documentation. - -## tinystr 0.3.0 (August 23, 2019) - - - Separate out `is_ascii_numeric`, `is_ascii_alphanumeric` and `is_ascii_alphabetic`. - -## tinystr 0.2.0 (August 16, 2019) - - - Add TinyStr16 - - Add to_ascii_titlecase specialization for all TinyStr* diff --git a/vendor/tinystr/Cargo.lock b/vendor/tinystr/Cargo.lock deleted file mode 100644 index ca4e5151b..000000000 --- a/vendor/tinystr/Cargo.lock +++ /dev/null @@ -1,625 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] - -[[package]] -name = "autocfg" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" - -[[package]] -name = "bitflags" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" - -[[package]] -name = "bstr" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31accafdb70df7871592c058eca3985b71104e15ac32f64706022c58867da931" -dependencies = [ - "lazy_static", - "memchr", - "regex-automata", - "serde", -] - -[[package]] -name = "bumpalo" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e8c087f005730276d1096a652e92a8bacee2e2472bcc9715a74d2bec38b5820" - -[[package]] -name = "byteorder" -version = "1.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" - -[[package]] -name = "cast" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0" -dependencies = [ - "rustc_version", -] - -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - -[[package]] -name = "clap" -version = "2.33.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129" -dependencies = [ - "bitflags", - "textwrap", - "unicode-width", -] - -[[package]] -name = "criterion" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70daa7ceec6cf143990669a04c7df13391d55fb27bd4079d252fca774ba244d8" -dependencies = [ - "atty", - "cast", - "clap", - "criterion-plot", - "csv", - "itertools", - "lazy_static", - "num-traits", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_cbor", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e022feadec601fba1649cfa83586381a4ad31c6bf3a9ab7d408118b05dd9889d" -dependencies = [ - "cast", - "itertools", -] - -[[package]] -name = "crossbeam-deque" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", - "maybe-uninit", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" -dependencies = [ - "autocfg", - "cfg-if", - "crossbeam-utils", - "lazy_static", - "maybe-uninit", - "memoffset", - "scopeguard", -] - -[[package]] -name = "crossbeam-queue" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "774ba60a54c213d409d5353bda12d49cd68d14e45036a285234c8d6f91f92570" -dependencies = [ - "cfg-if", - "crossbeam-utils", - "maybe-uninit", -] - -[[package]] -name = "crossbeam-utils" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" -dependencies = [ - "autocfg", - "cfg-if", - "lazy_static", -] - -[[package]] -name = "csv" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00affe7f6ab566df61b4be3ce8cf16bc2576bca0963ceb0955e45d514bf9a279" -dependencies = [ - "bstr", - "csv-core", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] - -[[package]] -name = "either" -version = "1.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" - -[[package]] -name = "half" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d36fab90f82edc3c747f9d438e06cf0a491055896f2a279638bb5beed6c40177" - -[[package]] -name = "hermit-abi" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9586eedd4ce6b3c498bc3b4dd92fc9f11166aa908a914071953768066c67909" -dependencies = [ - "libc", -] - -[[package]] -name = "itertools" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6" - -[[package]] -name = "js-sys" -version = "0.3.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4b9172132a62451e56142bff9afc91c8e4a4500aa5b847da36815b63bfda916" -dependencies = [ - "wasm-bindgen", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.71" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49" - -[[package]] -name = "log" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "maybe-uninit" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" - -[[package]] -name = "memchr" -version = "2.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" - -[[package]] -name = "memoffset" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c198b026e1bbf08a937e94c6c60f9ec4a2267f5b0d2eec9c1b21b061ce2be55f" -dependencies = [ - "autocfg", -] - -[[package]] -name = "num-traits" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac267bcc07f48ee5f8935ab0d24f316fb722d7a1292e2913f0cc196b29ffd611" -dependencies = [ - "autocfg", -] - -[[package]] -name = "num_cpus" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "oorandom" -version = "11.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a170cebd8021a008ea92e4db85a72f80b35df514ec664b296fdcbb654eac0b2c" - -[[package]] -name = "plotters" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d1685fbe7beba33de0330629da9d955ac75bd54f33d7b79f9a895590124f6bb" -dependencies = [ - "js-sys", - "num-traits", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "proc-macro2" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa" -dependencies = [ - "unicode-xid", -] - -[[package]] -name = "quote" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rayon" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f02856753d04e03e26929f820d0a0a337ebe71f849801eea335d464b349080" -dependencies = [ - "autocfg", - "crossbeam-deque", - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e92e15d89083484e11353891f1af602cc661426deb9564c298b270c726973280" -dependencies = [ - "crossbeam-deque", - "crossbeam-queue", - "crossbeam-utils", - "lazy_static", - "num_cpus", -] - -[[package]] -name = "regex" -version = "1.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6" -dependencies = [ - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4" -dependencies = [ - "byteorder", -] - -[[package]] -name = "regex-syntax" -version = "0.6.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8" - -[[package]] -name = "rustc_version" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" -dependencies = [ - "semver", -] - -[[package]] -name = "ryu" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "semver" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" -dependencies = [ - "semver-parser", -] - -[[package]] -name = "semver-parser" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" - -[[package]] -name = "serde" -version = "1.0.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5317f7588f0a5078ee60ef675ef96735a1442132dc645eb1d12c018620ed8cd3" - -[[package]] -name = "serde_cbor" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622" -dependencies = [ - "half", - "serde", -] - -[[package]] -name = "serde_derive" -version = "1.0.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0be94b04690fbaed37cddffc5c134bf537c8e3329d53e982fe04c374978f8e" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.56" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3433e879a558dde8b5e8feb2a04899cf34fdde1fafb894687e52105fc1162ac3" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "syn" -version = "1.0.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8d5d96e8cbb005d6959f119f773bfaebb5684296108fb32600c00cde305b2cd" -dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", -] - -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", -] - -[[package]] -name = "tinystr" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707151f004e8db265b83b1c7509d6c3b4c2c2bc8696113cbe0a8e595c2fdbd3b" - -[[package]] -name = "tinystr" -version = "0.3.4" -dependencies = [ - "criterion", - "tinystr-macros", -] - -[[package]] -name = "tinystr-macros" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c97a45afd04e6bf6d5945895d6982afd9a428e0ebf66585d5e09961d8319ac30" -dependencies = [ - "tinystr 0.3.3", -] - -[[package]] -name = "tinytemplate" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d3dc76004a03cec1c5932bca4cdc2e39aaa798e3f82363dd94f9adf6098c12f" -dependencies = [ - "serde", - "serde_json", -] - -[[package]] -name = "unicode-width" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" - -[[package]] -name = "unicode-xid" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" - -[[package]] -name = "walkdir" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" -dependencies = [ - "same-file", - "winapi", - "winapi-util", -] - -[[package]] -name = "wasm-bindgen" -version = "0.2.64" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a634620115e4a229108b71bde263bb4220c483b3f07f5ba514ee8d15064c4c2" -dependencies = [ - "cfg-if", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.64" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e53963b583d18a5aa3aaae4b4c1cb535218246131ba22a71f05b518098571df" -dependencies = [ - "bumpalo", - "lazy_static", - "log", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.64" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fcfd5ef6eec85623b4c6e844293d4516470d8f19cd72d0d12246017eb9060b8" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.64" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9adff9ee0e94b926ca81b57f57f86d5545cdcb1d259e21ec9bdd95b901754c75" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.64" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f7b90ea6c632dd06fd765d44542e234d5e63d9bb917ecd64d79778a13bd79ae" - -[[package]] -name = "web-sys" -version = "0.3.41" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "863539788676619aac1a23e2df3655e96b32b0e05eb72ca34ba045ad573c625d" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/vendor/tinystr/Cargo.toml b/vendor/tinystr/Cargo.toml index 53f9fc1f7..64682f74e 100644 --- a/vendor/tinystr/Cargo.toml +++ b/vendor/tinystr/Cargo.toml @@ -3,40 +3,112 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies +# to registry (e.g., crates.io) dependencies. # -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. [package] -edition = "2018" +edition = "2021" name = "tinystr" -version = "0.3.4" -authors = ["Raph Levien <raph.levien@gmail.com>", "Zibi Braniecki <zibi@braniecki.net>"] -description = "A small ASCII-only bounded length string representation.\n" -readme = "README.md" -keywords = ["string", "str", "small", "tiny", "no_std"] +version = "0.7.0" +authors = ["The ICU4X Project Developers"] +include = [ + "src/**/*", + "examples/**/*", + "benches/**/*", + "tests/**/*", + "Cargo.toml", + "LICENSE", + "README.md", +] +description = "A small ASCII-only bounded length string representation." +keywords = [ + "string", + "str", + "small", + "tiny", + "no_std", +] categories = ["data-structures"] -license = "Apache-2.0/MIT" -repository = "https://github.com/zbraniecki/tinystr" +license = "Unicode-DFS-2016" +repository = "https://github.com/unicode-org/icu4x" +resolver = "2" + +[package.metadata.docs.rs] +all-features = true + +[[test]] +name = "serde" +required-features = ["serde"] + +[[bench]] +name = "overview" +harness = false [[bench]] name = "construct" harness = false +required-features = ["bench"] [[bench]] -name = "tinystr" +name = "read" harness = false -[dependencies.tinystr-macros] +required-features = ["bench"] + +[[bench]] +name = "serde" +harness = false +required-features = [ + "bench", + "serde", +] + +[dependencies.databake] version = "0.1" optional = true + +[dependencies.displaydoc] +version = "0.2.3" +default-features = false + +[dependencies.serde] +version = "1.0.123" +features = ["alloc"] +optional = true +default-features = false + +[dependencies.zerovec] +version = "0.9" +optional = true + +[dev-dependencies.bincode] +version = "1.3" + [dev-dependencies.criterion] version = "0.3" +[dev-dependencies.postcard] +version = "1.0.0" +features = ["use-std"] + +[dev-dependencies.rand] +version = "0.8.5" +features = ["small_rng"] + +[dev-dependencies.serde_json] +version = "1.0" +features = ["alloc"] +default-features = false + +[dev-dependencies.tinystr_old] +version = "0.4" +features = ["serde"] +package = "tinystr" + [features] alloc = [] -default = ["std"] -macros = ["tinystr-macros"] -std = [] +bench = [] +default = ["alloc"] +zerovec = ["dep:zerovec"] diff --git a/vendor/tinystr/LICENSE b/vendor/tinystr/LICENSE new file mode 100644 index 000000000..9858d01ab --- /dev/null +++ b/vendor/tinystr/LICENSE @@ -0,0 +1,51 @@ +UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE + +See Terms of Use <https://www.unicode.org/copyright.html> +for definitions of Unicode Inc.’s Data Files and Software. + +NOTICE TO USER: Carefully read the following legal agreement. +BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +TERMS AND CONDITIONS OF THIS AGREEMENT. +IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +THE DATA FILES OR SOFTWARE. + +COPYRIGHT AND PERMISSION NOTICE + +Copyright © 1991-2022 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in https://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that either +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, or +(b) this copyright and permission notice appear in associated +Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. + +— + +Portions of ICU4X may have been adapted from ICU4C and/or ICU4J. +ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others. diff --git a/vendor/tinystr/LICENSE-APACHE b/vendor/tinystr/LICENSE-APACHE deleted file mode 100644 index 16fe87b06..000000000 --- a/vendor/tinystr/LICENSE-APACHE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/vendor/tinystr/LICENSE-MIT b/vendor/tinystr/LICENSE-MIT deleted file mode 100644 index 31aa79387..000000000 --- a/vendor/tinystr/LICENSE-MIT +++ /dev/null @@ -1,23 +0,0 @@ -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/vendor/tinystr/README.md b/vendor/tinystr/README.md index 15a8338be..96b3f955f 100644 --- a/vendor/tinystr/README.md +++ b/vendor/tinystr/README.md @@ -1,95 +1,53 @@ -# tinystr [![crates.io](http://meritbadge.herokuapp.com/tinystr)](https://crates.io/crates/tinystr) [![Build Status](https://travis-ci.org/zbraniecki/tinystr.svg?branch=master)](https://travis-ci.org/zbraniecki/tinystr) [![Coverage Status](https://coveralls.io/repos/github/zbraniecki/tinystr/badge.svg?branch=master)](https://coveralls.io/github/zbraniecki/tinystr?branch=master) +# tinystr [![crates.io](https://img.shields.io/crates/v/tinystr)](https://crates.io/crates/tinystr) -`tinystr` is a small ASCII-only bounded length string representation. +`tinystr` is a utility crate of the [`ICU4X`] project. -Usage ------ +It includes [`TinyAsciiStr`], a core API for representing small ASCII-only bounded length strings. -```rust -use tinystr::{TinyStr4, TinyStr8, TinyStr16, TinyStrAuto}; - -fn main() { - let s1: TinyStr4 = "tEsT".parse() - .expect("Failed to parse."); - - assert_eq!(s1, "tEsT"); - assert_eq!(s1.to_ascii_uppercase(), "TEST"); - assert_eq!(s1.to_ascii_lowercase(), "test"); - assert_eq!(s1.to_ascii_titlecase(), "Test"); - assert_eq!(s1.is_ascii_alphanumeric(), true); +It is optimized for operations on strings of size 8 or smaller. When use cases involve comparison +and conversion of strings for lowercase/uppercase/titlecase, or checking +numeric/alphabetic/alphanumeric, `TinyAsciiStr` is the edge performance library. - let s2: TinyStr8 = "New York".parse() - .expect("Failed to parse."); +## Examples - assert_eq!(s2, "New York"); - assert_eq!(s2.to_ascii_uppercase(), "NEW YORK"); - assert_eq!(s2.to_ascii_lowercase(), "new york"); - assert_eq!(s2.to_ascii_titlecase(), "New york"); - assert_eq!(s2.is_ascii_alphanumeric(), false); +```rust +use tinystr::TinyAsciiStr; - let s3: TinyStr16 = "metaMoRphosis123".parse() - .expect("Failed to parse."); +let s1: TinyAsciiStr<4> = "tEsT".parse().expect("Failed to parse."); - assert_eq!(s3, "metaMoRphosis123"); - assert_eq!(s3.to_ascii_uppercase(), "METAMORPHOSIS123"); - assert_eq!(s3.to_ascii_lowercase(), "metamorphosis123"); - assert_eq!(s3.to_ascii_titlecase(), "Metamorphosis123"); - assert_eq!(s3.is_ascii_alphanumeric(), true); +assert_eq!(s1, "tEsT"); +assert_eq!(s1.to_ascii_uppercase(), "TEST"); +assert_eq!(s1.to_ascii_lowercase(), "test"); +assert_eq!(s1.to_ascii_titlecase(), "Test"); +assert_eq!(s1.is_ascii_alphanumeric(), true); +assert_eq!(s1.is_ascii_numeric(), false); - let s4: TinyStrAuto = "shortNoAlloc".parse().unwrap(); - assert!(matches!(s4, TinyStrAuto::Tiny { .. })); - assert_eq!(s4, "shortNoAlloc"); +let s2 = TinyAsciiStr::<8>::try_from_raw(*b"New York") + .expect("Failed to parse."); - let s5: TinyStrAuto = "longFallbackToHeap".parse().unwrap(); - assert!(matches!(s4, TinyStrAuto::Heap { .. })); - assert_eq!(s4, "shortNoAlloc"); -} +assert_eq!(s2, "New York"); +assert_eq!(s2.to_ascii_uppercase(), "NEW YORK"); +assert_eq!(s2.to_ascii_lowercase(), "new york"); +assert_eq!(s2.to_ascii_titlecase(), "New york"); +assert_eq!(s2.is_ascii_alphanumeric(), false); ``` -Details -------- - -The crate provides three structs and an enum: - * `TinyStr4` an ASCII-only string limited to 4 characters. - * `TinyStr8` an ASCII-only string limited to 8 characters. - * `TinyStr16` an ASCII-only string limited to 16 characters. - * `TinyStrAuto` (enum): - * `Tiny` when the string is 16 characters or less. - * `Heap` when the string is 17 or more characters. - -The structs stores the characters as `u32`/`u64`/`u128` and uses bitmasking to provide basic string manipulation operations: - * is_ascii_numeric - * is_ascii_alphabetic - * is_ascii_alphanumeric - * to_ascii_lowercase - * to_ascii_uppercase - * to_ascii_titlecase - * PartialEq - -`TinyStrAuto` stores the string as a TinyStr16 when it is short enough, or else falls back to a standard `String`. You should use TinyStrAuto when you expect most strings to be 16 characters or smaller, but occasionally you receive one that exceeds that length. Unlike the structs, `TinyStrAuto` does not implement `Copy`. - -This set is sufficient for certain classes of uses such as `unic-langid` libraries. - -no_std ------- - -Disable the `std` feature of this crate to make it `#[no_std]`. Doing so disables `TinyStrAuto`. You -can re-enable `TinyStrAuto` in `#[no_std]` mode by enabling the `alloc` feature. - -Performance ------------ +## Details -For those uses, TinyStr provides [performance characteristics](https://github.com/zbraniecki/tinystr/wiki/Performance) much better than the regular `String`. +When strings are of size 8 or smaller, the struct transforms the strings as `u32`/`u64` and uses +bitmasking to provide basic string manipulation operations: +* `is_ascii_numeric` +* `is_ascii_alphabetic` +* `is_ascii_alphanumeric` +* `to_ascii_lowercase` +* `to_ascii_uppercase` +* `to_ascii_titlecase` +* `PartialEq` -Status ------- +`TinyAsciiStr` will fall back to `u8` character manipulation for strings of length greater than 8. -The crate is fully functional and ready to be used in production. -The capabilities can be extended. +[`ICU4X`]: ../icu/index.html -#### License +## More Information -<sup> -Licensed under either of <a href="LICENSE-APACHE">Apache License, Version -2.0</a> or <a href="LICENSE-MIT">MIT license</a> at your option. -</sup +For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x). diff --git a/vendor/tinystr/benches/common/mod.rs b/vendor/tinystr/benches/common/mod.rs new file mode 100644 index 000000000..07654e1d9 --- /dev/null +++ b/vendor/tinystr/benches/common/mod.rs @@ -0,0 +1,79 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +// This file was adapted from parts of https://github.com/zbraniecki/tinystr + +pub static STRINGS_4: &[&str] = &[ + "US", "GB", "AR", "Hans", "CN", "AT", "PL", "FR", "AT", "Cyrl", "SR", "NO", "FR", "MK", "UK", +]; + +pub static STRINGS_8: &[&str] = &[ + "Latn", "windows", "AR", "Hans", "macos", "AT", "pl", "FR", "en", "Cyrl", "SR", "NO", "419", + "und", "UK", +]; + +pub static STRINGS_16: &[&str] = &[ + "Latn", + "windows", + "AR", + "Hans", + "macos", + "AT", + "infiniband", + "FR", + "en", + "Cyrl", + "FromIntegral", + "NO", + "419", + "MacintoshOSX2019", + "UK", +]; + +#[macro_export] +macro_rules! bench_block { + ($c:expr, $name:expr, $action:ident) => { + let mut group4 = $c.benchmark_group(&format!("{}/4", $name)); + group4.bench_function("String", $action!(String, STRINGS_4)); + group4.bench_function("TinyAsciiStr<4>", $action!(TinyAsciiStr<4>, STRINGS_4)); + group4.bench_function( + "tinystr_old::TinyStr4", + $action!(tinystr_old::TinyStr4, STRINGS_4), + ); + group4.bench_function("TinyAsciiStr<8>", $action!(TinyAsciiStr<8>, STRINGS_4)); + group4.bench_function( + "tinystr_old::TinyStr8", + $action!(tinystr_old::TinyStr8, STRINGS_4), + ); + group4.bench_function("TinyAsciiStr<16>", $action!(TinyAsciiStr<16>, STRINGS_4)); + group4.bench_function( + "tinystr_old::TinyStr16", + $action!(tinystr_old::TinyStr16, STRINGS_4), + ); + group4.finish(); + + let mut group8 = $c.benchmark_group(&format!("{}/8", $name)); + group8.bench_function("String", $action!(String, STRINGS_8)); + group8.bench_function("TinyAsciiStr<8>", $action!(TinyAsciiStr<8>, STRINGS_8)); + group8.bench_function("TinyAsciiStr<16>", $action!(TinyAsciiStr<16>, STRINGS_8)); + group8.bench_function( + "tinystr_old::TinyStr8", + $action!(tinystr_old::TinyStr8, STRINGS_8), + ); + group8.bench_function( + "tinystr_old::TinyStr16", + $action!(tinystr_old::TinyStr16, STRINGS_8), + ); + group8.finish(); + + let mut group16 = $c.benchmark_group(&format!("{}/16", $name)); + group16.bench_function("String", $action!(String, STRINGS_16)); + group16.bench_function("TinyAsciiStr<16>", $action!(TinyAsciiStr<16>, STRINGS_16)); + group16.bench_function( + "tinystr_old::TinyStr16", + $action!(tinystr_old::TinyStr16, STRINGS_16), + ); + group16.finish(); + }; +} diff --git a/vendor/tinystr/benches/construct.rs b/vendor/tinystr/benches/construct.rs index a93fce8f6..145e721e1 100644 --- a/vendor/tinystr/benches/construct.rs +++ b/vendor/tinystr/benches/construct.rs @@ -1,91 +1,41 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +// This file was adapted from https://github.com/zbraniecki/tinystr + +mod common; +use common::*; + use criterion::black_box; use criterion::criterion_group; use criterion::criterion_main; use criterion::Bencher; use criterion::Criterion; -use criterion::Fun; - -use tinystr::{TinyStr16, TinyStr4, TinyStr8, TinyStrAuto}; - -static STRINGS_4: &[&str] = &[ - "US", "GB", "AR", "Hans", "CN", "AT", "PL", "FR", "AT", "Cyrl", "SR", "NO", "FR", "MK", "UK", -]; - -static STRINGS_8: &[&str] = &[ - "Latn", "windows", "AR", "Hans", "macos", "AT", "pl", "FR", "en", "Cyrl", "SR", "NO", "419", - "und", "UK", -]; - -static STRINGS_16: &[&str] = &[ - "Latn", - "windows", - "AR", - "Hans", - "macos", - "AT", - "infiniband", - "FR", - "en", - "Cyrl", - "FromIntegral", - "NO", - "419", - "MacintoshOSX2019", - "UK", -]; - -macro_rules! bench_block { - ($c:expr, $name:expr, $action:ident) => { - let funcs = vec![ - Fun::new("String", $action!(String)), - Fun::new("TinyStr4", $action!(TinyStr4)), - Fun::new("TinyStr8", $action!(TinyStr8)), - Fun::new("TinyStr16", $action!(TinyStr16)), - Fun::new("TinyStrAuto", $action!(TinyStrAuto)), - ]; - $c.bench_functions(&format!("{}/4", $name), funcs, STRINGS_4); - - let funcs = vec![ - Fun::new("String", $action!(String)), - Fun::new("TinyStr8", $action!(TinyStr8)), - Fun::new("TinyStr16", $action!(TinyStr16)), - Fun::new("TinyStrAuto", $action!(TinyStrAuto)), - ]; - - $c.bench_functions(&format!("{}/8", $name), funcs, STRINGS_8); - - let funcs = vec![ - Fun::new("String", $action!(String)), - Fun::new("TinyStr16", $action!(TinyStr16)), - Fun::new("TinyStrAuto", $action!(TinyStrAuto)), - ]; - - $c.bench_functions(&format!("{}/16", $name), funcs, STRINGS_16); - }; -} +use tinystr::TinyAsciiStr; fn construct_from_str(c: &mut Criterion) { macro_rules! cfs { - ($r:ty) => { - |b: &mut Bencher, strings: &&[&str]| { + ($r:ty, $inputs:expr) => { + |b: &mut Bencher| { b.iter(|| { - for s in *strings { + for s in $inputs { let _: $r = black_box(s.parse().unwrap()); } }) } }; - }; + } bench_block!(c, "construct_from_str", cfs); } fn construct_from_bytes(c: &mut Criterion) { macro_rules! cfu { - ($r:ty) => { - |b, inputs: &&[&str]| { - let raw: Vec<&[u8]> = inputs.iter().map(|s| s.as_bytes()).collect(); + ($r:ty, $inputs:expr) => { + |b| { + let raw: Vec<&[u8]> = $inputs.iter().map(|s| s.as_bytes()).collect(); b.iter(move || { for u in &raw { let _ = black_box(<$r>::from_bytes(*u).unwrap()); @@ -93,62 +43,47 @@ fn construct_from_bytes(c: &mut Criterion) { }) } }; - }; - - let funcs = vec![ - Fun::new("TinyStr4", cfu!(TinyStr4)), - Fun::new("TinyStr8", cfu!(TinyStr8)), - Fun::new("TinyStr16", cfu!(TinyStr16)), - ]; - - c.bench_functions("construct_from_bytes/4", funcs, STRINGS_4); - - let funcs = vec![ - Fun::new("TinyStr8", cfu!(TinyStr8)), - Fun::new("TinyStr16", cfu!(TinyStr16)), - ]; - - c.bench_functions("construct_from_bytes/8", funcs, STRINGS_8); - - let funcs = vec![Fun::new("TinyStr16", cfu!(TinyStr16))]; - - c.bench_functions("construct_from_bytes/16", funcs, STRINGS_16); -} - -fn construct_unchecked(c: &mut Criterion) { - macro_rules! cu { - ($tty:ty, $rty:ty) => { - |b, inputs: &&[&str]| { - let raw: Vec<$rty> = inputs - .iter() - .map(|s| s.parse::<$tty>().unwrap().into()) - .collect(); - b.iter(move || { - for num in &raw { - let _ = unsafe { <$tty>::new_unchecked(black_box(*num)) }; - } - }) - } - }; - }; - - let funcs = vec![Fun::new("TinyStr4", cu!(TinyStr4, u32))]; - - c.bench_functions("construct_unchecked/4", funcs, STRINGS_4); - - let funcs = vec![Fun::new("TinyStr8", cu!(TinyStr8, u64))]; - - c.bench_functions("construct_unchecked/8", funcs, STRINGS_8); - - let funcs = vec![Fun::new("TinyStr16", cu!(TinyStr16, u128))]; - - c.bench_functions("construct_unchecked/16", funcs, STRINGS_16); + } + + let mut group4 = c.benchmark_group("construct_from_bytes/4"); + group4.bench_function("TinyAsciiStr<4>", cfu!(TinyAsciiStr<4>, STRINGS_4)); + group4.bench_function( + "tinystr_old::TinyStr4", + cfu!(tinystr_old::TinyStr4, STRINGS_4), + ); + group4.bench_function("TinyAsciiStr<8>", cfu!(TinyAsciiStr<8>, STRINGS_4)); + group4.bench_function( + "tinystr_old::TinyStr8", + cfu!(tinystr_old::TinyStr8, STRINGS_4), + ); + group4.bench_function("TinyAsciiStr<16>", cfu!(TinyAsciiStr<16>, STRINGS_4)); + group4.bench_function( + "tinystr_old::TinyStr16", + cfu!(tinystr_old::TinyStr16, STRINGS_4), + ); + group4.finish(); + + let mut group8 = c.benchmark_group("construct_from_bytes/8"); + group8.bench_function("TinyAsciiStr<8>", cfu!(TinyAsciiStr<8>, STRINGS_8)); + group8.bench_function( + "tinystr_old::TinyStr8", + cfu!(tinystr_old::TinyStr8, STRINGS_8), + ); + group8.bench_function("TinyAsciiStr<16>", cfu!(TinyAsciiStr<16>, STRINGS_8)); + group8.bench_function( + "tinystr_old::TinyStr16", + cfu!(tinystr_old::TinyStr16, STRINGS_8), + ); + group8.finish(); + + let mut group16 = c.benchmark_group("construct_from_bytes/16"); + group16.bench_function("TinyAsciiStr<16>", cfu!(TinyAsciiStr<16>, STRINGS_16)); + group16.bench_function( + "tinystr_old::TinyStr16", + cfu!(tinystr_old::TinyStr16, STRINGS_16), + ); + group16.finish(); } -criterion_group!( - benches, - construct_from_str, - construct_from_bytes, - construct_unchecked, -); +criterion_group!(benches, construct_from_str, construct_from_bytes,); criterion_main!(benches); diff --git a/vendor/tinystr/benches/overview.rs b/vendor/tinystr/benches/overview.rs new file mode 100644 index 000000000..4911832ec --- /dev/null +++ b/vendor/tinystr/benches/overview.rs @@ -0,0 +1,165 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod common; +use common::*; + +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; + +use tinystr::TinyAsciiStr; +use tinystr_old::TinyStr16; +use tinystr_old::TinyStr4; +use tinystr_old::TinyStr8; + +fn overview(c: &mut Criterion) { + let mut g = c.benchmark_group("overview"); + + g.bench_function("construct/TinyAsciiStr", |b| { + b.iter(|| { + for s in STRINGS_4 { + let _: TinyAsciiStr<4> = black_box(s).parse().unwrap(); + let _: TinyAsciiStr<8> = black_box(s).parse().unwrap(); + let _: TinyAsciiStr<16> = black_box(s).parse().unwrap(); + } + for s in STRINGS_8 { + let _: TinyAsciiStr<8> = black_box(s).parse().unwrap(); + let _: TinyAsciiStr<16> = black_box(s).parse().unwrap(); + } + for s in STRINGS_16 { + let _: TinyAsciiStr<16> = black_box(s).parse().unwrap(); + } + }); + }); + + g.bench_function("construct/TinyStr", |b| { + b.iter(|| { + for s in STRINGS_4 { + let _: TinyStr4 = black_box(s).parse().unwrap(); + let _: TinyStr8 = black_box(s).parse().unwrap(); + let _: TinyStr16 = black_box(s).parse().unwrap(); + } + for s in STRINGS_8 { + let _: TinyStr8 = black_box(s).parse().unwrap(); + let _: TinyStr16 = black_box(s).parse().unwrap(); + } + for s in STRINGS_16 { + let _: TinyStr16 = black_box(s).parse().unwrap(); + } + }); + }); + + let parsed_ascii_4: Vec<TinyAsciiStr<4>> = STRINGS_4 + .iter() + .map(|s| s.parse::<TinyAsciiStr<4>>().unwrap()) + .collect(); + let parsed_ascii_8: Vec<TinyAsciiStr<8>> = STRINGS_4 + .iter() + .chain(STRINGS_8) + .map(|s| s.parse::<TinyAsciiStr<8>>().unwrap()) + .collect(); + let parsed_ascii_16: Vec<TinyAsciiStr<16>> = STRINGS_4 + .iter() + .chain(STRINGS_8) + .chain(STRINGS_16) + .map(|s| s.parse::<TinyAsciiStr<16>>().unwrap()) + .collect(); + + let parsed_tiny_4: Vec<TinyStr4> = STRINGS_4 + .iter() + .map(|s| s.parse::<TinyStr4>().unwrap()) + .collect(); + let parsed_tiny_8: Vec<TinyStr8> = STRINGS_4 + .iter() + .chain(STRINGS_8) + .map(|s| s.parse::<TinyStr8>().unwrap()) + .collect(); + let parsed_tiny_16: Vec<TinyStr16> = STRINGS_4 + .iter() + .chain(STRINGS_8) + .chain(STRINGS_16) + .map(|s| s.parse::<TinyStr16>().unwrap()) + .collect(); + + g.bench_function("read/TinyAsciiStr", |b| { + b.iter(|| { + let mut collector: usize = 0; + for t in black_box(&parsed_ascii_4) { + let s: &str = t; + collector += s.bytes().map(usize::from).sum::<usize>(); + } + for t in black_box(&parsed_ascii_8) { + let s: &str = t; + collector += s.bytes().map(usize::from).sum::<usize>(); + } + for t in black_box(&parsed_ascii_16) { + let s: &str = t; + collector += s.bytes().map(usize::from).sum::<usize>(); + } + collector + }); + }); + + g.bench_function("read/TinyStr", |b| { + b.iter(|| { + let mut collector: usize = 0; + for t in black_box(&parsed_tiny_4) { + let s: &str = t; + collector += s.bytes().map(usize::from).sum::<usize>(); + } + for t in black_box(&parsed_tiny_8) { + let s: &str = t; + collector += s.bytes().map(usize::from).sum::<usize>(); + } + for t in black_box(&parsed_tiny_16) { + let s: &str = t; + collector += s.bytes().map(usize::from).sum::<usize>(); + } + collector + }); + }); + + g.bench_function("compare/TinyAsciiStr", |b| { + b.iter(|| { + let mut collector: usize = 0; + for ts in black_box(&parsed_ascii_4).windows(2) { + let o = ts[0].cmp(&ts[1]); + collector ^= o as usize; + } + for ts in black_box(&parsed_ascii_8).windows(2) { + let o = ts[0].cmp(&ts[1]); + collector ^= o as usize; + } + for ts in black_box(&parsed_ascii_16).windows(2) { + let o = ts[0].cmp(&ts[1]); + collector ^= o as usize; + } + collector + }); + }); + + g.bench_function("compare/TinyStr", |b| { + b.iter(|| { + let mut collector: usize = 0; + for ts in black_box(&parsed_tiny_4).windows(2) { + let o = ts[0].cmp(&ts[1]); + collector ^= o as usize; + } + for ts in black_box(&parsed_tiny_8).windows(2) { + let o = ts[0].cmp(&ts[1]); + collector ^= o as usize; + } + for ts in black_box(&parsed_tiny_16).windows(2) { + let o = ts[0].cmp(&ts[1]); + collector ^= o as usize; + } + collector + }); + }); +} + +criterion_group!(benches, overview,); +criterion_main!(benches); diff --git a/vendor/tinystr/benches/read.rs b/vendor/tinystr/benches/read.rs new file mode 100644 index 000000000..793bb14f8 --- /dev/null +++ b/vendor/tinystr/benches/read.rs @@ -0,0 +1,34 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod common; +use common::*; + +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Bencher; +use criterion::Criterion; + +use tinystr::TinyAsciiStr; + +fn read(c: &mut Criterion) { + macro_rules! cfs { + ($r:ty, $inputs:expr) => { + |b: &mut Bencher| { + let parsed: Vec<$r> = $inputs.iter().map(|s| s.parse().unwrap()).collect(); + b.iter(|| { + for s in &parsed { + let _: &str = black_box(&**s); + } + }) + } + }; + } + + bench_block!(c, "read", cfs); +} + +criterion_group!(benches, read,); +criterion_main!(benches); diff --git a/vendor/tinystr/benches/serde.rs b/vendor/tinystr/benches/serde.rs new file mode 100644 index 000000000..b0341221d --- /dev/null +++ b/vendor/tinystr/benches/serde.rs @@ -0,0 +1,37 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +mod common; +use common::*; + +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Bencher; +use criterion::Criterion; + +use tinystr::TinyAsciiStr; + +fn deserialize(c: &mut Criterion) { + macro_rules! cfs { + ($r:ty, $inputs:expr) => { + |b: &mut Bencher| { + let serialized: Vec<Vec<u8>> = $inputs + .iter() + .map(|s| postcard::to_stdvec(&s.parse::<$r>().unwrap()).unwrap()) + .collect(); + b.iter(|| { + for bytes in &serialized { + let _: Result<$r, _> = black_box(postcard::from_bytes(bytes)); + } + }) + } + }; + } + + bench_block!(c, "deserialize", cfs); +} + +criterion_group!(benches, deserialize,); +criterion_main!(benches); diff --git a/vendor/tinystr/benches/tinystr.rs b/vendor/tinystr/benches/tinystr.rs deleted file mode 100644 index 83b26a30a..000000000 --- a/vendor/tinystr/benches/tinystr.rs +++ /dev/null @@ -1,176 +0,0 @@ -use criterion::black_box; -use criterion::criterion_group; -use criterion::criterion_main; -use criterion::Bencher; -use criterion::Criterion; -use criterion::Fun; - -use tinystr::{TinyStr16, TinyStr4, TinyStr8}; - -static STRINGS_4: &[&str] = &[ - "US", "GB", "AR", "Hans", "CN", "AT", "PL", "FR", "AT", "Cyrl", "SR", "NO", "FR", "MK", "UK", -]; - -static STRINGS_8: &[&str] = &[ - "Latn", "windows", "AR", "Hans", "macos", "AT", "pl", "FR", "en", "Cyrl", "SR", "NO", "419", - "und", "UK", -]; - -static STRINGS_16: &[&str] = &[ - "Latn", - "windows", - "AR", - "Hans", - "macos", - "AT", - "infiniband", - "FR", - "en", - "Cyrl", - "FromIntegral", - "NO", - "419", - "MacintoshOSX2019", - "UK", -]; - -macro_rules! bench_block { - ($c:expr, $name:expr, $action:ident) => { - let funcs = vec![ - Fun::new("String", $action!(String)), - Fun::new("TinyStr4", $action!(TinyStr4)), - Fun::new("TinyStr8", $action!(TinyStr8)), - Fun::new("TinyStr16", $action!(TinyStr16)), - ]; - - $c.bench_functions(&format!("{}/4", $name), funcs, STRINGS_4); - - let funcs = vec![ - Fun::new("String", $action!(String)), - Fun::new("TinyStr8", $action!(TinyStr8)), - Fun::new("TinyStr16", $action!(TinyStr16)), - ]; - - $c.bench_functions(&format!("{}/8", $name), funcs, STRINGS_8); - - let funcs = vec![ - Fun::new("String", $action!(String)), - Fun::new("TinyStr16", $action!(TinyStr16)), - ]; - - $c.bench_functions(&format!("{}/16", $name), funcs, STRINGS_16); - }; -} - -macro_rules! convert_to_ascii { - ($ty:ty, $action:ident) => { - |b: &mut Bencher, inputs: &&[&str]| { - let raw: Vec<$ty> = inputs.iter().map(|s| s.parse::<$ty>().unwrap()).collect(); - b.iter(move || { - for s in &raw { - let _ = black_box(s.$action()); - } - }) - } - }; -} - -fn convert_to_ascii_lowercase(c: &mut Criterion) { - macro_rules! ctal { - ($ty:ty) => { - convert_to_ascii!($ty, to_ascii_lowercase) - }; - } - - bench_block!(c, "convert_to_ascii_lowercase", ctal); -} - -fn convert_to_ascii_uppercase(c: &mut Criterion) { - macro_rules! ctau { - ($ty:ty) => { - convert_to_ascii!($ty, to_ascii_uppercase) - }; - } - - bench_block!(c, "convert_to_ascii_uppercase", ctau); -} - -trait ExtToAsciiTitlecase { - #[inline(always)] - fn to_ascii_titlecase(&self) -> String; -} - -impl ExtToAsciiTitlecase for str { - fn to_ascii_titlecase(&self) -> String { - let mut result = self.to_ascii_lowercase(); - result[0..1].make_ascii_uppercase(); - result - } -} - -fn convert_to_ascii_titlecase(c: &mut Criterion) { - macro_rules! ctat { - ($ty:ty) => { - convert_to_ascii!($ty, to_ascii_titlecase) - }; - } - - bench_block!(c, "convert_to_ascii_titlecase", ctat); -} - -trait ExtIsAsciiAlphanumeric { - #[inline(always)] - fn is_ascii_alphanumeric(&self) -> bool; -} - -impl ExtIsAsciiAlphanumeric for str { - fn is_ascii_alphanumeric(&self) -> bool { - self.chars().all(|c| c.is_ascii_alphanumeric()) - } -} - -fn test_is_ascii_alphanumeric(c: &mut Criterion) { - macro_rules! tiaa { - ($ty:ty) => { - |b: &mut Bencher, inputs: &&[&str]| { - let raw: Vec<$ty> = inputs.iter().map(|s| s.parse::<$ty>().unwrap()).collect(); - b.iter(move || { - for s in &raw { - let _ = black_box(s.is_ascii_alphanumeric()); - } - }) - } - }; - } - - bench_block!(c, "test_is_ascii_alphanumeric", tiaa); -} - -fn test_eq(c: &mut Criterion) { - macro_rules! te { - ($ty:ty) => { - |b: &mut Bencher, inputs: &&[&str]| { - let raw: Vec<$ty> = inputs.iter().map(|s| s.parse::<$ty>().unwrap()).collect(); - b.iter(move || { - for s in &raw { - for l in &raw { - let _ = black_box(s == l); - } - } - }) - } - }; - } - - bench_block!(c, "test_eq", te); -} - -criterion_group!( - benches, - convert_to_ascii_lowercase, - convert_to_ascii_uppercase, - convert_to_ascii_titlecase, - test_is_ascii_alphanumeric, - test_eq, -); -criterion_main!(benches); diff --git a/vendor/tinystr/examples/main.rs b/vendor/tinystr/examples/main.rs deleted file mode 100644 index 2e0d2a10b..000000000 --- a/vendor/tinystr/examples/main.rs +++ /dev/null @@ -1,18 +0,0 @@ -use tinystr::{TinyStr4, TinyStr8}; - -fn main() { - let s1: TinyStr4 = "tEsT".parse().expect("Failed to parse."); - - assert_eq!(s1, "tEsT"); - assert_eq!(s1.to_ascii_uppercase(), "TEST"); - assert_eq!(s1.to_ascii_lowercase(), "test"); - assert_eq!(s1.to_ascii_titlecase(), "Test"); - assert_eq!(s1.is_ascii_alphanumeric(), true); - - let s2: TinyStr8 = "New York".parse().expect("Failed to parse."); - - assert_eq!(s2, "New York"); - assert_eq!(s2.to_ascii_uppercase(), "NEW YORK"); - assert_eq!(s2.to_ascii_lowercase(), "new york"); - assert_eq!(s2.is_ascii_alphanumeric(), false); -} diff --git a/vendor/tinystr/src/ascii.rs b/vendor/tinystr/src/ascii.rs new file mode 100644 index 000000000..0be1125e3 --- /dev/null +++ b/vendor/tinystr/src/ascii.rs @@ -0,0 +1,987 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::asciibyte::AsciiByte; +use crate::int_ops::{Aligned4, Aligned8}; +use crate::TinyStrError; +use core::fmt; +use core::ops::Deref; +use core::str::{self, FromStr}; + +#[repr(transparent)] +#[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)] +pub struct TinyAsciiStr<const N: usize> { + bytes: [AsciiByte; N], +} + +impl<const N: usize> TinyAsciiStr<N> { + /// Creates a `TinyAsciiStr<N>` from the given byte slice. + /// `bytes` may contain at most `N` non-null ASCII bytes. + pub const fn from_bytes(bytes: &[u8]) -> Result<Self, TinyStrError> { + Self::from_bytes_inner(bytes, 0, bytes.len(), false) + } + + /// Attempts to parse a fixed-length byte array to a `TinyAsciiStr`. + /// + /// The byte array may contain trailing NUL bytes. + /// + /// # Example + /// + /// ``` + /// use tinystr::tinystr; + /// use tinystr::TinyAsciiStr; + /// + /// assert_eq!( + /// TinyAsciiStr::<3>::try_from_raw(*b"GB\0"), + /// Ok(tinystr!(3, "GB")) + /// ); + /// assert_eq!( + /// TinyAsciiStr::<3>::try_from_raw(*b"USD"), + /// Ok(tinystr!(3, "USD")) + /// ); + /// assert!(matches!(TinyAsciiStr::<3>::try_from_raw(*b"\0A\0"), Err(_))); + /// ``` + pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, TinyStrError> { + Self::from_bytes_inner(&raw, 0, N, true) + } + + /// Equivalent to [`from_bytes(bytes[start..end])`](Self::from_bytes), + /// but callable in a `const` context (which range indexing is not). + pub const fn from_bytes_manual_slice( + bytes: &[u8], + start: usize, + end: usize, + ) -> Result<Self, TinyStrError> { + Self::from_bytes_inner(bytes, start, end, false) + } + + #[inline] + pub(crate) const fn from_bytes_inner( + bytes: &[u8], + start: usize, + end: usize, + allow_trailing_null: bool, + ) -> Result<Self, TinyStrError> { + let len = end - start; + if len > N { + return Err(TinyStrError::TooLarge { max: N, len }); + } + + let mut out = [0; N]; + let mut i = 0; + let mut found_null = false; + // Indexing is protected by TinyStrError::TooLarge + #[allow(clippy::indexing_slicing)] + while i < len { + let b = bytes[start + i]; + + if b == 0 { + found_null = true; + } else if b >= 0x80 { + return Err(TinyStrError::NonAscii); + } else if found_null { + // Error if there are contentful bytes after null + return Err(TinyStrError::ContainsNull); + } + out[i] = b; + + i += 1; + } + + if !allow_trailing_null && found_null { + // We found some trailing nulls, error + return Err(TinyStrError::ContainsNull); + } + + Ok(Self { + // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes` + bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) }, + }) + } + + // TODO: This function shadows the FromStr trait. Rename? + #[inline] + pub const fn from_str(s: &str) -> Result<Self, TinyStrError> { + Self::from_bytes_inner(s.as_bytes(), 0, s.len(), false) + } + + #[inline] + pub const fn as_str(&self) -> &str { + // as_bytes is valid utf8 + unsafe { str::from_utf8_unchecked(self.as_bytes()) } + } + + #[inline] + #[must_use] + pub const fn len(&self) -> usize { + if N <= 4 { + Aligned4::from_ascii_bytes(&self.bytes).len() + } else if N <= 8 { + Aligned8::from_ascii_bytes(&self.bytes).len() + } else { + let mut i = 0; + #[allow(clippy::indexing_slicing)] // < N is safe + while i < N && self.bytes[i] as u8 != AsciiByte::B0 as u8 { + i += 1 + } + i + } + } + + #[inline] + #[must_use] + pub const fn is_empty(&self) -> bool { + self.bytes[0] as u8 == AsciiByte::B0 as u8 + } + + #[inline] + #[must_use] + pub const fn as_bytes(&self) -> &[u8] { + /// core::slice::from_raw_parts(a, b) = core::mem::transmute((a, b)) hack + /// ```compile_fail + /// const unsafe fn canary() { core::slice::from_raw_parts(0 as *const u8, 0); } + /// ``` + const _: () = (); + // Safe because `self.bytes.as_slice()` pointer-casts to `&[u8]`, + // and changing the length of that slice to self.len() < N is safe. + unsafe { core::mem::transmute((self.bytes.as_slice().as_ptr(), self.len())) } + } + + #[inline] + #[must_use] + pub const fn all_bytes(&self) -> &[u8; N] { + // SAFETY: `self.bytes` has same size as [u8; N] + unsafe { core::mem::transmute(&self.bytes) } + } + + #[inline] + #[must_use] + /// Resizes a TinyAsciiStr<N> to a TinyAsciiStr<M>. + /// + /// If M < len() the string gets truncated, otherwise only the + /// memory representation changes. + pub const fn resize<const M: usize>(self) -> TinyAsciiStr<M> { + let mut bytes = [0; M]; + let mut i = 0; + // Indexing is protected by the loop guard + #[allow(clippy::indexing_slicing)] + while i < M && i < N { + bytes[i] = self.bytes[i] as u8; + i += 1; + } + // `self.bytes` only contains ASCII bytes, with no null bytes between + // ASCII characters, so this also holds for `bytes`. + unsafe { TinyAsciiStr::from_bytes_unchecked(bytes) } + } + + /// # Safety + /// Must be called with a bytes array made of valid ASCII bytes, with no null bytes + /// between ASCII characters + #[must_use] + pub const unsafe fn from_bytes_unchecked(bytes: [u8; N]) -> Self { + Self { + bytes: AsciiByte::to_ascii_byte_array(&bytes), + } + } +} + +macro_rules! check_is { + ($self:ident, $check_int:ident, $check_u8:ident) => { + if N <= 4 { + Aligned4::from_ascii_bytes(&$self.bytes).$check_int() + } else if N <= 8 { + Aligned8::from_ascii_bytes(&$self.bytes).$check_int() + } else { + let mut i = 0; + // Won't panic because self.bytes has length N + #[allow(clippy::indexing_slicing)] + while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { + if !($self.bytes[i] as u8).$check_u8() { + return false; + } + i += 1; + } + true + } + }; + ($self:ident, $check_int:ident, !$check_u8_0_inv:ident, !$check_u8_1_inv:ident) => { + if N <= 4 { + Aligned4::from_ascii_bytes(&$self.bytes).$check_int() + } else if N <= 8 { + Aligned8::from_ascii_bytes(&$self.bytes).$check_int() + } else { + // Won't panic because N is > 8 + if ($self.bytes[0] as u8).$check_u8_0_inv() { + return false; + } + let mut i = 1; + // Won't panic because self.bytes has length N + #[allow(clippy::indexing_slicing)] + while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { + if ($self.bytes[i] as u8).$check_u8_1_inv() { + return false; + } + i += 1; + } + true + } + }; + ($self:ident, $check_int:ident, $check_u8_0_inv:ident, $check_u8_1_inv:ident) => { + if N <= 4 { + Aligned4::from_ascii_bytes(&$self.bytes).$check_int() + } else if N <= 8 { + Aligned8::from_ascii_bytes(&$self.bytes).$check_int() + } else { + // Won't panic because N is > 8 + if !($self.bytes[0] as u8).$check_u8_0_inv() { + return false; + } + let mut i = 1; + // Won't panic because self.bytes has length N + #[allow(clippy::indexing_slicing)] + while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { + if !($self.bytes[i] as u8).$check_u8_1_inv() { + return false; + } + i += 1; + } + true + } + }; +} + +impl<const N: usize> TinyAsciiStr<N> { + /// Checks if the value is composed of ASCII alphabetic characters: + /// + /// * U+0041 'A' ..= U+005A 'Z', or + /// * U+0061 'a' ..= U+007A 'z'. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyAsciiStr; + /// + /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse."); + /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse."); + /// + /// assert!(s1.is_ascii_alphabetic()); + /// assert!(!s2.is_ascii_alphabetic()); + /// ``` + #[inline] + #[must_use] + pub const fn is_ascii_alphabetic(&self) -> bool { + check_is!(self, is_ascii_alphabetic, is_ascii_alphabetic) + } + + /// Checks if the value is composed of ASCII alphanumeric characters: + /// + /// * U+0041 'A' ..= U+005A 'Z', or + /// * U+0061 'a' ..= U+007A 'z', or + /// * U+0030 '0' ..= U+0039 '9'. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyAsciiStr; + /// + /// let s1: TinyAsciiStr<4> = "A15b".parse().expect("Failed to parse."); + /// let s2: TinyAsciiStr<4> = "[3@w".parse().expect("Failed to parse."); + /// + /// assert!(s1.is_ascii_alphanumeric()); + /// assert!(!s2.is_ascii_alphanumeric()); + /// ``` + #[inline] + #[must_use] + pub const fn is_ascii_alphanumeric(&self) -> bool { + check_is!(self, is_ascii_alphanumeric, is_ascii_alphanumeric) + } + + /// Checks if the value is composed of ASCII decimal digits: + /// + /// * U+0030 '0' ..= U+0039 '9'. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyAsciiStr; + /// + /// let s1: TinyAsciiStr<4> = "312".parse().expect("Failed to parse."); + /// let s2: TinyAsciiStr<4> = "3d".parse().expect("Failed to parse."); + /// + /// assert!(s1.is_ascii_numeric()); + /// assert!(!s2.is_ascii_numeric()); + /// ``` + #[inline] + #[must_use] + pub const fn is_ascii_numeric(&self) -> bool { + check_is!(self, is_ascii_numeric, is_ascii_digit) + } + + /// Checks if the value is in ASCII lower case. + /// + /// All letter characters are checked for case. Non-letter characters are ignored. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyAsciiStr; + /// + /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse."); + /// let s2: TinyAsciiStr<4> = "test".parse().expect("Failed to parse."); + /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse."); + /// + /// assert!(!s1.is_ascii_lowercase()); + /// assert!(s2.is_ascii_lowercase()); + /// assert!(s3.is_ascii_lowercase()); + /// ``` + #[inline] + #[must_use] + pub const fn is_ascii_lowercase(&self) -> bool { + check_is!( + self, + is_ascii_lowercase, + !is_ascii_uppercase, + !is_ascii_uppercase + ) + } + + /// Checks if the value is in ASCII title case. + /// + /// This verifies that the first character is ASCII uppercase and all others ASCII lowercase. + /// Non-letter characters are ignored. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyAsciiStr; + /// + /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse."); + /// let s2: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse."); + /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse."); + /// + /// assert!(!s1.is_ascii_titlecase()); + /// assert!(s2.is_ascii_titlecase()); + /// assert!(s3.is_ascii_titlecase()); + /// ``` + #[inline] + #[must_use] + pub const fn is_ascii_titlecase(&self) -> bool { + check_is!( + self, + is_ascii_titlecase, + !is_ascii_lowercase, + !is_ascii_uppercase + ) + } + + /// Checks if the value is in ASCII upper case. + /// + /// All letter characters are checked for case. Non-letter characters are ignored. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyAsciiStr; + /// + /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse."); + /// let s2: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse."); + /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse."); + /// + /// assert!(!s1.is_ascii_uppercase()); + /// assert!(s2.is_ascii_uppercase()); + /// assert!(!s3.is_ascii_uppercase()); + /// ``` + #[inline] + #[must_use] + pub const fn is_ascii_uppercase(&self) -> bool { + check_is!( + self, + is_ascii_uppercase, + !is_ascii_lowercase, + !is_ascii_lowercase + ) + } + + /// Checks if the value is composed of ASCII alphabetic lower case characters: + /// + /// * U+0061 'a' ..= U+007A 'z', + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyAsciiStr; + /// + /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse."); + /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse."); + /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse."); + /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse."); + /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse."); + /// + /// assert!(!s1.is_ascii_alphabetic_lowercase()); + /// assert!(!s2.is_ascii_alphabetic_lowercase()); + /// assert!(!s3.is_ascii_alphabetic_lowercase()); + /// assert!(s4.is_ascii_alphabetic_lowercase()); + /// assert!(!s5.is_ascii_alphabetic_lowercase()); + /// ``` + #[inline] + #[must_use] + pub const fn is_ascii_alphabetic_lowercase(&self) -> bool { + check_is!( + self, + is_ascii_alphabetic_lowercase, + is_ascii_lowercase, + is_ascii_lowercase + ) + } + + /// Checks if the value is composed of ASCII alphabetic, with the first character being ASCII uppercase, and all others ASCII lowercase. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyAsciiStr; + /// + /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse."); + /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse."); + /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse."); + /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse."); + /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse."); + /// + /// assert!(s1.is_ascii_alphabetic_titlecase()); + /// assert!(!s2.is_ascii_alphabetic_titlecase()); + /// assert!(!s3.is_ascii_alphabetic_titlecase()); + /// assert!(!s4.is_ascii_alphabetic_titlecase()); + /// assert!(!s5.is_ascii_alphabetic_titlecase()); + /// ``` + #[inline] + #[must_use] + pub const fn is_ascii_alphabetic_titlecase(&self) -> bool { + check_is!( + self, + is_ascii_alphabetic_titlecase, + is_ascii_uppercase, + is_ascii_lowercase + ) + } + + /// Checks if the value is composed of ASCII alphabetic upper case characters: + /// + /// * U+0041 'A' ..= U+005A 'Z', + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyAsciiStr; + /// + /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse."); + /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse."); + /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse."); + /// let s4: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse."); + /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse."); + /// + /// assert!(!s1.is_ascii_alphabetic_uppercase()); + /// assert!(!s2.is_ascii_alphabetic_uppercase()); + /// assert!(!s3.is_ascii_alphabetic_uppercase()); + /// assert!(s4.is_ascii_alphabetic_uppercase()); + /// assert!(!s5.is_ascii_alphabetic_uppercase()); + /// ``` + #[inline] + #[must_use] + pub const fn is_ascii_alphabetic_uppercase(&self) -> bool { + check_is!( + self, + is_ascii_alphabetic_uppercase, + is_ascii_uppercase, + is_ascii_uppercase + ) + } +} + +macro_rules! to { + ($self:ident, $to:ident, $later_char_to:ident $(,$first_char_to:ident)?) => {{ + let mut i = 0; + if N <= 4 { + let aligned = Aligned4::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes(); + // Won't panic because self.bytes has length N and aligned has length >= N + #[allow(clippy::indexing_slicing)] + while i < N { + $self.bytes[i] = aligned[i]; + i += 1; + } + } else if N <= 8 { + let aligned = Aligned8::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes(); + // Won't panic because self.bytes has length N and aligned has length >= N + #[allow(clippy::indexing_slicing)] + while i < N { + $self.bytes[i] = aligned[i]; + i += 1; + } + } else { + // Won't panic because self.bytes has length N + #[allow(clippy::indexing_slicing)] + while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { + // SAFETY: AsciiByte is repr(u8) and has same size as u8 + unsafe { + $self.bytes[i] = core::mem::transmute( + ($self.bytes[i] as u8).$later_char_to() + ); + } + i += 1; + } + // SAFETY: AsciiByte is repr(u8) and has same size as u8 + $( + $self.bytes[0] = unsafe { + core::mem::transmute(($self.bytes[0] as u8).$first_char_to()) + }; + )? + } + $self + }}; +} + +impl<const N: usize> TinyAsciiStr<N> { + /// Converts this type to its ASCII lower case equivalent in-place. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyAsciiStr; + /// + /// let s1: TinyAsciiStr<4> = "TeS3".parse().expect("Failed to parse."); + /// + /// assert_eq!(&*s1.to_ascii_lowercase(), "tes3"); + /// ``` + #[inline] + #[must_use] + pub const fn to_ascii_lowercase(mut self) -> Self { + to!(self, to_ascii_lowercase, to_ascii_lowercase) + } + + /// Converts this type to its ASCII title case equivalent in-place. + /// + /// The first character is converted to ASCII uppercase; the remaining characters + /// are converted to ASCII lowercase. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyAsciiStr; + /// + /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse."); + /// + /// assert_eq!(&*s1.to_ascii_titlecase(), "Test"); + /// ``` + #[inline] + #[must_use] + pub const fn to_ascii_titlecase(mut self) -> Self { + to!( + self, + to_ascii_titlecase, + to_ascii_lowercase, + to_ascii_uppercase + ) + } + + /// Converts this type to its ASCII upper case equivalent in-place. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged. + /// + /// # Examples + /// + /// ``` + /// use tinystr::TinyAsciiStr; + /// + /// let s1: TinyAsciiStr<4> = "Tes3".parse().expect("Failed to parse."); + /// + /// assert_eq!(&*s1.to_ascii_uppercase(), "TES3"); + /// ``` + #[inline] + #[must_use] + pub const fn to_ascii_uppercase(mut self) -> Self { + to!(self, to_ascii_uppercase, to_ascii_uppercase) + } +} + +impl<const N: usize> fmt::Debug for TinyAsciiStr<N> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self.as_str(), f) + } +} + +impl<const N: usize> fmt::Display for TinyAsciiStr<N> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(self.as_str(), f) + } +} + +impl<const N: usize> Deref for TinyAsciiStr<N> { + type Target = str; + #[inline] + fn deref(&self) -> &str { + self.as_str() + } +} + +impl<const N: usize> FromStr for TinyAsciiStr<N> { + type Err = TinyStrError; + #[inline] + fn from_str(s: &str) -> Result<Self, TinyStrError> { + Self::from_str(s) + } +} + +impl<const N: usize> PartialEq<str> for TinyAsciiStr<N> { + fn eq(&self, other: &str) -> bool { + self.deref() == other + } +} + +impl<const N: usize> PartialEq<&str> for TinyAsciiStr<N> { + fn eq(&self, other: &&str) -> bool { + self.deref() == *other + } +} + +#[cfg(feature = "alloc")] +impl<const N: usize> PartialEq<alloc::string::String> for TinyAsciiStr<N> { + fn eq(&self, other: &alloc::string::String) -> bool { + self.deref() == other.deref() + } +} + +#[cfg(feature = "alloc")] +impl<const N: usize> PartialEq<TinyAsciiStr<N>> for alloc::string::String { + fn eq(&self, other: &TinyAsciiStr<N>) -> bool { + self.deref() == other.deref() + } +} + +#[cfg(test)] +mod test { + use super::*; + use rand::distributions::Distribution; + use rand::distributions::Standard; + use rand::rngs::SmallRng; + use rand::seq::SliceRandom; + use rand::SeedableRng; + + const STRINGS: &[&str] = &[ + "Latn", + "laTn", + "windows", + "AR", + "Hans", + "macos", + "AT", + "infiniband", + "FR", + "en", + "Cyrl", + "FromIntegral", + "NO", + "419", + "MacintoshOSX2019", + "a3z", + "A3z", + "A3Z", + "a3Z", + "3A", + "3Z", + "3a", + "3z", + "@@[`{", + "UK", + "E12", + ]; + + fn gen_strings(num_strings: usize, allowed_lengths: &[usize]) -> Vec<String> { + let mut rng = SmallRng::seed_from_u64(2022); + // Need to do this in 2 steps since the RNG is needed twice + let string_lengths = core::iter::repeat_with(|| *allowed_lengths.choose(&mut rng).unwrap()) + .take(num_strings) + .collect::<Vec<usize>>(); + string_lengths + .iter() + .map(|len| { + Standard + .sample_iter(&mut rng) + .filter(|b: &u8| *b > 0 && *b < 0x80) + .take(*len) + .collect::<Vec<u8>>() + }) + .map(|byte_vec| String::from_utf8(byte_vec).expect("All ASCII")) + .collect() + } + + fn check_operation<T, F1, F2, const N: usize>(reference_f: F1, tinystr_f: F2) + where + F1: Fn(&str) -> T, + F2: Fn(TinyAsciiStr<N>) -> T, + T: core::fmt::Debug + core::cmp::PartialEq, + { + for s in STRINGS + .iter() + .map(|s| s.to_string()) + .chain(gen_strings(100, &[3, 4, 5, 8, 12])) + { + let t = match TinyAsciiStr::<N>::from_str(&s) { + Ok(t) => t, + Err(TinyStrError::TooLarge { .. }) => continue, + Err(e) => panic!("{}", e), + }; + let expected = reference_f(&s); + let actual = tinystr_f(t); + assert_eq!(expected, actual, "TinyAsciiStr<{}>: {:?}", N, s); + } + } + + #[test] + fn test_is_ascii_alphabetic() { + fn check<const N: usize>() { + check_operation( + |s| s.chars().all(|c| c.is_ascii_alphabetic()), + |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic(&t), + ) + } + check::<2>(); + check::<3>(); + check::<4>(); + check::<5>(); + check::<8>(); + check::<16>(); + } + + #[test] + fn test_is_ascii_alphanumeric() { + fn check<const N: usize>() { + check_operation( + |s| s.chars().all(|c| c.is_ascii_alphanumeric()), + |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphanumeric(&t), + ) + } + check::<2>(); + check::<3>(); + check::<4>(); + check::<5>(); + check::<8>(); + check::<16>(); + } + + #[test] + fn test_is_ascii_numeric() { + fn check<const N: usize>() { + check_operation( + |s| s.chars().all(|c| c.is_ascii_digit()), + |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_numeric(&t), + ) + } + check::<2>(); + check::<3>(); + check::<4>(); + check::<5>(); + check::<8>(); + check::<16>(); + } + + #[test] + fn test_is_ascii_lowercase() { + fn check<const N: usize>() { + check_operation( + |s| { + s == TinyAsciiStr::<16>::from_str(s) + .unwrap() + .to_ascii_lowercase() + .as_str() + }, + |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_lowercase(&t), + ) + } + check::<2>(); + check::<3>(); + check::<4>(); + check::<5>(); + check::<8>(); + check::<16>(); + } + + #[test] + fn test_is_ascii_titlecase() { + fn check<const N: usize>() { + check_operation( + |s| { + s == TinyAsciiStr::<16>::from_str(s) + .unwrap() + .to_ascii_titlecase() + .as_str() + }, + |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_titlecase(&t), + ) + } + check::<2>(); + check::<3>(); + check::<4>(); + check::<5>(); + check::<8>(); + check::<16>(); + } + + #[test] + fn test_is_ascii_uppercase() { + fn check<const N: usize>() { + check_operation( + |s| { + s == TinyAsciiStr::<16>::from_str(s) + .unwrap() + .to_ascii_uppercase() + .as_str() + }, + |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_uppercase(&t), + ) + } + check::<2>(); + check::<3>(); + check::<4>(); + check::<5>(); + check::<8>(); + check::<16>(); + } + + #[test] + fn test_is_ascii_alphabetic_lowercase() { + fn check<const N: usize>() { + check_operation( + |s| { + // Check alphabetic + s.chars().all(|c| c.is_ascii_alphabetic()) && + // Check lowercase + s == TinyAsciiStr::<16>::from_str(s) + .unwrap() + .to_ascii_lowercase() + .as_str() + }, + |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_lowercase(&t), + ) + } + check::<2>(); + check::<3>(); + check::<4>(); + check::<5>(); + check::<8>(); + check::<16>(); + } + + #[test] + fn test_is_ascii_alphabetic_titlecase() { + fn check<const N: usize>() { + check_operation( + |s| { + // Check alphabetic + s.chars().all(|c| c.is_ascii_alphabetic()) && + // Check titlecase + s == TinyAsciiStr::<16>::from_str(s) + .unwrap() + .to_ascii_titlecase() + .as_str() + }, + |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_titlecase(&t), + ) + } + check::<2>(); + check::<3>(); + check::<4>(); + check::<5>(); + check::<8>(); + check::<16>(); + } + + #[test] + fn test_is_ascii_alphabetic_uppercase() { + fn check<const N: usize>() { + check_operation( + |s| { + // Check alphabetic + s.chars().all(|c| c.is_ascii_alphabetic()) && + // Check uppercase + s == TinyAsciiStr::<16>::from_str(s) + .unwrap() + .to_ascii_uppercase() + .as_str() + }, + |t: TinyAsciiStr<N>| TinyAsciiStr::is_ascii_alphabetic_uppercase(&t), + ) + } + check::<2>(); + check::<3>(); + check::<4>(); + check::<5>(); + check::<8>(); + check::<16>(); + } + + #[test] + fn test_to_ascii_lowercase() { + fn check<const N: usize>() { + check_operation( + |s| { + s.chars() + .map(|c| c.to_ascii_lowercase()) + .collect::<String>() + }, + |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_lowercase(t).to_string(), + ) + } + check::<2>(); + check::<3>(); + check::<4>(); + check::<5>(); + check::<8>(); + check::<16>(); + } + + #[test] + fn test_to_ascii_titlecase() { + fn check<const N: usize>() { + check_operation( + |s| { + let mut r = s + .chars() + .map(|c| c.to_ascii_lowercase()) + .collect::<String>(); + // Safe because the string is nonempty and an ASCII string + unsafe { r.as_bytes_mut()[0].make_ascii_uppercase() }; + r + }, + |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_titlecase(t).to_string(), + ) + } + check::<2>(); + check::<3>(); + check::<4>(); + check::<5>(); + check::<8>(); + check::<16>(); + } + + #[test] + fn test_to_ascii_uppercase() { + fn check<const N: usize>() { + check_operation( + |s| { + s.chars() + .map(|c| c.to_ascii_uppercase()) + .collect::<String>() + }, + |t: TinyAsciiStr<N>| TinyAsciiStr::to_ascii_uppercase(t).to_string(), + ) + } + check::<2>(); + check::<3>(); + check::<4>(); + check::<5>(); + check::<8>(); + check::<16>(); + } +} diff --git a/vendor/tinystr/src/asciibyte.rs b/vendor/tinystr/src/asciibyte.rs new file mode 100644 index 000000000..f41a03341 --- /dev/null +++ b/vendor/tinystr/src/asciibyte.rs @@ -0,0 +1,145 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#[repr(u8)] +#[allow(dead_code)] +#[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)] +pub enum AsciiByte { + B0 = 0, + B1 = 1, + B2 = 2, + B3 = 3, + B4 = 4, + B5 = 5, + B6 = 6, + B7 = 7, + B8 = 8, + B9 = 9, + B10 = 10, + B11 = 11, + B12 = 12, + B13 = 13, + B14 = 14, + B15 = 15, + B16 = 16, + B17 = 17, + B18 = 18, + B19 = 19, + B20 = 20, + B21 = 21, + B22 = 22, + B23 = 23, + B24 = 24, + B25 = 25, + B26 = 26, + B27 = 27, + B28 = 28, + B29 = 29, + B30 = 30, + B31 = 31, + B32 = 32, + B33 = 33, + B34 = 34, + B35 = 35, + B36 = 36, + B37 = 37, + B38 = 38, + B39 = 39, + B40 = 40, + B41 = 41, + B42 = 42, + B43 = 43, + B44 = 44, + B45 = 45, + B46 = 46, + B47 = 47, + B48 = 48, + B49 = 49, + B50 = 50, + B51 = 51, + B52 = 52, + B53 = 53, + B54 = 54, + B55 = 55, + B56 = 56, + B57 = 57, + B58 = 58, + B59 = 59, + B60 = 60, + B61 = 61, + B62 = 62, + B63 = 63, + B64 = 64, + B65 = 65, + B66 = 66, + B67 = 67, + B68 = 68, + B69 = 69, + B70 = 70, + B71 = 71, + B72 = 72, + B73 = 73, + B74 = 74, + B75 = 75, + B76 = 76, + B77 = 77, + B78 = 78, + B79 = 79, + B80 = 80, + B81 = 81, + B82 = 82, + B83 = 83, + B84 = 84, + B85 = 85, + B86 = 86, + B87 = 87, + B88 = 88, + B89 = 89, + B90 = 90, + B91 = 91, + B92 = 92, + B93 = 93, + B94 = 94, + B95 = 95, + B96 = 96, + B97 = 97, + B98 = 98, + B99 = 99, + B100 = 100, + B101 = 101, + B102 = 102, + B103 = 103, + B104 = 104, + B105 = 105, + B106 = 106, + B107 = 107, + B108 = 108, + B109 = 109, + B110 = 110, + B111 = 111, + B112 = 112, + B113 = 113, + B114 = 114, + B115 = 115, + B116 = 116, + B117 = 117, + B118 = 118, + B119 = 119, + B120 = 120, + B121 = 121, + B122 = 122, + B123 = 123, + B124 = 124, + B125 = 125, + B126 = 126, + B127 = 127, +} + +impl AsciiByte { + // Convert [u8; N] to [AsciiByte; N] + #[inline] + pub const unsafe fn to_ascii_byte_array<const N: usize>(bytes: &[u8; N]) -> [AsciiByte; N] { + *(bytes as *const [u8; N] as *const [AsciiByte; N]) + } +} diff --git a/vendor/tinystr/src/databake.rs b/vendor/tinystr/src/databake.rs new file mode 100644 index 000000000..e10c194f8 --- /dev/null +++ b/vendor/tinystr/src/databake.rs @@ -0,0 +1,21 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::TinyAsciiStr; +use databake::*; + +impl<const N: usize> Bake for TinyAsciiStr<N> { + fn bake(&self, env: &CrateEnv) -> TokenStream { + env.insert("tinystr"); + let string = self.as_str(); + quote! { + ::tinystr::tinystr!(#N, #string) + } + } +} + +#[test] +fn test() { + test_bake!(TinyAsciiStr<10>, const: crate::tinystr!(10usize, "foo"), tinystr); +} diff --git a/vendor/tinystr/src/error.rs b/vendor/tinystr/src/error.rs new file mode 100644 index 000000000..03901431c --- /dev/null +++ b/vendor/tinystr/src/error.rs @@ -0,0 +1,16 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use displaydoc::Display; + +#[derive(Display, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub enum TinyStrError { + #[displaydoc("found string of larger length {len} when constructing string of length {max}")] + TooLarge { max: usize, len: usize }, + #[displaydoc("tinystr types do not support strings with null bytes")] + ContainsNull, + #[displaydoc("attempted to construct TinyStrAuto from a non-ascii string")] + NonAscii, +} diff --git a/vendor/tinystr/src/helpers.rs b/vendor/tinystr/src/helpers.rs deleted file mode 100644 index c3d17d028..000000000 --- a/vendor/tinystr/src/helpers.rs +++ /dev/null @@ -1,32 +0,0 @@ -use std::num::NonZeroU32; -use std::ptr::copy_nonoverlapping; - -use super::Error; - -#[cfg(any(feature = "std", test))] -pub use std::string::String; - -#[cfg(all(not(feature = "std"), not(test)))] -extern crate alloc; - -#[cfg(all(not(feature = "std"), not(test)))] -pub use alloc::string::String; - -#[inline(always)] -pub(crate) unsafe fn make_4byte_bytes( - bytes: &[u8], - len: usize, - mask: u32, -) -> Result<NonZeroU32, Error> { - // Mask is always supplied as little-endian. - let mask = u32::from_le(mask); - let mut word: u32 = 0; - copy_nonoverlapping(bytes.as_ptr(), &mut word as *mut u32 as *mut u8, len); - if (word & mask) != 0 { - return Err(Error::NonAscii); - } - if ((mask - word) & mask) != 0 { - return Err(Error::InvalidNull); - } - Ok(NonZeroU32::new_unchecked(word)) -} diff --git a/vendor/tinystr/src/int_ops.rs b/vendor/tinystr/src/int_ops.rs new file mode 100644 index 000000000..102b052f2 --- /dev/null +++ b/vendor/tinystr/src/int_ops.rs @@ -0,0 +1,315 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::asciibyte::AsciiByte; + +/// Internal helper struct that performs operations on aligned integers. +/// Supports strings up to 4 bytes long. +#[repr(transparent)] +pub struct Aligned4(u32); + +impl Aligned4 { + /// # Panics + /// Panics if N is greater than 4 + #[inline] + pub const fn from_bytes<const N: usize>(src: &[u8; N]) -> Self { + let mut bytes = [0; 4]; + let mut i = 0; + // The function documentation defines when panics may occur + #[allow(clippy::indexing_slicing)] + while i < N { + bytes[i] = src[i]; + i += 1; + } + Self(u32::from_ne_bytes(bytes)) + } + + #[inline] + pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self { + Self::from_bytes::<N>(unsafe { core::mem::transmute(src) }) + } + + #[inline] + pub const fn to_bytes(&self) -> [u8; 4] { + self.0.to_ne_bytes() + } + + #[inline] + pub const fn to_ascii_bytes(&self) -> [AsciiByte; 4] { + unsafe { core::mem::transmute(self.to_bytes()) } + } + + pub const fn len(&self) -> usize { + let word = self.0; + #[cfg(target_endian = "little")] + let len = (4 - word.leading_zeros() / 8) as usize; + #[cfg(target_endian = "big")] + let len = (4 - word.trailing_zeros() / 8) as usize; + len + } + + pub const fn is_ascii_alphabetic(&self) -> bool { + let word = self.0; + // Each of the following bitmasks set *the high bit* (0x8) to 0 for valid and 1 for invalid. + // `mask` sets all NUL bytes to 0. + let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; + // `lower` converts the string to lowercase. It may also change the value of non-alpha + // characters, but this does not matter for the alphabetic test that follows. + let lower = word | 0x2020_2020; + // `alpha` sets all alphabetic bytes to 0. We only need check for lowercase characters. + let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505); + // The overall string is valid if every character passes at least one test. + // We performed two tests here: non-NUL (`mask`) and alphabetic (`alpha`). + (alpha & mask) == 0 + } + + pub const fn is_ascii_alphanumeric(&self) -> bool { + let word = self.0; + // See explanatory comments in is_ascii_alphabetic + let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; + let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646); + let lower = word | 0x2020_2020; + let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505); + (alpha & numeric & mask) == 0 + } + + pub const fn is_ascii_numeric(&self) -> bool { + let word = self.0; + // See explanatory comments in is_ascii_alphabetic + let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; + let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646); + (numeric & mask) == 0 + } + + pub const fn is_ascii_lowercase(&self) -> bool { + let word = self.0; + // For efficiency, this function tests for an invalid string rather than a valid string. + // A string is ASCII lowercase iff it contains no uppercase ASCII characters. + // `invalid_case` sets all uppercase ASCII characters to 0 and all others to 1. + let invalid_case = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525); + // The string is valid if it contains no invalid characters (if all high bits are 1). + (invalid_case & 0x8080_8080) == 0x8080_8080 + } + + pub const fn is_ascii_titlecase(&self) -> bool { + let word = self.0; + // See explanatory comments in is_ascii_lowercase + let invalid_case = if cfg!(target_endian = "little") { + !(word + 0x3f3f_3f1f) | (word + 0x2525_2505) + } else { + !(word + 0x1f3f_3f3f) | (word + 0x0525_2525) + }; + (invalid_case & 0x8080_8080) == 0x8080_8080 + } + + pub const fn is_ascii_uppercase(&self) -> bool { + let word = self.0; + // See explanatory comments in is_ascii_lowercase + let invalid_case = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505); + (invalid_case & 0x8080_8080) == 0x8080_8080 + } + + pub const fn is_ascii_alphabetic_lowercase(&self) -> bool { + let word = self.0; + // `mask` sets all NUL bytes to 0. + let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; + // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1. + let lower_alpha = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505); + // The overall string is valid if every character passes at least one test. + // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`). + (lower_alpha & mask) == 0 + } + + pub const fn is_ascii_alphabetic_titlecase(&self) -> bool { + let word = self.0; + // See explanatory comments in is_ascii_alphabetic_lowercase + let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; + let title_case = if cfg!(target_endian = "little") { + !(word + 0x1f1f_1f3f) | (word + 0x0505_0525) + } else { + !(word + 0x3f1f_1f1f) | (word + 0x2505_0505) + }; + (title_case & mask) == 0 + } + + pub const fn is_ascii_alphabetic_uppercase(&self) -> bool { + let word = self.0; + // See explanatory comments in is_ascii_alphabetic_lowercase + let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; + let upper_alpha = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525); + (upper_alpha & mask) == 0 + } + + pub const fn to_ascii_lowercase(&self) -> Self { + let word = self.0; + let result = word | (((word + 0x3f3f_3f3f) & !(word + 0x2525_2525) & 0x8080_8080) >> 2); + Self(result) + } + + pub const fn to_ascii_titlecase(&self) -> Self { + let word = self.0.to_le(); + let mask = ((word + 0x3f3f_3f1f) & !(word + 0x2525_2505) & 0x8080_8080) >> 2; + let result = (word | mask) & !(0x20 & mask); + Self(u32::from_le(result)) + } + + pub const fn to_ascii_uppercase(&self) -> Self { + let word = self.0; + let result = word & !(((word + 0x1f1f_1f1f) & !(word + 0x0505_0505) & 0x8080_8080) >> 2); + Self(result) + } +} + +/// Internal helper struct that performs operations on aligned integers. +/// Supports strings up to 8 bytes long. +#[repr(transparent)] +pub struct Aligned8(u64); + +impl Aligned8 { + /// # Panics + /// Panics if N is greater than 8 + #[inline] + pub const fn from_bytes<const N: usize>(src: &[u8; N]) -> Self { + let mut bytes = [0; 8]; + let mut i = 0; + // The function documentation defines when panics may occur + #[allow(clippy::indexing_slicing)] + while i < N { + bytes[i] = src[i]; + i += 1; + } + Self(u64::from_ne_bytes(bytes)) + } + + #[inline] + pub const fn from_ascii_bytes<const N: usize>(src: &[AsciiByte; N]) -> Self { + Self::from_bytes::<N>(unsafe { core::mem::transmute(src) }) + } + + #[inline] + pub const fn to_bytes(&self) -> [u8; 8] { + self.0.to_ne_bytes() + } + + #[inline] + pub const fn to_ascii_bytes(&self) -> [AsciiByte; 8] { + unsafe { core::mem::transmute(self.to_bytes()) } + } + + pub const fn len(&self) -> usize { + let word = self.0; + #[cfg(target_endian = "little")] + let len = (8 - word.leading_zeros() / 8) as usize; + #[cfg(target_endian = "big")] + let len = (8 - word.trailing_zeros() / 8) as usize; + len + } + + pub const fn is_ascii_alphabetic(&self) -> bool { + let word = self.0; + let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; + let lower = word | 0x2020_2020_2020_2020; + let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505); + (alpha & mask) == 0 + } + + pub const fn is_ascii_alphanumeric(&self) -> bool { + let word = self.0; + let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; + let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646); + let lower = word | 0x2020_2020_2020_2020; + let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505); + (alpha & numeric & mask) == 0 + } + + pub const fn is_ascii_numeric(&self) -> bool { + let word = self.0; + let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; + let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646); + (numeric & mask) == 0 + } + + pub const fn is_ascii_lowercase(&self) -> bool { + let word = self.0; + let invalid_case = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525); + (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080 + } + + pub const fn is_ascii_titlecase(&self) -> bool { + let word = self.0; + let invalid_case = if cfg!(target_endian = "little") { + !(word + 0x3f3f_3f3f_3f3f_3f1f) | (word + 0x2525_2525_2525_2505) + } else { + !(word + 0x1f3f_3f3f_3f3f_3f3f) | (word + 0x0525_2525_2525_2525) + }; + (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080 + } + + pub const fn is_ascii_uppercase(&self) -> bool { + let word = self.0; + let invalid_case = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505); + (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080 + } + + pub const fn is_ascii_alphabetic_lowercase(&self) -> bool { + let word = self.0; + // `mask` sets all NUL bytes to 0. + let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; + // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1. + let lower_alpha = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505); + // The overall string is valid if every character passes at least one test. + // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`). + (lower_alpha & mask) == 0 + } + + pub const fn is_ascii_alphabetic_titlecase(&self) -> bool { + let word = self.0; + // See explanatory comments in is_ascii_alphabetic_lowercase + let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; + let title_case = if cfg!(target_endian = "little") { + !(word + 0x1f1f_1f1f_1f1f_1f3f) | (word + 0x0505_0505_0505_0525) + } else { + !(word + 0x3f1f_1f1f_1f1f_1f1f) | (word + 0x2505_0505_0505_0505) + }; + (title_case & mask) == 0 + } + + pub const fn is_ascii_alphabetic_uppercase(&self) -> bool { + let word = self.0; + // See explanatory comments in is_ascii_alphabetic_lowercase + let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; + let upper_alpha = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525); + (upper_alpha & mask) == 0 + } + + pub const fn to_ascii_lowercase(&self) -> Self { + let word = self.0; + let result = word + | (((word + 0x3f3f_3f3f_3f3f_3f3f) + & !(word + 0x2525_2525_2525_2525) + & 0x8080_8080_8080_8080) + >> 2); + Self(result) + } + + pub const fn to_ascii_titlecase(&self) -> Self { + let word = self.0.to_le(); + let mask = ((word + 0x3f3f_3f3f_3f3f_3f1f) + & !(word + 0x2525_2525_2525_2505) + & 0x8080_8080_8080_8080) + >> 2; + let result = (word | mask) & !(0x20 & mask); + Self(u64::from_le(result)) + } + + pub const fn to_ascii_uppercase(&self) -> Self { + let word = self.0; + let result = word + & !(((word + 0x1f1f_1f1f_1f1f_1f1f) + & !(word + 0x0505_0505_0505_0505) + & 0x8080_8080_8080_8080) + >> 2); + Self(result) + } +} diff --git a/vendor/tinystr/src/lib.rs b/vendor/tinystr/src/lib.rs index 6f4c59658..96018b8b2 100644 --- a/vendor/tinystr/src/lib.rs +++ b/vendor/tinystr/src/lib.rs @@ -1,105 +1,116 @@ -//! `tinystr` is a small ASCII-only bounded length string representation. -//! -//! The crate is meant to be used for scenarios where one needs a fast -//! and memory efficient way to store and manipulate short ASCII-only strings. -//! -//! `tinystr` converts each string into an unsigned integer, and uses bitmasking -//! to compare, convert cases and test for common characteristics of strings. -//! -//! # Details -//! -//! The crate provides three structs and an enum: -//! * `TinyStr4` an ASCII-only string limited to 4 characters. -//! * `TinyStr8` an ASCII-only string limited to 8 characters. -//! * `TinyStr16` an ASCII-only string limited to 16 characters. -//! * `TinyStrAuto` (enum): -//! * `Tiny` when the string is 16 characters or less. -//! * `Heap` when the string is 17 or more characters. -//! -//! `TinyStrAuto` stores the string as a TinyStr16 when it is short enough, or else falls back to a -//! standard `String`. You should use TinyStrAuto when you expect most strings to be 16 characters -//! or smaller, but occasionally you receive one that exceeds that length. Unlike the structs, -//! `TinyStrAuto` does not implement `Copy`. +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! `tinystr` is a utility crate of the [`ICU4X`] project. //! -//! # no_std +//! It includes [`TinyAsciiStr`], a core API for representing small ASCII-only bounded length strings. //! -//! Disable the `std` feature of this crate to make it `#[no_std]`. Doing so disables `TinyStrAuto`. -//! You can re-enable `TinyStrAuto` in `#[no_std]` mode by enabling the `alloc` feature. +//! It is optimized for operations on strings of size 8 or smaller. When use cases involve comparison +//! and conversion of strings for lowercase/uppercase/titlecase, or checking +//! numeric/alphabetic/alphanumeric, `TinyAsciiStr` is the edge performance library. //! -//! # Example +//! # Examples //! -//! ``` -//! use tinystr::{TinyStr4, TinyStr8, TinyStr16, TinyStrAuto}; +//! ```rust +//! use tinystr::TinyAsciiStr; //! -//! let s1: TinyStr4 = "tEsT".parse() -//! .expect("Failed to parse."); +//! let s1: TinyAsciiStr<4> = "tEsT".parse().expect("Failed to parse."); //! //! assert_eq!(s1, "tEsT"); //! assert_eq!(s1.to_ascii_uppercase(), "TEST"); //! assert_eq!(s1.to_ascii_lowercase(), "test"); //! assert_eq!(s1.to_ascii_titlecase(), "Test"); //! assert_eq!(s1.is_ascii_alphanumeric(), true); +//! assert_eq!(s1.is_ascii_numeric(), false); //! -//! let s2: TinyStr8 = "New York".parse() -//! .expect("Failed to parse."); +//! let s2 = TinyAsciiStr::<8>::try_from_raw(*b"New York") +//! .expect("Failed to parse."); //! //! assert_eq!(s2, "New York"); //! assert_eq!(s2.to_ascii_uppercase(), "NEW YORK"); //! assert_eq!(s2.to_ascii_lowercase(), "new york"); //! assert_eq!(s2.to_ascii_titlecase(), "New york"); //! assert_eq!(s2.is_ascii_alphanumeric(), false); +//! ``` //! -//! let s3: TinyStr16 = "metaMoRphosis123".parse() -//! .expect("Failed to parse."); -//! -//! assert_eq!(s3, "metaMoRphosis123"); -//! assert_eq!(s3.to_ascii_uppercase(), "METAMORPHOSIS123"); -//! assert_eq!(s3.to_ascii_lowercase(), "metamorphosis123"); -//! assert_eq!(s3.to_ascii_titlecase(), "Metamorphosis123"); -//! assert_eq!(s3.is_ascii_alphanumeric(), true); +//! # Details //! -//! let s4: TinyStrAuto = "shortNoAlloc".parse().unwrap(); -//! assert!(matches!(s4, TinyStrAuto::Tiny { .. })); -//! assert_eq!(s4, "shortNoAlloc"); +//! When strings are of size 8 or smaller, the struct transforms the strings as `u32`/`u64` and uses +//! bitmasking to provide basic string manipulation operations: +//! * `is_ascii_numeric` +//! * `is_ascii_alphabetic` +//! * `is_ascii_alphanumeric` +//! * `to_ascii_lowercase` +//! * `to_ascii_uppercase` +//! * `to_ascii_titlecase` +//! * `PartialEq` +//! +//! `TinyAsciiStr` will fall back to `u8` character manipulation for strings of length greater than 8. + //! -//! let s5: TinyStrAuto = "longFallbackToHeap".parse().unwrap(); -//! assert!(matches!(s5, TinyStrAuto::Heap { .. })); -//! assert_eq!(s5, "longFallbackToHeap"); -//! ``` +//! [`ICU4X`]: ../icu/index.html + +// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations +#![cfg_attr(not(test), no_std)] +#![cfg_attr( + not(test), + deny( + clippy::indexing_slicing, + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::exhaustive_structs, + clippy::exhaustive_enums, + missing_debug_implementations, + ) +)] -#![no_std] +mod macros; -#[cfg(any(feature = "std", test))] -extern crate std; +mod ascii; +mod asciibyte; +mod error; +mod int_ops; -#[cfg(all(not(feature = "std"), not(test)))] -extern crate core as std; +#[cfg(feature = "serde")] +mod serde; -mod helpers; -mod tinystr16; -mod tinystr4; -mod tinystr8; +#[cfg(feature = "databake")] +mod databake; -#[cfg(any(feature = "std", feature = "alloc"))] -mod tinystrauto; +#[cfg(feature = "zerovec")] +mod ule; -pub use tinystr16::TinyStr16; -pub use tinystr4::TinyStr4; -pub use tinystr8::TinyStr8; +#[cfg(any(feature = "serde", feature = "alloc"))] +extern crate alloc; -#[cfg(any(feature = "std", feature = "alloc"))] -pub use tinystrauto::TinyStrAuto; +pub use ascii::TinyAsciiStr; +pub use error::TinyStrError; -#[cfg(feature = "macros")] -pub use tinystr_macros as macros; +/// These are temporary compatability reexports that will be removed +/// in a future version. +pub type TinyStr4 = TinyAsciiStr<4>; +/// These are temporary compatability reexports that will be removed +/// in a future version. +pub type TinyStr8 = TinyAsciiStr<8>; +/// These are temporary compatability reexports that will be removed +/// in a future version. +pub type TinyStr16 = TinyAsciiStr<16>; -/// Enum to store the various types of errors that can cause parsing a TinyStr to fail. -#[derive(PartialEq, Eq, Debug)] -pub enum Error { - /// String is too large or too small to store as TinyStr. - InvalidSize, - /// String is empty. - InvalidNull, - /// String contains non-ASCII character(s). - NonAscii, +#[test] +fn test_size() { + assert_eq!( + core::mem::size_of::<TinyStr4>(), + core::mem::size_of::<Option<TinyStr4>>() + ); + assert_eq!( + core::mem::size_of::<TinyStr8>(), + core::mem::size_of::<Option<TinyStr8>>() + ); } +// /// Allows unit tests to use the macro +// #[cfg(test)] +// mod tinystr { +// pub use super::{TinyAsciiStr, TinyStrError}; +// } diff --git a/vendor/tinystr/src/macros.rs b/vendor/tinystr/src/macros.rs new file mode 100644 index 000000000..b00185238 --- /dev/null +++ b/vendor/tinystr/src/macros.rs @@ -0,0 +1,32 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#[macro_export] +macro_rules! tinystr { + ($n:literal, $s:literal) => {{ + // Force it into a const context; otherwise it may get evaluated at runtime instead. + const TINYSTR_MACRO_CONST: $crate::TinyAsciiStr<$n> = { + match $crate::TinyAsciiStr::from_bytes($s.as_bytes()) { + Ok(s) => s, + // We are okay with panicking here because this is in a const context + #[allow(clippy::panic)] + // Cannot format the error since formatting isn't const yet + Err(_) => panic!(concat!("Failed to construct tinystr from ", $s)), + } + }; + TINYSTR_MACRO_CONST + }}; +} + +#[cfg(test)] +mod tests { + #[test] + fn test_macro_construction() { + let s1 = tinystr!(8, "foobar"); + assert_eq!(&*s1, "foobar"); + + let s1 = tinystr!(12, "foobarbaz"); + assert_eq!(&*s1, "foobarbaz"); + } +} diff --git a/vendor/tinystr/src/serde.rs b/vendor/tinystr/src/serde.rs new file mode 100644 index 000000000..933491f17 --- /dev/null +++ b/vendor/tinystr/src/serde.rs @@ -0,0 +1,91 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::TinyAsciiStr; +use alloc::borrow::Cow; +use alloc::string::ToString; +use core::fmt; +use core::marker::PhantomData; +use core::ops::Deref; +use serde::de::{Error, SeqAccess, Visitor}; +use serde::ser::SerializeTuple; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +impl<const N: usize> Serialize for TinyAsciiStr<N> { + #[inline] + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + if serializer.is_human_readable() { + self.deref().serialize(serializer) + } else { + let mut seq = serializer.serialize_tuple(N)?; + for byte in self.all_bytes() { + seq.serialize_element(byte)?; + } + seq.end() + } + } +} + +struct TinyAsciiStrVisitor<const N: usize> { + marker: PhantomData<TinyAsciiStr<N>>, +} + +impl<const N: usize> TinyAsciiStrVisitor<N> { + fn new() -> Self { + TinyAsciiStrVisitor { + marker: PhantomData, + } + } +} + +impl<'de, const N: usize> Visitor<'de> for TinyAsciiStrVisitor<N> { + type Value = TinyAsciiStr<N>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "a TinyAsciiStr<{}>", N) + } + + #[inline] + fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> + where + A: SeqAccess<'de>, + { + let mut bytes = [0u8; N]; + let mut zeroes = false; + for out in &mut bytes.iter_mut().take(N) { + let byte = seq + .next_element()? + .ok_or_else(|| Error::invalid_length(N, &self))?; + if byte == 0 { + zeroes = true; + } else if zeroes { + return Err(Error::custom("TinyAsciiStr cannot contain null bytes")); + } + + if byte >= 0x80 { + return Err(Error::custom("TinyAsciiStr cannot contain non-ascii bytes")); + } + *out = byte; + } + + Ok(unsafe { TinyAsciiStr::from_bytes_unchecked(bytes) }) + } +} + +impl<'de, const N: usize> Deserialize<'de> for TinyAsciiStr<N> { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + if deserializer.is_human_readable() { + let x: Cow<'de, str> = Deserialize::deserialize(deserializer)?; + TinyAsciiStr::from_str(&x).map_err(|e| Error::custom(e.to_string())) + } else { + deserializer.deserialize_tuple(N, TinyAsciiStrVisitor::<N>::new()) + } + } +} diff --git a/vendor/tinystr/src/tinystr16.rs b/vendor/tinystr/src/tinystr16.rs deleted file mode 100644 index 7403813f2..000000000 --- a/vendor/tinystr/src/tinystr16.rs +++ /dev/null @@ -1,327 +0,0 @@ -use std::cmp::Ordering; -use std::convert::Into; -use std::fmt; -use std::num::NonZeroU128; -use std::ops::Deref; -use std::ptr::copy_nonoverlapping; -use std::str::FromStr; - -use crate::Error; - -/// A tiny string that is from 1 to 16 non-NUL ASCII characters. -/// -/// # Examples -/// -/// ``` -/// use tinystr::TinyStr16; -/// -/// let s1: TinyStr16 = "Metamorphosis".parse() -/// .expect("Failed to parse."); -/// -/// assert_eq!(s1, "Metamorphosis"); -/// assert!(s1.is_ascii_alphabetic()); -/// ``` -#[derive(Copy, Clone, PartialEq, Eq, Hash)] -pub struct TinyStr16(NonZeroU128); - -impl TinyStr16 { - /// Creates a TinyStr16 from a byte slice. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr16; - /// - /// let s1 = TinyStr16::from_bytes("Testing".as_bytes()) - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1, "Testing"); - /// ``` - #[inline(always)] - pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> { - let len = bytes.len(); - if len < 1 || len > 16 { - return Err(Error::InvalidSize); - } - unsafe { - let mut word: u128 = 0; - copy_nonoverlapping(bytes.as_ptr(), &mut word as *mut u128 as *mut u8, len); - let mask = 0x80808080_80808080_80808080_80808080u128 >> (8 * (16 - len)); - // TODO: could do this with #cfg(target_endian), but this is clearer and - // more confidence-inspiring. - let mask = u128::from_le(mask); - if (word & mask) != 0 { - return Err(Error::NonAscii); - } - if ((mask - word) & mask) != 0 { - return Err(Error::InvalidNull); - } - Ok(Self(NonZeroU128::new_unchecked(word))) - } - } - - /// An unsafe constructor intended for cases where the consumer - /// guarantees that the input is a little endian integer which - /// is a correct representation of a `TinyStr16` string. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr16; - /// - /// let s1: TinyStr16 = "Metamorphosis".parse() - /// .expect("Failed to parse."); - /// - /// let num: u128 = s1.into(); - /// - /// let s2 = unsafe { TinyStr16::new_unchecked(num) }; - /// - /// assert_eq!(s1, s2); - /// assert_eq!(s2.as_str(), "Metamorphosis"); - /// ``` - /// - /// # Safety - /// - /// The method does not validate the `u128` to be properly encoded - /// value for `TinyStr16`. - /// The value can be retrieved via `Into<u128> for TinyStr16`. - #[inline(always)] - pub const unsafe fn new_unchecked(text: u128) -> Self { - Self(NonZeroU128::new_unchecked(u128::from_le(text))) - } - - /// Extracts a string slice containing the entire `TinyStr16`. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr16; - /// - /// let s1: TinyStr16 = "Metamorphosis".parse() - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1.as_str(), "Metamorphosis"); - /// ``` - #[inline(always)] - pub fn as_str(&self) -> &str { - self.deref() - } - - /// Checks if the value is composed of ASCII alphabetic characters: - /// - /// * U+0041 'A' ..= U+005A 'Z', or - /// * U+0061 'a' ..= U+007A 'z'. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr16; - /// - /// let s1: TinyStr16 = "Metamorphosis".parse() - /// .expect("Failed to parse."); - /// let s2: TinyStr16 = "Met3mo4pho!is".parse() - /// .expect("Failed to parse."); - /// - /// assert!(s1.is_ascii_alphabetic()); - /// assert!(!s2.is_ascii_alphabetic()); - /// ``` - pub fn is_ascii_alphabetic(self) -> bool { - let word = self.0.get(); - let mask = - (word + 0x7f7f7f7f_7f7f7f7f_7f7f7f7f_7f7f7f7f) & 0x80808080_80808080_80808080_80808080; - let lower = word | 0x20202020_20202020_20202020_20202020; - let alpha = !(lower + 0x1f1f1f1f_1f1f1f1f_1f1f1f1f_1f1f1f1f) - | (lower + 0x05050505_05050505_05050505_05050505); - (alpha & mask) == 0 - } - - /// Checks if the value is composed of ASCII alphanumeric characters: - /// - /// * U+0041 'A' ..= U+005A 'Z', or - /// * U+0061 'a' ..= U+007A 'z', or - /// * U+0030 '0' ..= U+0039 '9'. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr16; - /// - /// let s1: TinyStr16 = "A15bingA1".parse() - /// .expect("Failed to parse."); - /// let s2: TinyStr16 = "[3@w00Fs1".parse() - /// .expect("Failed to parse."); - /// - /// assert!(s1.is_ascii_alphanumeric()); - /// assert!(!s2.is_ascii_alphanumeric()); - /// ``` - pub fn is_ascii_alphanumeric(self) -> bool { - let word = self.0.get(); - let mask = - (word + 0x7f7f7f7f_7f7f7f7f_7f7f7f7f_7f7f7f7f) & 0x80808080_80808080_80808080_80808080; - let numeric = !(word + 0x50505050_50505050_50505050_50505050) - | (word + 0x46464646_46464646_46464646_46464646); - let lower = word | 0x20202020_20202020_20202020_20202020; - let alpha = !(lower + 0x1f1f1f1f_1f1f1f1f_1f1f1f1f_1f1f1f1f) - | (lower + 0x05050505_05050505_05050505_05050505); - (alpha & numeric & mask) == 0 - } - - /// Checks if the value is composed of ASCII decimal digits: - /// - /// * U+0030 '0' ..= U+0039 '9'. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr16; - /// - /// let s1: TinyStr16 = "31212314141".parse() - /// .expect("Failed to parse."); - /// let s2: TinyStr16 = "3d3d3d3d".parse() - /// .expect("Failed to parse."); - /// - /// assert!(s1.is_ascii_numeric()); - /// assert!(!s2.is_ascii_numeric()); - /// ``` - pub fn is_ascii_numeric(self) -> bool { - let word = self.0.get(); - let mask = - (word + 0x7f7f7f7f_7f7f7f7f_7f7f7f7f_7f7f7f7f) & 0x80808080_80808080_80808080_80808080; - let numeric = !(word + 0x50505050_50505050_50505050_50505050) - | (word + 0x46464646_46464646_46464646_46464646); - (numeric & mask) == 0 - } - - /// Converts this type to its ASCII lower case equivalent in-place. - /// - /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr16; - /// - /// let s1: TinyStr16 = "MeTAmOrpHo3sis".parse() - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1.to_ascii_lowercase(), "metamorpho3sis"); - /// ``` - pub fn to_ascii_lowercase(self) -> Self { - let word = self.0.get(); - let result = word - | (((word + 0x3f3f3f3f_3f3f3f3f_3f3f3f3f_3f3f3f3f) - & !(word + 0x25252525_25252525_25252525_25252525) - & 0x80808080_80808080_80808080_80808080) - >> 2); - unsafe { Self(NonZeroU128::new_unchecked(result)) } - } - - /// Converts this type to its ASCII title case equivalent in-place. - /// - /// First character, if is an ASCII letter 'a' to 'z' is mapped to 'A' to 'Z', - /// other characters are unchanged. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr16; - /// - /// let s1: TinyStr16 = "metamorphosis".parse() - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1.to_ascii_titlecase(), "Metamorphosis"); - /// ``` - pub fn to_ascii_titlecase(self) -> Self { - let word = self.0.get().to_le(); - let mask = ((word + 0x3f3f3f3f_3f3f3f3f_3f3f3f3f_3f3f3f1f) - & !(word + 0x25252525_25252525_25252525_25252505) - & 0x80808080_80808080_80808080_80808080) - >> 2; - let result = (word | mask) & !(0x20 & mask); - unsafe { Self(NonZeroU128::new_unchecked(u128::from_le(result))) } - } - - /// Converts this type to its ASCII upper case equivalent in-place. - /// - /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr16; - /// - /// let s1: TinyStr16 = "Met3amorphosis".parse() - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1.to_ascii_uppercase(), "MET3AMORPHOSIS"); - /// ``` - pub fn to_ascii_uppercase(self) -> Self { - let word = self.0.get(); - let result = word - & !(((word + 0x1f1f1f1f_1f1f1f1f_1f1f1f1f_1f1f1f1f) - & !(word + 0x05050505_05050505_05050505_05050505) - & 0x80808080_80808080_80808080_80808080) - >> 2); - unsafe { Self(NonZeroU128::new_unchecked(result)) } - } -} - -impl fmt::Display for TinyStr16 { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.deref()) - } -} - -impl fmt::Debug for TinyStr16 { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self.deref()) - } -} - -impl Deref for TinyStr16 { - type Target = str; - - #[inline(always)] - fn deref(&self) -> &str { - // Again, could use #cfg to hand-roll a big-endian implementation. - let word = self.0.get().to_le(); - let len = (16 - word.leading_zeros() / 8) as usize; - unsafe { - let slice = core::slice::from_raw_parts(&self.0 as *const _ as *const u8, len); - std::str::from_utf8_unchecked(slice) - } - } -} - -impl PartialEq<&str> for TinyStr16 { - fn eq(&self, other: &&str) -> bool { - self.deref() == *other - } -} - -impl PartialOrd for TinyStr16 { - fn partial_cmp(&self, other: &Self) -> Option<Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for TinyStr16 { - fn cmp(&self, other: &Self) -> Ordering { - self.0.get().to_be().cmp(&other.0.get().to_be()) - } -} - -impl FromStr for TinyStr16 { - type Err = Error; - - #[inline(always)] - fn from_str(text: &str) -> Result<Self, Self::Err> { - Self::from_bytes(text.as_bytes()) - } -} - -impl Into<u128> for TinyStr16 { - fn into(self) -> u128 { - self.0.get().to_le() - } -} diff --git a/vendor/tinystr/src/tinystr4.rs b/vendor/tinystr/src/tinystr4.rs deleted file mode 100644 index c63d25113..000000000 --- a/vendor/tinystr/src/tinystr4.rs +++ /dev/null @@ -1,299 +0,0 @@ -use std::cmp::Ordering; -use std::convert::Into; -use std::fmt; -use std::num::NonZeroU32; -use std::ops::Deref; -use std::str::FromStr; - -use crate::helpers::make_4byte_bytes; -use crate::Error; - -/// A tiny string that is from 1 to 4 non-NUL ASCII characters. -/// -/// # Examples -/// -/// ``` -/// use tinystr::TinyStr4; -/// -/// let s1: TinyStr4 = "Test".parse() -/// .expect("Failed to parse."); -/// -/// assert_eq!(s1, "Test"); -/// assert!(s1.is_ascii_alphabetic()); -/// ``` -#[derive(Copy, Clone, PartialEq, Eq, Hash)] -pub struct TinyStr4(NonZeroU32); - -impl TinyStr4 { - /// Creates a TinyStr4 from a byte slice. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr4; - /// - /// let s1 = TinyStr4::from_bytes("Test".as_bytes()) - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1, "Test"); - /// ``` - #[inline(always)] - pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> { - unsafe { - match bytes.len() { - 1 => make_4byte_bytes(bytes, 1, 0x80).map(Self), - 2 => make_4byte_bytes(bytes, 2, 0x8080).map(Self), - 3 => make_4byte_bytes(bytes, 3, 0x0080_8080).map(Self), - 4 => make_4byte_bytes(bytes, 4, 0x8080_8080).map(Self), - _ => Err(Error::InvalidSize), - } - } - } - - /// An unsafe constructor intended for cases where the consumer - /// guarantees that the input is a little endian integer which - /// is a correct representation of a `TinyStr4` string. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr4; - /// - /// let s1: TinyStr4 = "Test".parse() - /// .expect("Failed to parse."); - /// - /// let num: u32 = s1.into(); - /// - /// let s2 = unsafe { TinyStr4::new_unchecked(num) }; - /// - /// assert_eq!(s1, s2); - /// assert_eq!(s2.as_str(), "Test"); - /// ``` - /// - /// # Safety - /// - /// The method does not validate the `u32` to be properly encoded - /// value for `TinyStr4`. - /// The value can be retrieved via `Into<u32> for TinyStr4`. - #[inline(always)] - pub const unsafe fn new_unchecked(text: u32) -> Self { - Self(NonZeroU32::new_unchecked(u32::from_le(text))) - } - - /// Extracts a string slice containing the entire `TinyStr4`. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr4; - /// - /// let s1: TinyStr4 = "Test".parse() - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1.as_str(), "Test"); - /// ``` - #[inline(always)] - pub fn as_str(&self) -> &str { - self.deref() - } - - /// Checks if the value is composed of ASCII alphabetic characters: - /// - /// * U+0041 'A' ..= U+005A 'Z', or - /// * U+0061 'a' ..= U+007A 'z'. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr4; - /// - /// let s1: TinyStr4 = "Test".parse() - /// .expect("Failed to parse."); - /// let s2: TinyStr4 = "Te3t".parse() - /// .expect("Failed to parse."); - /// - /// assert!(s1.is_ascii_alphabetic()); - /// assert!(!s2.is_ascii_alphabetic()); - /// ``` - pub fn is_ascii_alphabetic(self) -> bool { - let word = self.0.get(); - let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; - let lower = word | 0x2020_2020; - let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505); - (alpha & mask) == 0 - } - - /// Checks if the value is composed of ASCII alphanumeric characters: - /// - /// * U+0041 'A' ..= U+005A 'Z', or - /// * U+0061 'a' ..= U+007A 'z', or - /// * U+0030 '0' ..= U+0039 '9'. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr4; - /// - /// let s1: TinyStr4 = "A15b".parse() - /// .expect("Failed to parse."); - /// let s2: TinyStr4 = "[3@w".parse() - /// .expect("Failed to parse."); - /// - /// assert!(s1.is_ascii_alphanumeric()); - /// assert!(!s2.is_ascii_alphanumeric()); - /// ``` - pub fn is_ascii_alphanumeric(self) -> bool { - let word = self.0.get(); - let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; - let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646); - let lower = word | 0x2020_2020; - let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505); - (alpha & numeric & mask) == 0 - } - - /// Checks if the value is composed of ASCII decimal digits: - /// - /// * U+0030 '0' ..= U+0039 '9'. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr4; - /// - /// let s1: TinyStr4 = "312".parse() - /// .expect("Failed to parse."); - /// let s2: TinyStr4 = "3d".parse() - /// .expect("Failed to parse."); - /// - /// assert!(s1.is_ascii_numeric()); - /// assert!(!s2.is_ascii_numeric()); - /// ``` - pub fn is_ascii_numeric(self) -> bool { - let word = self.0.get(); - let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; - let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646); - (numeric & mask) == 0 - } - - /// Converts this type to its ASCII lower case equivalent in-place. - /// - /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr4; - /// - /// let s1: TinyStr4 = "TeS3".parse() - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1.to_ascii_lowercase(), "tes3"); - /// ``` - pub fn to_ascii_lowercase(self) -> Self { - let word = self.0.get(); - let result = word | (((word + 0x3f3f_3f3f) & !(word + 0x2525_2525) & 0x8080_8080) >> 2); - unsafe { Self(NonZeroU32::new_unchecked(result)) } - } - - /// Converts this type to its ASCII title case equivalent in-place. - /// - /// First character, if is an ASCII letter 'a' to 'z' is mapped to 'A' to 'Z', - /// other characters are unchanged. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr4; - /// - /// let s1: TinyStr4 = "test".parse() - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1.to_ascii_titlecase(), "Test"); - /// ``` - pub fn to_ascii_titlecase(self) -> Self { - let word = self.0.get().to_le(); - let mask = ((word + 0x3f3f_3f1f) & !(word + 0x2525_2505) & 0x8080_8080) >> 2; - let result = (word | mask) & !(0x20 & mask); - unsafe { Self(NonZeroU32::new_unchecked(u32::from_le(result))) } - } - - /// Converts this type to its ASCII upper case equivalent in-place. - /// - /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr4; - /// - /// let s1: TinyStr4 = "Tes3".parse() - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1.to_ascii_uppercase(), "TES3"); - /// ``` - pub fn to_ascii_uppercase(self) -> Self { - let word = self.0.get(); - let result = word & !(((word + 0x1f1f_1f1f) & !(word + 0x0505_0505) & 0x8080_8080) >> 2); - unsafe { Self(NonZeroU32::new_unchecked(result)) } - } -} - -impl fmt::Display for TinyStr4 { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.deref()) - } -} - -impl fmt::Debug for TinyStr4 { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self.deref()) - } -} - -impl Deref for TinyStr4 { - type Target = str; - - #[inline(always)] - fn deref(&self) -> &str { - // Again, could use #cfg to hand-roll a big-endian implementation. - let word = self.0.get().to_le(); - let len = (4 - word.leading_zeros() / 8) as usize; - unsafe { - let slice = core::slice::from_raw_parts(&self.0 as *const _ as *const u8, len); - std::str::from_utf8_unchecked(slice) - } - } -} - -impl PartialEq<&str> for TinyStr4 { - fn eq(&self, other: &&str) -> bool { - self.deref() == *other - } -} - -impl PartialOrd for TinyStr4 { - fn partial_cmp(&self, other: &Self) -> Option<Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for TinyStr4 { - fn cmp(&self, other: &Self) -> Ordering { - self.0.get().to_be().cmp(&other.0.get().to_be()) - } -} - -impl FromStr for TinyStr4 { - type Err = Error; - - #[inline(always)] - fn from_str(text: &str) -> Result<Self, Self::Err> { - Self::from_bytes(text.as_bytes()) - } -} - -impl Into<u32> for TinyStr4 { - fn into(self) -> u32 { - self.0.get().to_le() - } -} diff --git a/vendor/tinystr/src/tinystr8.rs b/vendor/tinystr/src/tinystr8.rs deleted file mode 100644 index e121c519a..000000000 --- a/vendor/tinystr/src/tinystr8.rs +++ /dev/null @@ -1,319 +0,0 @@ -use std::cmp::Ordering; -use std::convert::Into; -use std::fmt; -use std::num::NonZeroU64; -use std::ops::Deref; -use std::ptr::copy_nonoverlapping; -use std::str::FromStr; - -use crate::Error; - -/// A tiny string that is from 1 to 8 non-NUL ASCII characters. -/// -/// # Examples -/// -/// ``` -/// use tinystr::TinyStr8; -/// -/// let s1: TinyStr8 = "Testing".parse() -/// .expect("Failed to parse."); -/// -/// assert_eq!(s1, "Testing"); -/// assert!(s1.is_ascii_alphabetic()); -/// ``` -#[derive(Copy, Clone, PartialEq, Eq, Hash)] -pub struct TinyStr8(NonZeroU64); - -impl TinyStr8 { - /// Creates a TinyStr8 from a byte slice. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr8; - /// - /// let s1 = TinyStr8::from_bytes("Testing".as_bytes()) - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1, "Testing"); - /// ``` - #[inline(always)] - pub fn from_bytes(bytes: &[u8]) -> Result<Self, Error> { - let len = bytes.len(); - if len < 1 || len > 8 { - return Err(Error::InvalidSize); - } - unsafe { - let mut word: u64 = 0; - copy_nonoverlapping(bytes.as_ptr(), &mut word as *mut u64 as *mut u8, len); - let mask = 0x80808080_80808080u64 >> (8 * (8 - len)); - // TODO: could do this with #cfg(target_endian), but this is clearer and - // more confidence-inspiring. - let mask = u64::from_le(mask); - if (word & mask) != 0 { - return Err(Error::NonAscii); - } - if ((mask - word) & mask) != 0 { - return Err(Error::InvalidNull); - } - Ok(Self(NonZeroU64::new_unchecked(word))) - } - } - - /// An unsafe constructor intended for cases where the consumer - /// guarantees that the input is a little endian integer which - /// is a correct representation of a `TinyStr8` string. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr8; - /// - /// let s1: TinyStr8 = "Testing".parse() - /// .expect("Failed to parse."); - /// - /// let num: u64 = s1.into(); - /// - /// let s2 = unsafe { TinyStr8::new_unchecked(num) }; - /// - /// assert_eq!(s1, s2); - /// assert_eq!(s2.as_str(), "Testing"); - /// ``` - /// - /// # Safety - /// - /// The method does not validate the `u64` to be properly encoded - /// value for `TinyStr8`. - /// The value can be retrieved via `Into<u64> for TinyStr8`. - #[inline(always)] - pub const unsafe fn new_unchecked(text: u64) -> Self { - Self(NonZeroU64::new_unchecked(u64::from_le(text))) - } - - /// Extracts a string slice containing the entire `TinyStr8`. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr8; - /// - /// let s1: TinyStr8 = "Testing".parse() - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1.as_str(), "Testing"); - /// ``` - #[inline(always)] - pub fn as_str(&self) -> &str { - self.deref() - } - - /// Checks if the value is composed of ASCII alphabetic characters: - /// - /// * U+0041 'A' ..= U+005A 'Z', or - /// * U+0061 'a' ..= U+007A 'z'. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr8; - /// - /// let s1: TinyStr8 = "Testing".parse() - /// .expect("Failed to parse."); - /// let s2: TinyStr8 = "Te3ting".parse() - /// .expect("Failed to parse."); - /// - /// assert!(s1.is_ascii_alphabetic()); - /// assert!(!s2.is_ascii_alphabetic()); - /// ``` - pub fn is_ascii_alphabetic(self) -> bool { - let word = self.0.get(); - let mask = (word + 0x7f7f7f7f_7f7f7f7f) & 0x80808080_80808080; - let lower = word | 0x20202020_20202020; - let alpha = !(lower + 0x1f1f1f1f_1f1f1f1f) | (lower + 0x05050505_05050505); - (alpha & mask) == 0 - } - - /// Checks if the value is composed of ASCII alphanumeric characters: - /// - /// * U+0041 'A' ..= U+005A 'Z', or - /// * U+0061 'a' ..= U+007A 'z', or - /// * U+0030 '0' ..= U+0039 '9'. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr8; - /// - /// let s1: TinyStr8 = "A15bing".parse() - /// .expect("Failed to parse."); - /// let s2: TinyStr8 = "[3@wing".parse() - /// .expect("Failed to parse."); - /// - /// assert!(s1.is_ascii_alphanumeric()); - /// assert!(!s2.is_ascii_alphanumeric()); - /// ``` - pub fn is_ascii_alphanumeric(self) -> bool { - let word = self.0.get(); - let mask = (word + 0x7f7f7f7f_7f7f7f7f) & 0x80808080_80808080; - let numeric = !(word + 0x50505050_50505050) | (word + 0x46464646_46464646); - let lower = word | 0x20202020_20202020; - let alpha = !(lower + 0x1f1f1f1f_1f1f1f1f) | (lower + 0x05050505_05050505); - (alpha & numeric & mask) == 0 - } - - /// Checks if the value is composed of ASCII decimal digits: - /// - /// * U+0030 '0' ..= U+0039 '9'. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr8; - /// - /// let s1: TinyStr8 = "3121029".parse() - /// .expect("Failed to parse."); - /// let s2: TinyStr8 = "3d212d".parse() - /// .expect("Failed to parse."); - /// - /// assert!(s1.is_ascii_numeric()); - /// assert!(!s2.is_ascii_numeric()); - /// ``` - pub fn is_ascii_numeric(self) -> bool { - let word = self.0.get(); - let mask = (word + 0x7f7f7f7f_7f7f7f7f) & 0x80808080_80808080; - let numeric = !(word + 0x50505050_50505050) | (word + 0x46464646_46464646); - (numeric & mask) == 0 - } - - /// Converts this type to its ASCII lower case equivalent in-place. - /// - /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr8; - /// - /// let s1: TinyStr8 = "TeS3ing".parse() - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1.to_ascii_lowercase(), "tes3ing"); - /// ``` - pub fn to_ascii_lowercase(self) -> Self { - let word = self.0.get(); - let result = word - | (((word + 0x3f3f3f3f_3f3f3f3f) - & !(word + 0x25252525_25252525) - & 0x80808080_80808080) - >> 2); - unsafe { Self(NonZeroU64::new_unchecked(result)) } - } - - /// Converts this type to its ASCII title case equivalent in-place. - /// - /// First character, if is an ASCII letter 'a' to 'z' is mapped to 'A' to 'Z', - /// other characters are unchanged. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr8; - /// - /// let s1: TinyStr8 = "testing".parse() - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1.to_ascii_titlecase(), "Testing"); - /// ``` - pub fn to_ascii_titlecase(self) -> Self { - let word = self.0.get().to_le(); - let mask = - ((word + 0x3f3f3f3f_3f3f3f1f) & !(word + 0x25252525_25252505) & 0x80808080_80808080) - >> 2; - let result = (word | mask) & !(0x20 & mask); - unsafe { Self(NonZeroU64::new_unchecked(u64::from_le(result))) } - } - - /// Converts this type to its ASCII upper case equivalent in-place. - /// - /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged. - /// - /// # Examples - /// - /// ``` - /// use tinystr::TinyStr8; - /// - /// let s1: TinyStr8 = "Tes3ing".parse() - /// .expect("Failed to parse."); - /// - /// assert_eq!(s1.to_ascii_uppercase(), "TES3ING"); - /// ``` - pub fn to_ascii_uppercase(self) -> Self { - let word = self.0.get(); - let result = word - & !(((word + 0x1f1f1f1f_1f1f1f1f) - & !(word + 0x05050505_05050505) - & 0x80808080_80808080) - >> 2); - unsafe { Self(NonZeroU64::new_unchecked(result)) } - } -} - -impl fmt::Display for TinyStr8 { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.deref()) - } -} - -impl fmt::Debug for TinyStr8 { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:?}", self.deref()) - } -} - -impl Deref for TinyStr8 { - type Target = str; - - #[inline(always)] - fn deref(&self) -> &str { - // Again, could use #cfg to hand-roll a big-endian implementation. - let word = self.0.get().to_le(); - let len = (8 - word.leading_zeros() / 8) as usize; - unsafe { - let slice = core::slice::from_raw_parts(&self.0 as *const _ as *const u8, len); - std::str::from_utf8_unchecked(slice) - } - } -} - -impl PartialEq<&str> for TinyStr8 { - fn eq(&self, other: &&str) -> bool { - self.deref() == *other - } -} - -impl PartialOrd for TinyStr8 { - fn partial_cmp(&self, other: &Self) -> Option<Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for TinyStr8 { - fn cmp(&self, other: &Self) -> Ordering { - self.0.get().to_be().cmp(&other.0.get().to_be()) - } -} - -impl FromStr for TinyStr8 { - type Err = Error; - - #[inline(always)] - fn from_str(text: &str) -> Result<Self, Self::Err> { - TinyStr8::from_bytes(text.as_bytes()) - } -} - -impl Into<u64> for TinyStr8 { - fn into(self) -> u64 { - self.0.get().to_le() - } -} diff --git a/vendor/tinystr/src/tinystrauto.rs b/vendor/tinystr/src/tinystrauto.rs deleted file mode 100644 index 9e2387cc1..000000000 --- a/vendor/tinystr/src/tinystrauto.rs +++ /dev/null @@ -1,72 +0,0 @@ -use std::fmt; -use std::ops::Deref; -use std::str::FromStr; - -use crate::helpers::String; -use crate::Error; -use crate::TinyStr16; - -/// An ASCII string that is tiny when <= 16 chars and a String otherwise. -/// -/// # Examples -/// -/// ``` -/// use tinystr::TinyStrAuto; -/// -/// let s1: TinyStrAuto = "Testing".parse() -/// .expect("Failed to parse."); -/// -/// assert_eq!(s1, "Testing"); -/// ``` -#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] -pub enum TinyStrAuto { - /// Up to 16 characters stored on the stack. - Tiny(TinyStr16), - /// 17 or more characters stored on the heap. - Heap(String), -} - -impl fmt::Display for TinyStrAuto { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.deref().fmt(f) - } -} - -impl Deref for TinyStrAuto { - type Target = str; - - fn deref(&self) -> &str { - use TinyStrAuto::*; - match self { - Tiny(value) => value.deref(), - Heap(value) => value.deref(), - } - } -} - -impl PartialEq<&str> for TinyStrAuto { - fn eq(&self, other: &&str) -> bool { - self.deref() == *other - } -} - -impl FromStr for TinyStrAuto { - type Err = Error; - - fn from_str(text: &str) -> Result<Self, Self::Err> { - if text.len() <= 16 { - match TinyStr16::from_str(text) { - Ok(result) => Ok(TinyStrAuto::Tiny(result)), - Err(err) => Err(err), - } - } else { - if !text.is_ascii() { - return Err(Error::NonAscii); - } - match String::from_str(text) { - Ok(result) => Ok(TinyStrAuto::Heap(result)), - Err(_) => unreachable!(), - } - } - } -} diff --git a/vendor/tinystr/src/ule.rs b/vendor/tinystr/src/ule.rs new file mode 100644 index 000000000..0fa212095 --- /dev/null +++ b/vendor/tinystr/src/ule.rs @@ -0,0 +1,76 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::TinyAsciiStr; +use zerovec::maps::ZeroMapKV; +use zerovec::ule::*; +use zerovec::{ZeroSlice, ZeroVec}; + +// Safety (based on the safety checklist on the ULE trait): +// 1. CharULE does not include any uninitialized or padding bytes. +// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) +// 2. CharULE is aligned to 1 byte. +// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid. +// 4. The impl of validate_byte_slice() returns an error if there are extra bytes. +// 5. The other ULE methods use the default impl. +// 6. CharULE byte equality is semantic equality +unsafe impl<const N: usize> ULE for TinyAsciiStr<N> { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { + if bytes.len() % N != 0 { + return Err(ZeroVecError::length::<Self>(bytes.len())); + } + // Validate the bytes + for chunk in bytes.chunks_exact(N) { + let _ = TinyAsciiStr::<N>::from_bytes_inner(chunk, 0, N, true) + .map_err(|_| ZeroVecError::parse::<Self>())?; + } + Ok(()) + } +} + +impl<const N: usize> AsULE for TinyAsciiStr<N> { + type ULE = Self; + + #[inline] + fn to_unaligned(self) -> Self::ULE { + self + } + + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } +} + +impl<'a, const N: usize> ZeroMapKV<'a> for TinyAsciiStr<N> { + type Container = ZeroVec<'a, TinyAsciiStr<N>>; + type Slice = ZeroSlice<TinyAsciiStr<N>>; + type GetType = TinyAsciiStr<N>; + type OwnedType = TinyAsciiStr<N>; +} + +#[cfg(test)] +mod test { + use crate::*; + use zerovec::*; + + #[test] + fn test_zerovec() { + let mut vec = ZeroVec::<TinyAsciiStr<7>>::new(); + + vec.with_mut(|v| v.push("foobar".parse().unwrap())); + vec.with_mut(|v| v.push("baz".parse().unwrap())); + vec.with_mut(|v| v.push("quux".parse().unwrap())); + + let bytes = vec.as_bytes(); + + let vec: ZeroVec<TinyAsciiStr<7>> = ZeroVec::parse_byte_slice(bytes).unwrap(); + + assert_eq!(&*vec.get(0).unwrap(), "foobar"); + assert_eq!(&*vec.get(1).unwrap(), "baz"); + assert_eq!(&*vec.get(2).unwrap(), "quux"); + } +} diff --git a/vendor/tinystr/tests/main.rs b/vendor/tinystr/tests/main.rs deleted file mode 100644 index a9cf7be1a..000000000 --- a/vendor/tinystr/tests/main.rs +++ /dev/null @@ -1,538 +0,0 @@ -use std::fmt::Write; -use std::mem::size_of; -use std::ops::Deref; -use tinystr::{Error, TinyStr16, TinyStr4, TinyStr8}; - -#[cfg(any(feature = "std", feature = "alloc"))] -use tinystr::TinyStrAuto; - -#[test] -fn tiny_sizes() { - assert_eq!(4, size_of::<TinyStr4>()); - assert_eq!(8, size_of::<TinyStr8>()); - assert_eq!(16, size_of::<TinyStr16>()); - assert_eq!(24, size_of::<String>()); - // Note: TinyStrAuto is size 32 even when a smaller TinyStr type is used - #[cfg(any(feature = "std", feature = "alloc"))] - assert_eq!(32, size_of::<TinyStrAuto>()); -} - -#[test] -fn tiny4_basic() { - let s: TinyStr4 = "abc".parse().unwrap(); - assert_eq!(s.deref(), "abc"); -} - -#[test] -fn tiny4_from_bytes() { - let s = TinyStr4::from_bytes("abc".as_bytes()).unwrap(); - assert_eq!(s.deref(), "abc"); - - assert_eq!( - TinyStr4::from_bytes(&[0, 159, 146, 150]), - Err(Error::NonAscii) - ); - assert_eq!(TinyStr4::from_bytes(&[]), Err(Error::InvalidSize)); - assert_eq!(TinyStr4::from_bytes(&[0]), Err(Error::InvalidNull)); -} - -#[test] -fn tiny4_size() { - assert_eq!("".parse::<TinyStr4>(), Err(Error::InvalidSize)); - assert!("1".parse::<TinyStr4>().is_ok()); - assert!("12".parse::<TinyStr4>().is_ok()); - assert!("123".parse::<TinyStr4>().is_ok()); - assert!("1234".parse::<TinyStr4>().is_ok()); - assert_eq!("12345".parse::<TinyStr4>(), Err(Error::InvalidSize)); - assert_eq!("123456789".parse::<TinyStr4>(), Err(Error::InvalidSize)); -} - -#[test] -fn tiny4_null() { - assert_eq!("a\u{0}b".parse::<TinyStr4>(), Err(Error::InvalidNull)); -} - -#[test] -fn tiny4_new_unchecked() { - let reference: TinyStr4 = "en".parse().unwrap(); - let uval: u32 = reference.into(); - let s = unsafe { TinyStr4::new_unchecked(uval) }; - assert_eq!(s, reference); - assert_eq!(s, "en"); -} - -#[test] -fn tiny4_nonascii() { - assert_eq!("\u{4000}".parse::<TinyStr4>(), Err(Error::NonAscii)); -} - -#[test] -fn tiny4_alpha() { - let s: TinyStr4 = "@aZ[".parse().unwrap(); - assert!(!s.is_ascii_alphabetic()); - assert!(!s.is_ascii_alphanumeric()); - assert_eq!(s.to_ascii_uppercase().as_str(), "@AZ["); - assert_eq!(s.to_ascii_lowercase().as_str(), "@az["); - - assert!("abYZ".parse::<TinyStr4>().unwrap().is_ascii_alphabetic()); - assert!("abYZ".parse::<TinyStr4>().unwrap().is_ascii_alphanumeric()); - assert!("a123".parse::<TinyStr4>().unwrap().is_ascii_alphanumeric()); - assert!(!"a123".parse::<TinyStr4>().unwrap().is_ascii_alphabetic()); -} - -#[test] -fn tiny4_numeric() { - let s: TinyStr4 = "@aZ[".parse().unwrap(); - assert!(!s.is_ascii_numeric()); - - assert!("0123".parse::<TinyStr4>().unwrap().is_ascii_numeric()); -} - -#[test] -fn tiny4_titlecase() { - assert_eq!( - "abcd" - .parse::<TinyStr4>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "Abcd" - ); - assert_eq!( - "ABCD" - .parse::<TinyStr4>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "Abcd" - ); - assert_eq!( - "aBCD" - .parse::<TinyStr4>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "Abcd" - ); - assert_eq!( - "A123" - .parse::<TinyStr4>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "A123" - ); - assert_eq!( - "123a" - .parse::<TinyStr4>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "123a" - ); -} - -#[test] -fn tiny4_ord() { - let mut v: Vec<TinyStr4> = vec!["zh".parse().unwrap(), "fr".parse().unwrap()]; - v.sort(); - - assert_eq!(v.get(0).unwrap().as_str(), "fr"); - assert_eq!(v.get(1).unwrap().as_str(), "zh"); -} - -#[test] -fn tiny4_eq() { - let s1: TinyStr4 = "en".parse().unwrap(); - let s2: TinyStr4 = "fr".parse().unwrap(); - let s3: TinyStr4 = "en".parse().unwrap(); - - assert_eq!(s1, s3); - assert_ne!(s1, s2); -} - -#[test] -fn tiny4_display() { - let s: TinyStr4 = "abcd".parse().unwrap(); - let mut result = String::new(); - write!(result, "{}", s).unwrap(); - assert_eq!(result, "abcd"); - assert_eq!(format!("{}", s), "abcd"); -} - -#[test] -fn tiny4_debug() { - let s: TinyStr4 = "abcd".parse().unwrap(); - assert_eq!(format!("{:#?}", s), "\"abcd\""); -} - -#[test] -fn tiny8_basic() { - let s: TinyStr8 = "abcde".parse().unwrap(); - assert_eq!(s.deref(), "abcde"); -} - -#[test] -fn tiny8_from_bytes() { - let s = TinyStr8::from_bytes("abcde".as_bytes()).unwrap(); - assert_eq!(s.deref(), "abcde"); - - assert_eq!( - TinyStr8::from_bytes(&[0, 159, 146, 150]), - Err(Error::NonAscii) - ); - assert_eq!(TinyStr8::from_bytes(&[]), Err(Error::InvalidSize)); - assert_eq!(TinyStr8::from_bytes(&[0]), Err(Error::InvalidNull)); -} - -#[test] -fn tiny8_size() { - assert_eq!("".parse::<TinyStr8>(), Err(Error::InvalidSize)); - assert!("1".parse::<TinyStr8>().is_ok()); - assert!("12".parse::<TinyStr8>().is_ok()); - assert!("123".parse::<TinyStr8>().is_ok()); - assert!("1234".parse::<TinyStr8>().is_ok()); - assert!("12345".parse::<TinyStr8>().is_ok()); - assert!("123456".parse::<TinyStr8>().is_ok()); - assert!("1234567".parse::<TinyStr8>().is_ok()); - assert!("12345678".parse::<TinyStr8>().is_ok()); - assert_eq!("123456789".parse::<TinyStr8>(), Err(Error::InvalidSize)); -} - -#[test] -fn tiny8_null() { - assert_eq!("a\u{0}b".parse::<TinyStr8>(), Err(Error::InvalidNull)); -} - -#[test] -fn tiny8_new_unchecked() { - let reference: TinyStr8 = "Windows".parse().unwrap(); - let uval: u64 = reference.into(); - let s = unsafe { TinyStr8::new_unchecked(uval) }; - assert_eq!(s, reference); - assert_eq!(s, "Windows"); -} - -#[test] -fn tiny8_nonascii() { - assert_eq!("\u{4000}".parse::<TinyStr8>(), Err(Error::NonAscii)); -} - -#[test] -fn tiny8_alpha() { - let s: TinyStr8 = "@abcXYZ[".parse().unwrap(); - assert!(!s.is_ascii_alphabetic()); - assert!(!s.is_ascii_alphanumeric()); - assert_eq!(s.to_ascii_uppercase().as_str(), "@ABCXYZ["); - assert_eq!(s.to_ascii_lowercase().as_str(), "@abcxyz["); - - assert!("abcXYZ".parse::<TinyStr8>().unwrap().is_ascii_alphabetic()); - assert!("abcXYZ" - .parse::<TinyStr8>() - .unwrap() - .is_ascii_alphanumeric()); - assert!(!"abc123".parse::<TinyStr8>().unwrap().is_ascii_alphabetic()); - assert!("abc123" - .parse::<TinyStr8>() - .unwrap() - .is_ascii_alphanumeric()); -} - -#[test] -fn tiny8_numeric() { - let s: TinyStr8 = "@abcXYZ[".parse().unwrap(); - assert!(!s.is_ascii_numeric()); - - assert!("01234567".parse::<TinyStr8>().unwrap().is_ascii_numeric()); -} - -#[test] -fn tiny8_titlecase() { - assert_eq!( - "abcdabcd" - .parse::<TinyStr8>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "Abcdabcd" - ); - assert_eq!( - "ABCDABCD" - .parse::<TinyStr8>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "Abcdabcd" - ); - assert_eq!( - "aBCDaBCD" - .parse::<TinyStr8>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "Abcdabcd" - ); - assert_eq!( - "A123a123" - .parse::<TinyStr8>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "A123a123" - ); - assert_eq!( - "123a123A" - .parse::<TinyStr8>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "123a123a" - ); -} - -#[test] -fn tiny8_ord() { - let mut v: Vec<TinyStr8> = vec!["nedis".parse().unwrap(), "macos".parse().unwrap()]; - v.sort(); - - assert_eq!(v.get(0).unwrap().as_str(), "macos"); - assert_eq!(v.get(1).unwrap().as_str(), "nedis"); -} - -#[test] -fn tiny8_eq() { - let s1: TinyStr8 = "windows".parse().unwrap(); - let s2: TinyStr8 = "mac".parse().unwrap(); - let s3: TinyStr8 = "windows".parse().unwrap(); - - assert_eq!(s1, s3); - assert_ne!(s1, s2); -} - -#[test] -fn tiny8_display() { - let s: TinyStr8 = "abcdef".parse().unwrap(); - let mut result = String::new(); - write!(result, "{}", s).unwrap(); - assert_eq!(result, "abcdef"); - assert_eq!(format!("{}", s), "abcdef"); -} - -#[test] -fn tiny8_debug() { - let s: TinyStr8 = "abcdef".parse().unwrap(); - assert_eq!(format!("{:#?}", s), "\"abcdef\""); -} - -#[test] -fn tiny16_from_bytes() { - let s = TinyStr16::from_bytes("abcdefghijk".as_bytes()).unwrap(); - assert_eq!(s.deref(), "abcdefghijk"); - - assert_eq!( - TinyStr16::from_bytes(&[0, 159, 146, 150]), - Err(Error::NonAscii) - ); - assert_eq!(TinyStr16::from_bytes(&[]), Err(Error::InvalidSize)); - assert_eq!(TinyStr16::from_bytes(&[0]), Err(Error::InvalidNull)); -} - -#[test] -fn tiny16_size() { - assert_eq!("".parse::<TinyStr16>(), Err(Error::InvalidSize)); - assert!("1".parse::<TinyStr16>().is_ok()); - assert!("12".parse::<TinyStr16>().is_ok()); - assert!("123".parse::<TinyStr16>().is_ok()); - assert!("1234".parse::<TinyStr16>().is_ok()); - assert!("12345".parse::<TinyStr16>().is_ok()); - assert!("123456".parse::<TinyStr16>().is_ok()); - assert!("1234567".parse::<TinyStr16>().is_ok()); - assert!("12345678".parse::<TinyStr16>().is_ok()); - assert!("123456781".parse::<TinyStr16>().is_ok()); - assert!("1234567812".parse::<TinyStr16>().is_ok()); - assert!("12345678123".parse::<TinyStr16>().is_ok()); - assert!("123456781234".parse::<TinyStr16>().is_ok()); - assert!("1234567812345".parse::<TinyStr16>().is_ok()); - assert!("12345678123456".parse::<TinyStr16>().is_ok()); - assert!("123456781234567".parse::<TinyStr16>().is_ok()); - assert!("1234567812345678".parse::<TinyStr16>().is_ok()); - assert_eq!( - "12345678123456789".parse::<TinyStr16>(), - Err(Error::InvalidSize) - ); -} - -#[test] -fn tiny16_null() { - assert_eq!("a\u{0}b".parse::<TinyStr16>(), Err(Error::InvalidNull)); -} - -#[test] -fn tiny16_new_unchecked() { - let reference: TinyStr16 = "WindowsCE/ME/NT".parse().unwrap(); - let uval: u128 = reference.into(); - let s = unsafe { TinyStr16::new_unchecked(uval) }; - assert_eq!(s, reference); - assert_eq!(s, "WindowsCE/ME/NT"); -} - -#[test] -fn tiny16_nonascii() { - assert_eq!("\u{4000}".parse::<TinyStr16>(), Err(Error::NonAscii)); -} - -#[test] -fn tiny16_alpha() { - let s: TinyStr16 = "@abcdefgTUVWXYZ[".parse().unwrap(); - assert!(!s.is_ascii_alphabetic()); - assert!(!s.is_ascii_alphanumeric()); - assert_eq!(s.to_ascii_uppercase().as_str(), "@ABCDEFGTUVWXYZ["); - assert_eq!(s.to_ascii_lowercase().as_str(), "@abcdefgtuvwxyz["); - - assert!("abcdefgTUVWXYZ" - .parse::<TinyStr16>() - .unwrap() - .is_ascii_alphabetic()); - assert!("abcdefgTUVWXYZ" - .parse::<TinyStr16>() - .unwrap() - .is_ascii_alphanumeric()); - assert!(!"abcdefg0123456" - .parse::<TinyStr16>() - .unwrap() - .is_ascii_alphabetic()); - assert!("abcdefgTUVWXYZ" - .parse::<TinyStr16>() - .unwrap() - .is_ascii_alphanumeric()); -} - -#[test] -fn tiny16_numeric() { - let s: TinyStr16 = "@abcdefgTUVWXYZ[".parse().unwrap(); - assert!(!s.is_ascii_numeric()); - - assert!("0123456789" - .parse::<TinyStr16>() - .unwrap() - .is_ascii_numeric()); -} - -#[test] -fn tiny16_titlecase() { - assert_eq!( - "abcdabcdabcdabcd" - .parse::<TinyStr16>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "Abcdabcdabcdabcd" - ); - assert_eq!( - "ABCDABCDABCDABCD" - .parse::<TinyStr16>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "Abcdabcdabcdabcd" - ); - assert_eq!( - "aBCDaBCDaBCDaBCD" - .parse::<TinyStr16>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "Abcdabcdabcdabcd" - ); - assert_eq!( - "A123a123A123a123" - .parse::<TinyStr16>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "A123a123a123a123" - ); - assert_eq!( - "123a123A123a123A" - .parse::<TinyStr16>() - .unwrap() - .to_ascii_titlecase() - .as_str(), - "123a123a123a123a" - ); -} - -#[test] -fn tiny16_ord() { - let mut v: Vec<TinyStr16> = vec!["nedis_xxxx".parse().unwrap(), "macos_xxxx".parse().unwrap()]; - v.sort(); - - assert_eq!(v.get(0).unwrap().as_str(), "macos_xxxx"); - assert_eq!(v.get(1).unwrap().as_str(), "nedis_xxxx"); -} - -#[test] -fn tiny16_eq() { - let s1: TinyStr16 = "windows98SE".parse().unwrap(); - let s2: TinyStr16 = "mac".parse().unwrap(); - let s3: TinyStr16 = "windows98SE".parse().unwrap(); - - assert_eq!(s1, s3); - assert_ne!(s1, s2); -} - -#[test] -fn tiny16_display() { - let s: TinyStr16 = "abcdefghijkl".parse().unwrap(); - let mut result = String::new(); - write!(result, "{}", s).unwrap(); - assert_eq!(result, "abcdefghijkl"); - assert_eq!(format!("{}", s), "abcdefghijkl"); -} - -#[test] -fn tiny16_debug() { - let s: TinyStr16 = "abcdefghijkl".parse().unwrap(); - assert_eq!(format!("{:#?}", s), "\"abcdefghijkl\""); -} - -#[cfg(any(feature = "std", feature = "alloc"))] -#[test] -fn tinyauto_basic() { - let s1: TinyStrAuto = "abc".parse().unwrap(); - assert_eq!(s1, "abc"); - - let s2: TinyStrAuto = "veryveryveryveryverylong".parse().unwrap(); - assert_eq!(s2, "veryveryveryveryverylong"); -} - -#[cfg(any(feature = "std", feature = "alloc"))] -#[test] -fn tinyauto_nonascii() { - assert_eq!("\u{4000}".parse::<TinyStrAuto>(), Err(Error::NonAscii)); - assert_eq!( - "veryveryveryveryverylong\u{4000}".parse::<TinyStrAuto>(), - Err(Error::NonAscii) - ); -} - -#[cfg(feature = "macros")] -const TS: TinyStr8 = tinystr::macros::tinystr8!("test"); - -#[cfg(feature = "macros")] -#[test] -fn tinystr_macros() { - use tinystr::macros::*; - - let x: TinyStr8 = "test".parse().unwrap(); - assert_eq!(TS, x); - - let x: TinyStr4 = "foo".parse().unwrap(); - assert_eq!(tinystr4!("foo"), x); - - let x: TinyStr8 = "barbaz".parse().unwrap(); - assert_eq!(tinystr8!("barbaz"), x); - - let x: TinyStr16 = "metamorphosis".parse().unwrap(); - assert_eq!(tinystr16!("metamorphosis"), x); -} diff --git a/vendor/tinystr/tests/serde.rs b/vendor/tinystr/tests/serde.rs new file mode 100644 index 000000000..282914e6f --- /dev/null +++ b/vendor/tinystr/tests/serde.rs @@ -0,0 +1,39 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use tinystr::*; + +// Tests largely adapted from `tinystr` crate +// https://github.com/zbraniecki/tinystr/blob/4e4eab55dd6bded7f29a18b41452c506c461716c/tests/serde.rs + +macro_rules! test_roundtrip { + ($f:ident, $n:literal, $val:expr) => { + #[test] + fn $f() { + let tiny: TinyAsciiStr<$n> = $val.parse().unwrap(); + let json_string = serde_json::to_string(&tiny).unwrap(); + let expected_json = concat!("\"", $val, "\""); + assert_eq!(json_string, expected_json); + let recover: TinyAsciiStr<$n> = serde_json::from_str(&json_string).unwrap(); + assert_eq!(&*tiny, &*recover); + + let bin = bincode::serialize(&tiny).unwrap(); + assert_eq!(bin, &tiny.all_bytes()[..]); + let debin: TinyAsciiStr<$n> = bincode::deserialize(&bin).unwrap(); + assert_eq!(&*tiny, &*debin); + + let post = postcard::to_stdvec(&tiny).unwrap(); + assert_eq!(post, &tiny.all_bytes()[..]); + let unpost: TinyAsciiStr<$n> = postcard::from_bytes(&post).unwrap(); + assert_eq!(&*tiny, &*unpost); + } + }; +} + +test_roundtrip!(test_roundtrip4_1, 4, "en"); +test_roundtrip!(test_roundtrip4_2, 4, "Latn"); +test_roundtrip!(test_roundtrip8, 8, "calendar"); +test_roundtrip!(test_roundtrip16, 16, "verylongstring"); +test_roundtrip!(test_roundtrip10, 11, "shortstring"); +test_roundtrip!(test_roundtrip30, 24, "veryveryverylongstring"); |