summaryrefslogtreecommitdiffstats
path: root/third_party/rust/zerovec
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/rust/zerovec
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/zerovec')
-rw-r--r--third_party/rust/zerovec/.cargo-checksum.json1
-rw-r--r--third_party/rust/zerovec/Cargo.lock832
-rw-r--r--third_party/rust/zerovec/Cargo.toml162
-rw-r--r--third_party/rust/zerovec/LICENSE44
-rw-r--r--third_party/rust/zerovec/README.md197
-rw-r--r--third_party/rust/zerovec/benches/vzv.rs212
-rw-r--r--third_party/rust/zerovec/benches/zeromap.rs396
-rw-r--r--third_party/rust/zerovec/benches/zerovec.rs165
-rw-r--r--third_party/rust/zerovec/benches/zerovec_iai.rs65
-rw-r--r--third_party/rust/zerovec/benches/zerovec_serde.rs145
-rw-r--r--third_party/rust/zerovec/examples/zv_serde.rs51
-rw-r--r--third_party/rust/zerovec/src/error.rs55
-rw-r--r--third_party/rust/zerovec/src/flexzerovec/databake.rs66
-rw-r--r--third_party/rust/zerovec/src/flexzerovec/mod.rs20
-rw-r--r--third_party/rust/zerovec/src/flexzerovec/owned.rs335
-rw-r--r--third_party/rust/zerovec/src/flexzerovec/serde.rs175
-rw-r--r--third_party/rust/zerovec/src/flexzerovec/slice.rs722
-rw-r--r--third_party/rust/zerovec/src/flexzerovec/vec.rs275
-rw-r--r--third_party/rust/zerovec/src/hashmap/algorithms.rs162
-rw-r--r--third_party/rust/zerovec/src/hashmap/mod.rs240
-rw-r--r--third_party/rust/zerovec/src/hashmap/serde.rs147
-rw-r--r--third_party/rust/zerovec/src/lib.rs558
-rw-r--r--third_party/rust/zerovec/src/map/borrowed.rs325
-rw-r--r--third_party/rust/zerovec/src/map/databake.rs82
-rw-r--r--third_party/rust/zerovec/src/map/kv.rs131
-rw-r--r--third_party/rust/zerovec/src/map/map.rs653
-rw-r--r--third_party/rust/zerovec/src/map/mod.rs23
-rw-r--r--third_party/rust/zerovec/src/map/serde.rs313
-rw-r--r--third_party/rust/zerovec/src/map/serde_helpers.rs168
-rw-r--r--third_party/rust/zerovec/src/map/vecs.rs724
-rw-r--r--third_party/rust/zerovec/src/map2d/borrowed.rs339
-rw-r--r--third_party/rust/zerovec/src/map2d/cursor.rs358
-rw-r--r--third_party/rust/zerovec/src/map2d/databake.rs110
-rw-r--r--third_party/rust/zerovec/src/map2d/map.rs875
-rw-r--r--third_party/rust/zerovec/src/map2d/mod.rs18
-rw-r--r--third_party/rust/zerovec/src/map2d/serde.rs430
-rw-r--r--third_party/rust/zerovec/src/samples.rs74
-rw-r--r--third_party/rust/zerovec/src/ule/chars.rs190
-rw-r--r--third_party/rust/zerovec/src/ule/custom.rs145
-rw-r--r--third_party/rust/zerovec/src/ule/encode.rs400
-rw-r--r--third_party/rust/zerovec/src/ule/macros.rs29
-rw-r--r--third_party/rust/zerovec/src/ule/mod.rs394
-rw-r--r--third_party/rust/zerovec/src/ule/multi.rs154
-rw-r--r--third_party/rust/zerovec/src/ule/niche.rs180
-rw-r--r--third_party/rust/zerovec/src/ule/option.rs264
-rw-r--r--third_party/rust/zerovec/src/ule/plain.rs366
-rw-r--r--third_party/rust/zerovec/src/ule/slices.rs103
-rw-r--r--third_party/rust/zerovec/src/ule/tuple.rs179
-rw-r--r--third_party/rust/zerovec/src/ule/unvalidated.rs527
-rw-r--r--third_party/rust/zerovec/src/varzerovec/components.rs574
-rw-r--r--third_party/rust/zerovec/src/varzerovec/databake.rs68
-rw-r--r--third_party/rust/zerovec/src/varzerovec/mod.rs26
-rw-r--r--third_party/rust/zerovec/src/varzerovec/owned.rs662
-rw-r--r--third_party/rust/zerovec/src/varzerovec/serde.rs268
-rw-r--r--third_party/rust/zerovec/src/varzerovec/slice.rs573
-rw-r--r--third_party/rust/zerovec/src/varzerovec/vec.rs531
-rw-r--r--third_party/rust/zerovec/src/yoke_impls.rs551
-rw-r--r--third_party/rust/zerovec/src/zerofrom_impls.rs124
-rw-r--r--third_party/rust/zerovec/src/zerovec/databake.rs69
-rw-r--r--third_party/rust/zerovec/src/zerovec/mod.rs1137
-rw-r--r--third_party/rust/zerovec/src/zerovec/serde.rs221
-rw-r--r--third_party/rust/zerovec/src/zerovec/slice.rs596
62 files changed, 17979 insertions, 0 deletions
diff --git a/third_party/rust/zerovec/.cargo-checksum.json b/third_party/rust/zerovec/.cargo-checksum.json
new file mode 100644
index 0000000000..c82fd3ef49
--- /dev/null
+++ b/third_party/rust/zerovec/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.lock":"63d1ed0deb73c010e41ef6341b196ed5841e95f7fdf220c5ad795ae80d6819ce","Cargo.toml":"f71c4fd9b24bb11dbc6fff1655a56ab07729651b1e1ab3516645f21644d1fca0","LICENSE":"853f87c96f3d249f200fec6db1114427bc8bdf4afddc93c576956d78152ce978","README.md":"5d88517b10c49ade5a20a9c2ef0ca70d5e0856ea8d20f9c88571619d2814adfd","benches/vzv.rs":"2b0de82b4a40d45151e7faa2d18cfcb65351059f3f391dd64f031c565212c561","benches/zeromap.rs":"09395d495aa98cd8fb8f0c5a0baef036832bb5e823a32a47c3429ef4148b0518","benches/zerovec.rs":"e32e380e653c62b1f344cb7be917016b739b2c4e722d62193aa3fa45908e77c0","benches/zerovec_iai.rs":"13028e4f1d85bad3ee5e5b783360b7ea247dd5de233d104b91ce46d19bf5765a","benches/zerovec_serde.rs":"7ed92aaaf5981755b4f02f366e1a0434b49a4a6ffa0945c96663626d787db070","examples/zv_serde.rs":"d292088928926579479f2dedd942200b2a5274cbbae9bdd88031193cdd1af297","src/error.rs":"03abb5dbae8dd371fca01700d6caeb012204f3a15b9cbd6d843c7d4261b8f868","src/flexzerovec/databake.rs":"ce388543d31b7ee4db1acca00f5ea9b69428551ce13809f2606563d6768ce1a5","src/flexzerovec/mod.rs":"647678116fcf0321463b3451462920f7c517f4cf6c8a57f4f7b13f5248c8b9b0","src/flexzerovec/owned.rs":"473b7834d04794098b16f84c164c7d4dcd08eb0468fbec78f2362c701cd2ce48","src/flexzerovec/serde.rs":"a0c7a8fcf5ef06f5ea44cbfb89cac47f173c7150082d0cd745a457de3d375b4f","src/flexzerovec/slice.rs":"78cdce6aa6e2ec6154065d2f06a98d998f908b3e7719979e19303a7e0ca5dcca","src/flexzerovec/vec.rs":"c383b8971660a1d93f9bd00c3dbccb38e991dbce7009c1c6433dfb8ac5face79","src/hashmap/algorithms.rs":"37d14b650b51b1000943f1ef7f2aa4df6f202a02947717cdaaed4e5a099e920e","src/hashmap/mod.rs":"be7adb38ba626ff86bf2ffae5231733f75451835d77578f46b631f80931ca2cf","src/hashmap/serde.rs":"c20faf5a9d2f6eb9fbca07fdbfdcba7d20d97b52d5911678e747a14de68e66f3","src/lib.rs":"aef93503cdf167b4bb50df0bf7e69f9b1e57b0527d7c2de1541705d8a4e40800","src/map/borrowed.rs":"373268aa4945b8f5382c6b8b90ce0d354de73aff1321c149ea93108234afc641","src/map/databake.rs":"cee98613d77a8cdf6730ce3b1c210e0a80023f704237fe6516cbe82c152da025","src/map/kv.rs":"03f3745df8d61ff399b66d32ba1ac9a7ee298ca00dfb463c3ea8d81c746e9d41","src/map/map.rs":"70a5b23921bd8d9fa3c6fbae042c19ea477683a8691d42da56af887fd03b6f6a","src/map/mod.rs":"4961ad14522fcef16b1aad804d9559c0a5eb2582fdd163bf76f9629cb765b6c4","src/map/serde.rs":"3bed09076a45774c65a8116dd21e72413f1657ed569fe32c4abb48e979fd3144","src/map/serde_helpers.rs":"72787005972b93e49b9dc17aa47d30699364e6da9dc95aadb820ce58e4bf5c54","src/map/vecs.rs":"1606310a274626b92df1d6f03e17fb0561301eff56afbd83e9a13a990413564b","src/map2d/borrowed.rs":"debc2b98b8c4e895ba470f6b96e81d713f65066b8bc34117fb81ab90ed910cba","src/map2d/cursor.rs":"e4447e7b5869e895b852124443151918bff5fe8cfb5207acfe5ff7b57b56bfdf","src/map2d/databake.rs":"295164c08bed8e6cb1a7e000c98724d9e140afc50095968590adaf0e0ec27947","src/map2d/map.rs":"4e4512cfd9419f16365b245252e11db2d410b4d45e125e3191ed6a3a68202aee","src/map2d/mod.rs":"80beae7a263f1fe39c9a06d287c9150480fe3ed43397c2a7475a50ee2b2fd37f","src/map2d/serde.rs":"5a11d53e25e3fada3807878e67dc67d8a28968eae02631ac490d70c42c0e52b0","src/samples.rs":"126b379b64f1d87e1f7479ce65bb4752574db62de38d221b4ee77c93f2c1ae7b","src/ule/chars.rs":"f5ae883916f8dfba6e2fb00b331b90a3d1a565a368e962ecad8d3a972a51c77a","src/ule/custom.rs":"836e57a5e602a56b9d26c4e3a84ec96ae8b62c0e563ce07f58bab81e1cc603fd","src/ule/encode.rs":"3188e7f0566653dcd8f76bb3ff01e41bba467ae3b4781b6ea3bd204bd86b0568","src/ule/macros.rs":"cc19f9c4c4b760377fbbe3d03f5df5a143568c282864463943bc1a7c96606a22","src/ule/mod.rs":"db089e93643640c993dc52d8e1a1bf20c1d921fe219968b9ea454dd4675c753d","src/ule/multi.rs":"11a0359459c76e73ecbe44b690afe21619a11e3c7d11e21af5af48c0c0eae233","src/ule/niche.rs":"8cfec0c7f1c1ce65e3286d2d558c02ecdbfad848beeb8798ce1d0f4ae1d7b77b","src/ule/option.rs":"557c39a42af784c7e11b602610a881186d4af4ef91104aeedc2d3e419d3cb6c7","src/ule/plain.rs":"afb6a74caf72b0331502edf862b0ef8ff9e42d049a7e7eac4def17a340dd159c","src/ule/slices.rs":"a7638535898b39be9f489f3ff9a2140b5334113c2ddc48c4fae2bc8b86efbd14","src/ule/tuple.rs":"03c74bc849d930788a381fd33c0505f61c9762544edd1b07ec68c95d4be21714","src/ule/unvalidated.rs":"85e03499b94bf976c48676d785033a27c9312b64ffb41fcc4c9222aa04ebc136","src/varzerovec/components.rs":"d05aed702bfd28945d14292907d9d48d5cb31343f3daed9de7e1ab733bd43de8","src/varzerovec/databake.rs":"88cf5932a2a6ae0dc9d61e9d060f6f1de0d7d21e5e2451be73525240de533f44","src/varzerovec/mod.rs":"c7aaaf571f7406e666d877920966a2e39373b5cf6a038cb31dbe4192b2e75d4d","src/varzerovec/owned.rs":"023e501125796cc1a4e86f7c3c096d40c3bacca268e059f1fedb79696678e8a1","src/varzerovec/serde.rs":"e5b4737f318281eafdac55f647ce047ecb76e8cee92ada7c0ce656cf32cf18eb","src/varzerovec/slice.rs":"8c3874d7f60b042288c566f0f08b35f7e2732aa1018e0bff07ae2457abe74ee9","src/varzerovec/vec.rs":"8eff1c3a36ae51be4d86b2a53042d15e6f16c1f6ad4b2afc20a08670a5b8a5b6","src/yoke_impls.rs":"a446c91cf836684fdd46c67910171c08066f7818dda17a56dd021c8ec48ce4eb","src/zerofrom_impls.rs":"587b6e30555b259a109eb085bbc08963ec5fd28d943aa6369bfdf172a668292e","src/zerovec/databake.rs":"abc308838321c5035a80313fa76b7a3852b5d62507bcb1651b1aa8025a37b27d","src/zerovec/mod.rs":"84c096cfe9c92ac6a062dac4ebe16235fd8151ab4b1114f259883a0c01585f6a","src/zerovec/serde.rs":"9f759507dbf548c56384423b76a8bc32b1e0b52e681278a1d727aed836b709a0","src/zerovec/slice.rs":"b8b4079e4e795c950f6ea210757f9e6b128eaf85dc7996ea74be8d86798133c2"},"package":"eff4439ae91fb5c72b8abc12f3f2dbf51bd27e6eadb9f8a5bc8898dddb0e27ea"} \ No newline at end of file
diff --git a/third_party/rust/zerovec/Cargo.lock b/third_party/rust/zerovec/Cargo.lock
new file mode 100644
index 0000000000..0267e48cf9
--- /dev/null
+++ b/third_party/rust/zerovec/Cargo.lock
@@ -0,0 +1,832 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anes"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "bincode"
+version = "1.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bumpalo"
+version = "3.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
+
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
+[[package]]
+name = "cfg-if"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "ciborium"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b"
+dependencies = [
+ "ciborium-io",
+ "half",
+]
+
+[[package]]
+name = "clap"
+version = "3.2.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
+dependencies = [
+ "bitflags",
+ "clap_lex",
+ "indexmap",
+ "textwrap",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
+dependencies = [
+ "os_str_bytes",
+]
+
+[[package]]
+name = "cobs"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15"
+
+[[package]]
+name = "criterion"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb"
+dependencies = [
+ "anes",
+ "atty",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "itertools",
+ "lazy_static",
+ "num-traits",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
+dependencies = [
+ "cast",
+ "itertools",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
+dependencies = [
+ "cfg-if 1.0.0",
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
+dependencies = [
+ "autocfg",
+ "cfg-if 1.0.0",
+ "crossbeam-utils",
+ "memoffset",
+ "scopeguard",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
+dependencies = [
+ "cfg-if 1.0.0",
+]
+
+[[package]]
+name = "databake"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "82175d72e69414ceafbe2b49686794d3a8bed846e0d50267355f83ea8fdd953a"
+dependencies = [
+ "databake-derive",
+ "proc-macro2",
+ "quote",
+]
+
+[[package]]
+name = "databake-derive"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "377af281d8f23663862a7c84623bc5dcf7f8c44b13c7496a590bdc157f941a43"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "either"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
+
+[[package]]
+name = "getrandom"
+version = "0.2.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
+dependencies = [
+ "cfg-if 1.0.0",
+ "js-sys",
+ "libc",
+ "wasi",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "half"
+version = "1.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "iai"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71a816c97c42258aa5834d07590b718b4c9a598944cd39a52dc25b351185d678"
+
+[[package]]
+name = "indexmap"
+version = "1.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
+dependencies = [
+ "autocfg",
+ "hashbrown",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
+
+[[package]]
+name = "js-sys"
+version = "0.3.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
+dependencies = [
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "libc"
+version = "0.2.148"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b"
+
+[[package]]
+name = "libm"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4"
+
+[[package]]
+name = "log"
+version = "0.4.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
+
+[[package]]
+name = "memchr"
+version = "2.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
+
+[[package]]
+name = "memoffset"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
+dependencies = [
+ "autocfg",
+ "libm",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
+
+[[package]]
+name = "oorandom"
+version = "11.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
+
+[[package]]
+name = "os_str_bytes"
+version = "6.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac"
+
+[[package]]
+name = "plotters"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab"
+dependencies = [
+ "plotters-backend",
+]
+
+[[package]]
+name = "postcard"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d534c6e61df1c7166e636ca612d9820d486fe96ddad37f7abc671517b297488e"
+dependencies = [
+ "cobs",
+ "serde",
+]
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.67"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rand_distr"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
+dependencies = [
+ "num-traits",
+ "rand",
+]
+
+[[package]]
+name = "rand_pcg"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59cad018caf63deb318e5a4586d99a24424a364f40f1e5778c29aca23f4fc73e"
+dependencies = [
+ "rand_core",
+]
+
+[[package]]
+name = "rayon"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "regex"
+version = "1.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
+
+[[package]]
+name = "rustc_version"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
+dependencies = [
+ "semver",
+]
+
+[[package]]
+name = "ryu"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "semver"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
+dependencies = [
+ "semver-parser",
+]
+
+[[package]]
+name = "semver-parser"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
+
+[[package]]
+name = "serde"
+version = "1.0.188"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.188"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.107"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+
+[[package]]
+name = "syn"
+version = "2.0.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "synstructure"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "285ba80e733fac80aa4270fbcdf83772a79b80aa35c97075320abfee4a915b06"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "unicode-xid",
+]
+
+[[package]]
+name = "t1ha"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa44aa51ae1a544e2c35a38831ba54ae40591f21384816f531b84f3e984b9ccc"
+dependencies = [
+ "cfg-if 0.1.10",
+ "lazy_static",
+ "num-traits",
+ "rustc_version",
+]
+
+[[package]]
+name = "textwrap"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
+
+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
+
+[[package]]
+name = "walkdir"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
+dependencies = [
+ "cfg-if 1.0.0",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
+dependencies = [
+ "bumpalo",
+ "log",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
+
+[[package]]
+name = "web-sys"
+version = "0.3.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "yoke"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65e71b2e4f287f467794c671e2b8f8a5f3716b3c829079a1c44740148eff07e4"
+dependencies = [
+ "serde",
+ "stable_deref_trait",
+ "zerofrom",
+]
+
+[[package]]
+name = "zerofrom"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "655b0814c5c0b19ade497851070c640773304939a6c0fd5f5fb43da0696d05b7"
+
+[[package]]
+name = "zerovec"
+version = "0.10.1"
+dependencies = [
+ "bincode",
+ "criterion",
+ "databake",
+ "getrandom",
+ "iai",
+ "postcard",
+ "rand",
+ "rand_distr",
+ "rand_pcg",
+ "serde",
+ "serde_json",
+ "t1ha",
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+
+[[package]]
+name = "zerovec-derive"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b4e5997cbf58990550ef1f0e5124a05e47e1ebd33a84af25739be6031a62c20"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
diff --git a/third_party/rust/zerovec/Cargo.toml b/third_party/rust/zerovec/Cargo.toml
new file mode 100644
index 0000000000..b5e4369d3a
--- /dev/null
+++ b/third_party/rust/zerovec/Cargo.toml
@@ -0,0 +1,162 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+rust-version = "1.67"
+name = "zerovec"
+version = "0.10.1"
+authors = ["The ICU4X Project Developers"]
+include = [
+ "data/**/*",
+ "src/**/*",
+ "examples/**/*",
+ "benches/**/*",
+ "tests/**/*",
+ "Cargo.toml",
+ "LICENSE",
+ "README.md",
+]
+description = "Zero-copy vector backed by a byte array"
+readme = "README.md"
+keywords = [
+ "zerocopy",
+ "serialization",
+ "zero-copy",
+ "serde",
+]
+categories = [
+ "rust-patterns",
+ "memory-management",
+ "caching",
+ "no-std",
+ "data-structures",
+]
+license-file = "LICENSE"
+repository = "https://github.com/unicode-org/icu4x"
+
+[package.metadata.cargo-all-features]
+denylist = ["bench"]
+max_combination_size = 3
+
+[package.metadata.docs.rs]
+all-features = true
+
+[package.metadata.workspaces]
+independent = true
+
+[lib]
+bench = false
+
+[[example]]
+name = "zv_serde"
+required-features = ["serde"]
+
+[[bench]]
+name = "zerovec"
+harness = false
+
+[[bench]]
+name = "zerovec_serde"
+harness = false
+required-features = ["serde"]
+
+[[bench]]
+name = "vzv"
+harness = false
+
+[[bench]]
+name = "zerovec_iai"
+harness = false
+
+[[bench]]
+name = "zeromap"
+harness = false
+required-features = [
+ "serde",
+ "hashmap",
+ "derive",
+]
+
+[dependencies.databake]
+version = "0.1.7"
+features = ["derive"]
+optional = true
+default-features = false
+
+[dependencies.serde]
+version = "1.0"
+features = ["alloc"]
+optional = true
+default-features = false
+
+[dependencies.t1ha]
+version = "0.1"
+optional = true
+
+[dependencies.yoke]
+version = ">=0.6.0, <0.8.0"
+optional = true
+
+[dependencies.zerofrom]
+version = "0.1.2"
+default-features = false
+
+[dependencies.zerovec-derive]
+version = "0.10.1"
+optional = true
+default-features = false
+
+[dev-dependencies.bincode]
+version = "1.3"
+
+[dev-dependencies.getrandom]
+version = "0.2"
+features = ["js"]
+
+[dev-dependencies.iai]
+version = "0.1"
+
+[dev-dependencies.postcard]
+version = "1.0.0"
+features = ["use-std"]
+default-features = false
+
+[dev-dependencies.rand]
+version = "0.8"
+
+[dev-dependencies.rand_distr]
+version = "0.4"
+
+[dev-dependencies.rand_pcg]
+version = "0.3"
+
+[dev-dependencies.serde]
+version = "1.0"
+features = ["derive"]
+
+[dev-dependencies.serde_json]
+version = "1.0"
+
+[features]
+bench = [
+ "serde",
+ "databake",
+]
+databake = ["dep:databake"]
+derive = ["dep:zerovec-derive"]
+hashmap = ["dep:t1ha"]
+serde = ["dep:serde"]
+std = []
+yoke = ["dep:yoke"]
+
+[target."cfg(not(target_arch = \"wasm32\"))".dev-dependencies.criterion]
+version = "0.4"
diff --git a/third_party/rust/zerovec/LICENSE b/third_party/rust/zerovec/LICENSE
new file mode 100644
index 0000000000..9845aa5f48
--- /dev/null
+++ b/third_party/rust/zerovec/LICENSE
@@ -0,0 +1,44 @@
+UNICODE LICENSE V3
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 2020-2023 Unicode, Inc.
+
+NOTICE TO USER: Carefully read the following legal agreement. BY
+DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
+SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
+DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of data files and any associated documentation (the "Data Files") or
+software and any associated documentation (the "Software") to deal in the
+Data Files or Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, and/or sell
+copies of the Data Files or Software, and to permit persons to whom the
+Data Files or Software are furnished to do so, provided that either (a)
+this copyright and permission notice appear with all copies of the Data
+Files or Software, or (b) this copyright and permission notice appear in
+associated Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
+THIRD PARTY RIGHTS.
+
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
+FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall
+not be used in advertising or otherwise to promote the sale, use or other
+dealings in these Data Files or Software without prior written
+authorization of the copyright holder.
+
+—
+
+Portions of ICU4X may have been adapted from ICU4C and/or ICU4J.
+ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others.
diff --git a/third_party/rust/zerovec/README.md b/third_party/rust/zerovec/README.md
new file mode 100644
index 0000000000..b1e2317a64
--- /dev/null
+++ b/third_party/rust/zerovec/README.md
@@ -0,0 +1,197 @@
+# zerovec [![crates.io](https://img.shields.io/crates/v/zerovec)](https://crates.io/crates/zerovec)
+
+<!-- cargo-rdme start -->
+
+Zero-copy vector abstractions for arbitrary types, backed by byte slices.
+
+`zerovec` enables a far wider range of types — beyond just `&[u8]` and `&str` — to participate in
+zero-copy deserialization from byte slices. It is `serde` compatible and comes equipped with
+proc macros
+
+Clients upgrading to `zerovec` benefit from zero heap allocations when deserializing
+read-only data.
+
+This crate has four main types:
+
+- [`ZeroVec<'a, T>`] (and [`ZeroSlice<T>`](ZeroSlice)) for fixed-width types like `u32`
+- [`VarZeroVec<'a, T>`] (and [`VarZeroSlice<T>`](ZeroSlice)) for variable-width types like `str`
+- [`ZeroMap<'a, K, V>`] to map from `K` to `V`
+- [`ZeroMap2d<'a, K0, K1, V>`] to map from the pair `(K0, K1)` to `V`
+
+The first two are intended as close-to-drop-in replacements for `Vec<T>` in Serde structs. The third and fourth are
+intended as a replacement for `HashMap` or [`LiteMap`](docs.rs/litemap). When used with Serde derives, **be sure to apply
+`#[serde(borrow)]` to these types**, same as one would for [`Cow<'a, T>`].
+
+[`ZeroVec<'a, T>`], [`VarZeroVec<'a, T>`], [`ZeroMap<'a, K, V>`], and [`ZeroMap2d<'a, K0, K1, V>`] all behave like
+[`Cow<'a, T>`] in that they abstract over either borrowed or owned data. When performing deserialization
+from human-readable formats (like `json` and `xml`), typically these types will allocate and fully own their data, whereas if deserializing
+from binary formats like `bincode` and `postcard`, these types will borrow data directly from the buffer being deserialized from,
+avoiding allocations and only performing validity checks. As such, this crate can be pretty fast (see [below](#Performance) for more information)
+on deserialization.
+
+See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how this crate
+works under the hood.
+
+## Cargo features
+
+This crate has several optional Cargo features:
+ - `serde`: Allows serializing and deserializing `zerovec`'s abstractions via [`serde`](https://docs.rs/serde)
+ - `yoke`: Enables implementations of `Yokeable` from the [`yoke`](https://docs.rs/yoke/) crate, which is also useful
+ in situations involving a lot of zero-copy deserialization.
+ - `derive`: Makes it easier to use custom types in these collections by providing the `#[make_ule]` and
+ `#[make_varule]` proc macros, which generate appropriate [`ULE`](https://docs.rs/zerovec/latest/zerovec/ule/trait.ULE.html) and
+ [`VarULE`](https://docs.rs/zerovec/latest/zerovec/ule/trait.VarULE.html)-conformant types for a given "normal" type.
+ - `std`: Enabled `std::Error` implementations for error types. This crate is by default `no_std` with a dependency on `alloc`.
+
+[`ZeroVec<'a, T>`]: ZeroVec
+[`VarZeroVec<'a, T>`]: VarZeroVec
+[`ZeroMap<'a, K, V>`]: ZeroMap
+[`ZeroMap2d<'a, K0, K1, V>`]: ZeroMap2d
+[`Cow<'a, T>`]: alloc::borrow::Cow
+
+## Examples
+
+Serialize and deserialize a struct with ZeroVec and VarZeroVec with Bincode:
+
+```rust
+use zerovec::{VarZeroVec, ZeroVec};
+
+// This example requires the "serde" feature
+#[derive(serde::Serialize, serde::Deserialize)]
+pub struct DataStruct<'data> {
+ #[serde(borrow)]
+ nums: ZeroVec<'data, u32>,
+ #[serde(borrow)]
+ chars: ZeroVec<'data, char>,
+ #[serde(borrow)]
+ strs: VarZeroVec<'data, str>,
+}
+
+let data = DataStruct {
+ nums: ZeroVec::from_slice_or_alloc(&[211, 281, 421, 461]),
+ chars: ZeroVec::alloc_from_slice(&['ö', '冇', 'म']),
+ strs: VarZeroVec::from(&["hello", "world"]),
+};
+let bincode_bytes =
+ bincode::serialize(&data).expect("Serialization should be successful");
+assert_eq!(bincode_bytes.len(), 67);
+
+let deserialized: DataStruct = bincode::deserialize(&bincode_bytes)
+ .expect("Deserialization should be successful");
+assert_eq!(deserialized.nums.first(), Some(211));
+assert_eq!(deserialized.chars.get(1), Some('冇'));
+assert_eq!(deserialized.strs.get(1), Some("world"));
+// The deserialization will not have allocated anything
+assert!(!deserialized.nums.is_owned());
+```
+
+Use custom types inside of ZeroVec:
+
+```rust
+use zerovec::{ZeroVec, VarZeroVec, ZeroMap};
+use std::borrow::Cow;
+use zerovec::ule::encode_varule_to_box;
+
+// custom fixed-size ULE type for ZeroVec
+#[zerovec::make_ule(DateULE)]
+#[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
+struct Date {
+ y: u64,
+ m: u8,
+ d: u8
+}
+
+// custom variable sized VarULE type for VarZeroVec
+#[zerovec::make_varule(PersonULE)]
+#[zerovec::derive(Serialize, Deserialize)] // add Serde impls to PersonULE
+#[derive(Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
+struct Person<'a> {
+ birthday: Date,
+ favorite_character: char,
+ #[serde(borrow)]
+ name: Cow<'a, str>,
+}
+
+#[derive(serde::Serialize, serde::Deserialize)]
+struct Data<'a> {
+ #[serde(borrow)]
+ important_dates: ZeroVec<'a, Date>,
+ // note: VarZeroVec always must reference the ULE type directly
+ #[serde(borrow)]
+ important_people: VarZeroVec<'a, PersonULE>,
+ #[serde(borrow)]
+ birthdays_to_people: ZeroMap<'a, Date, PersonULE>
+}
+
+
+let person1 = Person {
+ birthday: Date { y: 1990, m: 9, d: 7},
+ favorite_character: 'π',
+ name: Cow::from("Kate")
+};
+let person2 = Person {
+ birthday: Date { y: 1960, m: 5, d: 25},
+ favorite_character: '冇',
+ name: Cow::from("Jesse")
+};
+
+let important_dates = ZeroVec::alloc_from_slice(&[Date { y: 1943, m: 3, d: 20}, Date { y: 1976, m: 8, d: 2}, Date { y: 1998, m: 2, d: 15}]);
+let important_people = VarZeroVec::from(&[&person1, &person2]);
+let mut birthdays_to_people: ZeroMap<Date, PersonULE> = ZeroMap::new();
+// `.insert_var_v()` is slightly more convenient over `.insert()` for custom ULE types
+birthdays_to_people.insert_var_v(&person1.birthday, &person1);
+birthdays_to_people.insert_var_v(&person2.birthday, &person2);
+
+let data = Data { important_dates, important_people, birthdays_to_people };
+
+let bincode_bytes = bincode::serialize(&data)
+ .expect("Serialization should be successful");
+assert_eq!(bincode_bytes.len(), 168);
+
+let deserialized: Data = bincode::deserialize(&bincode_bytes)
+ .expect("Deserialization should be successful");
+
+assert_eq!(deserialized.important_dates.get(0).unwrap().y, 1943);
+assert_eq!(&deserialized.important_people.get(1).unwrap().name, "Jesse");
+assert_eq!(&deserialized.important_people.get(0).unwrap().name, "Kate");
+assert_eq!(&deserialized.birthdays_to_people.get(&person1.birthday).unwrap().name, "Kate");
+
+} // feature = serde and derive
+```
+
+## Performance
+
+`zerovec` is designed for fast deserialization from byte buffers with zero memory allocations
+while minimizing performance regressions for common vector operations.
+
+Benchmark results on x86_64:
+
+| Operation | `Vec<T>` | `zerovec` |
+|---|---|---|
+| Deserialize vec of 100 `u32` | 233.18 ns | 14.120 ns |
+| Compute sum of vec of 100 `u32` (read every element) | 8.7472 ns | 10.775 ns |
+| Binary search vec of 1000 `u32` 50 times | 442.80 ns | 472.51 ns |
+| Deserialize vec of 100 strings | 7.3740 μs\* | 1.4495 μs |
+| Count chars in vec of 100 strings (read every element) | 747.50 ns | 955.28 ns |
+| Binary search vec of 500 strings 10 times | 466.09 ns | 790.33 ns |
+
+\* *This result is reported for `Vec<String>`. However, Serde also supports deserializing to the partially-zero-copy `Vec<&str>`; this gives 1.8420 μs, much faster than `Vec<String>` but a bit slower than `zerovec`.*
+
+| Operation | `HashMap<K,V>` | `LiteMap<K,V>` | `ZeroMap<K,V>` |
+|---|---|---|---|
+| Deserialize a small map | 2.72 μs | 1.28 μs | 480 ns |
+| Deserialize a large map | 50.5 ms | 18.3 ms | 3.74 ms |
+| Look up from a small deserialized map | 49 ns | 42 ns | 54 ns |
+| Look up from a large deserialized map | 51 ns | 155 ns | 213 ns |
+
+Small = 16 elements, large = 131,072 elements. Maps contain `<String, String>`.
+
+The benches used to generate the above table can be found in the `benches` directory in the project repository.
+`zeromap` benches are named by convention, e.g. `zeromap/deserialize/small`, `zeromap/lookup/large`. The type
+is appended for baseline comparisons, e.g. `zeromap/lookup/small/hashmap`.
+
+<!-- cargo-rdme end -->
+
+## More Information
+
+For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x).
diff --git a/third_party/rust/zerovec/benches/vzv.rs b/third_party/rust/zerovec/benches/vzv.rs
new file mode 100644
index 0000000000..94b6621a96
--- /dev/null
+++ b/third_party/rust/zerovec/benches/vzv.rs
@@ -0,0 +1,212 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use rand::SeedableRng;
+use rand_distr::{Alphanumeric, Distribution, Uniform};
+use rand_pcg::Lcg64Xsh32;
+use std::ops::RangeInclusive;
+
+use zerovec::VarZeroVec;
+
+#[repr(align(8))]
+#[derive(Default)]
+struct AlignedBuffer(Vec<u8>);
+
+/// Generates an array of random alphanumeric strings.
+///
+/// - length = range of lengths for the strings (chosen uniformly at random)
+/// - count = number of strings to generate
+/// - seed = seed for the PRNG
+///
+/// Returns a tuple including the vector and a u64 that can be used to seed the next PRNG.
+fn random_alphanums(lengths: RangeInclusive<usize>, count: usize, seed: u64) -> (Vec<String>, u64) {
+ // Lcg64Xsh32 is a small, fast PRNG for reproducible benchmarks.
+ let mut rng1 = Lcg64Xsh32::seed_from_u64(seed);
+ let mut rng2 = Lcg64Xsh32::seed_from_u64(rand::Rng::gen(&mut rng1));
+ let alpha_dist = Alphanumeric;
+ let len_dist = Uniform::from(lengths);
+ let string_vec = len_dist
+ .sample_iter(&mut rng1)
+ .take(count)
+ .map(|len| {
+ (&alpha_dist)
+ .sample_iter(&mut rng2)
+ .take(len)
+ .map(char::from)
+ .collect::<String>()
+ })
+ .collect();
+ (string_vec, rand::Rng::gen(&mut rng1))
+}
+
+fn overview_bench(c: &mut Criterion) {
+ // Same as vzv/char_count/vzv but with different inputs
+ let seed = 42;
+ let (string_vec, _) = random_alphanums(2..=10, 100, seed);
+ let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
+ let vzv = VarZeroVec::<str>::parse_byte_slice(black_box(bytes.as_slice())).unwrap();
+
+ c.bench_function("vzv/overview", |b| {
+ b.iter(|| {
+ black_box(&vzv)
+ .iter()
+ .fold(0, |sum, string| sum + string.chars().count())
+ });
+ });
+
+ #[cfg(feature = "bench")]
+ {
+ char_count_benches(c);
+ binary_search_benches(c);
+ vzv_precompute_bench(c);
+ }
+
+ #[cfg(all(feature = "bench", feature = "serde"))]
+ {
+ serde_benches(c);
+ }
+}
+
+#[cfg(feature = "bench")]
+fn char_count_benches(c: &mut Criterion) {
+ let seed = 2021;
+ let (string_vec, _) = random_alphanums(2..=20, 100, seed);
+ let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
+ let vzv = VarZeroVec::<str>::parse_byte_slice(black_box(bytes.as_slice())).unwrap();
+
+ // *** Count chars in vec of 100 strings ***
+ c.bench_function("vzv/char_count/slice", |b| {
+ b.iter(|| {
+ black_box(&string_vec)
+ .iter()
+ .fold(0, |sum, string| sum + string.chars().count())
+ });
+ });
+
+ // *** Count chars in vec of 100 strings ***
+ c.bench_function("vzv/char_count/vzv", |b| {
+ b.iter(|| {
+ black_box(&vzv)
+ .iter()
+ .fold(0, |sum, string| sum + string.chars().count())
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn binary_search_benches(c: &mut Criterion) {
+ let seed = 2021;
+ let (string_vec, seed) = random_alphanums(2..=20, 500, seed);
+ let (needles, _) = random_alphanums(2..=20, 10, seed);
+ let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
+ let vzv = VarZeroVec::<str>::parse_byte_slice(black_box(bytes.as_slice())).unwrap();
+ let single_needle = "lmnop".to_owned();
+
+ // *** Binary search vec of 500 strings 10 times ***
+ c.bench_function("vzv/binary_search/slice", |b| {
+ b.iter(|| {
+ black_box(&needles)
+ .iter()
+ .map(|needle| black_box(&string_vec).binary_search(needle))
+ .filter(|r| r.is_ok())
+ .count()
+ });
+ });
+
+ // *** Binary search vec of 500 strings 10 times ***
+ c.bench_function("vzv/binary_search/vzv", |b| {
+ b.iter(|| {
+ black_box(&needles)
+ .iter()
+ .map(|needle| black_box(&vzv).binary_search(needle))
+ .filter(|r| r.is_ok())
+ .count()
+ });
+ });
+
+ c.bench_function("vzv/binary_search/single/slice", |b| {
+ b.iter(|| black_box(&string_vec).binary_search(black_box(&single_needle)));
+ });
+
+ c.bench_function("vzv/binary_search/single/vzv", |b| {
+ b.iter(|| black_box(&vzv).binary_search(black_box(&single_needle)));
+ });
+}
+
+#[cfg(all(feature = "bench", feature = "serde"))]
+fn serde_benches(c: &mut Criterion) {
+ let seed = 2021;
+ let (string_vec, _) = random_alphanums(2..=20, 100, seed);
+ let bincode_vec = bincode::serialize(&string_vec).unwrap();
+ let vzv: VarZeroVec<str> = VarZeroVec::from(&*string_vec);
+ let bincode_vzv = bincode::serialize(&vzv).unwrap();
+
+ // *** Deserialize vec of 100 strings ***
+ c.bench_function("vzv/deserialize/string/vec_owned", |b| {
+ b.iter(|| bincode::deserialize::<Vec<String>>(black_box(&bincode_vec)));
+ });
+
+ // *** Deserialize vec of 100 strings ***
+ c.bench_function("vzv/deserialize/string/vec_borrowed", |b| {
+ b.iter(|| bincode::deserialize::<Vec<&str>>(black_box(&bincode_vec)));
+ });
+
+ // *** Deserialize vec of 100 strings ***
+ c.bench_function("vzv/deserialize/string/vzv", |b| {
+ b.iter(|| bincode::deserialize::<VarZeroVec<str>>(black_box(&bincode_vzv)));
+ });
+}
+
+#[cfg(feature = "bench")]
+// Testing differences between operating on slices with precomputed/non-precomputed indexing info
+fn vzv_precompute_bench(c: &mut Criterion) {
+ let seed = 2021;
+ let (string_vec, seed) = random_alphanums(2..=20, 500, seed);
+ let (needles, _) = random_alphanums(2..=20, 10, seed);
+ let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
+ let vzv = VarZeroVec::<str>::parse_byte_slice(black_box(bytes.as_slice())).unwrap();
+ let borrowed = vzv.as_components();
+ let slice = vzv.as_slice();
+ let single_needle = "lmnop";
+
+ c.bench_function("vzv_precompute/get/precomputed", |b| {
+ b.iter(|| black_box(&borrowed).get(100));
+ });
+
+ c.bench_function("vzv_precompute/get/slice", |b| {
+ b.iter(|| black_box(&slice).get(100));
+ });
+
+ c.bench_function("vzv_precompute/search/precomputed", |b| {
+ b.iter(|| black_box(&borrowed).binary_search(single_needle));
+ });
+
+ c.bench_function("vzv_precompute/search/slice", |b| {
+ b.iter(|| black_box(&slice).binary_search(single_needle));
+ });
+
+ c.bench_function("vzv_precompute/search_multi/precomputed", |b| {
+ b.iter(|| {
+ black_box(&needles)
+ .iter()
+ .map(|needle| black_box(&borrowed).binary_search(needle))
+ .filter(|r| r.is_ok())
+ .count()
+ });
+ });
+
+ c.bench_function("vzv_precompute/search_multi/slice", |b| {
+ b.iter(|| {
+ black_box(&needles)
+ .iter()
+ .map(|needle| black_box(&slice).binary_search(needle))
+ .filter(|r| r.is_ok())
+ .count()
+ });
+ });
+}
+
+criterion_group!(benches, overview_bench,);
+criterion_main!(benches);
diff --git a/third_party/rust/zerovec/benches/zeromap.rs b/third_party/rust/zerovec/benches/zeromap.rs
new file mode 100644
index 0000000000..5f3e87b8c0
--- /dev/null
+++ b/third_party/rust/zerovec/benches/zeromap.rs
@@ -0,0 +1,396 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use std::collections::HashMap;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+use zerovec::maps::ZeroMapKV;
+use zerovec::vecs::{Index32, VarZeroSlice, VarZeroVec};
+use zerovec::{ZeroHashMap, ZeroMap};
+
+const DATA: [(&str, &str); 16] = [
+ ("ar", "Arabic"),
+ ("bn", "Bangla"),
+ ("ccp", "Chakma"),
+ ("chr", "Cherokee"),
+ ("el", "Greek"),
+ ("en", "English"),
+ ("eo", "Esperanto"),
+ ("es", "Spanish"),
+ ("fr", "French"),
+ ("iu", "Inuktitut"),
+ ("ja", "Japanese"),
+ ("ru", "Russian"),
+ ("sr", "Serbian"),
+ ("th", "Thai"),
+ ("tr", "Turkish"),
+ ("zh", "Chinese"),
+];
+
+const POSTCARD: [u8; 282] = [
+ 102, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 7, 0, 0, 0, 10, 0, 0, 0, 12, 0, 0, 0, 14,
+ 0, 0, 0, 16, 0, 0, 0, 18, 0, 0, 0, 20, 0, 0, 0, 22, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0, 0, 28, 0,
+ 0, 0, 30, 0, 0, 0, 32, 0, 0, 0, 97, 114, 98, 110, 99, 99, 112, 99, 104, 114, 101, 108, 101,
+ 110, 101, 111, 101, 115, 102, 114, 105, 117, 106, 97, 114, 117, 115, 114, 116, 104, 116, 114,
+ 122, 104, 177, 1, 16, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 12, 0, 0, 0, 18, 0, 0, 0, 26, 0, 0, 0,
+ 31, 0, 0, 0, 38, 0, 0, 0, 47, 0, 0, 0, 54, 0, 0, 0, 60, 0, 0, 0, 69, 0, 0, 0, 77, 0, 0, 0, 84,
+ 0, 0, 0, 91, 0, 0, 0, 95, 0, 0, 0, 102, 0, 0, 0, 65, 114, 97, 98, 105, 99, 66, 97, 110, 103,
+ 108, 97, 67, 104, 97, 107, 109, 97, 67, 104, 101, 114, 111, 107, 101, 101, 71, 114, 101, 101,
+ 107, 69, 110, 103, 108, 105, 115, 104, 69, 115, 112, 101, 114, 97, 110, 116, 111, 83, 112, 97,
+ 110, 105, 115, 104, 70, 114, 101, 110, 99, 104, 73, 110, 117, 107, 116, 105, 116, 117, 116, 74,
+ 97, 112, 97, 110, 101, 115, 101, 82, 117, 115, 115, 105, 97, 110, 83, 101, 114, 98, 105, 97,
+ 110, 84, 104, 97, 105, 84, 117, 114, 107, 105, 115, 104, 67, 104, 105, 110, 101, 115, 101,
+];
+
+const POSTCARD_HASHMAP: [u8; 176] = [
+ 16, 2, 114, 117, 7, 82, 117, 115, 115, 105, 97, 110, 3, 99, 99, 112, 6, 67, 104, 97, 107, 109,
+ 97, 3, 99, 104, 114, 8, 67, 104, 101, 114, 111, 107, 101, 101, 2, 116, 114, 7, 84, 117, 114,
+ 107, 105, 115, 104, 2, 116, 104, 4, 84, 104, 97, 105, 2, 106, 97, 8, 74, 97, 112, 97, 110, 101,
+ 115, 101, 2, 101, 115, 7, 83, 112, 97, 110, 105, 115, 104, 2, 101, 111, 9, 69, 115, 112, 101,
+ 114, 97, 110, 116, 111, 2, 122, 104, 7, 67, 104, 105, 110, 101, 115, 101, 2, 115, 114, 7, 83,
+ 101, 114, 98, 105, 97, 110, 2, 101, 110, 7, 69, 110, 103, 108, 105, 115, 104, 2, 105, 117, 9,
+ 73, 110, 117, 107, 116, 105, 116, 117, 116, 2, 102, 114, 6, 70, 114, 101, 110, 99, 104, 2, 98,
+ 110, 6, 66, 97, 110, 103, 108, 97, 2, 101, 108, 5, 71, 114, 101, 101, 107, 2, 97, 114, 6, 65,
+ 114, 97, 98, 105, 99,
+];
+
+const POSTCARD_ZEROHASHMAP: [u8; 412] = [
+ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,
+ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,
+ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 1,
+ 0, 0, 0, 102, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 11, 0,
+ 0, 0, 13, 0, 0, 0, 15, 0, 0, 0, 17, 0, 0, 0, 19, 0, 0, 0, 21, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0,
+ 0, 28, 0, 0, 0, 30, 0, 0, 0, 32, 0, 0, 0, 101, 110, 102, 114, 106, 97, 101, 108, 99, 104, 114,
+ 98, 110, 115, 114, 105, 117, 101, 111, 116, 114, 99, 99, 112, 122, 104, 114, 117, 101, 115,
+ 116, 104, 97, 114, 177, 1, 16, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 13, 0, 0, 0, 21, 0, 0, 0, 26,
+ 0, 0, 0, 34, 0, 0, 0, 40, 0, 0, 0, 47, 0, 0, 0, 56, 0, 0, 0, 65, 0, 0, 0, 72, 0, 0, 0, 78, 0,
+ 0, 0, 85, 0, 0, 0, 92, 0, 0, 0, 99, 0, 0, 0, 103, 0, 0, 0, 69, 110, 103, 108, 105, 115, 104,
+ 70, 114, 101, 110, 99, 104, 74, 97, 112, 97, 110, 101, 115, 101, 71, 114, 101, 101, 107, 67,
+ 104, 101, 114, 111, 107, 101, 101, 66, 97, 110, 103, 108, 97, 83, 101, 114, 98, 105, 97, 110,
+ 73, 110, 117, 107, 116, 105, 116, 117, 116, 69, 115, 112, 101, 114, 97, 110, 116, 111, 84, 117,
+ 114, 107, 105, 115, 104, 67, 104, 97, 107, 109, 97, 67, 104, 105, 110, 101, 115, 101, 82, 117,
+ 115, 115, 105, 97, 110, 83, 112, 97, 110, 105, 115, 104, 84, 104, 97, 105, 65, 114, 97, 98,
+ 105, 99,
+];
+
+/// Run this function to print new data to the console.
+/// Requires the optional `serde` Cargo feature.
+#[allow(dead_code)]
+fn generate_zeromap() {
+ let map = build_zeromap(false);
+ let buf = postcard::to_stdvec(&map).unwrap();
+ println!("{buf:?}");
+}
+
+/// Run this function to print new data to the console.
+/// Requires the optional `serde` Cargo feature.
+#[allow(dead_code)]
+fn generate_hashmap() {
+ let map = build_hashmap(false);
+ let buf = postcard::to_stdvec(&map).unwrap();
+ println!("{buf:?}");
+}
+
+/// Run this function to print new data to the console.
+/// Requires the optional `serde` Cargo feature.
+#[allow(dead_code)]
+fn generate_zerohashmap() {
+ let map = build_zerohashmap(false);
+ let buf = postcard::to_stdvec(&map).unwrap();
+ println!("{buf:?}");
+}
+
+fn overview_bench(c: &mut Criterion) {
+ bench_zeromap(c);
+ bench_hashmap(c);
+ bench_zerohashmap(c);
+}
+
+fn bench_zeromap(c: &mut Criterion) {
+ // Uncomment the following line to re-generate the const data.
+ // generate_hashmap();
+
+ bench_deserialize(c);
+ #[cfg(feature = "bench")]
+ bench_deserialize_large(c);
+ bench_lookup(c);
+ #[cfg(feature = "bench")]
+ bench_lookup_large(c);
+}
+
+fn build_zeromap(large: bool) -> ZeroMap<'static, Index32Str, Index32Str> {
+ // TODO(#2826): This should use ZeroMap::from_iter, however that currently takes
+ // *minutes*, whereas this code runs in milliseconds
+ let mut keys = Vec::new();
+ let mut values = Vec::new();
+ let mut data = DATA.to_vec();
+ data.sort();
+ for &(key, value) in data.iter() {
+ if large {
+ for n in 0..8192 {
+ keys.push(format!("{key}{n:04}"));
+ values.push(indexify(value));
+ }
+ } else {
+ keys.push(key.to_owned());
+ values.push(indexify(value));
+ }
+ }
+
+ let keys = keys.iter().map(|s| indexify(s)).collect::<Vec<_>>();
+ // keys are sorted by construction
+ unsafe { ZeroMap::from_parts_unchecked(VarZeroVec::from(&keys), VarZeroVec::from(&values)) }
+}
+
+fn bench_deserialize(c: &mut Criterion) {
+ c.bench_function("zeromap/deserialize/small", |b| {
+ b.iter(|| {
+ let map: ZeroMap<Index32Str, Index32Str> =
+ postcard::from_bytes(black_box(&POSTCARD)).unwrap();
+ assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut"));
+ })
+ });
+}
+
+#[cfg(feature = "bench")]
+fn bench_deserialize_large(c: &mut Criterion) {
+ let buf = large_zeromap_postcard_bytes();
+ c.bench_function("zeromap/deserialize/large", |b| {
+ b.iter(|| {
+ let map: ZeroMap<Index32Str, Index32Str> =
+ postcard::from_bytes(black_box(&buf)).unwrap();
+ assert_eq!(map.get(indexify("iu3333")).map(|x| &x.0), Some("Inuktitut"));
+ })
+ });
+}
+
+fn bench_lookup(c: &mut Criterion) {
+ let map: ZeroMap<Index32Str, Index32Str> = postcard::from_bytes(black_box(&POSTCARD)).unwrap();
+ c.bench_function("zeromap/lookup/small", |b| {
+ b.iter(|| {
+ assert_eq!(
+ map.get(black_box(indexify("iu"))).map(|x| &x.0),
+ Some("Inuktitut")
+ );
+ assert_eq!(map.get(black_box(indexify("zz"))).map(|x| &x.0), None);
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn bench_lookup_large(c: &mut Criterion) {
+ let buf = large_zeromap_postcard_bytes();
+ let map: ZeroMap<Index32Str, Index32Str> = postcard::from_bytes(&buf).unwrap();
+ c.bench_function("zeromap/lookup/large", |b| {
+ b.iter(|| {
+ assert_eq!(
+ map.get(black_box(indexify("iu3333"))).map(|x| &x.0),
+ Some("Inuktitut")
+ );
+ assert_eq!(map.get(black_box(indexify("zz"))).map(|x| &x.0), None);
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn large_zeromap_postcard_bytes() -> Vec<u8> {
+ postcard::to_stdvec(&build_zeromap(true)).unwrap()
+}
+
+fn bench_hashmap(c: &mut Criterion) {
+ // Uncomment the following line to re-generate the const data.
+ // generate_hashmap();
+
+ bench_deserialize_hashmap(c);
+ #[cfg(feature = "bench")]
+ bench_deserialize_large_hashmap(c);
+ bench_lookup_hashmap(c);
+ #[cfg(feature = "bench")]
+ bench_lookup_large_hashmap(c);
+}
+
+fn build_hashmap(large: bool) -> HashMap<String, String> {
+ let mut map: HashMap<String, String> = HashMap::new();
+ for &(key, value) in DATA.iter() {
+ if large {
+ for n in 0..8192 {
+ map.insert(format!("{key}{n}"), value.to_owned());
+ }
+ } else {
+ map.insert(key.to_owned(), value.to_owned());
+ }
+ }
+ map
+}
+
+fn bench_deserialize_hashmap(c: &mut Criterion) {
+ c.bench_function("zeromap/deserialize/small/hashmap", |b| {
+ b.iter(|| {
+ let map: HashMap<String, String> =
+ postcard::from_bytes(black_box(&POSTCARD_HASHMAP)).unwrap();
+ assert_eq!(map.get("iu"), Some(&"Inuktitut".to_owned()));
+ })
+ });
+}
+
+#[cfg(feature = "bench")]
+fn bench_deserialize_large_hashmap(c: &mut Criterion) {
+ let buf = large_hashmap_postcard_bytes();
+ c.bench_function("zeromap/deserialize/large/hashmap", |b| {
+ b.iter(|| {
+ let map: HashMap<String, String> = postcard::from_bytes(black_box(&buf)).unwrap();
+ assert_eq!(map.get("iu3333"), Some(&"Inuktitut".to_owned()));
+ })
+ });
+}
+
+fn bench_lookup_hashmap(c: &mut Criterion) {
+ let map: HashMap<String, String> = postcard::from_bytes(black_box(&POSTCARD_HASHMAP)).unwrap();
+ c.bench_function("zeromap/lookup/small/hashmap", |b| {
+ b.iter(|| {
+ assert_eq!(map.get(black_box("iu")), Some(&"Inuktitut".to_owned()));
+ assert_eq!(map.get(black_box("zz")), None);
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn bench_lookup_large_hashmap(c: &mut Criterion) {
+ let buf = large_hashmap_postcard_bytes();
+ let map: HashMap<String, String> = postcard::from_bytes(&buf).unwrap();
+ c.bench_function("zeromap/lookup/large/hashmap", |b| {
+ b.iter(|| {
+ assert_eq!(map.get(black_box("iu3333")), Some(&"Inuktitut".to_owned()));
+ assert_eq!(map.get(black_box("zz")), None);
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn large_hashmap_postcard_bytes() -> Vec<u8> {
+ postcard::to_stdvec(&build_hashmap(true)).unwrap()
+}
+
+fn bench_zerohashmap(c: &mut Criterion) {
+ // Uncomment the following line to re-generate the const data.
+ // generate_zerohashmap();
+
+ bench_deserialize_zerohashmap(c);
+ #[cfg(feature = "bench")]
+ bench_deserialize_large_zerohashmap(c);
+ bench_zerohashmap_lookup(c);
+ #[cfg(feature = "bench")]
+ bench_zerohashmap_lookup_large(c);
+}
+
+fn build_zerohashmap(large: bool) -> ZeroHashMap<'static, Index32Str, Index32Str> {
+ let mut kv = Vec::new();
+
+ for (key, value) in DATA.iter() {
+ if large {
+ for n in 0..512 {
+ kv.push((format!("{key}{n}"), indexify(value)));
+ }
+ } else {
+ kv.push((key.to_string(), indexify(value)));
+ }
+ }
+
+ ZeroHashMap::from_iter(kv.iter().map(|kv| (indexify(&kv.0), kv.1)))
+}
+
+fn bench_deserialize_zerohashmap(c: &mut Criterion) {
+ c.bench_function("zerohashmap/deserialize/small", |b| {
+ b.iter(|| {
+ let map: ZeroHashMap<Index32Str, Index32Str> =
+ postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap();
+ assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut"));
+ })
+ });
+}
+
+fn bench_deserialize_large_zerohashmap(c: &mut Criterion) {
+ let buf = large_zerohashmap_postcard_bytes();
+ c.bench_function("zerohashmap/deserialize/large", |b| {
+ b.iter(|| {
+ let map: ZeroHashMap<Index32Str, Index32Str> =
+ postcard::from_bytes(black_box(&buf)).unwrap();
+ assert_eq!(map.get(indexify("iu333")).map(|x| &x.0), Some("Inuktitut"));
+ })
+ });
+}
+
+fn bench_zerohashmap_lookup(c: &mut Criterion) {
+ let zero_hashmap: ZeroHashMap<Index32Str, Index32Str> =
+ postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap();
+
+ c.bench_function("zerohashmap/lookup/small", |b| {
+ b.iter(|| {
+ assert_eq!(
+ zero_hashmap.get(black_box(indexify("iu"))).map(|x| &x.0),
+ Some("Inuktitut")
+ );
+ assert_eq!(
+ zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0),
+ None
+ );
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn bench_zerohashmap_lookup_large(c: &mut Criterion) {
+ let buf = large_zerohashmap_postcard_bytes();
+ let zero_hashmap: ZeroHashMap<Index32Str, Index32Str> = postcard::from_bytes(&buf).unwrap();
+
+ c.bench_function("zerohashmap/lookup/large", |b| {
+ b.iter(|| {
+ assert_eq!(
+ zero_hashmap.get(black_box(indexify("iu333"))).map(|x| &x.0),
+ Some("Inuktitut")
+ );
+ assert_eq!(
+ zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0),
+ None
+ );
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn large_zerohashmap_postcard_bytes() -> Vec<u8> {
+ postcard::to_stdvec(&build_zerohashmap(true)).unwrap()
+}
+
+criterion_group!(benches, overview_bench);
+criterion_main!(benches);
+
+/// This type lets us use a u32-index-format VarZeroVec with the ZeroMap.
+///
+/// Eventually we will have a FormatSelector type that lets us do `ZeroMap<FormatSelector<K, Index32>, V>`
+/// (https://github.com/unicode-org/icu4x/issues/2312)
+///
+/// , isn't actually important; it's just more convenient to use make_varule to get the
+/// full suite of traits instead of `#[derive(VarULE)]`. (With `#[derive(VarULE)]` we would have to manually
+/// define a Serialize implementation, and that would be gnarly)
+/// https://github.com/unicode-org/icu4x/issues/2310 tracks being able to do this with derive(ULE)
+#[zerovec::make_varule(Index32Str)]
+#[zerovec::skip_derive(ZeroMapKV)]
+#[derive(Eq, PartialEq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
+#[zerovec::derive(Serialize, Deserialize, Hash)]
+pub(crate) struct Index32StrBorrowed<'a>(#[serde(borrow)] pub &'a str);
+
+impl<'a> ZeroMapKV<'a> for Index32Str {
+ type Container = VarZeroVec<'a, Index32Str, Index32>;
+ type Slice = VarZeroSlice<Index32Str, Index32>;
+ type GetType = Index32Str;
+ type OwnedType = Box<Index32Str>;
+}
+
+#[inline]
+fn indexify(s: &str) -> &Index32Str {
+ unsafe { &*(s as *const str as *const Index32Str) }
+}
diff --git a/third_party/rust/zerovec/benches/zerovec.rs b/third_party/rust/zerovec/benches/zerovec.rs
new file mode 100644
index 0000000000..5ed9421603
--- /dev/null
+++ b/third_party/rust/zerovec/benches/zerovec.rs
@@ -0,0 +1,165 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use rand::SeedableRng;
+use rand_distr::{Distribution, LogNormal};
+use rand_pcg::Lcg64Xsh32;
+use std::fmt;
+
+#[path = "../src/samples.rs"]
+mod samples;
+use samples::*;
+
+use zerovec::ule::*;
+use zerovec::ZeroVec;
+
+#[repr(align(8))]
+#[derive(Default)]
+struct AlignedBuffer(Vec<u8>);
+
+/// Generate a large list of u32s for stress testing.
+#[allow(dead_code)]
+fn get_needles_and_haystack() -> (Vec<u32>, Vec<u32>) {
+ // Lcg64Xsh32 is a small, fast PRNG for reproducible benchmarks.
+ // LogNormal(10, 1) generates numbers with mean 36315 and mode 8103, a distribution that, in
+ // spirit, correlates with Unicode properties (many low values and a long tail of high values)
+ let mut rng = Lcg64Xsh32::seed_from_u64(2021);
+ let dist = LogNormal::new(10.0, 1.0).unwrap();
+ let haystack = {
+ let mut unsorted: Vec<u32> = (&dist)
+ .sample_iter(&mut rng)
+ .take(1000)
+ .map(|f| f as u32)
+ .collect();
+ unsorted.sort_unstable();
+ unsorted
+ };
+ let needles: Vec<u32> = (&dist)
+ .sample_iter(&mut rng)
+ .take(100)
+ .map(|f| f as u32)
+ .collect();
+ (needles, haystack)
+}
+
+#[allow(dead_code, clippy::ptr_arg)]
+fn vec_to_unaligned_uvec<'a, T>(vec: &Vec<T>, buffer: &'a mut AlignedBuffer) -> ZeroVec<'a, T>
+where
+ T: EqULE + Copy + PartialEq + fmt::Debug,
+{
+ // Pad with zero to ensure it is not aligned
+ buffer.0.push(0);
+ buffer
+ .0
+ .extend(ZeroVec::from_slice_or_alloc(vec.as_slice()).as_bytes());
+ ZeroVec::<T>::parse_byte_slice(&buffer.0[1..]).unwrap()
+}
+
+fn overview_bench(c: &mut Criterion) {
+ c.bench_function("zerovec/overview", |b| {
+ b.iter(|| {
+ ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE))
+ .unwrap()
+ .iter()
+ .sum::<u32>()
+ });
+ });
+
+ #[cfg(feature = "bench")]
+ {
+ sum_benches(c);
+ binary_search_benches(c);
+ }
+}
+
+#[cfg(feature = "bench")]
+fn sum_benches(c: &mut Criterion) {
+ let normal_slice = &TEST_SLICE[0..19];
+ let aligned_ule_slice = <u32 as AsULE>::ULE::parse_byte_slice(&TEST_BUFFER_LE[0..76]).unwrap();
+ let unalign_ule_slice = <u32 as AsULE>::ULE::parse_byte_slice(&TEST_BUFFER_LE[1..77]).unwrap();
+
+ assert_eq!(normal_slice.len(), aligned_ule_slice.len());
+ assert_eq!(normal_slice.len(), unalign_ule_slice.len());
+
+ c.bench_function("zerovec/sum/sample/slice", |b| {
+ b.iter(|| {
+ black_box(normal_slice)
+ .iter()
+ .copied()
+ .fold(0u32, |sum, val| sum.wrapping_add(val))
+ })
+ });
+
+ c.bench_function("zerovec/sum/sample/zerovec_aligned", |b| {
+ b.iter(|| {
+ ZeroVec::<u32>::new_borrowed(black_box(aligned_ule_slice))
+ .iter()
+ .fold(0u32, |sum, val| sum.wrapping_add(val))
+ });
+ });
+
+ c.bench_function("zerovec/sum/sample/zerovec_unaligned", |b| {
+ b.iter(|| {
+ ZeroVec::<u32>::new_borrowed(black_box(unalign_ule_slice))
+ .iter()
+ .fold(0u32, |sum, val| sum.wrapping_add(val))
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn binary_search_benches(c: &mut Criterion) {
+ c.bench_function("zerovec/binary_search/sample/slice", |b| {
+ b.iter(|| black_box(&TEST_SLICE).binary_search(&0x0c0d0c));
+ });
+
+ c.bench_function("zerovec/binary_search/sample/zerovec", |b| {
+ let zerovec = ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap();
+ b.iter(|| zerovec.binary_search(&0x0c0d0c));
+ });
+
+ let (needles_100, haystack) = get_needles_and_haystack();
+ // Only search for 50 needles to put all figures in nanoseconds
+ let needles_50 = &needles_100[0..50];
+
+ // *** Binary search vec of 1000 `u32` 50 times ***
+ c.bench_function("zerovec/binary_search/log_normal/slice", |b| {
+ b.iter(|| {
+ black_box(&needles_50)
+ .iter()
+ .map(|needle| black_box(&haystack).binary_search(needle))
+ .filter(|r| r.is_ok())
+ .count()
+ });
+ });
+
+ let mut buffer = AlignedBuffer::default();
+ let zerovec = vec_to_unaligned_uvec(black_box(&haystack), &mut buffer);
+ assert_eq!(zerovec, haystack.as_slice());
+
+ // *** Binary search vec of 1000 `u32` 50 times ***
+ c.bench_function("zerovec/binary_search/log_normal/zerovec", |b| {
+ b.iter(|| {
+ black_box(&needles_50)
+ .iter()
+ .map(|needle| black_box(&zerovec).binary_search(needle))
+ .filter(|r| r.is_ok())
+ .count()
+ });
+ });
+
+ let single_needle = 36315;
+
+ c.bench_function("zerovec/binary_search/log_normal/single/slice", |b| {
+ b.iter(|| black_box(&haystack).binary_search(&single_needle));
+ });
+
+ c.bench_function("zerovec/binary_search/log_normal/single/zerovec", |b| {
+ b.iter(|| black_box(&zerovec).binary_search(&single_needle));
+ });
+}
+
+criterion_group!(benches, overview_bench,);
+criterion_main!(benches);
diff --git a/third_party/rust/zerovec/benches/zerovec_iai.rs b/third_party/rust/zerovec/benches/zerovec_iai.rs
new file mode 100644
index 0000000000..c34a6aebc0
--- /dev/null
+++ b/third_party/rust/zerovec/benches/zerovec_iai.rs
@@ -0,0 +1,65 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use iai::black_box;
+
+#[path = "../src/samples.rs"]
+mod samples;
+use samples::*;
+
+use zerovec::ule::VarULE;
+use zerovec::VarZeroSlice;
+use zerovec::ZeroVec;
+
+fn sum_slice() -> u32 {
+ black_box(TEST_SLICE).iter().sum::<u32>()
+}
+
+fn sum_zerovec() -> u32 {
+ ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE))
+ .unwrap()
+ .iter()
+ .sum::<u32>()
+}
+
+fn binarysearch_slice() -> Result<usize, usize> {
+ black_box(TEST_SLICE).binary_search(&0x0c0d0c)
+}
+
+fn binarysearch_zerovec() -> Result<usize, usize> {
+ ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE))
+ .unwrap()
+ .binary_search(&0x0c0d0c)
+}
+
+fn varzeroslice_parse_get() -> Option<&'static str> {
+ let slice: &'static VarZeroSlice<str> =
+ VarZeroSlice::parse_byte_slice(black_box(TEST_VARZEROSLICE_BYTES)).unwrap();
+ slice.get(black_box(1))
+}
+
+fn varzeroslice_get() -> Option<&'static str> {
+ // Safety: The bytes are valid.
+ let slice: &'static VarZeroSlice<str> =
+ unsafe { VarZeroSlice::from_byte_slice_unchecked(black_box(TEST_VARZEROSLICE_BYTES)) };
+ slice.get(black_box(1))
+}
+
+fn varzeroslice_get_unchecked() -> &'static str {
+ // Safety: The bytes are valid.
+ let slice: &'static VarZeroSlice<str> =
+ unsafe { VarZeroSlice::from_byte_slice_unchecked(black_box(TEST_VARZEROSLICE_BYTES)) };
+ // Safety: The VarZeroVec has length 4.
+ unsafe { slice.get_unchecked(black_box(1)) }
+}
+
+iai::main!(
+ sum_slice,
+ sum_zerovec,
+ binarysearch_slice,
+ binarysearch_zerovec,
+ varzeroslice_parse_get,
+ varzeroslice_get,
+ varzeroslice_get_unchecked,
+);
diff --git a/third_party/rust/zerovec/benches/zerovec_serde.rs b/third_party/rust/zerovec/benches/zerovec_serde.rs
new file mode 100644
index 0000000000..3a9bb051a2
--- /dev/null
+++ b/third_party/rust/zerovec/benches/zerovec_serde.rs
@@ -0,0 +1,145 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use rand::SeedableRng;
+use rand_distr::{Distribution, LogNormal};
+use rand_pcg::Lcg64Xsh32;
+
+#[path = "../src/samples.rs"]
+mod samples;
+use samples::*;
+
+use zerovec::ZeroVec;
+
+/// Generate a large list of u32s for stress testing.
+#[allow(dead_code)]
+fn random_numbers(count: usize) -> Vec<u32> {
+ // Lcg64Xsh32 is a small, fast PRNG for reproducible benchmarks.
+ // LogNormal(10, 1) generates numbers with mean 36315 and mode 8103, a distribution that, in
+ // spirit, correlates with Unicode properties (many low values and a long tail of high values)
+ let mut rng = Lcg64Xsh32::seed_from_u64(2021);
+ let dist = LogNormal::new(10.0, 1.0).unwrap();
+ (&dist)
+ .sample_iter(&mut rng)
+ .take(count)
+ .map(|f| f as u32)
+ .collect()
+}
+
+fn overview_bench(c: &mut Criterion) {
+ c.bench_function("zerovec_serde/overview", |b| {
+ // Same as "zerovec_serde/deserialize_sum/u32/zerovec"
+ let buffer = bincode::serialize(
+ &ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap(),
+ )
+ .unwrap();
+ b.iter(|| {
+ bincode::deserialize::<ZeroVec<u32>>(&buffer)
+ .unwrap()
+ .iter()
+ .sum::<u32>()
+ });
+ });
+
+ #[cfg(feature = "bench")]
+ {
+ u32_benches(c);
+ char_benches(c);
+ stress_benches(c);
+ }
+}
+
+#[cfg(feature = "bench")]
+fn u32_benches(c: &mut Criterion) {
+ c.bench_function("zerovec_serde/serialize/u32/slice", |b| {
+ b.iter(|| bincode::serialize(&Vec::from(black_box(TEST_SLICE))));
+ });
+
+ c.bench_function("zerovec_serde/deserialize_sum/u32/slice", |b| {
+ let buffer = bincode::serialize(&Vec::from(black_box(TEST_SLICE))).unwrap();
+ b.iter(|| {
+ bincode::deserialize::<Vec<u32>>(&buffer)
+ .unwrap()
+ .iter()
+ .sum::<u32>()
+ });
+ });
+
+ c.bench_function("zerovec_serde/serialize/u32/zerovec", |b| {
+ b.iter(|| bincode::serialize(&ZeroVec::from_slice_or_alloc(black_box(TEST_SLICE))));
+ });
+
+ c.bench_function("zerovec_serde/deserialize_sum/u32/zerovec", |b| {
+ let buffer = bincode::serialize(
+ &ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap(),
+ )
+ .unwrap();
+ b.iter(|| {
+ bincode::deserialize::<ZeroVec<u32>>(&buffer)
+ .unwrap()
+ .iter()
+ .sum::<u32>()
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn char_benches(c: &mut Criterion) {
+ const ORIGINAL_CHARS: &[char] = &[
+ 'ⶢ', '⺇', 'Ⱜ', '◁', '◩', '⌂', '⼅', '⏻', '⢜', '◊', 'ⲫ', '⏷', '◢', '⟉', '℞',
+ ];
+
+ let char_zero_vec = &ZeroVec::alloc_from_slice(ORIGINAL_CHARS);
+
+ c.bench_function("zerovec_serde/serialize/char/slice", |b| {
+ b.iter(|| bincode::serialize(black_box(&Vec::from(ORIGINAL_CHARS))));
+ });
+
+ c.bench_function("zerovec_serde/deserialize/char/slice", |b| {
+ let buffer = bincode::serialize(black_box(&Vec::from(ORIGINAL_CHARS))).unwrap();
+ b.iter(|| bincode::deserialize::<Vec<char>>(&buffer));
+ });
+
+ c.bench_function("zerovec_serde/serialize/char/zerovec", |b| {
+ b.iter(|| bincode::serialize(black_box(char_zero_vec)));
+ });
+
+ c.bench_function("zerovec_serde/deserialize/char/zerovec", |b| {
+ let buffer = bincode::serialize(black_box(char_zero_vec)).unwrap();
+ b.iter(|| bincode::deserialize::<ZeroVec<char>>(&buffer));
+ });
+}
+
+#[cfg(feature = "bench")]
+fn stress_benches(c: &mut Criterion) {
+ let number_vec = random_numbers(100);
+ let bincode_vec = bincode::serialize(&number_vec).unwrap();
+ let zerovec_aligned = ZeroVec::from_slice_or_alloc(number_vec.as_slice());
+ let bincode_zerovec = bincode::serialize(&zerovec_aligned).unwrap();
+
+ // *** Deserialize vec of 100 `u32` ***
+ c.bench_function("zerovec_serde/deserialize/stress/vec", |b| {
+ b.iter(|| bincode::deserialize::<Vec<u32>>(&bincode_vec));
+ });
+
+ // *** Deserialize vec of 100 `u32` ***
+ c.bench_function("zerovec_serde/deserialize/stress/zerovec", |b| {
+ b.iter(|| bincode::deserialize::<ZeroVec<u32>>(&bincode_zerovec));
+ });
+
+ // *** Compute sum of vec of 100 `u32` ***
+ c.bench_function("zerovec_serde/sum/stress/vec", |b| {
+ b.iter(|| black_box(&number_vec).iter().sum::<u32>());
+ });
+
+ // *** Compute sum of vec of 100 `u32` ***
+ let zerovec = ZeroVec::<u32>::parse_byte_slice(zerovec_aligned.as_bytes()).unwrap();
+ c.bench_function("zerovec_serde/sum/stress/zerovec", |b| {
+ b.iter(|| black_box(&zerovec).iter().sum::<u32>());
+ });
+}
+
+criterion_group!(benches, overview_bench,);
+criterion_main!(benches);
diff --git a/third_party/rust/zerovec/examples/zv_serde.rs b/third_party/rust/zerovec/examples/zv_serde.rs
new file mode 100644
index 0000000000..e4a8ec309b
--- /dev/null
+++ b/third_party/rust/zerovec/examples/zv_serde.rs
@@ -0,0 +1,51 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// This example demonstrates zero-copy, zero-allocation deserialization of a u32 vector
+// stored in a Bincode buffer.
+
+#![no_main] // https://github.com/unicode-org/icu4x/issues/395
+
+icu_benchmark_macros::static_setup!();
+
+use zerovec::ZeroVec;
+
+#[derive(serde::Serialize, serde::Deserialize)]
+struct DataStruct<'s> {
+ #[serde(borrow)]
+ pub nums: ZeroVec<'s, u16>,
+}
+
+const U16_SLICE: [u16; 16] = [
+ 196, 989, 414, 731, 660, 217, 716, 353, 218, 730, 245, 846, 122, 294, 922, 488,
+];
+
+const POSTCARD_BYTES: [u8; 33] = [
+ 0x20, 0xc4, 0x0, 0xdd, 0x3, 0x9e, 0x1, 0xdb, 0x2, 0x94, 0x2, 0xd9, 0x0, 0xcc, 0x2, 0x61, 0x1,
+ 0xda, 0x0, 0xda, 0x2, 0xf5, 0x0, 0x4e, 0x3, 0x7a, 0x0, 0x26, 0x1, 0x9a, 0x3, 0xe8, 0x1,
+];
+
+#[allow(dead_code)]
+fn serialize() {
+ let data = DataStruct {
+ nums: ZeroVec::from_slice_or_alloc(&U16_SLICE),
+ };
+ let postcard_bytes = postcard::to_stdvec(&data).expect("Serialization should be successful");
+ println!("Postcard bytes: {postcard_bytes:#x?}");
+ println!("ZeroVec bytes: {:#x?}", data.nums.as_bytes());
+}
+
+#[no_mangle]
+fn main(_argc: isize, _argv: *const *const u8) -> isize {
+ icu_benchmark_macros::main_setup!();
+
+ // Un-comment the following line to generate postcard data:
+ // serialize();
+
+ let data: DataStruct = postcard::from_bytes(&POSTCARD_BYTES).expect("Valid bytes");
+ let result = data.nums.iter().sum::<u16>();
+ assert_eq!(8141, result);
+
+ 0
+}
diff --git a/third_party/rust/zerovec/src/error.rs b/third_party/rust/zerovec/src/error.rs
new file mode 100644
index 0000000000..85de3ecc8d
--- /dev/null
+++ b/third_party/rust/zerovec/src/error.rs
@@ -0,0 +1,55 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::any;
+use core::fmt;
+
+/// A generic error type to be used for decoding slices of ULE types
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[non_exhaustive]
+pub enum ZeroVecError {
+ /// Attempted to parse a buffer into a slice of the given ULE type but its
+ /// length was not compatible
+ InvalidLength { ty: &'static str, len: usize },
+ /// The byte sequence provided for `ty` failed to parse correctly
+ ParseError { ty: &'static str },
+ /// The byte buffer was not in the appropriate format for VarZeroVec
+ VarZeroVecFormatError,
+}
+
+impl fmt::Display for ZeroVecError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+ match *self {
+ ZeroVecError::InvalidLength { ty, len } => {
+ write!(f, "Invalid length {len} for slice of type {ty}")
+ }
+ ZeroVecError::ParseError { ty } => {
+ write!(f, "Could not parse bytes to slice of type {ty}")
+ }
+ ZeroVecError::VarZeroVecFormatError => {
+ write!(f, "Invalid format for VarZeroVec buffer")
+ }
+ }
+ }
+}
+
+impl ZeroVecError {
+ /// Construct a parse error for the given type
+ pub fn parse<T: ?Sized + 'static>() -> ZeroVecError {
+ ZeroVecError::ParseError {
+ ty: any::type_name::<T>(),
+ }
+ }
+
+ /// Construct an "invalid length" error for the given type and length
+ pub fn length<T: ?Sized + 'static>(len: usize) -> ZeroVecError {
+ ZeroVecError::InvalidLength {
+ ty: any::type_name::<T>(),
+ len,
+ }
+ }
+}
+
+#[cfg(feature = "std")]
+impl ::std::error::Error for ZeroVecError {}
diff --git a/third_party/rust/zerovec/src/flexzerovec/databake.rs b/third_party/rust/zerovec/src/flexzerovec/databake.rs
new file mode 100644
index 0000000000..bd165352e8
--- /dev/null
+++ b/third_party/rust/zerovec/src/flexzerovec/databake.rs
@@ -0,0 +1,66 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::{FlexZeroSlice, FlexZeroVec};
+use databake::*;
+
+impl Bake for FlexZeroVec<'_> {
+ fn bake(&self, env: &CrateEnv) -> TokenStream {
+ env.insert("zerovec");
+ if self.is_empty() {
+ quote! { zerovec::vecs::FlexZeroVec::new() }
+ } else {
+ let slice = self.as_ref().bake(env);
+ quote! { #slice.as_flexzerovec() }
+ }
+ }
+}
+
+impl Bake for &FlexZeroSlice {
+ fn bake(&self, env: &CrateEnv) -> TokenStream {
+ env.insert("zerovec");
+ if self.is_empty() {
+ quote! { zerovec::vecs::FlexZeroSlice::new_empty() }
+ } else {
+ let bytes = databake::Bake::bake(&self.as_bytes(), env);
+ quote! { unsafe { zerovec::vecs::FlexZeroSlice::from_byte_slice_unchecked(#bytes) } }
+ }
+ }
+}
+
+#[test]
+fn test_baked_vec() {
+ test_bake!(
+ FlexZeroVec,
+ const: crate::vecs::FlexZeroVec::new(),
+ zerovec
+ );
+ test_bake!(
+ FlexZeroVec,
+ const: unsafe {
+ crate::vecs::FlexZeroSlice::from_byte_slice_unchecked(
+ b"\x02\x01\0\x16\0M\x01\x11"
+ )
+ }.as_flexzerovec(),
+ zerovec
+ );
+}
+
+#[test]
+fn test_baked_slice() {
+ test_bake!(
+ &FlexZeroSlice,
+ const: crate::vecs::FlexZeroSlice::new_empty(),
+ zerovec
+ );
+ test_bake!(
+ &FlexZeroSlice,
+ const: unsafe {
+ crate::vecs::FlexZeroSlice::from_byte_slice_unchecked(
+ b"\x02\x01\0\x16\0M\x01\x11"
+ )
+ },
+ zerovec
+ );
+}
diff --git a/third_party/rust/zerovec/src/flexzerovec/mod.rs b/third_party/rust/zerovec/src/flexzerovec/mod.rs
new file mode 100644
index 0000000000..b6d7e780ac
--- /dev/null
+++ b/third_party/rust/zerovec/src/flexzerovec/mod.rs
@@ -0,0 +1,20 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! See [`FlexZeroVec`](crate::vecs::FlexZeroVec) for details.
+
+pub(crate) mod owned;
+pub(crate) mod slice;
+pub(crate) mod vec;
+
+#[cfg(feature = "databake")]
+mod databake;
+
+#[cfg(feature = "serde")]
+mod serde;
+
+pub use owned::FlexZeroVecOwned;
+pub(crate) use slice::chunk_to_usize;
+pub use slice::FlexZeroSlice;
+pub use vec::FlexZeroVec;
diff --git a/third_party/rust/zerovec/src/flexzerovec/owned.rs b/third_party/rust/zerovec/src/flexzerovec/owned.rs
new file mode 100644
index 0000000000..7d7bfb33d6
--- /dev/null
+++ b/third_party/rust/zerovec/src/flexzerovec/owned.rs
@@ -0,0 +1,335 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use alloc::vec;
+use alloc::vec::Vec;
+use core::fmt;
+use core::iter::FromIterator;
+use core::ops::Deref;
+
+use super::FlexZeroSlice;
+use super::FlexZeroVec;
+
+/// The fully-owned variant of [`FlexZeroVec`]. Contains all mutation methods.
+// Safety invariant: the inner bytes must deref to a valid `FlexZeroSlice`
+#[derive(Clone, PartialEq, Eq)]
+pub struct FlexZeroVecOwned(Vec<u8>);
+
+impl FlexZeroVecOwned {
+ /// Creates a new [`FlexZeroVecOwned`] with zero elements.
+ pub fn new_empty() -> Self {
+ Self(vec![1])
+ }
+
+ /// Creates a [`FlexZeroVecOwned`] from a [`FlexZeroSlice`].
+ pub fn from_slice(other: &FlexZeroSlice) -> FlexZeroVecOwned {
+ // safety: the bytes originate from a valid FlexZeroSlice
+ Self(other.as_bytes().to_vec())
+ }
+
+ /// Obtains this [`FlexZeroVecOwned`] as a [`FlexZeroSlice`].
+ pub fn as_slice(&self) -> &FlexZeroSlice {
+ let slice: &[u8] = &self.0;
+ unsafe {
+ // safety: the slice is known to come from a valid parsed FlexZeroSlice
+ FlexZeroSlice::from_byte_slice_unchecked(slice)
+ }
+ }
+
+ /// Mutably obtains this `FlexZeroVecOwned` as a [`FlexZeroSlice`].
+ pub(crate) fn as_mut_slice(&mut self) -> &mut FlexZeroSlice {
+ let slice: &mut [u8] = &mut self.0;
+ unsafe {
+ // safety: the slice is known to come from a valid parsed FlexZeroSlice
+ FlexZeroSlice::from_byte_slice_mut_unchecked(slice)
+ }
+ }
+
+ /// Converts this `FlexZeroVecOwned` into a [`FlexZeroVec::Owned`].
+ #[inline]
+ pub fn into_flexzerovec(self) -> FlexZeroVec<'static> {
+ FlexZeroVec::Owned(self)
+ }
+
+ /// Clears all values out of this `FlexZeroVecOwned`.
+ #[inline]
+ pub fn clear(&mut self) {
+ *self = Self::new_empty()
+ }
+
+ /// Appends an item to the end of the vector.
+ ///
+ /// # Panics
+ ///
+ /// Panics if inserting the element would require allocating more than `usize::MAX` bytes.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// let mut zv: FlexZeroVec = [22, 44, 66].iter().copied().collect();
+ /// zv.to_mut().push(33);
+ /// assert_eq!(zv.to_vec(), vec![22, 44, 66, 33]);
+ /// ```
+ pub fn push(&mut self, item: usize) {
+ let insert_info = self.get_insert_info(item);
+ self.0.resize(insert_info.new_bytes_len, 0);
+ let insert_index = insert_info.new_count - 1;
+ self.as_mut_slice().insert_impl(insert_info, insert_index);
+ }
+
+ /// Inserts an element into the middle of the vector.
+ ///
+ /// Caution: Both arguments to this function are of type `usize`. Please be careful to pass
+ /// the index first followed by the value second.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `index > len`.
+ ///
+ /// Panics if inserting the element would require allocating more than `usize::MAX` bytes.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// let mut zv: FlexZeroVec = [22, 44, 66].iter().copied().collect();
+ /// zv.to_mut().insert(2, 33);
+ /// assert_eq!(zv.to_vec(), vec![22, 44, 33, 66]);
+ /// ```
+ pub fn insert(&mut self, index: usize, item: usize) {
+ #[allow(clippy::panic)] // panic is documented in function contract
+ if index > self.len() {
+ panic!("index {} out of range {}", index, self.len());
+ }
+ let insert_info = self.get_insert_info(item);
+ self.0.resize(insert_info.new_bytes_len, 0);
+ self.as_mut_slice().insert_impl(insert_info, index);
+ }
+
+ /// Inserts an element into an ascending sorted vector
+ /// at a position that keeps the vector sorted.
+ ///
+ /// # Panics
+ ///
+ /// Panics if inserting the element would require allocating more than `usize::MAX` bytes.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVecOwned;
+ ///
+ /// let mut fzv = FlexZeroVecOwned::new_empty();
+ /// fzv.insert_sorted(10);
+ /// fzv.insert_sorted(5);
+ /// fzv.insert_sorted(8);
+ ///
+ /// assert!(Iterator::eq(fzv.iter(), [5, 8, 10].iter().copied()));
+ /// ```
+ pub fn insert_sorted(&mut self, item: usize) {
+ let index = match self.binary_search(item) {
+ Ok(i) => i,
+ Err(i) => i,
+ };
+ let insert_info = self.get_insert_info(item);
+ self.0.resize(insert_info.new_bytes_len, 0);
+ self.as_mut_slice().insert_impl(insert_info, index);
+ }
+
+ /// Removes and returns the element at the specified index.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `index >= len`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// let mut zv: FlexZeroVec = [22, 44, 66].iter().copied().collect();
+ /// let removed_item = zv.to_mut().remove(1);
+ /// assert_eq!(44, removed_item);
+ /// assert_eq!(zv.to_vec(), vec![22, 66]);
+ /// ```
+ pub fn remove(&mut self, index: usize) -> usize {
+ #[allow(clippy::panic)] // panic is documented in function contract
+ if index >= self.len() {
+ panic!("index {} out of range {}", index, self.len());
+ }
+ let remove_info = self.get_remove_info(index);
+ // Safety: `remove_index` is a valid index
+ let item = unsafe { self.get_unchecked(remove_info.remove_index) };
+ let new_bytes_len = remove_info.new_bytes_len;
+ self.as_mut_slice().remove_impl(remove_info);
+ self.0.truncate(new_bytes_len);
+ item
+ }
+
+ /// Removes and returns the last element from an ascending sorted vector.
+ ///
+ /// If the vector is not sorted, use [`FlexZeroVecOwned::remove()`] instead. Calling this
+ /// function would leave the FlexZeroVec in a safe, well-defined state; however, information
+ /// may be lost and/or the equality invariant might not hold.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `self.is_empty()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// let mut zv: FlexZeroVec = [22, 44, 66].iter().copied().collect();
+ /// let popped_item = zv.to_mut().pop_sorted();
+ /// assert_eq!(66, popped_item);
+ /// assert_eq!(zv.to_vec(), vec![22, 44]);
+ /// ```
+ ///
+ /// Calling this function on a non-ascending vector could cause surprising results:
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// let mut zv1: FlexZeroVec = [444, 222, 111].iter().copied().collect();
+ /// let popped_item = zv1.to_mut().pop_sorted();
+ /// assert_eq!(111, popped_item);
+ ///
+ /// // Oops!
+ /// assert_eq!(zv1.to_vec(), vec![188, 222]);
+ /// ```
+ pub fn pop_sorted(&mut self) -> usize {
+ #[allow(clippy::panic)] // panic is documented in function contract
+ if self.is_empty() {
+ panic!("cannot pop from an empty vector");
+ }
+ let remove_info = self.get_sorted_pop_info();
+ // Safety: `remove_index` is a valid index
+ let item = unsafe { self.get_unchecked(remove_info.remove_index) };
+ let new_bytes_len = remove_info.new_bytes_len;
+ self.as_mut_slice().remove_impl(remove_info);
+ self.0.truncate(new_bytes_len);
+ item
+ }
+}
+
+impl Deref for FlexZeroVecOwned {
+ type Target = FlexZeroSlice;
+ fn deref(&self) -> &Self::Target {
+ self.as_slice()
+ }
+}
+
+impl fmt::Debug for FlexZeroVecOwned {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{:?}", self.to_vec())
+ }
+}
+
+impl From<&FlexZeroSlice> for FlexZeroVecOwned {
+ fn from(other: &FlexZeroSlice) -> Self {
+ Self::from_slice(other)
+ }
+}
+
+impl FromIterator<usize> for FlexZeroVecOwned {
+ /// Creates a [`FlexZeroVecOwned`] from an iterator of `usize`.
+ fn from_iter<I>(iter: I) -> Self
+ where
+ I: IntoIterator<Item = usize>,
+ {
+ let mut result = FlexZeroVecOwned::new_empty();
+ for item in iter {
+ result.push(item);
+ }
+ result
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ fn check_contents(fzv: &FlexZeroSlice, expected: &[usize]) {
+ assert_eq!(fzv.len(), expected.len(), "len: {fzv:?} != {expected:?}");
+ assert_eq!(
+ fzv.is_empty(),
+ expected.is_empty(),
+ "is_empty: {fzv:?} != {expected:?}"
+ );
+ assert_eq!(
+ fzv.first(),
+ expected.first().copied(),
+ "first: {fzv:?} != {expected:?}"
+ );
+ assert_eq!(
+ fzv.last(),
+ expected.last().copied(),
+ "last: {fzv:?} != {expected:?}"
+ );
+ for i in 0..(expected.len() + 1) {
+ assert_eq!(
+ fzv.get(i),
+ expected.get(i).copied(),
+ "@{i}: {fzv:?} != {expected:?}"
+ );
+ }
+ }
+
+ #[test]
+ fn test_basic() {
+ let mut fzv = FlexZeroVecOwned::new_empty();
+ assert_eq!(fzv.get_width(), 1);
+ check_contents(&fzv, &[]);
+
+ fzv.push(42);
+ assert_eq!(fzv.get_width(), 1);
+ check_contents(&fzv, &[42]);
+
+ fzv.push(77);
+ assert_eq!(fzv.get_width(), 1);
+ check_contents(&fzv, &[42, 77]);
+
+ // Scale up
+ fzv.push(300);
+ assert_eq!(fzv.get_width(), 2);
+ check_contents(&fzv, &[42, 77, 300]);
+
+ // Does not need to be sorted
+ fzv.insert(1, 325);
+ assert_eq!(fzv.get_width(), 2);
+ check_contents(&fzv, &[42, 325, 77, 300]);
+
+ fzv.remove(3);
+ assert_eq!(fzv.get_width(), 2);
+ check_contents(&fzv, &[42, 325, 77]);
+
+ // Scale down
+ fzv.remove(1);
+ assert_eq!(fzv.get_width(), 1);
+ check_contents(&fzv, &[42, 77]);
+ }
+
+ #[test]
+ fn test_build_sorted() {
+ let nums: &[usize] = &[0, 50, 0, 77, 831, 29, 89182, 931, 0, 77, 712381];
+ let mut fzv = FlexZeroVecOwned::new_empty();
+
+ for num in nums {
+ fzv.insert_sorted(*num);
+ }
+ assert_eq!(fzv.get_width(), 3);
+ check_contents(&fzv, &[0, 0, 0, 29, 50, 77, 77, 831, 931, 89182, 712381]);
+
+ for num in nums {
+ let index = fzv.binary_search(*num).unwrap();
+ fzv.remove(index);
+ }
+ assert_eq!(fzv.get_width(), 1);
+ check_contents(&fzv, &[]);
+ }
+}
diff --git a/third_party/rust/zerovec/src/flexzerovec/serde.rs b/third_party/rust/zerovec/src/flexzerovec/serde.rs
new file mode 100644
index 0000000000..fb7caa7a8b
--- /dev/null
+++ b/third_party/rust/zerovec/src/flexzerovec/serde.rs
@@ -0,0 +1,175 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::{FlexZeroSlice, FlexZeroVec};
+use alloc::vec::Vec;
+use core::fmt;
+use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor};
+#[cfg(feature = "serde")]
+use serde::ser::{Serialize, SerializeSeq, Serializer};
+
+#[derive(Default)]
+struct FlexZeroVecVisitor {}
+
+impl<'de> Visitor<'de> for FlexZeroVecVisitor {
+ type Value = FlexZeroVec<'de>;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ formatter.write_str("a sequence or borrowed buffer of bytes")
+ }
+
+ fn visit_borrowed_bytes<E>(self, bytes: &'de [u8]) -> Result<Self::Value, E>
+ where
+ E: de::Error,
+ {
+ FlexZeroVec::parse_byte_slice(bytes).map_err(de::Error::custom)
+ }
+
+ fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
+ where
+ A: SeqAccess<'de>,
+ {
+ let mut vec: Vec<usize> = if let Some(capacity) = seq.size_hint() {
+ Vec::with_capacity(capacity)
+ } else {
+ Vec::new()
+ };
+ while let Some(value) = seq.next_element::<usize>()? {
+ vec.push(value);
+ }
+ Ok(vec.into_iter().collect())
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl<'de, 'a> Deserialize<'de> for FlexZeroVec<'a>
+where
+ 'de: 'a,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ let visitor = FlexZeroVecVisitor::default();
+ if deserializer.is_human_readable() {
+ deserializer.deserialize_seq(visitor)
+ } else {
+ deserializer.deserialize_bytes(visitor)
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl<'de, 'a> Deserialize<'de> for &'a FlexZeroSlice
+where
+ 'de: 'a,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ Err(de::Error::custom(
+ "&FlexZeroSlice cannot be deserialized from human-readable formats",
+ ))
+ } else {
+ let deserialized: FlexZeroVec<'a> = FlexZeroVec::deserialize(deserializer)?;
+ let borrowed = if let FlexZeroVec::Borrowed(b) = deserialized {
+ b
+ } else {
+ return Err(de::Error::custom(
+ "&FlexZeroSlice can only deserialize in zero-copy ways",
+ ));
+ };
+ Ok(borrowed)
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl Serialize for FlexZeroVec<'_> {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ if serializer.is_human_readable() {
+ let mut seq = serializer.serialize_seq(Some(self.len()))?;
+ for value in self.iter() {
+ seq.serialize_element(&value)?;
+ }
+ seq.end()
+ } else {
+ serializer.serialize_bytes(self.as_bytes())
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl Serialize for FlexZeroSlice {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ self.as_flexzerovec().serialize(serializer)
+ }
+}
+
+#[cfg(test)]
+#[allow(non_camel_case_types)]
+mod test {
+ use super::{FlexZeroSlice, FlexZeroVec};
+
+ #[derive(serde::Serialize, serde::Deserialize)]
+ struct DeriveTest_FlexZeroVec<'data> {
+ #[serde(borrow)]
+ _data: FlexZeroVec<'data>,
+ }
+
+ #[derive(serde::Serialize, serde::Deserialize)]
+ struct DeriveTest_FlexZeroSlice<'data> {
+ #[serde(borrow)]
+ _data: &'data FlexZeroSlice,
+ }
+
+ // [1, 22, 333, 4444];
+ const BYTES: &[u8] = &[2, 0x01, 0x00, 0x16, 0x00, 0x4D, 0x01, 0x5C, 0x11];
+ const JSON_STR: &str = "[1,22,333,4444]";
+ const BINCODE_BUF: &[u8] = &[9, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 22, 0, 77, 1, 92, 17];
+
+ #[test]
+ fn test_serde_json() {
+ let zerovec_orig: FlexZeroVec = FlexZeroVec::parse_byte_slice(BYTES).expect("parse");
+ let json_str = serde_json::to_string(&zerovec_orig).expect("serialize");
+ assert_eq!(JSON_STR, json_str);
+ // FlexZeroVec should deserialize from JSON to either Vec or FlexZeroVec
+ let vec_new: Vec<usize> =
+ serde_json::from_str(&json_str).expect("deserialize from buffer to Vec");
+ assert_eq!(zerovec_orig.to_vec(), vec_new);
+ let zerovec_new: FlexZeroVec =
+ serde_json::from_str(&json_str).expect("deserialize from buffer to FlexZeroVec");
+ assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec());
+ assert!(matches!(zerovec_new, FlexZeroVec::Owned(_)));
+ }
+
+ #[test]
+ fn test_serde_bincode() {
+ let zerovec_orig: FlexZeroVec = FlexZeroVec::parse_byte_slice(BYTES).expect("parse");
+ let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize");
+ assert_eq!(BINCODE_BUF, bincode_buf);
+ let zerovec_new: FlexZeroVec =
+ bincode::deserialize(&bincode_buf).expect("deserialize from buffer to FlexZeroVec");
+ assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec());
+ assert!(matches!(zerovec_new, FlexZeroVec::Borrowed(_)));
+ }
+
+ #[test]
+ fn test_vzv_borrowed() {
+ let zerovec_orig: &FlexZeroSlice = FlexZeroSlice::parse_byte_slice(BYTES).expect("parse");
+ let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize");
+ assert_eq!(BINCODE_BUF, bincode_buf);
+ let zerovec_new: &FlexZeroSlice =
+ bincode::deserialize(&bincode_buf).expect("deserialize from buffer to FlexZeroSlice");
+ assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec());
+ }
+}
diff --git a/third_party/rust/zerovec/src/flexzerovec/slice.rs b/third_party/rust/zerovec/src/flexzerovec/slice.rs
new file mode 100644
index 0000000000..41cb7116f9
--- /dev/null
+++ b/third_party/rust/zerovec/src/flexzerovec/slice.rs
@@ -0,0 +1,722 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::FlexZeroVec;
+use crate::ZeroVecError;
+use alloc::vec::Vec;
+use core::cmp::Ordering;
+use core::fmt;
+use core::mem;
+use core::ops::Range;
+
+const USIZE_WIDTH: usize = mem::size_of::<usize>();
+
+/// A zero-copy "slice" that efficiently represents `[usize]`.
+#[repr(packed)]
+pub struct FlexZeroSlice {
+ // Hard Invariant: 1 <= width <= USIZE_WIDTH (which is target_pointer_width)
+ // Soft Invariant: width == the width of the largest element
+ width: u8,
+ // Hard Invariant: data.len() % width == 0
+ data: [u8],
+}
+
+impl fmt::Debug for FlexZeroSlice {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ self.to_vec().fmt(f)
+ }
+}
+
+impl PartialEq for FlexZeroSlice {
+ fn eq(&self, other: &Self) -> bool {
+ self.width == other.width && self.data == other.data
+ }
+}
+impl Eq for FlexZeroSlice {}
+
+/// Helper function to decode a little-endian "chunk" (byte slice of a specific length)
+/// into a `usize`. We cannot call `usize::from_le_bytes` directly because that function
+/// requires the high bits to be set to 0.
+#[inline]
+pub(crate) fn chunk_to_usize(chunk: &[u8], width: usize) -> usize {
+ debug_assert_eq!(chunk.len(), width);
+ let mut bytes = [0; USIZE_WIDTH];
+ #[allow(clippy::indexing_slicing)] // protected by debug_assert above
+ bytes[0..width].copy_from_slice(chunk);
+ usize::from_le_bytes(bytes)
+}
+
+impl FlexZeroSlice {
+ /// Constructs a new empty [`FlexZeroSlice`].
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroSlice;
+ ///
+ /// const EMPTY_SLICE: &FlexZeroSlice = FlexZeroSlice::new_empty();
+ ///
+ /// assert!(EMPTY_SLICE.is_empty());
+ /// assert_eq!(EMPTY_SLICE.len(), 0);
+ /// assert_eq!(EMPTY_SLICE.first(), None);
+ /// ```
+ #[inline]
+ pub const fn new_empty() -> &'static Self {
+ const ARR: &[u8] = &[1u8];
+ // Safety: The slice is a valid empty `FlexZeroSlice`
+ unsafe { Self::from_byte_slice_unchecked(ARR) }
+ }
+
+ /// Safely constructs a [`FlexZeroSlice`] from a byte array.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroSlice;
+ ///
+ /// const FZS: &FlexZeroSlice = match FlexZeroSlice::parse_byte_slice(&[
+ /// 2, // width
+ /// 0x42, 0x00, // first value
+ /// 0x07, 0x09, // second value
+ /// 0xFF, 0xFF, // third value
+ /// ]) {
+ /// Ok(v) => v,
+ /// Err(_) => panic!("invalid bytes"),
+ /// };
+ ///
+ /// assert!(!FZS.is_empty());
+ /// assert_eq!(FZS.len(), 3);
+ /// assert_eq!(FZS.first(), Some(0x0042));
+ /// assert_eq!(FZS.get(0), Some(0x0042));
+ /// assert_eq!(FZS.get(1), Some(0x0907));
+ /// assert_eq!(FZS.get(2), Some(0xFFFF));
+ /// assert_eq!(FZS.get(3), None);
+ /// assert_eq!(FZS.last(), Some(0xFFFF));
+ /// ```
+ pub const fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> {
+ let (width_u8, data) = match bytes.split_first() {
+ Some(v) => v,
+ None => {
+ return Err(ZeroVecError::InvalidLength {
+ ty: "FlexZeroSlice",
+ len: 0,
+ })
+ }
+ };
+ let width = *width_u8 as usize;
+ if width < 1 || width > USIZE_WIDTH {
+ return Err(ZeroVecError::ParseError {
+ ty: "FlexZeroSlice",
+ });
+ }
+ if data.len() % width != 0 {
+ return Err(ZeroVecError::InvalidLength {
+ ty: "FlexZeroSlice",
+ len: bytes.len(),
+ });
+ }
+ // Safety: All hard invariants have been checked.
+ // Note: The soft invariant requires a linear search that we don't do here.
+ Ok(unsafe { Self::from_byte_slice_unchecked(bytes) })
+ }
+
+ /// Constructs a [`FlexZeroSlice`] without checking invariants.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `bytes` is empty.
+ ///
+ /// # Safety
+ ///
+ /// Must be called on a valid [`FlexZeroSlice`] byte array.
+ #[inline]
+ pub const unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ // Safety: The DST of FlexZeroSlice is a pointer to the `width` element and has a metadata
+ // equal to the length of the `data` field, which will be one less than the length of the
+ // overall array.
+ #[allow(clippy::panic)] // panic is documented in function contract
+ if bytes.is_empty() {
+ panic!("from_byte_slice_unchecked called with empty slice")
+ }
+ let slice = core::ptr::slice_from_raw_parts(bytes.as_ptr(), bytes.len() - 1);
+ &*(slice as *const Self)
+ }
+
+ #[inline]
+ pub(crate) unsafe fn from_byte_slice_mut_unchecked(bytes: &mut [u8]) -> &mut Self {
+ // Safety: See comments in `from_byte_slice_unchecked`
+ let remainder = core::ptr::slice_from_raw_parts_mut(bytes.as_mut_ptr(), bytes.len() - 1);
+ &mut *(remainder as *mut Self)
+ }
+
+ /// Returns this slice as its underlying `&[u8]` byte buffer representation.
+ ///
+ /// Useful for serialization.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroSlice;
+ ///
+ /// let bytes: &[u8] = &[2, 0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80];
+ /// let fzv = FlexZeroSlice::parse_byte_slice(bytes).expect("valid bytes");
+ ///
+ /// assert_eq!(bytes, fzv.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &[u8] {
+ // Safety: See comments in `from_byte_slice_unchecked`
+ unsafe {
+ core::slice::from_raw_parts(self as *const Self as *const u8, self.data.len() + 1)
+ }
+ }
+
+ /// Borrows this `FlexZeroSlice` as a [`FlexZeroVec::Borrowed`].
+ #[inline]
+ pub const fn as_flexzerovec(&self) -> FlexZeroVec {
+ FlexZeroVec::Borrowed(self)
+ }
+
+ /// Returns the number of elements in the `FlexZeroSlice`.
+ #[inline]
+ pub fn len(&self) -> usize {
+ self.data.len() / self.get_width()
+ }
+
+ #[inline]
+ pub(crate) fn get_width(&self) -> usize {
+ usize::from(self.width)
+ }
+
+ /// Returns whether there are zero elements in the `FlexZeroSlice`.
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.data.len() == 0
+ }
+
+ /// Gets the element at `index`, or `None` if `index >= self.len()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// let fzv: FlexZeroVec = [22, 33].iter().copied().collect();
+ /// assert_eq!(fzv.get(0), Some(22));
+ /// assert_eq!(fzv.get(1), Some(33));
+ /// assert_eq!(fzv.get(2), None);
+ /// ```
+ #[inline]
+ pub fn get(&self, index: usize) -> Option<usize> {
+ if index >= self.len() {
+ None
+ } else {
+ Some(unsafe { self.get_unchecked(index) })
+ }
+ }
+
+ /// Gets the element at `index` as a chunk of bytes, or `None` if `index >= self.len()`.
+ #[inline]
+ pub(crate) fn get_chunk(&self, index: usize) -> Option<&[u8]> {
+ let w = self.get_width();
+ self.data.get(index * w..index * w + w)
+ }
+
+ /// Gets the element at `index` without checking bounds.
+ ///
+ /// # Safety
+ ///
+ /// `index` must be in-range.
+ #[inline]
+ pub unsafe fn get_unchecked(&self, index: usize) -> usize {
+ match self.width {
+ 1 => *self.data.get_unchecked(index) as usize,
+ 2 => {
+ let ptr = self.data.as_ptr().add(index * 2);
+ u16::from_le_bytes(core::ptr::read(ptr as *const [u8; 2])) as usize
+ }
+ _ => {
+ let mut bytes = [0; USIZE_WIDTH];
+ let w = self.get_width();
+ assert!(w <= USIZE_WIDTH);
+ let ptr = self.data.as_ptr().add(index * w);
+ core::ptr::copy_nonoverlapping(ptr, bytes.as_mut_ptr(), w);
+ usize::from_le_bytes(bytes)
+ }
+ }
+ }
+
+ /// Gets the first element of the slice, or `None` if the slice is empty.
+ #[inline]
+ pub fn first(&self) -> Option<usize> {
+ let w = self.get_width();
+ self.data.get(0..w).map(|chunk| chunk_to_usize(chunk, w))
+ }
+
+ /// Gets the last element of the slice, or `None` if the slice is empty.
+ #[inline]
+ pub fn last(&self) -> Option<usize> {
+ let l = self.data.len();
+ if l == 0 {
+ None
+ } else {
+ let w = self.get_width();
+ self.data
+ .get(l - w..l)
+ .map(|chunk| chunk_to_usize(chunk, w))
+ }
+ }
+
+ /// Gets an iterator over the elements of the slice as `usize`.
+ #[inline]
+ pub fn iter(
+ &self,
+ ) -> impl DoubleEndedIterator<Item = usize> + '_ + ExactSizeIterator<Item = usize> {
+ let w = self.get_width();
+ self.data
+ .chunks_exact(w)
+ .map(move |chunk| chunk_to_usize(chunk, w))
+ }
+
+ /// Gets an iterator over pairs of elements.
+ ///
+ /// The second element of the final pair is `None`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// let nums: &[usize] = &[211, 281, 421, 461];
+ /// let fzv: FlexZeroVec = nums.iter().copied().collect();
+ ///
+ /// let mut pairs_it = fzv.iter_pairs();
+ ///
+ /// assert_eq!(pairs_it.next(), Some((211, Some(281))));
+ /// assert_eq!(pairs_it.next(), Some((281, Some(421))));
+ /// assert_eq!(pairs_it.next(), Some((421, Some(461))));
+ /// assert_eq!(pairs_it.next(), Some((461, None)));
+ /// assert_eq!(pairs_it.next(), None);
+ /// ```
+ pub fn iter_pairs(&self) -> impl Iterator<Item = (usize, Option<usize>)> + '_ {
+ self.iter().zip(self.iter().skip(1).map(Some).chain([None]))
+ }
+
+ /// Creates a `Vec<usize>` from a [`FlexZeroSlice`] (or `FlexZeroVec`).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// let nums: &[usize] = &[211, 281, 421, 461];
+ /// let fzv: FlexZeroVec = nums.iter().copied().collect();
+ /// let vec: Vec<usize> = fzv.to_vec();
+ ///
+ /// assert_eq!(nums, vec.as_slice());
+ /// ```
+ #[inline]
+ pub fn to_vec(&self) -> Vec<usize> {
+ self.iter().collect()
+ }
+
+ /// Binary searches a sorted `FlexZeroSlice` for the given `usize` value.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// let nums: &[usize] = &[211, 281, 421, 461];
+ /// let fzv: FlexZeroVec = nums.iter().copied().collect();
+ ///
+ /// assert_eq!(fzv.binary_search(0), Err(0));
+ /// assert_eq!(fzv.binary_search(211), Ok(0));
+ /// assert_eq!(fzv.binary_search(250), Err(1));
+ /// assert_eq!(fzv.binary_search(281), Ok(1));
+ /// assert_eq!(fzv.binary_search(300), Err(2));
+ /// assert_eq!(fzv.binary_search(421), Ok(2));
+ /// assert_eq!(fzv.binary_search(450), Err(3));
+ /// assert_eq!(fzv.binary_search(461), Ok(3));
+ /// assert_eq!(fzv.binary_search(462), Err(4));
+ /// ```
+ #[inline]
+ pub fn binary_search(&self, needle: usize) -> Result<usize, usize> {
+ self.binary_search_by(|probe| probe.cmp(&needle))
+ }
+
+ /// Binary searches a sorted range of a `FlexZeroSlice` for the given `usize` value.
+ ///
+ /// The indices in the return value are relative to the start of the range.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// // Make a FlexZeroVec with two sorted ranges: 0..3 and 3..5
+ /// let nums: &[usize] = &[111, 222, 444, 333, 555];
+ /// let fzv: FlexZeroVec = nums.iter().copied().collect();
+ ///
+ /// // Search in the first range:
+ /// assert_eq!(fzv.binary_search_in_range(0, 0..3), Some(Err(0)));
+ /// assert_eq!(fzv.binary_search_in_range(111, 0..3), Some(Ok(0)));
+ /// assert_eq!(fzv.binary_search_in_range(199, 0..3), Some(Err(1)));
+ /// assert_eq!(fzv.binary_search_in_range(222, 0..3), Some(Ok(1)));
+ /// assert_eq!(fzv.binary_search_in_range(399, 0..3), Some(Err(2)));
+ /// assert_eq!(fzv.binary_search_in_range(444, 0..3), Some(Ok(2)));
+ /// assert_eq!(fzv.binary_search_in_range(999, 0..3), Some(Err(3)));
+ ///
+ /// // Search in the second range:
+ /// assert_eq!(fzv.binary_search_in_range(0, 3..5), Some(Err(0)));
+ /// assert_eq!(fzv.binary_search_in_range(333, 3..5), Some(Ok(0)));
+ /// assert_eq!(fzv.binary_search_in_range(399, 3..5), Some(Err(1)));
+ /// assert_eq!(fzv.binary_search_in_range(555, 3..5), Some(Ok(1)));
+ /// assert_eq!(fzv.binary_search_in_range(999, 3..5), Some(Err(2)));
+ ///
+ /// // Out-of-bounds range:
+ /// assert_eq!(fzv.binary_search_in_range(0, 4..6), None);
+ /// ```
+ #[inline]
+ pub fn binary_search_in_range(
+ &self,
+ needle: usize,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ self.binary_search_in_range_by(|probe| probe.cmp(&needle), range)
+ }
+
+ /// Binary searches a sorted `FlexZeroSlice` according to a predicate function.
+ #[inline]
+ pub fn binary_search_by(
+ &self,
+ predicate: impl FnMut(usize) -> Ordering,
+ ) -> Result<usize, usize> {
+ debug_assert!(self.len() <= self.data.len());
+ // Safety: self.len() <= self.data.len()
+ let scaled_slice = unsafe { self.data.get_unchecked(0..self.len()) };
+ self.binary_search_impl(predicate, scaled_slice)
+ }
+
+ /// Binary searches a sorted range of a `FlexZeroSlice` according to a predicate function.
+ ///
+ /// The indices in the return value are relative to the start of the range.
+ #[inline]
+ pub fn binary_search_in_range_by(
+ &self,
+ predicate: impl FnMut(usize) -> Ordering,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ // Note: We need to check bounds separately, since `self.data.get(range)` does not return
+ // bounds errors, since it is indexing directly into the upscaled data array
+ if range.start > self.len() || range.end > self.len() {
+ return None;
+ }
+ let scaled_slice = self.data.get(range)?;
+ Some(self.binary_search_impl(predicate, scaled_slice))
+ }
+
+ /// Binary searches a `FlexZeroSlice` by its indices.
+ ///
+ /// The `predicate` function is passed in-bounds indices into the `FlexZeroSlice`.
+ #[inline]
+ pub fn binary_search_with_index(
+ &self,
+ predicate: impl FnMut(usize) -> Ordering,
+ ) -> Result<usize, usize> {
+ debug_assert!(self.len() <= self.data.len());
+ // Safety: self.len() <= self.data.len()
+ let scaled_slice = unsafe { self.data.get_unchecked(0..self.len()) };
+ self.binary_search_with_index_impl(predicate, scaled_slice)
+ }
+
+ /// Binary searches a range of a `FlexZeroSlice` by its indices.
+ ///
+ /// The `predicate` function is passed in-bounds indices into the `FlexZeroSlice`, which are
+ /// relative to the start of the entire slice.
+ ///
+ /// The indices in the return value are relative to the start of the range.
+ #[inline]
+ pub fn binary_search_in_range_with_index(
+ &self,
+ predicate: impl FnMut(usize) -> Ordering,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ // Note: We need to check bounds separately, since `self.data.get(range)` does not return
+ // bounds errors, since it is indexing directly into the upscaled data array
+ if range.start > self.len() || range.end > self.len() {
+ return None;
+ }
+ let scaled_slice = self.data.get(range)?;
+ Some(self.binary_search_with_index_impl(predicate, scaled_slice))
+ }
+
+ /// # Safety
+ ///
+ /// `scaled_slice` must be a subslice of `self.data`
+ #[inline]
+ fn binary_search_impl(
+ &self,
+ mut predicate: impl FnMut(usize) -> Ordering,
+ scaled_slice: &[u8],
+ ) -> Result<usize, usize> {
+ self.binary_search_with_index_impl(
+ |index| {
+ // Safety: The contract of `binary_search_with_index_impl` says `index` is in bounds
+ let actual_probe = unsafe { self.get_unchecked(index) };
+ predicate(actual_probe)
+ },
+ scaled_slice,
+ )
+ }
+
+ /// `predicate` is passed a valid index as an argument.
+ ///
+ /// # Safety
+ ///
+ /// `scaled_slice` must be a subslice of `self.data`
+ fn binary_search_with_index_impl(
+ &self,
+ mut predicate: impl FnMut(usize) -> Ordering,
+ scaled_slice: &[u8],
+ ) -> Result<usize, usize> {
+ // This code is an absolute atrocity. This code is not a place of honor. This
+ // code is known to the State of California to cause cancer.
+ //
+ // Unfortunately, the stdlib's `binary_search*` functions can only operate on slices.
+ // We do not have a slice. We have something we can .get() and index on, but that is not
+ // a slice.
+ //
+ // The `binary_search*` functions also do not have a variant where they give you the element's
+ // index, which we could otherwise use to directly index `self`.
+ // We do have `self.indices`, but these are indices into a byte buffer, which cannot in
+ // isolation be used to recoup the logical index of the element they refer to.
+ //
+ // However, `binary_search_by()` provides references to the elements of the slice being iterated.
+ // Since the layout of Rust slices is well-defined, we can do pointer arithmetic on these references
+ // to obtain the index being used by the search.
+ //
+ // It's worth noting that the slice we choose to search is irrelevant, as long as it has the appropriate
+ // length. `self.indices` is defined to have length `self.len()`, so it is convenient to use
+ // here and does not require additional allocations.
+ //
+ // The alternative to doing this is to implement our own binary search. This is significantly less fun.
+
+ // Note: We always use zero_index relative to the whole indices array, even if we are
+ // only searching a subslice of it.
+ let zero_index = self.data.as_ptr() as *const _ as usize;
+ scaled_slice.binary_search_by(|probe: &_| {
+ // Note: `scaled_slice` is a slice of u8
+ let index = probe as *const _ as usize - zero_index;
+ predicate(index)
+ })
+ }
+}
+
+#[inline]
+pub(crate) fn get_item_width(item_bytes: &[u8; USIZE_WIDTH]) -> usize {
+ USIZE_WIDTH - item_bytes.iter().rev().take_while(|b| **b == 0).count()
+}
+
+/// Pre-computed information about a pending insertion operation.
+///
+/// Do not create one of these directly; call `get_insert_info()`.
+pub(crate) struct InsertInfo {
+ /// The bytes to be inserted, with zero-fill.
+ pub item_bytes: [u8; USIZE_WIDTH],
+ /// The new item width after insertion.
+ pub new_width: usize,
+ /// The new number of items in the vector: self.len() after insertion.
+ pub new_count: usize,
+ /// The new number of bytes required for the entire slice (self.data.len() + 1).
+ pub new_bytes_len: usize,
+}
+
+impl FlexZeroSlice {
+ /// Compute the [`InsertInfo`] for inserting the specified item anywhere into the vector.
+ ///
+ /// # Panics
+ ///
+ /// Panics if inserting the element would require allocating more than `usize::MAX` bytes.
+ pub(crate) fn get_insert_info(&self, new_item: usize) -> InsertInfo {
+ let item_bytes = new_item.to_le_bytes();
+ let item_width = get_item_width(&item_bytes);
+ let old_width = self.get_width();
+ let new_width = core::cmp::max(old_width, item_width);
+ let new_count = 1 + (self.data.len() / old_width);
+ #[allow(clippy::unwrap_used)] // panic is documented in function contract
+ let new_bytes_len = new_count
+ .checked_mul(new_width)
+ .unwrap()
+ .checked_add(1)
+ .unwrap();
+ InsertInfo {
+ item_bytes,
+ new_width,
+ new_count,
+ new_bytes_len,
+ }
+ }
+
+ /// This function should be called on a slice with a data array `new_data_len` long
+ /// which previously held `new_count - 1` elements.
+ ///
+ /// After calling this function, all bytes in the slice will have been written.
+ pub(crate) fn insert_impl(&mut self, insert_info: InsertInfo, insert_index: usize) {
+ let InsertInfo {
+ item_bytes,
+ new_width,
+ new_count,
+ new_bytes_len,
+ } = insert_info;
+ debug_assert!(new_width <= USIZE_WIDTH);
+ debug_assert!(new_width >= self.get_width());
+ debug_assert!(insert_index < new_count);
+ debug_assert_eq!(new_bytes_len, new_count * new_width + 1);
+ debug_assert_eq!(new_bytes_len, self.data.len() + 1);
+ // For efficiency, calculate how many items we can skip copying.
+ let lower_i = if new_width == self.get_width() {
+ insert_index
+ } else {
+ 0
+ };
+ // Copy elements starting from the end into the new empty section of the vector.
+ // Note: We could copy fully in place, but we need to set 0 bytes for the high bytes,
+ // so we stage the new value on the stack.
+ for i in (lower_i..new_count).rev() {
+ let bytes_to_write = if i == insert_index {
+ item_bytes
+ } else {
+ let j = if i > insert_index { i - 1 } else { i };
+ debug_assert!(j < new_count - 1);
+ // Safety: j is in range (assertion on previous line), and it has not been
+ // overwritten yet since we are walking backwards.
+ unsafe { self.get_unchecked(j).to_le_bytes() }
+ };
+ // Safety: The vector has capacity for `new_width` items at the new index, which is
+ // later in the array than the bytes that we read above.
+ unsafe {
+ core::ptr::copy_nonoverlapping(
+ bytes_to_write.as_ptr(),
+ self.data.as_mut_ptr().add(new_width * i),
+ new_width,
+ );
+ }
+ }
+ self.width = new_width as u8;
+ }
+}
+
+/// Pre-computed information about a pending removal operation.
+///
+/// Do not create one of these directly; call `get_remove_info()` or `get_sorted_pop_info()`.
+pub(crate) struct RemoveInfo {
+ /// The index of the item to be removed.
+ pub remove_index: usize,
+ /// The new item width after insertion.
+ pub new_width: usize,
+ /// The new number of items in the vector: self.len() after insertion.
+ pub new_count: usize,
+ /// The new number of bytes required for the entire slice (self.data.len() + 1).
+ pub new_bytes_len: usize,
+}
+
+impl FlexZeroSlice {
+ /// Compute the [`RemoveInfo`] for removing the item at the specified index.
+ pub(crate) fn get_remove_info(&self, remove_index: usize) -> RemoveInfo {
+ debug_assert!(remove_index < self.len());
+ // Safety: remove_index is in range (assertion on previous line)
+ let item_bytes = unsafe { self.get_unchecked(remove_index).to_le_bytes() };
+ let item_width = get_item_width(&item_bytes);
+ let old_width = self.get_width();
+ let old_count = self.data.len() / old_width;
+ let new_width = if item_width < old_width {
+ old_width
+ } else {
+ debug_assert_eq!(old_width, item_width);
+ // We might be removing the widest element. If so, we need to scale down.
+ let mut largest_width = 1;
+ for i in 0..old_count {
+ if i == remove_index {
+ continue;
+ }
+ // Safety: i is in range (between 0 and old_count)
+ let curr_bytes = unsafe { self.get_unchecked(i).to_le_bytes() };
+ let curr_width = get_item_width(&curr_bytes);
+ largest_width = core::cmp::max(curr_width, largest_width);
+ }
+ largest_width
+ };
+ let new_count = old_count - 1;
+ // Note: the following line won't overflow because we are making the slice shorter.
+ let new_bytes_len = new_count * new_width + 1;
+ RemoveInfo {
+ remove_index,
+ new_width,
+ new_count,
+ new_bytes_len,
+ }
+ }
+
+ /// Returns the [`RemoveInfo`] for removing the last element. Should be called
+ /// on a slice sorted in ascending order.
+ ///
+ /// This is more efficient than `get_remove_info()` because it doesn't require a
+ /// linear traversal of the vector in order to calculate `new_width`.
+ pub(crate) fn get_sorted_pop_info(&self) -> RemoveInfo {
+ debug_assert!(!self.is_empty());
+ let remove_index = self.len() - 1;
+ let old_count = self.len();
+ let new_width = if old_count == 1 {
+ 1
+ } else {
+ // Safety: the FlexZeroSlice has at least two elements
+ let largest_item = unsafe { self.get_unchecked(remove_index - 1).to_le_bytes() };
+ get_item_width(&largest_item)
+ };
+ let new_count = old_count - 1;
+ // Note: the following line won't overflow because we are making the slice shorter.
+ let new_bytes_len = new_count * new_width + 1;
+ RemoveInfo {
+ remove_index,
+ new_width,
+ new_count,
+ new_bytes_len,
+ }
+ }
+
+ /// This function should be called on a valid slice.
+ ///
+ /// After calling this function, the slice data should be truncated to `new_data_len` bytes.
+ pub(crate) fn remove_impl(&mut self, remove_info: RemoveInfo) {
+ let RemoveInfo {
+ remove_index,
+ new_width,
+ new_count,
+ ..
+ } = remove_info;
+ debug_assert!(new_width <= self.get_width());
+ debug_assert!(new_count < self.len());
+ // For efficiency, calculate how many items we can skip copying.
+ let lower_i = if new_width == self.get_width() {
+ remove_index
+ } else {
+ 0
+ };
+ // Copy elements starting from the beginning to compress the vector to fewer bytes.
+ for i in lower_i..new_count {
+ let j = if i < remove_index { i } else { i + 1 };
+ // Safety: j is in range because j <= new_count < self.len()
+ let bytes_to_write = unsafe { self.get_unchecked(j).to_le_bytes() };
+ // Safety: The bytes are being copied to a section of the array that is not after
+ // the section of the array that currently holds the bytes.
+ unsafe {
+ core::ptr::copy_nonoverlapping(
+ bytes_to_write.as_ptr(),
+ self.data.as_mut_ptr().add(new_width * i),
+ new_width,
+ );
+ }
+ }
+ self.width = new_width as u8;
+ }
+}
diff --git a/third_party/rust/zerovec/src/flexzerovec/vec.rs b/third_party/rust/zerovec/src/flexzerovec/vec.rs
new file mode 100644
index 0000000000..d83f600b57
--- /dev/null
+++ b/third_party/rust/zerovec/src/flexzerovec/vec.rs
@@ -0,0 +1,275 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::FlexZeroSlice;
+use super::FlexZeroVecOwned;
+use crate::ZeroVecError;
+use core::cmp::Ordering;
+use core::iter::FromIterator;
+use core::ops::Deref;
+
+/// A zero-copy data structure that efficiently stores integer values.
+///
+/// `FlexZeroVec` automatically increases or decreases its storage capacity based on the largest
+/// integer stored in the vector. It therefore results in lower memory usage when smaller numbers
+/// are usually stored, but larger values must sometimes also be stored.
+///
+/// The maximum value that can be stored in `FlexZeroVec` is `usize::MAX` on the current platform.
+///
+/// `FlexZeroVec` is the data structure for storing `usize` in a `ZeroMap`.
+///
+/// `FlexZeroVec` derefs to [`FlexZeroSlice`], which contains most of the methods.
+///
+/// # Examples
+///
+/// Storing a vec of `usize`s in a zero-copy way:
+///
+/// ```
+/// use zerovec::vecs::FlexZeroVec;
+///
+/// // Create a FlexZeroVec and add a few numbers to it
+/// let mut zv1 = FlexZeroVec::new();
+/// zv1.to_mut().push(55);
+/// zv1.to_mut().push(33);
+/// zv1.to_mut().push(999);
+/// assert_eq!(zv1.to_vec(), vec![55, 33, 999]);
+///
+/// // Convert it to bytes and back
+/// let bytes = zv1.as_bytes();
+/// let zv2 =
+/// FlexZeroVec::parse_byte_slice(bytes).expect("bytes should round-trip");
+/// assert_eq!(zv2.to_vec(), vec![55, 33, 999]);
+///
+/// // Verify the compact storage
+/// assert_eq!(7, bytes.len());
+/// assert!(matches!(zv2, FlexZeroVec::Borrowed(_)));
+/// ```
+///
+/// Storing a map of `usize` to `usize` in a zero-copy way:
+///
+/// ```
+/// use zerovec::ZeroMap;
+///
+/// // Append some values to the ZeroMap
+/// let mut zm = ZeroMap::<usize, usize>::new();
+/// assert!(zm.try_append(&29, &92).is_none());
+/// assert!(zm.try_append(&38, &83).is_none());
+/// assert!(zm.try_append(&56, &65).is_none());
+/// assert_eq!(zm.len(), 3);
+///
+/// // Insert another value into the middle
+/// assert!(zm.try_append(&47, &74).is_some());
+/// assert!(zm.insert(&47, &74).is_none());
+/// assert_eq!(zm.len(), 4);
+///
+/// // Verify that the values are correct
+/// assert_eq!(zm.get_copied(&0), None);
+/// assert_eq!(zm.get_copied(&29), Some(92));
+/// assert_eq!(zm.get_copied(&38), Some(83));
+/// assert_eq!(zm.get_copied(&47), Some(74));
+/// assert_eq!(zm.get_copied(&56), Some(65));
+/// assert_eq!(zm.get_copied(&usize::MAX), None);
+/// ```
+#[derive(Debug)]
+#[non_exhaustive]
+pub enum FlexZeroVec<'a> {
+ Owned(FlexZeroVecOwned),
+ Borrowed(&'a FlexZeroSlice),
+}
+
+impl<'a> Deref for FlexZeroVec<'a> {
+ type Target = FlexZeroSlice;
+ fn deref(&self) -> &Self::Target {
+ match self {
+ FlexZeroVec::Owned(v) => v.deref(),
+ FlexZeroVec::Borrowed(v) => v,
+ }
+ }
+}
+
+impl<'a> AsRef<FlexZeroSlice> for FlexZeroVec<'a> {
+ fn as_ref(&self) -> &FlexZeroSlice {
+ self.deref()
+ }
+}
+
+impl Eq for FlexZeroVec<'_> {}
+
+impl<'a, 'b> PartialEq<FlexZeroVec<'b>> for FlexZeroVec<'a> {
+ #[inline]
+ fn eq(&self, other: &FlexZeroVec<'b>) -> bool {
+ self.iter().eq(other.iter())
+ }
+}
+
+impl<'a> Default for FlexZeroVec<'a> {
+ #[inline]
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl<'a> PartialOrd for FlexZeroVec<'a> {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl<'a> Ord for FlexZeroVec<'a> {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.iter().cmp(other.iter())
+ }
+}
+
+impl<'a> FlexZeroVec<'a> {
+ #[inline]
+ /// Creates a new, borrowed, empty `FlexZeroVec`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// let zv: FlexZeroVec = FlexZeroVec::new();
+ /// assert!(zv.is_empty());
+ /// ```
+ pub const fn new() -> Self {
+ Self::Borrowed(FlexZeroSlice::new_empty())
+ }
+
+ /// Parses a `&[u8]` buffer into a `FlexZeroVec`.
+ ///
+ /// The bytes within the byte buffer must remain constant for the life of the FlexZeroVec.
+ ///
+ /// # Endianness
+ ///
+ /// The byte buffer must be encoded in little-endian, even if running in a big-endian
+ /// environment. This ensures a consistent representation of data across platforms.
+ ///
+ /// # Max Value
+ ///
+ /// The bytes will fail to parse if the high value is greater than the capacity of `usize`
+ /// on this platform. For example, a `FlexZeroVec` created on a 64-bit platform might fail
+ /// to deserialize on a 32-bit platform.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// let bytes: &[u8] = &[2, 0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let zv = FlexZeroVec::parse_byte_slice(bytes).expect("valid slice");
+ ///
+ /// assert!(matches!(zv, FlexZeroVec::Borrowed(_)));
+ /// assert_eq!(zv.get(2), Some(421));
+ /// ```
+ pub fn parse_byte_slice(bytes: &'a [u8]) -> Result<Self, ZeroVecError> {
+ let slice: &'a FlexZeroSlice = FlexZeroSlice::parse_byte_slice(bytes)?;
+ Ok(Self::Borrowed(slice))
+ }
+
+ /// Converts a borrowed FlexZeroVec to an owned FlexZeroVec. No-op if already owned.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// let bytes: &[u8] = &[2, 0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let zv = FlexZeroVec::parse_byte_slice(bytes).expect("valid bytes");
+ /// assert!(matches!(zv, FlexZeroVec::Borrowed(_)));
+ ///
+ /// let owned = zv.into_owned();
+ /// assert!(matches!(owned, FlexZeroVec::Owned(_)));
+ /// ```
+ pub fn into_owned(self) -> FlexZeroVec<'static> {
+ match self {
+ Self::Owned(owned) => FlexZeroVec::Owned(owned),
+ Self::Borrowed(slice) => FlexZeroVec::Owned(FlexZeroVecOwned::from_slice(slice)),
+ }
+ }
+
+ /// Allows the FlexZeroVec to be mutated by converting it to an owned variant, and producing
+ /// a mutable [`FlexZeroVecOwned`].
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// let bytes: &[u8] = &[2, 0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let mut zv = FlexZeroVec::parse_byte_slice(bytes).expect("valid bytes");
+ /// assert!(matches!(zv, FlexZeroVec::Borrowed(_)));
+ ///
+ /// zv.to_mut().push(12);
+ /// assert!(matches!(zv, FlexZeroVec::Owned(_)));
+ /// assert_eq!(zv.get(4), Some(12));
+ /// ```
+ pub fn to_mut(&mut self) -> &mut FlexZeroVecOwned {
+ match self {
+ Self::Owned(ref mut owned) => owned,
+ Self::Borrowed(slice) => {
+ *self = FlexZeroVec::Owned(FlexZeroVecOwned::from_slice(slice));
+ // recursion is limited since we are guaranteed to hit the Owned branch
+ self.to_mut()
+ }
+ }
+ }
+
+ /// Remove all elements from this FlexZeroVec and reset it to an empty borrowed state.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::vecs::FlexZeroVec;
+ ///
+ /// let mut zv: FlexZeroVec = [1, 2, 3].iter().copied().collect();
+ /// assert!(!zv.is_empty());
+ /// zv.clear();
+ /// assert!(zv.is_empty());
+ /// ```
+ pub fn clear(&mut self) {
+ *self = Self::Borrowed(FlexZeroSlice::new_empty())
+ }
+}
+
+impl FromIterator<usize> for FlexZeroVec<'_> {
+ /// Creates a [`FlexZeroVec::Owned`] from an iterator of `usize`.
+ fn from_iter<I>(iter: I) -> Self
+ where
+ I: IntoIterator<Item = usize>,
+ {
+ FlexZeroVecOwned::from_iter(iter).into_flexzerovec()
+ }
+}
+
+#[test]
+fn test_zeromap_usize() {
+ use crate::ZeroMap;
+
+ let mut zm = ZeroMap::<usize, usize>::new();
+ assert!(zm.try_append(&29, &92).is_none());
+ assert!(zm.try_append(&38, &83).is_none());
+ assert!(zm.try_append(&47, &74).is_none());
+ assert!(zm.try_append(&56, &65).is_none());
+
+ assert_eq!(zm.keys.get_width(), 1);
+ assert_eq!(zm.values.get_width(), 1);
+
+ assert_eq!(zm.insert(&47, &744), Some(74));
+ assert_eq!(zm.values.get_width(), 2);
+ assert_eq!(zm.insert(&47, &774), Some(744));
+ assert_eq!(zm.values.get_width(), 2);
+ assert!(zm.try_append(&1100, &1).is_none());
+ assert_eq!(zm.keys.get_width(), 2);
+ assert_eq!(zm.remove(&1100), Some(1));
+ assert_eq!(zm.keys.get_width(), 1);
+
+ assert_eq!(zm.get_copied(&0), None);
+ assert_eq!(zm.get_copied(&29), Some(92));
+ assert_eq!(zm.get_copied(&38), Some(83));
+ assert_eq!(zm.get_copied(&47), Some(774));
+ assert_eq!(zm.get_copied(&56), Some(65));
+ assert_eq!(zm.get_copied(&usize::MAX), None);
+}
diff --git a/third_party/rust/zerovec/src/hashmap/algorithms.rs b/third_party/rust/zerovec/src/hashmap/algorithms.rs
new file mode 100644
index 0000000000..58ffc48f4c
--- /dev/null
+++ b/third_party/rust/zerovec/src/hashmap/algorithms.rs
@@ -0,0 +1,162 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use alloc::vec;
+use alloc::vec::Vec;
+use core::hash::{Hash, Hasher};
+use t1ha::T1haHasher;
+
+// Const seed to be used with [`T1haHasher::with_seed`].
+const SEED: u64 = 0xaabbccdd;
+
+/// Split the 64bit `hash` into (g, f0, f1).
+/// g denotes the highest 16bits of the hash modulo `m`, and is referred to as first level hash.
+/// (f0, f1) denotes the middle, and lower 24bits of the hash respectively.
+/// (f0, f1) are used to distribute the keys with same g, into distinct slots.
+///
+/// # Arguments
+///
+/// * `hash` - The hash to split.
+/// * `m` - The modulo used to split the hash.
+pub const fn split_hash64(hash: u64, m: usize) -> (usize, u32, u32) {
+ (
+ ((hash >> 48) as usize % m),
+ ((hash >> 24) as u32 & 0xffffff),
+ ((hash & 0xffffff) as u32),
+ )
+}
+
+/// Compute hash using [`T1haHasher`].
+pub fn compute_hash<K: Hash + ?Sized>(key: &K) -> u64 {
+ let mut hasher = T1haHasher::with_seed(SEED);
+ key.hash(&mut hasher);
+ hasher.finish()
+}
+
+/// Calculate the index using (f0, f1), (d0, d1) in modulo m.
+/// Returns [`None`] if d is (0, 0) or modulo is 0
+/// else returns the index computed using (f0 + f1 * d0 + d1) mod m.
+pub fn compute_index(f: (u32, u32), d: (u32, u32), m: u32) -> Option<usize> {
+ if d == (0, 0) || m == 0 {
+ None
+ } else {
+ Some((f.1.wrapping_mul(d.0).wrapping_add(f.0).wrapping_add(d.1) % m) as usize)
+ }
+}
+
+/// Compute displacements for the given `key_hashes`, which split the keys into distinct slots by a
+/// two-level hashing schema.
+/// Returns a tuple of where the first item is the displacement array and the second item is the
+/// reverse mapping used to permute keys, values into their slots.
+///
+/// 1. Split the hashes into (g, f0, f1).
+/// 2. Bucket and sort the split hash on g in descending order.
+/// 3. In decreasing order of bucket size, try until a (d0, d1) is found that splits the keys
+/// in the bucket into distinct slots.
+/// 4. Mark the slots for current bucket as occupied and store the reverse mapping.
+/// 5. Repeat untill all the keys have been assigned distinct slots.
+///
+/// # Arguments
+///
+/// * `key_hashes` - [`ExactSizeIterator`] over the hashed key values
+#[allow(clippy::indexing_slicing, clippy::unwrap_used)]
+pub fn compute_displacements(
+ key_hashes: impl ExactSizeIterator<Item = u64>,
+) -> (Vec<(u32, u32)>, Vec<usize>) {
+ let len = key_hashes.len();
+
+ // A vector to track the size of buckets for sorting.
+ let mut bucket_sizes = vec![0; len];
+
+ // A flattened representation of items in the buckets after applying first level hash function
+ let mut bucket_flatten = Vec::with_capacity(len);
+
+ // Compute initial displacement and bucket sizes
+
+ key_hashes.into_iter().enumerate().for_each(|(i, kh)| {
+ let h = split_hash64(kh, len);
+ bucket_sizes[h.0] += 1;
+ bucket_flatten.push((h, i))
+ });
+
+ // Sort by decreasing order of bucket_sizes.
+ bucket_flatten.sort_by(|&(ha, _), &(hb, _)| {
+ // ha.0, hb.0 are always within bounds of `bucket_sizes`
+ (bucket_sizes[hb.0], hb).cmp(&(bucket_sizes[ha.0], ha))
+ });
+
+ // Generation count while iterating buckets.
+ // Each trial of ((d0, d1), bucket chain) is a new generation.
+ // We use this to track which all slots are assigned for the current bucket chain.
+ let mut generation = 0;
+
+ // Whether a slot has been occupied by previous buckets with a different first level hash (different
+ // bucket chain).
+ let mut occupied = vec![false; len];
+
+ // Track generation count for the slots.
+ // A slot is empty if either it is unoccupied by the previous bucket chains and the
+ // assignment is not equal to generation.
+ let mut assignments = vec![0; len];
+
+ // Vec to store the displacements (saves us a recomputation of hash while assigning slots).
+ let mut current_displacements = Vec::with_capacity(16);
+
+ // (d0, d1) which splits the bucket into different slots
+ let mut displacements = vec![(0, 0); len];
+
+ // Vec to store mapping to the original order of keys.
+ // This is a permutation which will be applied to keys, values at the end.
+ let mut reverse_mapping = vec![0; len];
+
+ let mut start = 0;
+ while start < len {
+ // Bucket span with the same first level hash
+ // start is always within bounds of `bucket_flatten`
+ let g = bucket_flatten[start].0 .0;
+ // g is always within bounds of `bucket_sizes`
+ let end = start + bucket_sizes[g];
+ // start, end - 1 are always within bounds of `bucket_sizes`
+ let buckets = &bucket_flatten[start..end];
+
+ 'd0: for d0 in 0..len as u32 {
+ 'd1: for d1 in 0..len as u32 {
+ if (d0, d1) == (0, 0) {
+ continue;
+ }
+ current_displacements.clear();
+ generation += 1;
+
+ for ((_, f0, f1), _) in buckets {
+ let displacement_idx = compute_index((*f0, *f1), (d0, d1), len as u32).unwrap();
+
+ // displacement_idx is always within bounds
+ if occupied[displacement_idx] || assignments[displacement_idx] == generation {
+ continue 'd1;
+ }
+ assignments[displacement_idx] = generation;
+ current_displacements.push(displacement_idx);
+ }
+
+ // Successfully found a (d0, d1), store it as index g.
+ // g < displacements.len() due to modulo operation
+ displacements[g] = (d0, d1);
+
+ for (i, displacement_idx) in current_displacements.iter().enumerate() {
+ // `current_displacements` has same size as `buckets`
+ let (_, idx) = &buckets[i];
+
+ // displacement_idx is always within bounds
+ occupied[*displacement_idx] = true;
+ reverse_mapping[*displacement_idx] = *idx;
+ }
+ break 'd0;
+ }
+ }
+
+ start = end;
+ }
+
+ (displacements, reverse_mapping)
+}
diff --git a/third_party/rust/zerovec/src/hashmap/mod.rs b/third_party/rust/zerovec/src/hashmap/mod.rs
new file mode 100644
index 0000000000..e3aed11980
--- /dev/null
+++ b/third_party/rust/zerovec/src/hashmap/mod.rs
@@ -0,0 +1,240 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::map::{MutableZeroVecLike, ZeroMapKV, ZeroVecLike};
+use crate::ZeroVec;
+use alloc::borrow::Borrow;
+use alloc::vec;
+use core::hash::Hash;
+
+pub mod algorithms;
+use algorithms::*;
+
+#[cfg(feature = "serde")]
+mod serde;
+
+/// A perfect zerohashmap optimized for lookups over immutable keys.
+///
+/// # Examples
+/// ```
+/// use zerovec::ZeroHashMap;
+///
+/// let hashmap =
+/// ZeroHashMap::<i32, str>::from_iter([(0, "a"), (1, "b"), (2, "c")]);
+/// assert_eq!(hashmap.get(&0), Some("a"));
+/// assert_eq!(hashmap.get(&2), Some("c"));
+/// assert_eq!(hashmap.get(&4), None);
+/// ```
+#[derive(Debug)]
+pub struct ZeroHashMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ /// Array of (d0, d1) which splits the keys with same first level hash into distinct
+ /// slots.
+ /// The ith index of the array splits the keys with first level hash i.
+ /// If no key with first level hash is found in the original keys, (0, 0) is used as an empty
+ /// placeholder.
+ displacements: ZeroVec<'a, (u32, u32)>,
+ keys: K::Container,
+ values: V::Container,
+}
+
+impl<'a, K, V> ZeroHashMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ /// The number of elements in the [`ZeroHashMap`].
+ pub fn len(&self) -> usize {
+ self.values.zvl_len()
+ }
+
+ /// Whether the [`ZeroHashMap`] is empty.
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+}
+
+impl<'a, K, V> ZeroHashMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized + Hash + Eq,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ /// Given a `key` return the index for the key or [`None`] if the key is absent.
+ fn index<A>(&self, key: &A) -> Option<usize>
+ where
+ A: Borrow<K> + ?Sized,
+ {
+ let hash = compute_hash(key.borrow());
+ let (g, f0, f1) = split_hash64(hash, self.len());
+
+ #[allow(clippy::unwrap_used)] // g is in-range
+ let (d0, d1) = self.displacements.get(g).unwrap();
+ let index = compute_index((f0, f1), (d0, d1), self.displacements.len() as u32)?;
+
+ #[allow(clippy::unwrap_used)] // index is in 0..self.keys.len()
+ let found = self.keys.zvl_get(index).unwrap();
+ if K::Container::zvl_get_as_t(found, |found| found == key.borrow()) {
+ Some(index)
+ } else {
+ None
+ }
+ }
+
+ /// Get the value corresponding to `key`.
+ /// If absent [`None`] is returned.
+ ///
+ /// # Example
+ /// ```
+ /// use zerovec::ZeroHashMap;
+ ///
+ /// let hashmap = ZeroHashMap::<str, str>::from_iter([("a", "A"), ("z", "Z")]);
+ ///
+ /// assert_eq!(hashmap.get("a"), Some("A"));
+ /// assert_eq!(hashmap.get("z"), Some("Z"));
+ /// assert_eq!(hashmap.get("0"), None);
+ /// ```
+ pub fn get<'b, A>(&'b self, key: &A) -> Option<&'b V::GetType>
+ where
+ A: Borrow<K> + ?Sized + 'b,
+ {
+ self.index(key).and_then(|i| self.values.zvl_get(i))
+ }
+
+ /// Returns whether `key` is contained in this hashmap
+ ///
+ /// # Example
+ /// ```rust
+ /// use zerovec::ZeroHashMap;
+ ///
+ /// let hashmap = ZeroHashMap::<str, str>::from_iter([("a", "A"), ("z", "Z")]);
+ ///
+ /// assert!(hashmap.contains_key("a"));
+ /// assert!(!hashmap.contains_key("p"));
+ /// ```
+ pub fn contains_key(&self, key: &K) -> bool {
+ self.index(key).is_some()
+ }
+}
+
+impl<'a, K, V> ZeroHashMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ // Produce an iterator over (key, value) pairs.
+ pub fn iter<'b>(
+ &'b self,
+ ) -> impl ExactSizeIterator<
+ Item = (
+ &'b <K as ZeroMapKV<'a>>::GetType,
+ &'b <V as ZeroMapKV<'a>>::GetType,
+ ),
+ > {
+ (0..self.len()).map(|index| {
+ (
+ #[allow(clippy::unwrap_used)] // index is in range
+ self.keys.zvl_get(index).unwrap(),
+ #[allow(clippy::unwrap_used)] // index is in range
+ self.values.zvl_get(index).unwrap(),
+ )
+ })
+ }
+
+ // Produce an iterator over keys.
+ pub fn iter_keys<'b>(
+ &'b self,
+ ) -> impl ExactSizeIterator<Item = &'b <K as ZeroMapKV<'a>>::GetType> {
+ #[allow(clippy::unwrap_used)] // index is in range
+ (0..self.len()).map(|index| self.keys.zvl_get(index).unwrap())
+ }
+
+ // Produce an iterator over values.
+ pub fn iter_values<'b>(
+ &'b self,
+ ) -> impl ExactSizeIterator<Item = &'b <V as ZeroMapKV<'a>>::GetType> {
+ #[allow(clippy::unwrap_used)] // index is in range
+ (0..self.len()).map(|index| self.values.zvl_get(index).unwrap())
+ }
+}
+
+impl<'a, K, V, A, B> FromIterator<(A, B)> for ZeroHashMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized + Hash + Eq,
+ V: ZeroMapKV<'a> + ?Sized,
+ B: Borrow<V>,
+ A: Borrow<K>,
+{
+ /// Build a [`ZeroHashMap`] from an iterator returning (K, V) tuples.
+ ///
+ /// # Example
+ /// ```
+ /// use zerovec::ZeroHashMap;
+ ///
+ /// let hashmap = ZeroHashMap::<i32, str>::from_iter([
+ /// (1, "a"),
+ /// (2, "b"),
+ /// (3, "c"),
+ /// (4, "d"),
+ /// ]);
+ /// assert_eq!(hashmap.get(&1), Some("a"));
+ /// assert_eq!(hashmap.get(&2), Some("b"));
+ /// assert_eq!(hashmap.get(&3), Some("c"));
+ /// assert_eq!(hashmap.get(&4), Some("d"));
+ /// ```
+ fn from_iter<T: IntoIterator<Item = (A, B)>>(iter: T) -> Self {
+ let iter = iter.into_iter();
+ let size_hint = match iter.size_hint() {
+ (_, Some(upper)) => upper,
+ (lower, None) => lower,
+ };
+
+ let mut key_hashes = vec![];
+ key_hashes.reserve(size_hint);
+ let mut keys = K::Container::zvl_with_capacity(size_hint);
+ let mut values = V::Container::zvl_with_capacity(size_hint);
+ for (k, v) in iter {
+ keys.zvl_push(k.borrow());
+ key_hashes.push(compute_hash(k.borrow()));
+ values.zvl_push(v.borrow());
+ }
+
+ let (displacements, mut reverse_mapping) = compute_displacements(key_hashes.into_iter());
+
+ keys.zvl_permute(&mut reverse_mapping.clone());
+ values.zvl_permute(&mut reverse_mapping);
+
+ Self {
+ displacements: ZeroVec::alloc_from_slice(&displacements),
+ values,
+ keys,
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::ule::AsULE;
+ use rand::{distributions::Standard, Rng, SeedableRng};
+ use rand_pcg::Lcg64Xsh32;
+
+ #[test]
+ fn test_zhms_u64k_u64v() {
+ const N: usize = 65530;
+ let seed = u64::from_le_bytes(*b"testseed");
+ let rng = Lcg64Xsh32::seed_from_u64(seed);
+ let kv: Vec<(u64, u64)> = rng.sample_iter(&Standard).take(N).collect();
+ let hashmap: ZeroHashMap<u64, u64> =
+ ZeroHashMap::from_iter(kv.iter().map(|e| (&e.0, &e.1)));
+ for (k, v) in kv {
+ assert_eq!(
+ hashmap.get(&k).copied().map(<u64 as AsULE>::from_unaligned),
+ Some(v),
+ );
+ }
+ }
+}
diff --git a/third_party/rust/zerovec/src/hashmap/serde.rs b/third_party/rust/zerovec/src/hashmap/serde.rs
new file mode 100644
index 0000000000..7a4941205c
--- /dev/null
+++ b/third_party/rust/zerovec/src/hashmap/serde.rs
@@ -0,0 +1,147 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::ZeroHashMap;
+use crate::{
+ map::{ZeroMapKV, ZeroVecLike},
+ ZeroVec,
+};
+
+use serde::{de, Deserialize, Serialize};
+
+impl<'a, K, V> Serialize for ZeroHashMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + Serialize + ?Sized,
+ V: ZeroMapKV<'a> + Serialize + ?Sized,
+ K::Container: Serialize,
+ V::Container: Serialize,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ (&self.displacements, &self.keys, &self.values).serialize(serializer)
+ }
+}
+
+impl<'de, 'a, K, V> Deserialize<'de> for ZeroHashMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ K::Container: Deserialize<'de>,
+ V::Container: Deserialize<'de>,
+ 'de: 'a,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ let (displacements, keys, values): (ZeroVec<(u32, u32)>, K::Container, V::Container) =
+ Deserialize::deserialize(deserializer)?;
+ if keys.zvl_len() != values.zvl_len() {
+ return Err(de::Error::custom(
+ "Mismatched key and value sizes in ZeroHashMap",
+ ));
+ }
+ if displacements.zvl_len() != keys.zvl_len() {
+ return Err(de::Error::custom(
+ "Mismatched displacements and key, value sizes in ZeroHashMap",
+ ));
+ }
+ Ok(Self {
+ displacements,
+ keys,
+ values,
+ })
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use crate::{VarZeroVec, ZeroHashMap, ZeroVec};
+ use serde::{Deserialize, Serialize};
+
+ const JSON_STR: &str = "[[[0,1],[0,0],[0,1]],[2,1,0],[\"c\",\"b\",\"a\"]]";
+
+ const BINCODE_BYTES: &[u8] = &[
+ 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+ 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0,
+ 3, 0, 0, 0, 0, 0, 1, 0, 2, 0, 99, 98, 97,
+ ];
+
+ #[derive(Serialize, Deserialize)]
+ struct DeriveTestZeroHashMap<'data> {
+ #[serde(borrow)]
+ _data: ZeroHashMap<'data, str, [u8]>,
+ }
+
+ fn make_zerohashmap() -> ZeroHashMap<'static, u32, str> {
+ ZeroHashMap::from_iter([(0, "a"), (1, "b"), (2, "c")])
+ }
+
+ fn build_invalid_hashmap_str(
+ displacements: Vec<(u32, u32)>,
+ keys: Vec<u32>,
+ values: Vec<&str>,
+ ) -> String {
+ let invalid_hm: ZeroHashMap<u32, str> = ZeroHashMap {
+ displacements: ZeroVec::alloc_from_slice(&displacements),
+ keys: ZeroVec::alloc_from_slice(&keys),
+ values: VarZeroVec::<str>::from(&values),
+ };
+ serde_json::to_string(&invalid_hm).expect("serialize")
+ }
+
+ #[test]
+ fn test_invalid_deser_zhm() {
+ // Invalid hashmap |keys| != |values|
+ let mut invalid_hm_str =
+ build_invalid_hashmap_str(vec![(0, 1), (0, 0)], vec![1, 2], vec!["a", "b", "c"]);
+
+ assert_eq!(
+ serde_json::from_str::<ZeroHashMap<u32, str>>(&invalid_hm_str)
+ .unwrap_err()
+ .to_string(),
+ "Mismatched key and value sizes in ZeroHashMap"
+ );
+
+ // Invalid hashmap |displacements| != |keys| == |values|
+ // |displacements| = 2, |keys| = 3, |values| = 3
+ invalid_hm_str =
+ build_invalid_hashmap_str(vec![(0, 1), (0, 0)], vec![2, 1, 0], vec!["a", "b", "c"]);
+
+ assert_eq!(
+ serde_json::from_str::<ZeroHashMap<u32, str>>(&invalid_hm_str)
+ .unwrap_err()
+ .to_string(),
+ "Mismatched displacements and key, value sizes in ZeroHashMap"
+ );
+ }
+
+ #[test]
+ fn test_serde_valid_deser_zhm() {
+ let hm = make_zerohashmap();
+ let json_str = serde_json::to_string(&hm).expect("serialize");
+ assert_eq!(json_str, JSON_STR);
+ let deserialized_hm: ZeroHashMap<u32, str> =
+ serde_json::from_str(JSON_STR).expect("deserialize");
+ assert_eq!(
+ hm.iter().collect::<Vec<_>>(),
+ deserialized_hm.iter().collect::<Vec<_>>()
+ );
+ }
+
+ #[test]
+ fn test_bincode_zhm() {
+ let hm = make_zerohashmap();
+ let bincode_bytes = bincode::serialize(&hm).expect("serialize");
+ assert_eq!(bincode_bytes, BINCODE_BYTES);
+ let deserialized_hm: ZeroHashMap<u32, str> =
+ bincode::deserialize(BINCODE_BYTES).expect("deserialize");
+ assert_eq!(
+ hm.iter().collect::<Vec<_>>(),
+ deserialized_hm.iter().collect::<Vec<_>>()
+ );
+ }
+}
diff --git a/third_party/rust/zerovec/src/lib.rs b/third_party/rust/zerovec/src/lib.rs
new file mode 100644
index 0000000000..961d62f34c
--- /dev/null
+++ b/third_party/rust/zerovec/src/lib.rs
@@ -0,0 +1,558 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Zero-copy vector abstractions for arbitrary types, backed by byte slices.
+//!
+//! `zerovec` enables a far wider range of types — beyond just `&[u8]` and `&str` — to participate in
+//! zero-copy deserialization from byte slices. It is `serde` compatible and comes equipped with
+//! proc macros
+//!
+//! Clients upgrading to `zerovec` benefit from zero heap allocations when deserializing
+//! read-only data.
+//!
+//! This crate has four main types:
+//!
+//! - [`ZeroVec<'a, T>`] (and [`ZeroSlice<T>`](ZeroSlice)) for fixed-width types like `u32`
+//! - [`VarZeroVec<'a, T>`] (and [`VarZeroSlice<T>`](ZeroSlice)) for variable-width types like `str`
+//! - [`ZeroMap<'a, K, V>`] to map from `K` to `V`
+//! - [`ZeroMap2d<'a, K0, K1, V>`] to map from the pair `(K0, K1)` to `V`
+//!
+//! The first two are intended as close-to-drop-in replacements for `Vec<T>` in Serde structs. The third and fourth are
+//! intended as a replacement for `HashMap` or [`LiteMap`](docs.rs/litemap). When used with Serde derives, **be sure to apply
+//! `#[serde(borrow)]` to these types**, same as one would for [`Cow<'a, T>`].
+//!
+//! [`ZeroVec<'a, T>`], [`VarZeroVec<'a, T>`], [`ZeroMap<'a, K, V>`], and [`ZeroMap2d<'a, K0, K1, V>`] all behave like
+//! [`Cow<'a, T>`] in that they abstract over either borrowed or owned data. When performing deserialization
+//! from human-readable formats (like `json` and `xml`), typically these types will allocate and fully own their data, whereas if deserializing
+//! from binary formats like `bincode` and `postcard`, these types will borrow data directly from the buffer being deserialized from,
+//! avoiding allocations and only performing validity checks. As such, this crate can be pretty fast (see [below](#Performance) for more information)
+//! on deserialization.
+//!
+//! See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how this crate
+//! works under the hood.
+//!
+//! # Cargo features
+//!
+//! This crate has several optional Cargo features:
+//! - `serde`: Allows serializing and deserializing `zerovec`'s abstractions via [`serde`](https://docs.rs/serde)
+//! - `yoke`: Enables implementations of `Yokeable` from the [`yoke`](https://docs.rs/yoke/) crate, which is also useful
+//! in situations involving a lot of zero-copy deserialization.
+//! - `derive`: Makes it easier to use custom types in these collections by providing the [`#[make_ule]`](crate::make_ule) and
+//! [`#[make_varule]`](crate::make_varule) proc macros, which generate appropriate [`ULE`](crate::ule::ULE) and
+//! [`VarULE`](crate::ule::VarULE)-conformant types for a given "normal" type.
+//! - `std`: Enabled `std::Error` implementations for error types. This crate is by default `no_std` with a dependency on `alloc`.
+//!
+//! [`ZeroVec<'a, T>`]: ZeroVec
+//! [`VarZeroVec<'a, T>`]: VarZeroVec
+//! [`ZeroMap<'a, K, V>`]: ZeroMap
+//! [`ZeroMap2d<'a, K0, K1, V>`]: ZeroMap2d
+//! [`Cow<'a, T>`]: alloc::borrow::Cow
+//!
+//! # Examples
+//!
+//! Serialize and deserialize a struct with ZeroVec and VarZeroVec with Bincode:
+//!
+//! ```
+//! # #[cfg(feature = "serde")] {
+//! use zerovec::{VarZeroVec, ZeroVec};
+//!
+//! // This example requires the "serde" feature
+//! #[derive(serde::Serialize, serde::Deserialize)]
+//! pub struct DataStruct<'data> {
+//! #[serde(borrow)]
+//! nums: ZeroVec<'data, u32>,
+//! #[serde(borrow)]
+//! chars: ZeroVec<'data, char>,
+//! #[serde(borrow)]
+//! strs: VarZeroVec<'data, str>,
+//! }
+//!
+//! let data = DataStruct {
+//! nums: ZeroVec::from_slice_or_alloc(&[211, 281, 421, 461]),
+//! chars: ZeroVec::alloc_from_slice(&['ö', '冇', 'म']),
+//! strs: VarZeroVec::from(&["hello", "world"]),
+//! };
+//! let bincode_bytes =
+//! bincode::serialize(&data).expect("Serialization should be successful");
+//! assert_eq!(bincode_bytes.len(), 67);
+//!
+//! let deserialized: DataStruct = bincode::deserialize(&bincode_bytes)
+//! .expect("Deserialization should be successful");
+//! assert_eq!(deserialized.nums.first(), Some(211));
+//! assert_eq!(deserialized.chars.get(1), Some('冇'));
+//! assert_eq!(deserialized.strs.get(1), Some("world"));
+//! // The deserialization will not have allocated anything
+//! assert!(!deserialized.nums.is_owned());
+//! # } // feature = "serde"
+//! ```
+//!
+//! Use custom types inside of ZeroVec:
+//!
+//! ```rust
+//! # #[cfg(all(feature = "serde", feature = "derive"))] {
+//! use zerovec::{ZeroVec, VarZeroVec, ZeroMap};
+//! use std::borrow::Cow;
+//! use zerovec::ule::encode_varule_to_box;
+//!
+//! // custom fixed-size ULE type for ZeroVec
+//! #[zerovec::make_ule(DateULE)]
+//! #[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
+//! struct Date {
+//! y: u64,
+//! m: u8,
+//! d: u8
+//! }
+//!
+//! // custom variable sized VarULE type for VarZeroVec
+//! #[zerovec::make_varule(PersonULE)]
+//! #[zerovec::derive(Serialize, Deserialize)] // add Serde impls to PersonULE
+//! #[derive(Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
+//! struct Person<'a> {
+//! birthday: Date,
+//! favorite_character: char,
+//! #[serde(borrow)]
+//! name: Cow<'a, str>,
+//! }
+//!
+//! #[derive(serde::Serialize, serde::Deserialize)]
+//! struct Data<'a> {
+//! #[serde(borrow)]
+//! important_dates: ZeroVec<'a, Date>,
+//! // note: VarZeroVec always must reference the ULE type directly
+//! #[serde(borrow)]
+//! important_people: VarZeroVec<'a, PersonULE>,
+//! #[serde(borrow)]
+//! birthdays_to_people: ZeroMap<'a, Date, PersonULE>
+//! }
+//!
+//!
+//! let person1 = Person {
+//! birthday: Date { y: 1990, m: 9, d: 7},
+//! favorite_character: 'π',
+//! name: Cow::from("Kate")
+//! };
+//! let person2 = Person {
+//! birthday: Date { y: 1960, m: 5, d: 25},
+//! favorite_character: '冇',
+//! name: Cow::from("Jesse")
+//! };
+//!
+//! let important_dates = ZeroVec::alloc_from_slice(&[Date { y: 1943, m: 3, d: 20}, Date { y: 1976, m: 8, d: 2}, Date { y: 1998, m: 2, d: 15}]);
+//! let important_people = VarZeroVec::from(&[&person1, &person2]);
+//! let mut birthdays_to_people: ZeroMap<Date, PersonULE> = ZeroMap::new();
+//! // `.insert_var_v()` is slightly more convenient over `.insert()` for custom ULE types
+//! birthdays_to_people.insert_var_v(&person1.birthday, &person1);
+//! birthdays_to_people.insert_var_v(&person2.birthday, &person2);
+//!
+//! let data = Data { important_dates, important_people, birthdays_to_people };
+//!
+//! let bincode_bytes = bincode::serialize(&data)
+//! .expect("Serialization should be successful");
+//! assert_eq!(bincode_bytes.len(), 168);
+//!
+//! let deserialized: Data = bincode::deserialize(&bincode_bytes)
+//! .expect("Deserialization should be successful");
+//!
+//! assert_eq!(deserialized.important_dates.get(0).unwrap().y, 1943);
+//! assert_eq!(&deserialized.important_people.get(1).unwrap().name, "Jesse");
+//! assert_eq!(&deserialized.important_people.get(0).unwrap().name, "Kate");
+//! assert_eq!(&deserialized.birthdays_to_people.get(&person1.birthday).unwrap().name, "Kate");
+//!
+//! } // feature = serde and derive
+//! ```
+//!
+//! # Performance
+//!
+//! `zerovec` is designed for fast deserialization from byte buffers with zero memory allocations
+//! while minimizing performance regressions for common vector operations.
+//!
+//! Benchmark results on x86_64:
+//!
+//! | Operation | `Vec<T>` | `zerovec` |
+//! |---|---|---|
+//! | Deserialize vec of 100 `u32` | 233.18 ns | 14.120 ns |
+//! | Compute sum of vec of 100 `u32` (read every element) | 8.7472 ns | 10.775 ns |
+//! | Binary search vec of 1000 `u32` 50 times | 442.80 ns | 472.51 ns |
+//! | Deserialize vec of 100 strings | 7.3740 μs\* | 1.4495 μs |
+//! | Count chars in vec of 100 strings (read every element) | 747.50 ns | 955.28 ns |
+//! | Binary search vec of 500 strings 10 times | 466.09 ns | 790.33 ns |
+//!
+//! \* *This result is reported for `Vec<String>`. However, Serde also supports deserializing to the partially-zero-copy `Vec<&str>`; this gives 1.8420 μs, much faster than `Vec<String>` but a bit slower than `zerovec`.*
+//!
+//! | Operation | `HashMap<K,V>` | `LiteMap<K,V>` | `ZeroMap<K,V>` |
+//! |---|---|---|---|
+//! | Deserialize a small map | 2.72 μs | 1.28 μs | 480 ns |
+//! | Deserialize a large map | 50.5 ms | 18.3 ms | 3.74 ms |
+//! | Look up from a small deserialized map | 49 ns | 42 ns | 54 ns |
+//! | Look up from a large deserialized map | 51 ns | 155 ns | 213 ns |
+//!
+//! Small = 16 elements, large = 131,072 elements. Maps contain `<String, String>`.
+//!
+//! The benches used to generate the above table can be found in the `benches` directory in the project repository.
+//! `zeromap` benches are named by convention, e.g. `zeromap/deserialize/small`, `zeromap/lookup/large`. The type
+//! is appended for baseline comparisons, e.g. `zeromap/lookup/small/hashmap`.
+
+// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations
+#![cfg_attr(not(any(test, feature = "std")), no_std)]
+#![cfg_attr(
+ not(test),
+ deny(
+ clippy::indexing_slicing,
+ clippy::unwrap_used,
+ clippy::expect_used,
+ clippy::panic,
+ clippy::exhaustive_structs,
+ clippy::exhaustive_enums,
+ missing_debug_implementations,
+ )
+)]
+// this crate does a lot of nuanced lifetime manipulation, being explicit
+// is better here.
+#![allow(clippy::needless_lifetimes)]
+
+extern crate alloc;
+
+mod error;
+mod flexzerovec;
+#[cfg(feature = "hashmap")]
+pub mod hashmap;
+mod map;
+mod map2d;
+#[cfg(test)]
+pub mod samples;
+mod varzerovec;
+mod zerovec;
+
+// This must be after `mod zerovec` for some impls on `ZeroSlice<RawBytesULE>`
+// to show up in the right spot in the docs
+pub mod ule;
+
+#[cfg(feature = "yoke")]
+mod yoke_impls;
+mod zerofrom_impls;
+
+pub use crate::error::ZeroVecError;
+#[cfg(feature = "hashmap")]
+pub use crate::hashmap::ZeroHashMap;
+pub use crate::map::map::ZeroMap;
+pub use crate::map2d::map::ZeroMap2d;
+pub use crate::varzerovec::{slice::VarZeroSlice, vec::VarZeroVec};
+pub use crate::zerovec::{ZeroSlice, ZeroVec};
+
+pub(crate) use flexzerovec::chunk_to_usize;
+
+#[doc(hidden)]
+pub mod __zerovec_internal_reexport {
+ pub use zerofrom::ZeroFrom;
+
+ pub use alloc::boxed;
+
+ #[cfg(feature = "serde")]
+ pub use serde;
+}
+
+pub mod maps {
+ //! This module contains additional utility types and traits for working with
+ //! [`ZeroMap`] and [`ZeroMap2d`]. See their docs for more details on the general purpose
+ //! of these types.
+ //!
+ //! [`ZeroMapBorrowed`] and [`ZeroMap2dBorrowed`] are versions of [`ZeroMap`] and [`ZeroMap2d`]
+ //! that can be used when you wish to guarantee that the map data is always borrowed, leading to
+ //! relaxed lifetime constraints.
+ //!
+ //! The [`ZeroMapKV`] trait is required to be implemented on any type that needs to be used
+ //! within a map type. [`ZeroVecLike`] and [`MutableZeroVecLike`] are traits used in the
+ //! internal workings of the map types, and should typically not be used or implemented by
+ //! users of this crate.
+ #[doc(no_inline)]
+ pub use crate::map::ZeroMap;
+ pub use crate::map::ZeroMapBorrowed;
+
+ #[doc(no_inline)]
+ pub use crate::map2d::ZeroMap2d;
+ pub use crate::map2d::ZeroMap2dBorrowed;
+
+ pub use crate::map::{MutableZeroVecLike, ZeroMapKV, ZeroVecLike};
+
+ pub use crate::map2d::ZeroMap2dCursor;
+}
+
+pub mod vecs {
+ //! This module contains additional utility types for working with
+ //! [`ZeroVec`] and [`VarZeroVec`]. See their docs for more details on the general purpose
+ //! of these types.
+ //!
+ //! [`ZeroSlice`] and [`VarZeroSlice`] provide slice-like versions of the vector types
+ //! for use behind references and in custom ULE types.
+ //!
+ //! [`VarZeroVecOwned`] is a special owned/mutable version of [`VarZeroVec`], allowing
+ //! direct manipulation of the backing buffer.
+
+ #[doc(no_inline)]
+ pub use crate::zerovec::{ZeroSlice, ZeroVec};
+
+ #[doc(no_inline)]
+ pub use crate::varzerovec::{VarZeroSlice, VarZeroVec};
+
+ pub use crate::varzerovec::{Index16, Index32, VarZeroVecFormat, VarZeroVecOwned};
+
+ pub use crate::flexzerovec::{FlexZeroSlice, FlexZeroVec, FlexZeroVecOwned};
+}
+
+// Proc macro reexports
+//
+// These exist so that our docs can use intra-doc links.
+// Due to quirks of how rustdoc does documentation on reexports, these must be in this module and not reexported from
+// a submodule
+
+/// Generate a corresponding [`ULE`] type and the relevant [`AsULE`] implementations for this type
+///
+/// This can be attached to structs containing only [`AsULE`] types, or C-like enums that have `#[repr(u8)]`
+/// and all explicit discriminants.
+///
+/// The type must be [`Copy`], [`PartialEq`], and [`Eq`].
+///
+/// `#[make_ule]` will automatically derive the following traits on the [`ULE`] type:
+///
+/// - [`Ord`] and [`PartialOrd`]
+/// - [`ZeroMapKV`]
+///
+/// To disable one of the automatic derives, use `#[zerovec::skip_derive(...)]` like so: `#[zerovec::skip_derive(ZeroMapKV)]`.
+/// `Ord` and `PartialOrd` are implemented as a unit and can only be disabled as a group with `#[zerovec::skip_derive(Ord)]`.
+///
+/// The following traits are available to derive, but not automatic:
+///
+/// - [`Debug`]
+///
+/// To enable one of these additional derives, use `#[zerovec::derive(...)]` like so: `#[zerovec::derive(Debug)]`.
+///
+/// In most cases these derives will defer to the impl of the same trait on the current type, so such impls must exist.
+///
+/// For enums, this attribute will generate a crate-public `fn new_from_u8(value: u8) -> Option<Self>`
+/// method on the main type that allows one to construct the value from a u8. If this method is desired
+/// to be more public, it should be wrapped.
+///
+/// [`ULE`]: ule::ULE
+/// [`AsULE`]: ule::AsULE
+/// [`ZeroMapKV`]: maps::ZeroMapKV
+///
+/// # Example
+///
+/// ```rust
+/// use zerovec::ZeroVec;
+///
+/// #[zerovec::make_ule(DateULE)]
+/// #[derive(
+/// Copy,
+/// Clone,
+/// PartialEq,
+/// Eq,
+/// Ord,
+/// PartialOrd,
+/// serde::Serialize,
+/// serde::Deserialize,
+/// )]
+/// struct Date {
+/// y: u64,
+/// m: u8,
+/// d: u8,
+/// }
+///
+/// #[derive(serde::Serialize, serde::Deserialize)]
+/// struct Dates<'a> {
+/// #[serde(borrow)]
+/// dates: ZeroVec<'a, Date>,
+/// }
+///
+/// let dates = Dates {
+/// dates: ZeroVec::alloc_from_slice(&[
+/// Date {
+/// y: 1985,
+/// m: 9,
+/// d: 3,
+/// },
+/// Date {
+/// y: 1970,
+/// m: 2,
+/// d: 20,
+/// },
+/// Date {
+/// y: 1990,
+/// m: 6,
+/// d: 13,
+/// },
+/// ]),
+/// };
+///
+/// let bincode_bytes =
+/// bincode::serialize(&dates).expect("Serialization should be successful");
+///
+/// // Will deserialize without allocations
+/// let deserialized: Dates = bincode::deserialize(&bincode_bytes)
+/// .expect("Deserialization should be successful");
+///
+/// assert_eq!(deserialized.dates.get(1).unwrap().y, 1970);
+/// assert_eq!(deserialized.dates.get(2).unwrap().d, 13);
+/// ```
+#[cfg(feature = "derive")]
+pub use zerovec_derive::make_ule;
+
+/// Generate a corresponding [`VarULE`] type and the relevant [`EncodeAsVarULE`]/[`zerofrom::ZeroFrom`]
+/// implementations for this type
+///
+/// This can be attached to structs containing only [`AsULE`] types with the last fields being
+/// [`Cow<'a, str>`](alloc::borrow::Cow), [`ZeroSlice`], or [`VarZeroSlice`]. If there is more than one such field, it will be represented
+/// using [`MultiFieldsULE`](crate::ule::MultiFieldsULE) and getters will be generated. Other VarULE fields will be detected if they are
+/// tagged with `#[zerovec::varule(NameOfVarULETy)]`.
+///
+/// The type must be [`PartialEq`] and [`Eq`].
+///
+/// [`EncodeAsVarULE`] and [`zerofrom::ZeroFrom`] are useful for avoiding the need to deal with
+/// the [`VarULE`] type directly. In particular, it is recommended to use [`zerofrom::ZeroFrom`]
+/// to convert the [`VarULE`] type back to this type in a cheap, zero-copy way (see the example below
+/// for more details).
+///
+/// `#[make_varule]` will automatically derive the following traits on the [`VarULE`] type:
+///
+/// - [`Ord`] and [`PartialOrd`]
+/// - [`ZeroMapKV`]
+///
+/// To disable one of the automatic derives, use `#[zerovec::skip_derive(...)]` like so: `#[zerovec::skip_derive(ZeroMapKV)]`.
+/// `Ord` and `PartialOrd` are implemented as a unit and can only be disabled as a group with `#[zerovec::skip_derive(Ord)]`.
+///
+/// The following traits are available to derive, but not automatic:
+///
+/// - [`Debug`]
+/// - [`Serialize`](serde::Serialize)
+/// - [`Deserialize`](serde::Deserialize)
+///
+/// To enable one of these additional derives, use `#[zerovec::derive(...)]` like so: `#[zerovec::derive(Debug)]`.
+///
+/// In most cases these derives will defer to the impl of the same trait on the current type, so such impls must exist.
+///
+/// This implementation will also by default autogenerate [`Ord`] and [`PartialOrd`] on the [`VarULE`] type based on
+/// the implementation on `Self`. You can opt out of this with `#[zerovec::skip_derive(Ord)]`
+///
+/// Note that this implementation will autogenerate [`EncodeAsVarULE`] impls for _both_ `Self` and `&Self`
+/// for convenience. This allows for a little more flexibility encoding slices.
+///
+/// [`EncodeAsVarULE`]: ule::EncodeAsVarULE
+/// [`VarULE`]: ule::VarULE
+/// [`ULE`]: ule::ULE
+/// [`AsULE`]: ule::AsULE
+/// [`ZeroMapKV`]: maps::ZeroMapKV
+///
+/// # Example
+///
+/// ```rust
+/// use std::borrow::Cow;
+/// use zerofrom::ZeroFrom;
+/// use zerovec::ule::encode_varule_to_box;
+/// use zerovec::{VarZeroVec, ZeroMap, ZeroVec};
+///
+/// // custom fixed-size ULE type for ZeroVec
+/// #[zerovec::make_ule(DateULE)]
+/// #[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
+/// struct Date {
+/// y: u64,
+/// m: u8,
+/// d: u8,
+/// }
+///
+/// // custom variable sized VarULE type for VarZeroVec
+/// #[zerovec::make_varule(PersonULE)]
+/// #[zerovec::derive(Serialize, Deserialize)]
+/// #[derive(Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
+/// struct Person<'a> {
+/// birthday: Date,
+/// favorite_character: char,
+/// #[serde(borrow)]
+/// name: Cow<'a, str>,
+/// }
+///
+/// #[derive(serde::Serialize, serde::Deserialize)]
+/// struct Data<'a> {
+/// // note: VarZeroVec always must reference the ULE type directly
+/// #[serde(borrow)]
+/// important_people: VarZeroVec<'a, PersonULE>,
+/// }
+///
+/// let person1 = Person {
+/// birthday: Date {
+/// y: 1990,
+/// m: 9,
+/// d: 7,
+/// },
+/// favorite_character: 'π',
+/// name: Cow::from("Kate"),
+/// };
+/// let person2 = Person {
+/// birthday: Date {
+/// y: 1960,
+/// m: 5,
+/// d: 25,
+/// },
+/// favorite_character: '冇',
+/// name: Cow::from("Jesse"),
+/// };
+///
+/// let important_people = VarZeroVec::from(&[person1, person2]);
+/// let data = Data { important_people };
+///
+/// let bincode_bytes = bincode::serialize(&data).expect("Serialization should be successful");
+///
+/// // Will deserialize without allocations
+/// let deserialized: Data =
+/// bincode::deserialize(&bincode_bytes).expect("Deserialization should be successful");
+///
+/// assert_eq!(&deserialized.important_people.get(1).unwrap().name, "Jesse");
+/// assert_eq!(&deserialized.important_people.get(0).unwrap().name, "Kate");
+///
+/// // Since VarZeroVec produces PersonULE types, it's convenient to use ZeroFrom
+/// // to recoup Person values in a zero-copy way
+/// let person_converted: Person =
+/// ZeroFrom::zero_from(deserialized.important_people.get(1).unwrap());
+/// assert_eq!(person_converted.name, "Jesse");
+/// assert_eq!(person_converted.birthday.y, 1960);
+/// ```
+#[cfg(feature = "derive")]
+pub use zerovec_derive::make_varule;
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use core::mem::size_of;
+
+ /// Checks that the size of the type is one of the given sizes.
+ /// The size might differ across Rust versions or channels.
+ macro_rules! check_size_of {
+ ($sizes:pat, $type:path) => {
+ assert!(
+ matches!(size_of::<$type>(), $sizes),
+ concat!(stringify!($type), " is of size {}"),
+ size_of::<$type>()
+ );
+ };
+ }
+
+ #[test]
+ fn check_sizes() {
+ check_size_of!(24, ZeroVec<u8>);
+ check_size_of!(24, ZeroVec<u32>);
+ check_size_of!(32 | 24, VarZeroVec<[u8]>);
+ check_size_of!(32 | 24, VarZeroVec<str>);
+ check_size_of!(48, ZeroMap<u32, u32>);
+ check_size_of!(56 | 48, ZeroMap<u32, str>);
+ check_size_of!(56 | 48, ZeroMap<str, u32>);
+ check_size_of!(64 | 48, ZeroMap<str, str>);
+ check_size_of!(120 | 96, ZeroMap2d<str, str, str>);
+ check_size_of!(32 | 24, vecs::FlexZeroVec);
+
+ check_size_of!(32, Option<ZeroVec<u8>>);
+ check_size_of!(32, Option<VarZeroVec<str>>);
+ check_size_of!(64 | 56, Option<ZeroMap<str, str>>);
+ check_size_of!(120 | 104, Option<ZeroMap2d<str, str, str>>);
+ check_size_of!(32, Option<vecs::FlexZeroVec>);
+ }
+}
diff --git a/third_party/rust/zerovec/src/map/borrowed.rs b/third_party/rust/zerovec/src/map/borrowed.rs
new file mode 100644
index 0000000000..98b2d2f9d1
--- /dev/null
+++ b/third_party/rust/zerovec/src/map/borrowed.rs
@@ -0,0 +1,325 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::ule::AsULE;
+use crate::ZeroSlice;
+
+use core::cmp::Ordering;
+use core::fmt;
+
+pub use super::kv::ZeroMapKV;
+pub use super::vecs::{MutableZeroVecLike, ZeroVecLike};
+
+/// A borrowed-only version of [`ZeroMap`](super::ZeroMap)
+///
+/// This is useful for fully-zero-copy deserialization from non-human-readable
+/// serialization formats. It also has the advantage that it can return references that live for
+/// the lifetime of the backing buffer as opposed to that of the [`ZeroMapBorrowed`] instance.
+///
+/// # Examples
+///
+/// ```
+/// use zerovec::maps::ZeroMapBorrowed;
+///
+/// // Example byte buffer representing the map { 1: "one" }
+/// let BINCODE_BYTES: &[u8; 29] = &[
+/// 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+/// 0, 0, 111, 110, 101,
+/// ];
+///
+/// // Deserializing to ZeroMap requires no heap allocations.
+/// let zero_map: ZeroMapBorrowed<u32, str> =
+/// bincode::deserialize(BINCODE_BYTES)
+/// .expect("Should deserialize successfully");
+/// assert_eq!(zero_map.get(&1), Some("one"));
+/// ```
+///
+/// This can be obtained from a [`ZeroMap`](super::ZeroMap) via [`ZeroMap::as_borrowed`](super::ZeroMap::as_borrowed)
+pub struct ZeroMapBorrowed<'a, K, V>
+where
+ K: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K: ?Sized,
+ V: ?Sized,
+{
+ pub(crate) keys: &'a <K as ZeroMapKV<'a>>::Slice,
+ pub(crate) values: &'a <V as ZeroMapKV<'a>>::Slice,
+}
+
+impl<'a, K, V> Copy for ZeroMapBorrowed<'a, K, V>
+where
+ K: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K: ?Sized,
+ V: ?Sized,
+{
+}
+impl<'a, K, V> Clone for ZeroMapBorrowed<'a, K, V>
+where
+ K: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K: ?Sized,
+ V: ?Sized,
+{
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<'a, K, V> Default for ZeroMapBorrowed<'a, K, V>
+where
+ K: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K::Slice: 'static,
+ V::Slice: 'static,
+ K: ?Sized,
+ V: ?Sized,
+{
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl<'a, K, V> ZeroMapBorrowed<'a, K, V>
+where
+ K: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K::Slice: 'static,
+ V::Slice: 'static,
+ K: ?Sized,
+ V: ?Sized,
+{
+ /// Creates a new, empty `ZeroMapBorrowed<K, V>`.
+ ///
+ /// Note: Since [`ZeroMapBorrowed`] is not mutable, the return value will be a stub unless
+ /// converted into a [`ZeroMap`](super::ZeroMap).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::maps::ZeroMapBorrowed;
+ ///
+ /// let zm: ZeroMapBorrowed<u16, str> = ZeroMapBorrowed::new();
+ /// assert!(zm.is_empty());
+ /// ```
+ pub fn new() -> Self {
+ Self {
+ keys: K::Container::zvl_new_borrowed(),
+ values: V::Container::zvl_new_borrowed(),
+ }
+ }
+}
+
+impl<'a, K, V> ZeroMapBorrowed<'a, K, V>
+where
+ K: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K: ?Sized,
+ V: ?Sized,
+{
+ #[doc(hidden)] // databake internal
+ pub const unsafe fn from_parts_unchecked(
+ keys: &'a <K as ZeroMapKV<'a>>::Slice,
+ values: &'a <V as ZeroMapKV<'a>>::Slice,
+ ) -> Self {
+ Self { keys, values }
+ }
+
+ /// The number of elements in the [`ZeroMapBorrowed`]
+ pub fn len(&self) -> usize {
+ self.values.zvl_len()
+ }
+
+ /// Whether the [`ZeroMapBorrowed`] is empty
+ pub fn is_empty(&self) -> bool {
+ self.values.zvl_len() == 0
+ }
+}
+
+impl<'a, K, V> ZeroMapBorrowed<'a, K, V>
+where
+ K: ZeroMapKV<'a> + Ord,
+ V: ZeroMapKV<'a>,
+ K: ?Sized,
+ V: ?Sized,
+{
+ /// Get the value associated with `key`, if it exists.
+ ///
+ /// This is able to return values that live longer than the map itself
+ /// since they borrow directly from the backing buffer. This is the
+ /// primary advantage of using [`ZeroMapBorrowed`](super::ZeroMapBorrowed) over [`ZeroMap`](super::ZeroMap).
+ ///
+ /// ```rust
+ /// use zerovec::maps::ZeroMapBorrowed;
+ /// use zerovec::ZeroMap;
+ ///
+ /// let mut map = ZeroMap::new();
+ /// map.insert(&1, "one");
+ /// map.insert(&2, "two");
+ /// let borrowed = map.as_borrowed();
+ /// assert_eq!(borrowed.get(&1), Some("one"));
+ /// assert_eq!(borrowed.get(&3), None);
+ /// ```
+ pub fn get(&self, key: &K) -> Option<&'a V::GetType> {
+ let index = self.keys.zvl_binary_search(key).ok()?;
+ self.values.zvl_get(index)
+ }
+
+ /// Binary search the map with `predicate` to find a key, returning the value.
+ ///
+ /// This is able to return values that live longer than the map itself
+ /// since they borrow directly from the backing buffer. This is the
+ /// primary advantage of using [`ZeroMapBorrowed`](super::ZeroMapBorrowed) over [`ZeroMap`](super::ZeroMap).
+ ///
+ /// ```rust
+ /// use zerovec::maps::ZeroMapBorrowed;
+ /// use zerovec::ZeroMap;
+ ///
+ /// let mut map = ZeroMap::new();
+ /// map.insert(&1, "one");
+ /// map.insert(&2, "two");
+ /// let borrowed = map.as_borrowed();
+ /// assert_eq!(borrowed.get_by(|probe| probe.cmp(&1)), Some("one"));
+ /// assert_eq!(borrowed.get_by(|probe| probe.cmp(&3)), None);
+ /// ```
+ pub fn get_by(&self, predicate: impl FnMut(&K) -> Ordering) -> Option<&'a V::GetType> {
+ let index = self.keys.zvl_binary_search_by(predicate).ok()?;
+ self.values.zvl_get(index)
+ }
+
+ /// Returns whether `key` is contained in this map
+ ///
+ /// ```rust
+ /// use zerovec::maps::ZeroMapBorrowed;
+ /// use zerovec::ZeroMap;
+ ///
+ /// let mut map = ZeroMap::new();
+ /// map.insert(&1, "one");
+ /// map.insert(&2, "two");
+ /// let borrowed = map.as_borrowed();
+ /// assert!(borrowed.contains_key(&1));
+ /// assert!(!borrowed.contains_key(&3));
+ /// ```
+ pub fn contains_key(&self, key: &K) -> bool {
+ self.keys.zvl_binary_search(key).is_ok()
+ }
+}
+
+impl<'a, K, V> ZeroMapBorrowed<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ /// Produce an ordered iterator over key-value pairs
+ pub fn iter<'b>(
+ &'b self,
+ ) -> impl Iterator<
+ Item = (
+ &'a <K as ZeroMapKV<'a>>::GetType,
+ &'a <V as ZeroMapKV<'a>>::GetType,
+ ),
+ > + 'b {
+ self.iter_keys().zip(self.iter_values())
+ }
+
+ /// Produce an ordered iterator over keys
+ pub fn iter_keys<'b>(&'b self) -> impl Iterator<Item = &'a <K as ZeroMapKV<'a>>::GetType> + 'b {
+ #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len()
+ (0..self.keys.zvl_len()).map(move |idx| self.keys.zvl_get(idx).unwrap())
+ }
+
+ /// Produce an iterator over values, ordered by keys
+ pub fn iter_values<'b>(
+ &'b self,
+ ) -> impl Iterator<Item = &'a <V as ZeroMapKV<'a>>::GetType> + 'b {
+ #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() == values.zvl_len()
+ (0..self.values.zvl_len()).map(move |idx| self.values.zvl_get(idx).unwrap())
+ }
+}
+
+impl<'a, K, V> ZeroMapBorrowed<'a, K, V>
+where
+ K: ZeroMapKV<'a> + Ord + ?Sized,
+ V: ZeroMapKV<'a, Slice = ZeroSlice<V>> + AsULE + Copy + 'static,
+{
+ /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE`
+ pub fn get_copied(&self, key: &K) -> Option<V> {
+ let index = self.keys.zvl_binary_search(key).ok()?;
+ self.values.get(index)
+ }
+
+ /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE`
+ pub fn get_copied_by(&self, predicate: impl FnMut(&K) -> Ordering) -> Option<V> {
+ let index = self.keys.zvl_binary_search_by(predicate).ok()?;
+ self.values.get(index)
+ }
+
+ /// Similar to [`Self::iter()`] except it returns a direct copy of the values instead of references
+ /// to `V::ULE`, in cases when `V` is fixed-size
+ pub fn iter_copied_values<'b>(
+ &'b self,
+ ) -> impl Iterator<Item = (&'b <K as ZeroMapKV<'a>>::GetType, V)> {
+ (0..self.keys.zvl_len()).map(move |idx| {
+ (
+ #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len()
+ self.keys.zvl_get(idx).unwrap(),
+ #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() = values.zvl_len()
+ self.values.get(idx).unwrap(),
+ )
+ })
+ }
+}
+
+impl<'a, K, V> ZeroMapBorrowed<'a, K, V>
+where
+ K: ZeroMapKV<'a, Slice = ZeroSlice<K>> + AsULE + Copy + Ord + 'static,
+ V: ZeroMapKV<'a, Slice = ZeroSlice<V>> + AsULE + Copy + 'static,
+{
+ /// Similar to [`Self::iter()`] except it returns a direct copy of the keys values instead of references
+ /// to `K::ULE` and `V::ULE`, in cases when `K` and `V` are fixed-size
+ #[allow(clippy::needless_lifetimes)] // Lifetime is necessary in impl Trait
+ pub fn iter_copied<'b: 'a>(&'b self) -> impl Iterator<Item = (K, V)> + 'b {
+ let keys = &self.keys;
+ let values = &self.values;
+ let len = self.keys.zvl_len();
+ (0..len).map(move |idx| {
+ (
+ #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len()
+ ZeroSlice::get(keys, idx).unwrap(),
+ #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() = values.zvl_len()
+ ZeroSlice::get(values, idx).unwrap(),
+ )
+ })
+ }
+}
+
+// We can't use the default PartialEq because ZeroMap is invariant
+// so otherwise rustc will not automatically allow you to compare ZeroMaps
+// with different lifetimes
+impl<'a, 'b, K, V> PartialEq<ZeroMapBorrowed<'b, K, V>> for ZeroMapBorrowed<'a, K, V>
+where
+ K: for<'c> ZeroMapKV<'c> + ?Sized,
+ V: for<'c> ZeroMapKV<'c> + ?Sized,
+ <K as ZeroMapKV<'a>>::Slice: PartialEq<<K as ZeroMapKV<'b>>::Slice>,
+ <V as ZeroMapKV<'a>>::Slice: PartialEq<<V as ZeroMapKV<'b>>::Slice>,
+{
+ fn eq(&self, other: &ZeroMapBorrowed<'b, K, V>) -> bool {
+ self.keys.eq(other.keys) && self.values.eq(other.values)
+ }
+}
+
+impl<'a, K, V> fmt::Debug for ZeroMapBorrowed<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ K::Slice: fmt::Debug,
+ V::Slice: fmt::Debug,
+{
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+ f.debug_struct("ZeroMapBorrowed")
+ .field("keys", &self.keys)
+ .field("values", &self.values)
+ .finish()
+ }
+}
diff --git a/third_party/rust/zerovec/src/map/databake.rs b/third_party/rust/zerovec/src/map/databake.rs
new file mode 100644
index 0000000000..f861e5c29c
--- /dev/null
+++ b/third_party/rust/zerovec/src/map/databake.rs
@@ -0,0 +1,82 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::{maps::ZeroMapBorrowed, maps::ZeroMapKV, ZeroMap};
+use databake::*;
+
+impl<'a, K, V> Bake for ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ K::Container: Bake,
+ V::Container: Bake,
+{
+ fn bake(&self, env: &CrateEnv) -> TokenStream {
+ env.insert("zerovec");
+ let keys = self.keys.bake(env);
+ let values = self.values.bake(env);
+ quote! { unsafe { #[allow(unused_unsafe)] zerovec::ZeroMap::from_parts_unchecked(#keys, #values) } }
+ }
+}
+
+impl<'a, K, V> Bake for ZeroMapBorrowed<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ &'a K::Slice: Bake,
+ &'a V::Slice: Bake,
+{
+ fn bake(&self, env: &CrateEnv) -> TokenStream {
+ env.insert("zerovec");
+ let keys = self.keys.bake(env);
+ let values = self.values.bake(env);
+ quote! { unsafe { #[allow(unused_unsafe)] zerovec::maps::ZeroMapBorrowed::from_parts_unchecked(#keys, #values) } }
+ }
+}
+
+#[test]
+fn test_baked_map() {
+ test_bake!(
+ ZeroMap<str, str>,
+ const: unsafe {
+ #[allow(unused_unsafe)]
+ crate::ZeroMap::from_parts_unchecked(
+ unsafe {
+ crate::VarZeroVec::from_bytes_unchecked(
+ b"\x02\0\0\0\0\0\0\0\x02\0\0\0adbc"
+ )
+ },
+ unsafe {
+ crate::VarZeroVec::from_bytes_unchecked(
+ b"\x02\0\0\0\0\0\0\0\x04\0\0\0ERA1ERA0"
+ )
+ },
+ )
+ },
+ zerovec
+ );
+}
+
+#[test]
+fn test_baked_borrowed_map() {
+ test_bake!(
+ ZeroMapBorrowed<str, str>,
+ const: unsafe {
+ #[allow(unused_unsafe)]
+ crate::maps::ZeroMapBorrowed::from_parts_unchecked(
+ unsafe {
+ crate::VarZeroSlice::from_bytes_unchecked(
+ b"\x02\0\0\0\0\0\0\0\x02\0\0\0adbc"
+ )
+ },
+ unsafe {
+ crate::VarZeroSlice::from_bytes_unchecked(
+ b"\x02\0\0\0\0\0\0\0\x04\0\0\0ERA1ERA0"
+ )
+ },
+ )
+ },
+ zerovec
+ );
+}
diff --git a/third_party/rust/zerovec/src/map/kv.rs b/third_party/rust/zerovec/src/map/kv.rs
new file mode 100644
index 0000000000..1923ed9911
--- /dev/null
+++ b/third_party/rust/zerovec/src/map/kv.rs
@@ -0,0 +1,131 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::vecs::{MutableZeroVecLike, ZeroVecLike};
+use crate::ule::*;
+use crate::vecs::{FlexZeroSlice, FlexZeroVec};
+use crate::vecs::{VarZeroSlice, VarZeroVec};
+use crate::zerovec::{ZeroSlice, ZeroVec};
+use alloc::boxed::Box;
+
+/// Trait marking types which are allowed to be keys or values in [`ZeroMap`](super::ZeroMap).
+///
+/// Users should not be calling methods of this trait directly, however if you are
+/// implementing your own [`AsULE`] or [`VarULE`] type you may wish to implement
+/// this trait.
+// this lifetime should be a GAT on Container once that is possible
+#[allow(clippy::upper_case_acronyms)] // KV is not an acronym
+pub trait ZeroMapKV<'a> {
+ /// The container that can be used with this type: [`ZeroVec`] or [`VarZeroVec`].
+ type Container: MutableZeroVecLike<
+ 'a,
+ Self,
+ SliceVariant = Self::Slice,
+ GetType = Self::GetType,
+ OwnedType = Self::OwnedType,
+ > + Sized;
+ type Slice: ZeroVecLike<Self, GetType = Self::GetType> + ?Sized;
+ /// The type produced by `Container::get()`
+ ///
+ /// This type will be predetermined by the choice of `Self::Container`:
+ /// For sized types this must be `T::ULE`, and for unsized types this must be `T`
+ type GetType: ?Sized + 'static;
+ /// The type produced by `Container::replace()` and `Container::remove()`,
+ /// also used during deserialization. If `Self` is human readable serialized,
+ /// deserializing to `Self::OwnedType` should produce the same value once
+ /// passed through `Self::owned_as_self()`
+ ///
+ /// This type will be predetermined by the choice of `Self::Container`:
+ /// For sized types this must be `T` and for unsized types this must be `Box<T>`
+ type OwnedType: 'static;
+}
+
+macro_rules! impl_sized_kv {
+ ($ty:ident) => {
+ impl<'a> ZeroMapKV<'a> for $ty {
+ type Container = ZeroVec<'a, $ty>;
+ type Slice = ZeroSlice<$ty>;
+ type GetType = <$ty as AsULE>::ULE;
+ type OwnedType = $ty;
+ }
+ };
+}
+
+impl_sized_kv!(u8);
+impl_sized_kv!(u16);
+impl_sized_kv!(u32);
+impl_sized_kv!(u64);
+impl_sized_kv!(u128);
+impl_sized_kv!(i8);
+impl_sized_kv!(i16);
+impl_sized_kv!(i32);
+impl_sized_kv!(i64);
+impl_sized_kv!(i128);
+impl_sized_kv!(char);
+impl_sized_kv!(f32);
+impl_sized_kv!(f64);
+
+impl<'a> ZeroMapKV<'a> for usize {
+ type Container = FlexZeroVec<'a>;
+ type Slice = FlexZeroSlice;
+ type GetType = [u8];
+ type OwnedType = usize;
+}
+
+impl<'a, T> ZeroMapKV<'a> for Option<T>
+where
+ Option<T>: AsULE + 'static,
+{
+ type Container = ZeroVec<'a, Option<T>>;
+ type Slice = ZeroSlice<Option<T>>;
+ type GetType = <Option<T> as AsULE>::ULE;
+ type OwnedType = Option<T>;
+}
+
+impl<'a, T> ZeroMapKV<'a> for OptionVarULE<T>
+where
+ T: VarULE + ?Sized,
+{
+ type Container = VarZeroVec<'a, OptionVarULE<T>>;
+ type Slice = VarZeroSlice<OptionVarULE<T>>;
+ type GetType = OptionVarULE<T>;
+ type OwnedType = Box<OptionVarULE<T>>;
+}
+
+impl<'a> ZeroMapKV<'a> for str {
+ type Container = VarZeroVec<'a, str>;
+ type Slice = VarZeroSlice<str>;
+ type GetType = str;
+ type OwnedType = Box<str>;
+}
+
+impl<'a, T> ZeroMapKV<'a> for [T]
+where
+ T: ULE + AsULE<ULE = T>,
+{
+ type Container = VarZeroVec<'a, [T]>;
+ type Slice = VarZeroSlice<[T]>;
+ type GetType = [T];
+ type OwnedType = Box<[T]>;
+}
+
+impl<'a, T, const N: usize> ZeroMapKV<'a> for [T; N]
+where
+ T: AsULE + 'static,
+{
+ type Container = ZeroVec<'a, [T; N]>;
+ type Slice = ZeroSlice<[T; N]>;
+ type GetType = [T::ULE; N];
+ type OwnedType = [T; N];
+}
+
+impl<'a, T> ZeroMapKV<'a> for ZeroSlice<T>
+where
+ T: AsULE + 'static,
+{
+ type Container = VarZeroVec<'a, ZeroSlice<T>>;
+ type Slice = VarZeroSlice<ZeroSlice<T>>;
+ type GetType = ZeroSlice<T>;
+ type OwnedType = Box<ZeroSlice<T>>;
+}
diff --git a/third_party/rust/zerovec/src/map/map.rs b/third_party/rust/zerovec/src/map/map.rs
new file mode 100644
index 0000000000..6801869c96
--- /dev/null
+++ b/third_party/rust/zerovec/src/map/map.rs
@@ -0,0 +1,653 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use crate::ule::{AsULE, EncodeAsVarULE, VarULE};
+use crate::{VarZeroVec, ZeroSlice, ZeroVec, ZeroVecError};
+use alloc::borrow::Borrow;
+use alloc::boxed::Box;
+use core::cmp::Ordering;
+use core::fmt;
+use core::iter::FromIterator;
+
+/// A zero-copy map datastructure, built on sorted binary-searchable [`ZeroVec`]
+/// and [`VarZeroVec`].
+///
+/// This type, like [`ZeroVec`] and [`VarZeroVec`], is able to zero-copy
+/// deserialize from appropriately formatted byte buffers. It is internally copy-on-write, so it can be mutated
+/// afterwards as necessary.
+///
+/// Internally, a `ZeroMap` is a zero-copy vector for keys paired with a zero-copy vector for
+/// values, sorted by the keys. Therefore, all types used in `ZeroMap` need to work with either
+/// [`ZeroVec`] or [`VarZeroVec`].
+///
+/// This does mean that for fixed-size data, one must use the regular type (`u32`, `u8`, `char`, etc),
+/// whereas for variable-size data, `ZeroMap` will use the dynamically sized version (`str` not `String`,
+/// `ZeroSlice` not `ZeroVec`, `FooULE` not `Foo` for custom types)
+///
+/// # Examples
+///
+/// ```
+/// use zerovec::ZeroMap;
+///
+/// #[derive(serde::Serialize, serde::Deserialize)]
+/// struct Data<'a> {
+/// #[serde(borrow)]
+/// map: ZeroMap<'a, u32, str>,
+/// }
+///
+/// let mut map = ZeroMap::new();
+/// map.insert(&1, "one");
+/// map.insert(&2, "two");
+/// map.insert(&4, "four");
+///
+/// let data = Data { map };
+///
+/// let bincode_bytes =
+/// bincode::serialize(&data).expect("Serialization should be successful");
+///
+/// // Will deserialize without any allocations
+/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
+/// .expect("Deserialization should be successful");
+///
+/// assert_eq!(data.map.get(&1), Some("one"));
+/// assert_eq!(data.map.get(&2), Some("two"));
+/// ```
+///
+/// [`VarZeroVec`]: crate::VarZeroVec
+// ZeroMap has only one invariant: keys.len() == values.len()
+// It is also expected that the keys are sorted, but this is not an invariant. See #1433
+pub struct ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ pub(crate) keys: K::Container,
+ pub(crate) values: V::Container,
+}
+
+impl<'a, K, V> Default for ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl<'a, K, V> ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ /// Creates a new, empty `ZeroMap<K, V>`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ZeroMap;
+ ///
+ /// let zm: ZeroMap<u16, str> = ZeroMap::new();
+ /// assert!(zm.is_empty());
+ /// ```
+ pub fn new() -> Self {
+ Self {
+ keys: K::Container::zvl_with_capacity(0),
+ values: V::Container::zvl_with_capacity(0),
+ }
+ }
+
+ #[doc(hidden)] // databake internal
+ pub const unsafe fn from_parts_unchecked(keys: K::Container, values: V::Container) -> Self {
+ Self { keys, values }
+ }
+
+ /// Construct a new [`ZeroMap`] with a given capacity
+ pub fn with_capacity(capacity: usize) -> Self {
+ Self {
+ keys: K::Container::zvl_with_capacity(capacity),
+ values: V::Container::zvl_with_capacity(capacity),
+ }
+ }
+
+ /// Obtain a borrowed version of this map
+ pub fn as_borrowed(&'a self) -> ZeroMapBorrowed<'a, K, V> {
+ ZeroMapBorrowed {
+ keys: self.keys.zvl_as_borrowed(),
+ values: self.values.zvl_as_borrowed(),
+ }
+ }
+
+ /// The number of elements in the [`ZeroMap`]
+ pub fn len(&self) -> usize {
+ self.values.zvl_len()
+ }
+
+ /// Whether the [`ZeroMap`] is empty
+ pub fn is_empty(&self) -> bool {
+ self.values.zvl_len() == 0
+ }
+
+ /// Remove all elements from the [`ZeroMap`]
+ pub fn clear(&mut self) {
+ self.keys.zvl_clear();
+ self.values.zvl_clear();
+ }
+
+ /// Reserve capacity for `additional` more elements to be inserted into
+ /// the [`ZeroMap`] to avoid frequent reallocations.
+ ///
+ /// See [`Vec::reserve()`](alloc::vec::Vec::reserve) for more information.
+ pub fn reserve(&mut self, additional: usize) {
+ self.keys.zvl_reserve(additional);
+ self.values.zvl_reserve(additional);
+ }
+}
+impl<'a, K, V> ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized + Ord,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ /// Get the value associated with `key`, if it exists.
+ ///
+ /// For fixed-size ([`AsULE`]) `V` types, this _will_ return
+ /// their corresponding [`AsULE::ULE`] type. If you wish to work with the `V`
+ /// type directly, [`Self::get_copied()`] exists for convenience.
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap;
+ ///
+ /// let mut map = ZeroMap::new();
+ /// map.insert(&1, "one");
+ /// map.insert(&2, "two");
+ /// assert_eq!(map.get(&1), Some("one"));
+ /// assert_eq!(map.get(&3), None);
+ /// ```
+ pub fn get(&self, key: &K) -> Option<&V::GetType> {
+ let index = self.keys.zvl_binary_search(key).ok()?;
+ self.values.zvl_get(index)
+ }
+
+ /// Binary search the map with `predicate` to find a key, returning the value.
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap;
+ ///
+ /// let mut map = ZeroMap::new();
+ /// map.insert(&1, "one");
+ /// map.insert(&2, "two");
+ /// assert_eq!(map.get_by(|probe| probe.cmp(&1)), Some("one"));
+ /// assert_eq!(map.get_by(|probe| probe.cmp(&3)), None);
+ /// ```
+ pub fn get_by(&self, predicate: impl FnMut(&K) -> Ordering) -> Option<&V::GetType> {
+ let index = self.keys.zvl_binary_search_by(predicate).ok()?;
+ self.values.zvl_get(index)
+ }
+
+ /// Returns whether `key` is contained in this map
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap;
+ ///
+ /// let mut map = ZeroMap::new();
+ /// map.insert(&1, "one");
+ /// map.insert(&2, "two");
+ /// assert!(map.contains_key(&1));
+ /// assert!(!map.contains_key(&3));
+ /// ```
+ pub fn contains_key(&self, key: &K) -> bool {
+ self.keys.zvl_binary_search(key).is_ok()
+ }
+
+ /// Insert `value` with `key`, returning the existing value if it exists.
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap;
+ ///
+ /// let mut map = ZeroMap::new();
+ /// map.insert(&1, "one");
+ /// map.insert(&2, "two");
+ /// assert_eq!(map.get(&1), Some("one"));
+ /// assert_eq!(map.get(&3), None);
+ /// ```
+ pub fn insert(&mut self, key: &K, value: &V) -> Option<V::OwnedType> {
+ match self.keys.zvl_binary_search(key) {
+ Ok(index) => Some(self.values.zvl_replace(index, value)),
+ Err(index) => {
+ self.keys.zvl_insert(index, key);
+ self.values.zvl_insert(index, value);
+ None
+ }
+ }
+ }
+
+ /// Remove the value at `key`, returning it if it exists.
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap;
+ ///
+ /// let mut map = ZeroMap::new();
+ /// map.insert(&1, "one");
+ /// map.insert(&2, "two");
+ /// assert_eq!(map.remove(&1), Some("one".to_owned().into_boxed_str()));
+ /// assert_eq!(map.get(&1), None);
+ /// ```
+ pub fn remove(&mut self, key: &K) -> Option<V::OwnedType> {
+ let idx = self.keys.zvl_binary_search(key).ok()?;
+ self.keys.zvl_remove(idx);
+ Some(self.values.zvl_remove(idx))
+ }
+
+ /// Appends `value` with `key` to the end of the underlying vector, returning
+ /// `key` and `value` _if it failed_. Useful for extending with an existing
+ /// sorted list.
+ /// ```rust
+ /// use zerovec::ZeroMap;
+ ///
+ /// let mut map = ZeroMap::new();
+ /// assert!(map.try_append(&1, "uno").is_none());
+ /// assert!(map.try_append(&3, "tres").is_none());
+ ///
+ /// let unsuccessful = map.try_append(&3, "tres-updated");
+ /// assert!(unsuccessful.is_some(), "append duplicate of last key");
+ ///
+ /// let unsuccessful = map.try_append(&2, "dos");
+ /// assert!(unsuccessful.is_some(), "append out of order");
+ ///
+ /// assert_eq!(map.get(&1), Some("uno"));
+ ///
+ /// // contains the original value for the key: 3
+ /// assert_eq!(map.get(&3), Some("tres"));
+ ///
+ /// // not appended since it wasn't in order
+ /// assert_eq!(map.get(&2), None);
+ /// ```
+ #[must_use]
+ pub fn try_append<'b>(&mut self, key: &'b K, value: &'b V) -> Option<(&'b K, &'b V)> {
+ if self.keys.zvl_len() != 0 {
+ if let Some(last) = self.keys.zvl_get(self.keys.zvl_len() - 1) {
+ if K::Container::t_cmp_get(key, last) != Ordering::Greater {
+ return Some((key, value));
+ }
+ }
+ }
+
+ self.keys.zvl_push(key);
+ self.values.zvl_push(value);
+ None
+ }
+}
+
+impl<'a, K, V> ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ /// Produce an ordered iterator over key-value pairs
+ pub fn iter<'b>(
+ &'b self,
+ ) -> impl ExactSizeIterator<
+ Item = (
+ &'b <K as ZeroMapKV<'a>>::GetType,
+ &'b <V as ZeroMapKV<'a>>::GetType,
+ ),
+ > {
+ (0..self.keys.zvl_len()).map(move |idx| {
+ (
+ #[allow(clippy::unwrap_used)] // idx is in-range
+ self.keys.zvl_get(idx).unwrap(),
+ #[allow(clippy::unwrap_used)] // idx is in-range
+ self.values.zvl_get(idx).unwrap(),
+ )
+ })
+ }
+
+ /// Produce an ordered iterator over keys
+ pub fn iter_keys<'b>(
+ &'b self,
+ ) -> impl ExactSizeIterator<Item = &'b <K as ZeroMapKV<'a>>::GetType> {
+ #[allow(clippy::unwrap_used)] // idx is in-range
+ (0..self.keys.zvl_len()).map(move |idx| self.keys.zvl_get(idx).unwrap())
+ }
+
+ /// Produce an iterator over values, ordered by keys
+ pub fn iter_values<'b>(
+ &'b self,
+ ) -> impl ExactSizeIterator<Item = &'b <V as ZeroMapKV<'a>>::GetType> {
+ #[allow(clippy::unwrap_used)] // idx is in-range
+ (0..self.values.zvl_len()).map(move |idx| self.values.zvl_get(idx).unwrap())
+ }
+}
+
+impl<'a, K, V> ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a, Container = ZeroVec<'a, K>> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ K: AsULE,
+{
+ /// Cast a `ZeroMap<K, V>` to `ZeroMap<P, V>` where `K` and `P` are [`AsULE`] types
+ /// with the same representation.
+ ///
+ /// # Unchecked Invariants
+ ///
+ /// If `K` and `P` have different ordering semantics, unexpected behavior may occur.
+ pub fn cast_zv_k_unchecked<P>(self) -> ZeroMap<'a, P, V>
+ where
+ P: AsULE<ULE = K::ULE> + ZeroMapKV<'a, Container = ZeroVec<'a, P>> + ?Sized,
+ {
+ ZeroMap {
+ keys: self.keys.cast(),
+ values: self.values,
+ }
+ }
+
+ /// Convert a `ZeroMap<K, V>` to `ZeroMap<P, V>` where `K` and `P` are [`AsULE`] types
+ /// with the same size.
+ ///
+ /// # Unchecked Invariants
+ ///
+ /// If `K` and `P` have different ordering semantics, unexpected behavior may occur.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `K::ULE` and `P::ULE` are not the same size.
+ pub fn try_convert_zv_k_unchecked<P>(self) -> Result<ZeroMap<'a, P, V>, ZeroVecError>
+ where
+ P: AsULE + ZeroMapKV<'a, Container = ZeroVec<'a, P>> + ?Sized,
+ {
+ Ok(ZeroMap {
+ keys: self.keys.try_into_converted()?,
+ values: self.values,
+ })
+ }
+}
+
+impl<'a, K, V> ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a, Container = ZeroVec<'a, V>> + ?Sized,
+ V: AsULE,
+{
+ /// Cast a `ZeroMap<K, V>` to `ZeroMap<K, P>` where `V` and `P` are [`AsULE`] types
+ /// with the same representation.
+ ///
+ /// # Unchecked Invariants
+ ///
+ /// If `V` and `P` have different ordering semantics, unexpected behavior may occur.
+ pub fn cast_zv_v_unchecked<P>(self) -> ZeroMap<'a, K, P>
+ where
+ P: AsULE<ULE = V::ULE> + ZeroMapKV<'a, Container = ZeroVec<'a, P>> + ?Sized,
+ {
+ ZeroMap {
+ keys: self.keys,
+ values: self.values.cast(),
+ }
+ }
+
+ /// Convert a `ZeroMap<K, V>` to `ZeroMap<K, P>` where `V` and `P` are [`AsULE`] types
+ /// with the same size.
+ ///
+ /// # Unchecked Invariants
+ ///
+ /// If `V` and `P` have different ordering semantics, unexpected behavior may occur.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `V::ULE` and `P::ULE` are not the same size.
+ pub fn try_convert_zv_v_unchecked<P>(self) -> Result<ZeroMap<'a, K, P>, ZeroVecError>
+ where
+ P: AsULE + ZeroMapKV<'a, Container = ZeroVec<'a, P>> + ?Sized,
+ {
+ Ok(ZeroMap {
+ keys: self.keys,
+ values: self.values.try_into_converted()?,
+ })
+ }
+}
+
+impl<'a, K, V> ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized + Ord,
+ V: ZeroMapKV<'a, Container = VarZeroVec<'a, V>> + ?Sized,
+ V: VarULE,
+{
+ /// Same as `insert()`, but allows using [EncodeAsVarULE](crate::ule::EncodeAsVarULE)
+ /// types with the value to avoid an extra allocation when dealing with custom ULE types.
+ ///
+ /// ```rust
+ /// use std::borrow::Cow;
+ /// use zerovec::ZeroMap;
+ ///
+ /// #[zerovec::make_varule(PersonULE)]
+ /// #[derive(Clone, Eq, PartialEq, Ord, PartialOrd)]
+ /// struct Person<'a> {
+ /// age: u8,
+ /// name: Cow<'a, str>,
+ /// }
+ ///
+ /// let mut map: ZeroMap<u32, PersonULE> = ZeroMap::new();
+ /// map.insert_var_v(
+ /// &1,
+ /// &Person {
+ /// age: 20,
+ /// name: "Joseph".into(),
+ /// },
+ /// );
+ /// map.insert_var_v(
+ /// &1,
+ /// &Person {
+ /// age: 35,
+ /// name: "Carla".into(),
+ /// },
+ /// );
+ /// assert_eq!(&map.get(&1).unwrap().name, "Carla");
+ /// assert!(map.get(&3).is_none());
+ /// ```
+ pub fn insert_var_v<VE: EncodeAsVarULE<V>>(&mut self, key: &K, value: &VE) -> Option<Box<V>> {
+ match self.keys.zvl_binary_search(key) {
+ Ok(index) => {
+ #[allow(clippy::unwrap_used)] // binary search
+ let ret = self.values.get(index).unwrap().to_boxed();
+ self.values.make_mut().replace(index, value);
+ Some(ret)
+ }
+ Err(index) => {
+ self.keys.zvl_insert(index, key);
+ self.values.make_mut().insert(index, value);
+ None
+ }
+ }
+ }
+
+ // insert_var_k, insert_var_kv are not possible since one cannot perform the binary search with EncodeAsVarULE
+ // though we might be able to do it in the future if we add a trait for cross-Ord requirements
+}
+
+impl<'a, K, V> ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized + Ord,
+ V: ZeroMapKV<'a> + ?Sized,
+ V: Copy,
+{
+ /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE`.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap;
+ ///
+ /// let mut map = ZeroMap::new();
+ /// map.insert(&1, &'a');
+ /// map.insert(&2, &'b');
+ /// assert_eq!(map.get_copied(&1), Some('a'));
+ /// assert_eq!(map.get_copied(&3), None);
+ #[inline]
+ pub fn get_copied(&self, key: &K) -> Option<V> {
+ let index = self.keys.zvl_binary_search(key).ok()?;
+ self.get_copied_at(index)
+ }
+
+ /// Binary search the map with `predicate` to find a key, returning the value.
+ ///
+ /// For cases when `V` is fixed-size, use this method to obtain a direct copy of `V`
+ /// instead of `V::ULE`.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap;
+ ///
+ /// let mut map = ZeroMap::new();
+ /// map.insert(&1, &'a');
+ /// map.insert(&2, &'b');
+ /// assert_eq!(map.get_copied_by(|probe| probe.cmp(&1)), Some('a'));
+ /// assert_eq!(map.get_copied_by(|probe| probe.cmp(&3)), None);
+ /// ```
+ #[inline]
+ pub fn get_copied_by(&self, predicate: impl FnMut(&K) -> Ordering) -> Option<V> {
+ let index = self.keys.zvl_binary_search_by(predicate).ok()?;
+ self.get_copied_at(index)
+ }
+
+ fn get_copied_at(&self, index: usize) -> Option<V> {
+ let ule = self.values.zvl_get(index)?;
+ let mut result = Option::<V>::None;
+ V::Container::zvl_get_as_t(ule, |v| result.replace(*v));
+ #[allow(clippy::unwrap_used)] // `zvl_get_as_t` guarantees that the callback is invoked
+ Some(result.unwrap())
+ }
+}
+
+impl<'a, K, V> ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a, Container = ZeroVec<'a, V>> + ?Sized,
+ V: AsULE + Copy,
+{
+ /// Similar to [`Self::iter()`] except it returns a direct copy of the values instead of references
+ /// to `V::ULE`, in cases when `V` is fixed-size
+ pub fn iter_copied_values<'b>(
+ &'b self,
+ ) -> impl Iterator<Item = (&'b <K as ZeroMapKV<'a>>::GetType, V)> {
+ (0..self.keys.zvl_len()).map(move |idx| {
+ (
+ #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len()
+ self.keys.zvl_get(idx).unwrap(),
+ #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() = values.zvl_len()
+ ZeroSlice::get(&*self.values, idx).unwrap(),
+ )
+ })
+ }
+}
+
+impl<'a, K, V> ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a, Container = ZeroVec<'a, K>> + ?Sized,
+ V: ZeroMapKV<'a, Container = ZeroVec<'a, V>> + ?Sized,
+ K: AsULE + Copy,
+ V: AsULE + Copy,
+{
+ /// Similar to [`Self::iter()`] except it returns a direct copy of the keys values instead of references
+ /// to `K::ULE` and `V::ULE`, in cases when `K` and `V` are fixed-size
+ #[allow(clippy::needless_lifetimes)] // Lifetime is necessary in impl Trait
+ pub fn iter_copied<'b>(&'b self) -> impl Iterator<Item = (K, V)> + 'b {
+ let keys = &self.keys;
+ let values = &self.values;
+ (0..keys.len()).map(move |idx| {
+ (
+ #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len()
+ ZeroSlice::get(&**keys, idx).unwrap(),
+ #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() = values.zvl_len()
+ ZeroSlice::get(&**values, idx).unwrap(),
+ )
+ })
+ }
+}
+
+impl<'a, K, V> From<ZeroMapBorrowed<'a, K, V>> for ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K: ?Sized,
+ V: ?Sized,
+{
+ fn from(other: ZeroMapBorrowed<'a, K, V>) -> Self {
+ Self {
+ keys: K::Container::zvl_from_borrowed(other.keys),
+ values: V::Container::zvl_from_borrowed(other.values),
+ }
+ }
+}
+
+// We can't use the default PartialEq because ZeroMap is invariant
+// so otherwise rustc will not automatically allow you to compare ZeroMaps
+// with different lifetimes
+impl<'a, 'b, K, V> PartialEq<ZeroMap<'b, K, V>> for ZeroMap<'a, K, V>
+where
+ K: for<'c> ZeroMapKV<'c> + ?Sized,
+ V: for<'c> ZeroMapKV<'c> + ?Sized,
+ <K as ZeroMapKV<'a>>::Container: PartialEq<<K as ZeroMapKV<'b>>::Container>,
+ <V as ZeroMapKV<'a>>::Container: PartialEq<<V as ZeroMapKV<'b>>::Container>,
+{
+ fn eq(&self, other: &ZeroMap<'b, K, V>) -> bool {
+ self.keys.eq(&other.keys) && self.values.eq(&other.values)
+ }
+}
+
+impl<'a, K, V> fmt::Debug for ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ <K as ZeroMapKV<'a>>::Container: fmt::Debug,
+ <V as ZeroMapKV<'a>>::Container: fmt::Debug,
+{
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+ f.debug_struct("ZeroMap")
+ .field("keys", &self.keys)
+ .field("values", &self.values)
+ .finish()
+ }
+}
+
+impl<'a, K, V> Clone for ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ <K as ZeroMapKV<'a>>::Container: Clone,
+ <V as ZeroMapKV<'a>>::Container: Clone,
+{
+ fn clone(&self) -> Self {
+ Self {
+ keys: self.keys.clone(),
+ values: self.values.clone(),
+ }
+ }
+}
+
+impl<'a, A, B, K, V> FromIterator<(A, B)> for ZeroMap<'a, K, V>
+where
+ A: Borrow<K>,
+ B: Borrow<V>,
+ K: ZeroMapKV<'a> + ?Sized + Ord,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ fn from_iter<T>(iter: T) -> Self
+ where
+ T: IntoIterator<Item = (A, B)>,
+ {
+ let iter = iter.into_iter();
+ let mut map = match iter.size_hint() {
+ (_, Some(upper)) => Self::with_capacity(upper),
+ (lower, None) => Self::with_capacity(lower),
+ };
+
+ for (key, value) in iter {
+ if let Some((key, value)) = map.try_append(key.borrow(), value.borrow()) {
+ map.insert(key, value);
+ }
+ }
+ map
+ }
+}
diff --git a/third_party/rust/zerovec/src/map/mod.rs b/third_party/rust/zerovec/src/map/mod.rs
new file mode 100644
index 0000000000..fcad0cff71
--- /dev/null
+++ b/third_party/rust/zerovec/src/map/mod.rs
@@ -0,0 +1,23 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! See [`ZeroMap`](crate::ZeroMap) for details.
+
+mod borrowed;
+mod kv;
+#[allow(clippy::module_inception)] // module is purely internal
+pub(crate) mod map;
+mod vecs;
+
+#[cfg(feature = "databake")]
+mod databake;
+#[cfg(feature = "serde")]
+mod serde;
+#[cfg(feature = "serde")]
+mod serde_helpers;
+
+pub use crate::ZeroMap;
+pub use borrowed::ZeroMapBorrowed;
+pub use kv::ZeroMapKV;
+pub use vecs::{MutableZeroVecLike, ZeroVecLike};
diff --git a/third_party/rust/zerovec/src/map/serde.rs b/third_party/rust/zerovec/src/map/serde.rs
new file mode 100644
index 0000000000..e82886d2ad
--- /dev/null
+++ b/third_party/rust/zerovec/src/map/serde.rs
@@ -0,0 +1,313 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::{MutableZeroVecLike, ZeroMap, ZeroMapBorrowed, ZeroMapKV, ZeroVecLike};
+use core::fmt;
+use core::marker::PhantomData;
+use serde::de::{self, Deserialize, Deserializer, MapAccess, SeqAccess, Visitor};
+#[cfg(feature = "serde")]
+use serde::ser::{Serialize, SerializeMap, SerializeSeq, Serializer};
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl<'a, K, V> Serialize for ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + Serialize + ?Sized + Ord,
+ V: ZeroMapKV<'a> + Serialize + ?Sized,
+ K::Container: Serialize,
+ V::Container: Serialize,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ if serializer.is_human_readable() {
+ // Many human-readable formats don't support values other
+ // than numbers and strings as map keys. For them, we can serialize
+ // as a vec of tuples instead
+ if let Some(k) = self.iter_keys().next() {
+ if !K::Container::zvl_get_as_t(k, super::serde_helpers::is_num_or_string) {
+ let mut seq = serializer.serialize_seq(Some(self.len()))?;
+ for (k, v) in self.iter() {
+ K::Container::zvl_get_as_t(k, |k| {
+ V::Container::zvl_get_as_t(v, |v| seq.serialize_element(&(k, v)))
+ })?;
+ }
+ return seq.end();
+ }
+ }
+ let mut map = serializer.serialize_map(Some(self.len()))?;
+ for (k, v) in self.iter() {
+ K::Container::zvl_get_as_t(k, |k| map.serialize_key(k))?;
+ V::Container::zvl_get_as_t(v, |v| map.serialize_value(v))?;
+ }
+ map.end()
+ } else {
+ (&self.keys, &self.values).serialize(serializer)
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl<'a, K, V> Serialize for ZeroMapBorrowed<'a, K, V>
+where
+ K: ZeroMapKV<'a> + Serialize + ?Sized + Ord,
+ V: ZeroMapKV<'a> + Serialize + ?Sized,
+ K::Container: Serialize,
+ V::Container: Serialize,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ ZeroMap::<K, V>::from(*self).serialize(serializer)
+ }
+}
+
+/// Modified example from https://serde.rs/deserialize-map.html
+struct ZeroMapMapVisitor<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized + Ord,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ #[allow(clippy::type_complexity)] // it's a marker type, complexity doesn't matter
+ marker: PhantomData<fn() -> (&'a K::OwnedType, &'a V::OwnedType)>,
+}
+
+impl<'a, K, V> ZeroMapMapVisitor<'a, K, V>
+where
+ K: ZeroMapKV<'a> + ?Sized + Ord,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ fn new() -> Self {
+ ZeroMapMapVisitor {
+ marker: PhantomData,
+ }
+ }
+}
+
+impl<'a, 'de, K, V> Visitor<'de> for ZeroMapMapVisitor<'a, K, V>
+where
+ K: ZeroMapKV<'a> + Ord + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ K::OwnedType: Deserialize<'de>,
+ V::OwnedType: Deserialize<'de>,
+{
+ type Value = ZeroMap<'a, K, V>;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ formatter.write_str("a map produced by ZeroMap")
+ }
+
+ fn visit_seq<S>(self, mut access: S) -> Result<Self::Value, S::Error>
+ where
+ S: SeqAccess<'de>,
+ {
+ let mut map = ZeroMap::with_capacity(access.size_hint().unwrap_or(0));
+
+ // While there are entries remaining in the input, add them
+ // into our map.
+ while let Some((key, value)) = access.next_element::<(K::OwnedType, V::OwnedType)>()? {
+ // Try to append it at the end, hoping for a sorted map.
+ // If not sorted, return an error
+ // a serialized map that came from another ZeroMap
+ if map
+ .try_append(
+ K::Container::owned_as_t(&key),
+ V::Container::owned_as_t(&value),
+ )
+ .is_some()
+ {
+ return Err(de::Error::custom(
+ "ZeroMap's keys must be sorted while deserializing",
+ ));
+ }
+ }
+
+ Ok(map)
+ }
+
+ fn visit_map<M>(self, mut access: M) -> Result<Self::Value, M::Error>
+ where
+ M: MapAccess<'de>,
+ {
+ let mut map = ZeroMap::with_capacity(access.size_hint().unwrap_or(0));
+
+ // While there are entries remaining in the input, add them
+ // into our map.
+ while let Some((key, value)) = access.next_entry::<K::OwnedType, V::OwnedType>()? {
+ // Try to append it at the end, hoping for a sorted map.
+ // If not sorted, return an error
+ // a serialized map that came from another ZeroMap
+ if map
+ .try_append(
+ K::Container::owned_as_t(&key),
+ V::Container::owned_as_t(&value),
+ )
+ .is_some()
+ {
+ return Err(de::Error::custom(
+ "ZeroMap's keys must be sorted while deserializing",
+ ));
+ }
+ }
+
+ Ok(map)
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl<'de, 'a, K, V> Deserialize<'de> for ZeroMap<'a, K, V>
+where
+ K: ZeroMapKV<'a> + Ord + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ K::Container: Deserialize<'de>,
+ V::Container: Deserialize<'de>,
+ K::OwnedType: Deserialize<'de>,
+ V::OwnedType: Deserialize<'de>,
+ 'de: 'a,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ deserializer.deserialize_any(ZeroMapMapVisitor::<'a, K, V>::new())
+ } else {
+ let (keys, values): (K::Container, V::Container) =
+ Deserialize::deserialize(deserializer)?;
+ if keys.zvl_len() != values.zvl_len() {
+ return Err(de::Error::custom(
+ "Mismatched key and value sizes in ZeroMap",
+ ));
+ }
+ // #1433: If keys are out of order, treat it as GIGO.
+ debug_assert!(keys.zvl_is_ascending());
+ Ok(Self { keys, values })
+ }
+ }
+}
+
+// /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl<'de, 'a, K, V> Deserialize<'de> for ZeroMapBorrowed<'a, K, V>
+where
+ K: ZeroMapKV<'a> + Ord + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ K::Container: Deserialize<'de>,
+ V::Container: Deserialize<'de>,
+ K::OwnedType: Deserialize<'de>,
+ V::OwnedType: Deserialize<'de>,
+ 'de: 'a,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ Err(de::Error::custom(
+ "ZeroMapBorrowed cannot be deserialized from human-readable formats",
+ ))
+ } else {
+ let deserialized: ZeroMap<'a, K, V> = ZeroMap::deserialize(deserializer)?;
+ let keys = if let Some(keys) = deserialized.keys.zvl_as_borrowed_inner() {
+ keys
+ } else {
+ return Err(de::Error::custom(
+ "ZeroMapBorrowed can only deserialize in zero-copy ways",
+ ));
+ };
+ let values = if let Some(values) = deserialized.values.zvl_as_borrowed_inner() {
+ values
+ } else {
+ return Err(de::Error::custom(
+ "ZeroMapBorrowed can only deserialize in zero-copy ways",
+ ));
+ };
+ Ok(Self { keys, values })
+ }
+ }
+}
+
+#[cfg(test)]
+#[allow(non_camel_case_types)]
+mod test {
+ use crate::{map::ZeroMapBorrowed, ZeroMap};
+
+ #[derive(serde::Serialize, serde::Deserialize)]
+ struct DeriveTest_ZeroMap<'data> {
+ #[serde(borrow)]
+ _data: ZeroMap<'data, str, [u8]>,
+ }
+
+ #[derive(serde::Serialize, serde::Deserialize)]
+ struct DeriveTest_ZeroMapBorrowed<'data> {
+ #[serde(borrow)]
+ _data: ZeroMapBorrowed<'data, str, [u8]>,
+ }
+
+ const JSON_STR: &str = "{\"1\":\"uno\",\"2\":\"dos\",\"3\":\"tres\"}";
+ const BINCODE_BYTES: &[u8] = &[
+ 12, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 3, 0,
+ 0, 0, 0, 0, 3, 0, 6, 0, 117, 110, 111, 100, 111, 115, 116, 114, 101, 115,
+ ];
+
+ fn make_map() -> ZeroMap<'static, u32, str> {
+ let mut map = ZeroMap::new();
+ map.insert(&1, "uno");
+ map.insert(&2, "dos");
+ map.insert(&3, "tres");
+ map
+ }
+
+ #[test]
+ fn test_serde_json() {
+ let map = make_map();
+ let json_str = serde_json::to_string(&map).expect("serialize");
+ assert_eq!(JSON_STR, json_str);
+ let new_map: ZeroMap<u32, str> = serde_json::from_str(&json_str).expect("deserialize");
+ assert_eq!(
+ new_map.iter().collect::<Vec<_>>(),
+ map.iter().collect::<Vec<_>>()
+ );
+ }
+
+ #[test]
+ fn test_serde_json_complex_key() {
+ let mut map = ZeroMap::new();
+ map.insert(&(1, 1), "uno");
+ map.insert(&(2, 2), "dos");
+ map.insert(&(3, 3), "tres");
+ let json_str = serde_json::to_string(&map).expect("serialize");
+ assert_eq!(
+ json_str,
+ "[[[1,1],\"uno\"],[[2,2],\"dos\"],[[3,3],\"tres\"]]"
+ );
+ let new_map: ZeroMap<(u32, u32), str> =
+ serde_json::from_str(&json_str).expect("deserialize");
+ assert_eq!(
+ new_map.iter().collect::<Vec<_>>(),
+ map.iter().collect::<Vec<_>>()
+ );
+ }
+
+ #[test]
+ fn test_bincode() {
+ let map = make_map();
+ let bincode_bytes = bincode::serialize(&map).expect("serialize");
+ assert_eq!(BINCODE_BYTES, bincode_bytes);
+ let new_map: ZeroMap<u32, str> = bincode::deserialize(&bincode_bytes).expect("deserialize");
+ assert_eq!(
+ new_map.iter().collect::<Vec<_>>(),
+ map.iter().collect::<Vec<_>>()
+ );
+
+ let new_map: ZeroMapBorrowed<u32, str> =
+ bincode::deserialize(&bincode_bytes).expect("deserialize");
+ assert_eq!(
+ new_map.iter().collect::<Vec<_>>(),
+ map.iter().collect::<Vec<_>>()
+ );
+ }
+}
diff --git a/third_party/rust/zerovec/src/map/serde_helpers.rs b/third_party/rust/zerovec/src/map/serde_helpers.rs
new file mode 100644
index 0000000000..b1ead938a0
--- /dev/null
+++ b/third_party/rust/zerovec/src/map/serde_helpers.rs
@@ -0,0 +1,168 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// @@@@@@@@@@@@@@@@
+// THIS FILE IS SHARED BETWEEN LITEMAP AND ZEROVEC. PLEASE KEEP IT IN SYNC FOR ALL EDITS
+// @@@@@@@@@@@@@@@@
+
+use serde::ser::{Impossible, Serialize, Serializer};
+
+pub fn is_num_or_string<T: Serialize + ?Sized>(k: &T) -> bool {
+ // Serializer that errors in the same cases as serde_json::ser::MapKeySerializer
+ struct MapKeySerializerDryRun;
+ impl Serializer for MapKeySerializerDryRun {
+ type Ok = ();
+ // Singleton error type that implements serde::ser::Error
+ type Error = core::fmt::Error;
+
+ type SerializeSeq = Impossible<(), Self::Error>;
+ type SerializeTuple = Impossible<(), Self::Error>;
+ type SerializeTupleStruct = Impossible<(), Self::Error>;
+ type SerializeTupleVariant = Impossible<(), Self::Error>;
+ type SerializeMap = Impossible<(), Self::Error>;
+ type SerializeStruct = Impossible<(), Self::Error>;
+ type SerializeStructVariant = Impossible<(), Self::Error>;
+
+ fn serialize_str(self, _value: &str) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+ fn serialize_unit_variant(
+ self,
+ _name: &'static str,
+ _variant_index: u32,
+ _variant: &'static str,
+ ) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+ fn serialize_newtype_struct<T: Serialize + ?Sized>(
+ self,
+ _name: &'static str,
+ value: &T,
+ ) -> Result<Self::Ok, Self::Error> {
+ // Recurse
+ value.serialize(self)
+ }
+ fn serialize_bool(self, _value: bool) -> Result<Self::Ok, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_i8(self, _value: i8) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+ fn serialize_i16(self, _value: i16) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+ fn serialize_i32(self, _value: i32) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+ fn serialize_i64(self, _value: i64) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+ serde::serde_if_integer128! {
+ fn serialize_i128(self, _value: i128) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+ }
+ fn serialize_u8(self, _value: u8) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+ fn serialize_u16(self, _value: u16) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+ fn serialize_u32(self, _value: u32) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+ fn serialize_u64(self, _value: u64) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+ serde::serde_if_integer128! {
+ fn serialize_u128(self, _value: u128) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+ }
+ fn serialize_f32(self, _value: f32) -> Result<Self::Ok, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_f64(self, _value: f64) -> Result<Self::Ok, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_char(self, _value: char) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+ fn serialize_bytes(self, _value: &[u8]) -> Result<Self::Ok, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_newtype_variant<T: Serialize + ?Sized>(
+ self,
+ _name: &'static str,
+ _variant_index: u32,
+ _variant: &'static str,
+ _value: &T,
+ ) -> Result<Self::Ok, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_some<T: Serialize + ?Sized>(
+ self,
+ _value: &T,
+ ) -> Result<Self::Ok, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_tuple_struct(
+ self,
+ _name: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeTupleStruct, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_tuple_variant(
+ self,
+ _name: &'static str,
+ _variant_index: u32,
+ _variant: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeTupleVariant, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_struct(
+ self,
+ _name: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeStruct, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn serialize_struct_variant(
+ self,
+ _name: &'static str,
+ _variant_index: u32,
+ _variant: &'static str,
+ _len: usize,
+ ) -> Result<Self::SerializeStructVariant, Self::Error> {
+ Err(core::fmt::Error)
+ }
+ fn collect_str<T: core::fmt::Display + ?Sized>(
+ self,
+ _value: &T,
+ ) -> Result<Self::Ok, Self::Error> {
+ Ok(())
+ }
+ }
+ k.serialize(MapKeySerializerDryRun).is_ok()
+}
diff --git a/third_party/rust/zerovec/src/map/vecs.rs b/third_party/rust/zerovec/src/map/vecs.rs
new file mode 100644
index 0000000000..5ee93d3fea
--- /dev/null
+++ b/third_party/rust/zerovec/src/map/vecs.rs
@@ -0,0 +1,724 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::ule::*;
+use crate::varzerovec::owned::VarZeroVecOwned;
+use crate::vecs::{FlexZeroSlice, FlexZeroVec, FlexZeroVecOwned, VarZeroVecFormat};
+use crate::{VarZeroSlice, VarZeroVec};
+use crate::{ZeroSlice, ZeroVec};
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::cmp::Ordering;
+use core::mem;
+use core::ops::Range;
+
+/// Trait abstracting over [`ZeroVec`] and [`VarZeroVec`], for use in [`ZeroMap`](super::ZeroMap). **You
+/// should not be implementing or calling this trait directly.**
+///
+/// The T type is the type received by [`Self::zvl_binary_search()`], as well as the one used
+/// for human-readable serialization.
+///
+/// Methods are prefixed with `zvl_*` to avoid clashes with methods on the types themselves
+pub trait ZeroVecLike<T: ?Sized> {
+ /// The type returned by `Self::get()`
+ type GetType: ?Sized + 'static;
+ /// A fully borrowed version of this
+ type SliceVariant: ZeroVecLike<T, GetType = Self::GetType> + ?Sized;
+
+ /// Create a new, empty borrowed variant
+ fn zvl_new_borrowed() -> &'static Self::SliceVariant;
+
+ /// Search for a key in a sorted vector, returns `Ok(index)` if found,
+ /// returns `Err(insert_index)` if not found, where `insert_index` is the
+ /// index where it should be inserted to maintain sort order.
+ fn zvl_binary_search(&self, k: &T) -> Result<usize, usize>
+ where
+ T: Ord;
+ /// Search for a key within a certain range in a sorted vector.
+ /// Returns `None` if the range is out of bounds, and
+ /// `Ok` or `Err` in the same way as `zvl_binary_search`.
+ /// Indices are returned relative to the start of the range.
+ fn zvl_binary_search_in_range(
+ &self,
+ k: &T,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>>
+ where
+ T: Ord;
+
+ /// Search for a key in a sorted vector by a predicate, returns `Ok(index)` if found,
+ /// returns `Err(insert_index)` if not found, where `insert_index` is the
+ /// index where it should be inserted to maintain sort order.
+ fn zvl_binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize>;
+ /// Search for a key within a certain range in a sorted vector by a predicate.
+ /// Returns `None` if the range is out of bounds, and
+ /// `Ok` or `Err` in the same way as `zvl_binary_search`.
+ /// Indices are returned relative to the start of the range.
+ fn zvl_binary_search_in_range_by(
+ &self,
+ predicate: impl FnMut(&T) -> Ordering,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>>;
+
+ /// Get element at `index`
+ fn zvl_get(&self, index: usize) -> Option<&Self::GetType>;
+ /// The length of this vector
+ fn zvl_len(&self) -> usize;
+ /// Check if this vector is in ascending order according to `T`s `Ord` impl
+ fn zvl_is_ascending(&self) -> bool
+ where
+ T: Ord,
+ {
+ if let Some(first) = self.zvl_get(0) {
+ let mut prev = first;
+ for i in 1..self.zvl_len() {
+ #[allow(clippy::unwrap_used)] // looping over the valid indices
+ let curr = self.zvl_get(i).unwrap();
+ if Self::get_cmp_get(prev, curr) != Ordering::Less {
+ return false;
+ }
+ prev = curr;
+ }
+ }
+ true
+ }
+ /// Check if this vector is empty
+ fn zvl_is_empty(&self) -> bool {
+ self.zvl_len() == 0
+ }
+
+ /// Construct a borrowed variant by borrowing from `&self`.
+ ///
+ /// This function behaves like `&'b self -> Self::SliceVariant<'b>`,
+ /// where `'b` is the lifetime of the reference to this object.
+ ///
+ /// Note: We rely on the compiler recognizing `'a` and `'b` as covariant and
+ /// casting `&'b Self<'a>` to `&'b Self<'b>` when this gets called, which works
+ /// out for `ZeroVec` and `VarZeroVec` containers just fine.
+ fn zvl_as_borrowed(&self) -> &Self::SliceVariant;
+
+ /// Compare this type with a `Self::GetType`. This must produce the same result as
+ /// if `g` were converted to `Self`
+ #[inline]
+ fn t_cmp_get(t: &T, g: &Self::GetType) -> Ordering
+ where
+ T: Ord,
+ {
+ Self::zvl_get_as_t(g, |g| t.cmp(g))
+ }
+
+ /// Compare two values of `Self::GetType`. This must produce the same result as
+ /// if both `a` and `b` were converted to `Self`
+ #[inline]
+ fn get_cmp_get(a: &Self::GetType, b: &Self::GetType) -> Ordering
+ where
+ T: Ord,
+ {
+ Self::zvl_get_as_t(a, |a| Self::zvl_get_as_t(b, |b| a.cmp(b)))
+ }
+
+ /// Obtain a reference to T, passed to a closure
+ ///
+ /// This uses a callback because it's not possible to return owned-or-borrowed
+ /// types without GATs
+ ///
+ /// Impls should guarantee that the callback function is be called exactly once.
+ fn zvl_get_as_t<R>(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R;
+}
+
+/// Trait abstracting over [`ZeroVec`] and [`VarZeroVec`], for use in [`ZeroMap`](super::ZeroMap). **You
+/// should not be implementing or calling this trait directly.**
+///
+/// This trait augments [`ZeroVecLike`] with methods allowing for mutation of the underlying
+/// vector for owned vector types.
+///
+/// Methods are prefixed with `zvl_*` to avoid clashes with methods on the types themselves
+pub trait MutableZeroVecLike<'a, T: ?Sized>: ZeroVecLike<T> {
+ /// The type returned by `Self::remove()` and `Self::replace()`
+ type OwnedType;
+
+ /// Insert an element at `index`
+ fn zvl_insert(&mut self, index: usize, value: &T);
+ /// Remove the element at `index` (panicking if nonexistant)
+ fn zvl_remove(&mut self, index: usize) -> Self::OwnedType;
+ /// Replace the element at `index` with another one, returning the old element
+ fn zvl_replace(&mut self, index: usize, value: &T) -> Self::OwnedType;
+ /// Push an element to the end of this vector
+ fn zvl_push(&mut self, value: &T);
+ /// Create a new, empty vector, with given capacity
+ fn zvl_with_capacity(cap: usize) -> Self;
+ /// Remove all elements from the vector
+ fn zvl_clear(&mut self);
+ /// Reserve space for `addl` additional elements
+ fn zvl_reserve(&mut self, addl: usize);
+ /// Applies the permutation such that `before.zvl_get(permutation[i]) == after.zvl_get(i)`.
+ ///
+ /// # Panics
+ /// If `permutation` is not a valid permutation of length `zvl_len()`.
+ fn zvl_permute(&mut self, permutation: &mut [usize]);
+
+ /// Convert an owned value to a borrowed T
+ fn owned_as_t(o: &Self::OwnedType) -> &T;
+
+ /// Construct from the borrowed version of the type
+ ///
+ /// These are useful to ensure serialization parity between borrowed and owned versions
+ fn zvl_from_borrowed(b: &'a Self::SliceVariant) -> Self;
+ /// Extract the inner borrowed variant if possible. Returns `None` if the data is owned.
+ ///
+ /// This function behaves like `&'_ self -> Self::SliceVariant<'a>`,
+ /// where `'a` is the lifetime of this object's borrowed data.
+ ///
+ /// This function is similar to matching the `Borrowed` variant of `ZeroVec`
+ /// or `VarZeroVec`, returning the inner borrowed type.
+ fn zvl_as_borrowed_inner(&self) -> Option<&'a Self::SliceVariant>;
+}
+
+impl<'a, T> ZeroVecLike<T> for ZeroVec<'a, T>
+where
+ T: 'a + AsULE + Copy,
+{
+ type GetType = T::ULE;
+ type SliceVariant = ZeroSlice<T>;
+
+ fn zvl_new_borrowed() -> &'static Self::SliceVariant {
+ ZeroSlice::<T>::new_empty()
+ }
+ fn zvl_binary_search(&self, k: &T) -> Result<usize, usize>
+ where
+ T: Ord,
+ {
+ ZeroSlice::binary_search(self, k)
+ }
+ fn zvl_binary_search_in_range(&self, k: &T, range: Range<usize>) -> Option<Result<usize, usize>>
+ where
+ T: Ord,
+ {
+ let zs: &ZeroSlice<T> = self;
+ zs.zvl_binary_search_in_range(k, range)
+ }
+ fn zvl_binary_search_by(
+ &self,
+ mut predicate: impl FnMut(&T) -> Ordering,
+ ) -> Result<usize, usize> {
+ ZeroSlice::binary_search_by(self, |probe| predicate(&probe))
+ }
+ fn zvl_binary_search_in_range_by(
+ &self,
+ predicate: impl FnMut(&T) -> Ordering,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ let zs: &ZeroSlice<T> = self;
+ zs.zvl_binary_search_in_range_by(predicate, range)
+ }
+ fn zvl_get(&self, index: usize) -> Option<&T::ULE> {
+ self.get_ule_ref(index)
+ }
+ fn zvl_len(&self) -> usize {
+ ZeroSlice::len(self)
+ }
+ fn zvl_as_borrowed(&self) -> &ZeroSlice<T> {
+ self
+ }
+ #[inline]
+ fn zvl_get_as_t<R>(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R {
+ f(&T::from_unaligned(*g))
+ }
+}
+
+impl<T> ZeroVecLike<T> for ZeroSlice<T>
+where
+ T: AsULE + Copy,
+{
+ type GetType = T::ULE;
+ type SliceVariant = ZeroSlice<T>;
+
+ fn zvl_new_borrowed() -> &'static Self::SliceVariant {
+ ZeroSlice::<T>::new_empty()
+ }
+ fn zvl_binary_search(&self, k: &T) -> Result<usize, usize>
+ where
+ T: Ord,
+ {
+ ZeroSlice::binary_search(self, k)
+ }
+ fn zvl_binary_search_in_range(&self, k: &T, range: Range<usize>) -> Option<Result<usize, usize>>
+ where
+ T: Ord,
+ {
+ let subslice = self.get_subslice(range)?;
+ Some(ZeroSlice::binary_search(subslice, k))
+ }
+ fn zvl_binary_search_by(
+ &self,
+ mut predicate: impl FnMut(&T) -> Ordering,
+ ) -> Result<usize, usize> {
+ ZeroSlice::binary_search_by(self, |probe| predicate(&probe))
+ }
+ fn zvl_binary_search_in_range_by(
+ &self,
+ mut predicate: impl FnMut(&T) -> Ordering,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ let subslice = self.get_subslice(range)?;
+ Some(ZeroSlice::binary_search_by(subslice, |probe| {
+ predicate(&probe)
+ }))
+ }
+ fn zvl_get(&self, index: usize) -> Option<&T::ULE> {
+ self.get_ule_ref(index)
+ }
+ fn zvl_len(&self) -> usize {
+ ZeroSlice::len(self)
+ }
+ fn zvl_as_borrowed(&self) -> &ZeroSlice<T> {
+ self
+ }
+
+ #[inline]
+ fn zvl_get_as_t<R>(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R {
+ f(&T::from_unaligned(*g))
+ }
+}
+
+impl<'a, T> MutableZeroVecLike<'a, T> for ZeroVec<'a, T>
+where
+ T: AsULE + Copy + 'static,
+{
+ type OwnedType = T;
+ fn zvl_insert(&mut self, index: usize, value: &T) {
+ self.with_mut(|v| v.insert(index, value.to_unaligned()))
+ }
+ fn zvl_remove(&mut self, index: usize) -> T {
+ T::from_unaligned(self.with_mut(|v| v.remove(index)))
+ }
+ fn zvl_replace(&mut self, index: usize, value: &T) -> T {
+ #[allow(clippy::indexing_slicing)]
+ let unaligned = self.with_mut(|vec| {
+ debug_assert!(index < vec.len());
+ mem::replace(&mut vec[index], value.to_unaligned())
+ });
+ T::from_unaligned(unaligned)
+ }
+ fn zvl_push(&mut self, value: &T) {
+ self.with_mut(|v| v.push(value.to_unaligned()))
+ }
+ fn zvl_with_capacity(cap: usize) -> Self {
+ if cap == 0 {
+ ZeroVec::new()
+ } else {
+ ZeroVec::new_owned(Vec::with_capacity(cap))
+ }
+ }
+ fn zvl_clear(&mut self) {
+ self.with_mut(|v| v.clear())
+ }
+ fn zvl_reserve(&mut self, addl: usize) {
+ self.with_mut(|v| v.reserve(addl))
+ }
+
+ fn owned_as_t(o: &Self::OwnedType) -> &T {
+ o
+ }
+
+ fn zvl_from_borrowed(b: &'a ZeroSlice<T>) -> Self {
+ b.as_zerovec()
+ }
+ fn zvl_as_borrowed_inner(&self) -> Option<&'a ZeroSlice<T>> {
+ self.as_maybe_borrowed()
+ }
+
+ #[allow(clippy::indexing_slicing)] // documented panic
+ fn zvl_permute(&mut self, permutation: &mut [usize]) {
+ assert_eq!(permutation.len(), self.zvl_len());
+
+ let vec = self.to_mut_slice();
+
+ for cycle_start in 0..permutation.len() {
+ let mut curr = cycle_start;
+ let mut next = permutation[curr];
+
+ while next != cycle_start {
+ vec.swap(curr, next);
+ // Make curr a self-cycle so we don't use it as a cycle_start later
+ permutation[curr] = curr;
+ curr = next;
+ next = permutation[next];
+ }
+ permutation[curr] = curr;
+ }
+ }
+}
+
+impl<'a, T, F> ZeroVecLike<T> for VarZeroVec<'a, T, F>
+where
+ T: VarULE,
+ T: ?Sized,
+ F: VarZeroVecFormat,
+{
+ type GetType = T;
+ type SliceVariant = VarZeroSlice<T, F>;
+
+ fn zvl_new_borrowed() -> &'static Self::SliceVariant {
+ VarZeroSlice::<T, F>::new_empty()
+ }
+ fn zvl_binary_search(&self, k: &T) -> Result<usize, usize>
+ where
+ T: Ord,
+ {
+ self.binary_search(k)
+ }
+ fn zvl_binary_search_in_range(&self, k: &T, range: Range<usize>) -> Option<Result<usize, usize>>
+ where
+ T: Ord,
+ {
+ self.binary_search_in_range(k, range)
+ }
+ fn zvl_binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
+ self.binary_search_by(predicate)
+ }
+ fn zvl_binary_search_in_range_by(
+ &self,
+ predicate: impl FnMut(&T) -> Ordering,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ self.binary_search_in_range_by(predicate, range)
+ }
+ fn zvl_get(&self, index: usize) -> Option<&T> {
+ self.get(index)
+ }
+ fn zvl_len(&self) -> usize {
+ self.len()
+ }
+
+ fn zvl_as_borrowed(&self) -> &VarZeroSlice<T, F> {
+ self.as_slice()
+ }
+
+ #[inline]
+ fn zvl_get_as_t<R>(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R {
+ f(g)
+ }
+}
+
+impl<T, F> ZeroVecLike<T> for VarZeroSlice<T, F>
+where
+ T: VarULE,
+ T: ?Sized,
+ F: VarZeroVecFormat,
+{
+ type GetType = T;
+ type SliceVariant = VarZeroSlice<T, F>;
+
+ fn zvl_new_borrowed() -> &'static Self::SliceVariant {
+ VarZeroSlice::<T, F>::new_empty()
+ }
+ fn zvl_binary_search(&self, k: &T) -> Result<usize, usize>
+ where
+ T: Ord,
+ {
+ self.binary_search(k)
+ }
+ fn zvl_binary_search_in_range(&self, k: &T, range: Range<usize>) -> Option<Result<usize, usize>>
+ where
+ T: Ord,
+ {
+ self.binary_search_in_range(k, range)
+ }
+ fn zvl_binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
+ self.binary_search_by(predicate)
+ }
+ fn zvl_binary_search_in_range_by(
+ &self,
+ predicate: impl FnMut(&T) -> Ordering,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ self.binary_search_in_range_by(predicate, range)
+ }
+ fn zvl_get(&self, index: usize) -> Option<&T> {
+ self.get(index)
+ }
+ fn zvl_len(&self) -> usize {
+ self.len()
+ }
+
+ fn zvl_as_borrowed(&self) -> &VarZeroSlice<T, F> {
+ self
+ }
+
+ #[inline]
+ fn zvl_get_as_t<R>(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R {
+ f(g)
+ }
+}
+
+impl<'a, T, F> MutableZeroVecLike<'a, T> for VarZeroVec<'a, T, F>
+where
+ T: VarULE,
+ T: ?Sized,
+ F: VarZeroVecFormat,
+{
+ type OwnedType = Box<T>;
+ fn zvl_insert(&mut self, index: usize, value: &T) {
+ self.make_mut().insert(index, value)
+ }
+ fn zvl_remove(&mut self, index: usize) -> Box<T> {
+ let vec = self.make_mut();
+ debug_assert!(index < vec.len());
+ #[allow(clippy::unwrap_used)]
+ let old = vec.get(index).unwrap().to_boxed();
+ vec.remove(index);
+ old
+ }
+ fn zvl_replace(&mut self, index: usize, value: &T) -> Box<T> {
+ let vec = self.make_mut();
+ debug_assert!(index < vec.len());
+ #[allow(clippy::unwrap_used)]
+ let old = vec.get(index).unwrap().to_boxed();
+ vec.replace(index, value);
+ old
+ }
+ fn zvl_push(&mut self, value: &T) {
+ let len = self.len();
+ self.make_mut().insert(len, value)
+ }
+ fn zvl_with_capacity(cap: usize) -> Self {
+ if cap == 0 {
+ VarZeroVec::new()
+ } else {
+ VarZeroVec::Owned(VarZeroVecOwned::with_capacity(cap))
+ }
+ }
+ fn zvl_clear(&mut self) {
+ self.make_mut().clear()
+ }
+ fn zvl_reserve(&mut self, addl: usize) {
+ self.make_mut().reserve(addl)
+ }
+
+ fn owned_as_t(o: &Self::OwnedType) -> &T {
+ o
+ }
+
+ fn zvl_from_borrowed(b: &'a VarZeroSlice<T, F>) -> Self {
+ b.as_varzerovec()
+ }
+ fn zvl_as_borrowed_inner(&self) -> Option<&'a VarZeroSlice<T, F>> {
+ if let VarZeroVec::Borrowed(b) = *self {
+ Some(b)
+ } else {
+ None
+ }
+ }
+
+ #[allow(clippy::unwrap_used)] // documented panic
+ fn zvl_permute(&mut self, permutation: &mut [usize]) {
+ assert_eq!(permutation.len(), self.zvl_len());
+
+ let mut result = VarZeroVecOwned::new();
+ for &i in permutation.iter() {
+ result.push(self.get(i).unwrap());
+ }
+ *self = VarZeroVec::Owned(result);
+ }
+}
+
+impl<'a> ZeroVecLike<usize> for FlexZeroVec<'a> {
+ type GetType = [u8];
+ type SliceVariant = FlexZeroSlice;
+
+ fn zvl_new_borrowed() -> &'static Self::SliceVariant {
+ FlexZeroSlice::new_empty()
+ }
+ fn zvl_binary_search(&self, k: &usize) -> Result<usize, usize> {
+ FlexZeroSlice::binary_search(self, *k)
+ }
+ fn zvl_binary_search_in_range(
+ &self,
+ k: &usize,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ FlexZeroSlice::binary_search_in_range(self, *k, range)
+ }
+ fn zvl_binary_search_by(
+ &self,
+ mut predicate: impl FnMut(&usize) -> Ordering,
+ ) -> Result<usize, usize> {
+ FlexZeroSlice::binary_search_by(self, |probe| predicate(&probe))
+ }
+ fn zvl_binary_search_in_range_by(
+ &self,
+ mut predicate: impl FnMut(&usize) -> Ordering,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ FlexZeroSlice::binary_search_in_range_by(self, |probe| predicate(&probe), range)
+ }
+ fn zvl_get(&self, index: usize) -> Option<&[u8]> {
+ self.get_chunk(index)
+ }
+ fn zvl_len(&self) -> usize {
+ FlexZeroSlice::len(self)
+ }
+
+ fn zvl_as_borrowed(&self) -> &FlexZeroSlice {
+ self
+ }
+
+ #[inline]
+ fn zvl_get_as_t<R>(g: &[u8], f: impl FnOnce(&usize) -> R) -> R {
+ f(&crate::chunk_to_usize(g, g.len()))
+ }
+}
+
+impl ZeroVecLike<usize> for FlexZeroSlice {
+ type GetType = [u8];
+ type SliceVariant = FlexZeroSlice;
+
+ fn zvl_new_borrowed() -> &'static Self::SliceVariant {
+ FlexZeroSlice::new_empty()
+ }
+ fn zvl_binary_search(&self, k: &usize) -> Result<usize, usize> {
+ FlexZeroSlice::binary_search(self, *k)
+ }
+ fn zvl_binary_search_in_range(
+ &self,
+ k: &usize,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ FlexZeroSlice::binary_search_in_range(self, *k, range)
+ }
+ fn zvl_binary_search_by(
+ &self,
+ mut predicate: impl FnMut(&usize) -> Ordering,
+ ) -> Result<usize, usize> {
+ FlexZeroSlice::binary_search_by(self, |probe| predicate(&probe))
+ }
+ fn zvl_binary_search_in_range_by(
+ &self,
+ mut predicate: impl FnMut(&usize) -> Ordering,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ FlexZeroSlice::binary_search_in_range_by(self, |probe| predicate(&probe), range)
+ }
+ fn zvl_get(&self, index: usize) -> Option<&[u8]> {
+ self.get_chunk(index)
+ }
+ fn zvl_len(&self) -> usize {
+ FlexZeroSlice::len(self)
+ }
+
+ fn zvl_as_borrowed(&self) -> &FlexZeroSlice {
+ self
+ }
+
+ #[inline]
+ fn zvl_get_as_t<R>(g: &Self::GetType, f: impl FnOnce(&usize) -> R) -> R {
+ f(&crate::chunk_to_usize(g, g.len()))
+ }
+}
+
+impl<'a> MutableZeroVecLike<'a, usize> for FlexZeroVec<'a> {
+ type OwnedType = usize;
+ fn zvl_insert(&mut self, index: usize, value: &usize) {
+ self.to_mut().insert(index, *value)
+ }
+ fn zvl_remove(&mut self, index: usize) -> usize {
+ self.to_mut().remove(index)
+ }
+ fn zvl_replace(&mut self, index: usize, value: &usize) -> usize {
+ // TODO(#2028): Make this a single operation instead of two operations.
+ let mutable = self.to_mut();
+ let old_value = mutable.remove(index);
+ mutable.insert(index, *value);
+ old_value
+ }
+ fn zvl_push(&mut self, value: &usize) {
+ self.to_mut().push(*value)
+ }
+ fn zvl_with_capacity(_cap: usize) -> Self {
+ // There is no `FlexZeroVec::with_capacity()` because it is variable-width
+ FlexZeroVec::Owned(FlexZeroVecOwned::new_empty())
+ }
+ fn zvl_clear(&mut self) {
+ self.to_mut().clear()
+ }
+ fn zvl_reserve(&mut self, _addl: usize) {
+ // There is no `FlexZeroVec::reserve()` because it is variable-width
+ }
+
+ fn owned_as_t(o: &Self::OwnedType) -> &usize {
+ o
+ }
+
+ fn zvl_from_borrowed(b: &'a FlexZeroSlice) -> Self {
+ b.as_flexzerovec()
+ }
+ fn zvl_as_borrowed_inner(&self) -> Option<&'a FlexZeroSlice> {
+ if let FlexZeroVec::Borrowed(b) = *self {
+ Some(b)
+ } else {
+ None
+ }
+ }
+
+ #[allow(clippy::unwrap_used)] // documented panic
+ fn zvl_permute(&mut self, permutation: &mut [usize]) {
+ assert_eq!(permutation.len(), self.zvl_len());
+ *self = permutation.iter().map(|&i| self.get(i).unwrap()).collect();
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_zerovec_binary_search_in_range() {
+ let zv: ZeroVec<u16> = ZeroVec::from_slice_or_alloc(&[11, 22, 33, 44, 55, 66, 77]);
+
+ // Full range search
+ assert_eq!(zv.zvl_binary_search_in_range(&11, 0..7), Some(Ok(0)));
+ assert_eq!(zv.zvl_binary_search_in_range(&12, 0..7), Some(Err(1)));
+ assert_eq!(zv.zvl_binary_search_in_range(&44, 0..7), Some(Ok(3)));
+ assert_eq!(zv.zvl_binary_search_in_range(&45, 0..7), Some(Err(4)));
+ assert_eq!(zv.zvl_binary_search_in_range(&77, 0..7), Some(Ok(6)));
+ assert_eq!(zv.zvl_binary_search_in_range(&78, 0..7), Some(Err(7)));
+
+ // Out-of-range search
+ assert_eq!(zv.zvl_binary_search_in_range(&44, 0..2), Some(Err(2)));
+ assert_eq!(zv.zvl_binary_search_in_range(&44, 5..7), Some(Err(0)));
+
+ // Offset search
+ assert_eq!(zv.zvl_binary_search_in_range(&44, 2..5), Some(Ok(1)));
+ assert_eq!(zv.zvl_binary_search_in_range(&45, 2..5), Some(Err(2)));
+
+ // Out-of-bounds
+ assert_eq!(zv.zvl_binary_search_in_range(&44, 0..100), None);
+ assert_eq!(zv.zvl_binary_search_in_range(&44, 100..200), None);
+ }
+
+ #[test]
+ fn test_permute() {
+ let mut zv: ZeroVec<u16> = ZeroVec::from_slice_or_alloc(&[11, 22, 33, 44, 55, 66, 77]);
+ let mut permutation = vec![3, 2, 1, 0, 6, 5, 4];
+ zv.zvl_permute(&mut permutation);
+ assert_eq!(&zv, &[44, 33, 22, 11, 77, 66, 55]);
+
+ let mut vzv: VarZeroVec<str> = VarZeroVec::Owned(
+ VarZeroVecOwned::try_from_elements(&["11", "22", "33", "44", "55", "66", "77"])
+ .unwrap(),
+ );
+ let mut permutation = vec![3, 2, 1, 0, 6, 5, 4];
+ vzv.zvl_permute(&mut permutation);
+ assert_eq!(&vzv, &["44", "33", "22", "11", "77", "66", "55"]);
+
+ let mut fzv: FlexZeroVec = [11, 22, 33, 44, 55, 66, 77].into_iter().collect();
+ let mut permutation = vec![3, 2, 1, 0, 6, 5, 4];
+ fzv.zvl_permute(&mut permutation);
+ assert_eq!(
+ fzv.iter().collect::<Vec<_>>(),
+ [44, 33, 22, 11, 77, 66, 55].into_iter().collect::<Vec<_>>()
+ );
+ }
+}
diff --git a/third_party/rust/zerovec/src/map2d/borrowed.rs b/third_party/rust/zerovec/src/map2d/borrowed.rs
new file mode 100644
index 0000000000..166f1be743
--- /dev/null
+++ b/third_party/rust/zerovec/src/map2d/borrowed.rs
@@ -0,0 +1,339 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::ZeroSlice;
+
+use core::cmp::Ordering;
+use core::fmt;
+
+use crate::map::ZeroMapKV;
+use crate::map::ZeroVecLike;
+use crate::map2d::ZeroMap2dCursor;
+
+/// A borrowed-only version of [`ZeroMap2d`](super::ZeroMap2d)
+///
+/// This is useful for fully-zero-copy deserialization from non-human-readable
+/// serialization formats. It also has the advantage that it can return references that live for
+/// the lifetime of the backing buffer as opposed to that of the [`ZeroMap2dBorrowed`] instance.
+///
+/// # Examples
+///
+/// ```
+/// use zerovec::maps::ZeroMap2dBorrowed;
+///
+/// // Example byte buffer representing the map { 1: {2: "three" } }
+/// let BINCODE_BYTES: &[u8; 51] = &[
+/// 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0,
+/// 0, 0, 0, 0, 0, 0, 2, 0, 11, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 116,
+/// 104, 114, 101, 101,
+/// ];
+///
+/// // Deserializing to ZeroMap2d requires no heap allocations.
+/// let zero_map: ZeroMap2dBorrowed<u16, u16, str> =
+/// bincode::deserialize(BINCODE_BYTES)
+/// .expect("Should deserialize successfully");
+/// assert_eq!(zero_map.get_2d(&1, &2), Some("three"));
+/// ```
+///
+/// This can be obtained from a [`ZeroMap2d`](super::ZeroMap2d) via [`ZeroMap2d::as_borrowed`](super::ZeroMap2d::as_borrowed)
+pub struct ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ pub(crate) keys0: &'a K0::Slice,
+ pub(crate) joiner: &'a ZeroSlice<u32>,
+ pub(crate) keys1: &'a K1::Slice,
+ pub(crate) values: &'a V::Slice,
+}
+
+impl<'a, K0, K1, V> Copy for ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+}
+
+impl<'a, K0, K1, V> Clone for ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<'a, K0, K1, V> Default for ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0::Slice: 'static,
+ K1::Slice: 'static,
+ V::Slice: 'static,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0::Slice: 'static,
+ K1::Slice: 'static,
+ V::Slice: 'static,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ /// Creates a new, empty `ZeroMap2dBorrowed<K0, K1, V>`.
+ ///
+ /// Note: Since [`ZeroMap2dBorrowed`] is not mutable, the return value will be a stub unless
+ /// converted into a [`ZeroMap2d`](super::ZeroMap2d).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::maps::ZeroMap2dBorrowed;
+ ///
+ /// let zm: ZeroMap2dBorrowed<u16, u16, str> = ZeroMap2dBorrowed::new();
+ /// assert!(zm.is_empty());
+ /// ```
+ pub fn new() -> Self {
+ Self {
+ keys0: K0::Container::zvl_new_borrowed(),
+ joiner: Default::default(),
+ keys1: K1::Container::zvl_new_borrowed(),
+ values: V::Container::zvl_new_borrowed(),
+ }
+ }
+}
+
+impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ #[doc(hidden)] // databake internal
+ pub const unsafe fn from_parts_unchecked(
+ keys0: &'a K0::Slice,
+ joiner: &'a ZeroSlice<u32>,
+ keys1: &'a K1::Slice,
+ values: &'a V::Slice,
+ ) -> Self {
+ Self {
+ keys0,
+ joiner,
+ keys1,
+ values,
+ }
+ }
+
+ /// The number of elements in the [`ZeroMap2dBorrowed`]
+ pub fn len(&self) -> usize {
+ self.values.zvl_len()
+ }
+
+ /// Whether the [`ZeroMap2dBorrowed`] is empty
+ pub fn is_empty(&self) -> bool {
+ self.values.zvl_len() == 0
+ }
+}
+
+impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + Ord,
+ K1: ZeroMapKV<'a> + Ord,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ /// Get the value associated with `key0` and `key1`, if it exists.
+ ///
+ /// This is able to return values that live longer than the map itself
+ /// since they borrow directly from the backing buffer. This is the
+ /// primary advantage of using [`ZeroMap2dBorrowed`](super::ZeroMap2dBorrowed) over [`ZeroMap2d`](super::ZeroMap2d).
+ ///
+ /// ```rust
+ /// use zerovec::maps::ZeroMap2dBorrowed;
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map = ZeroMap2d::new();
+ /// map.insert(&1, "one", "foo");
+ /// map.insert(&2, "one", "bar");
+ /// map.insert(&2, "two", "baz");
+ ///
+ /// let borrowed = map.as_borrowed();
+ /// assert_eq!(borrowed.get_2d(&1, "one"), Some("foo"));
+ /// assert_eq!(borrowed.get_2d(&1, "two"), None);
+ /// assert_eq!(borrowed.get_2d(&2, "one"), Some("bar"));
+ /// assert_eq!(borrowed.get_2d(&2, "two"), Some("baz"));
+ /// assert_eq!(borrowed.get_2d(&3, "three"), None);
+ /// ```
+ pub fn get_2d(&self, key0: &K0, key1: &K1) -> Option<&'a V::GetType> {
+ self.get0(key0)?.get1(key1)
+ }
+}
+
+impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + Ord,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ /// Gets a cursor for `key0`. If `None`, then `key0` is not in the map. If `Some`,
+ /// then `key0` is in the map, and `key1` can be queried.
+ ///
+ /// ```rust
+ /// use zerovec::maps::ZeroMap2dBorrowed;
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map = ZeroMap2d::new();
+ /// map.insert(&1, "one", "foo");
+ /// map.insert(&2, "two", "bar");
+ /// let borrowed = map.as_borrowed();
+ /// assert!(matches!(borrowed.get0(&1), Some(_)));
+ /// assert!(matches!(borrowed.get0(&3), None));
+ /// ```
+ #[inline]
+ pub fn get0<'l>(&'l self, key0: &K0) -> Option<ZeroMap2dCursor<'a, 'a, K0, K1, V>> {
+ let key0_index = self.keys0.zvl_binary_search(key0).ok()?;
+ Some(ZeroMap2dCursor::from_borrowed(self, key0_index))
+ }
+
+ /// Binary search the map for `key0`, returning a cursor.
+ ///
+ /// ```rust
+ /// use zerovec::maps::ZeroMap2dBorrowed;
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map = ZeroMap2d::new();
+ /// map.insert(&1, "one", "foo");
+ /// map.insert(&2, "two", "bar");
+ /// let borrowed = map.as_borrowed();
+ /// assert!(matches!(borrowed.get0_by(|probe| probe.cmp(&1)), Some(_)));
+ /// assert!(matches!(borrowed.get0_by(|probe| probe.cmp(&3)), None));
+ /// ```
+ pub fn get0_by<'l>(
+ &'l self,
+ predicate: impl FnMut(&K0) -> Ordering,
+ ) -> Option<ZeroMap2dCursor<'a, 'a, K0, K1, V>> {
+ let key0_index = self.keys0.zvl_binary_search_by(predicate).ok()?;
+ Some(ZeroMap2dCursor::from_borrowed(self, key0_index))
+ }
+
+ /// Returns whether `key0` is contained in this map
+ ///
+ /// ```rust
+ /// use zerovec::maps::ZeroMap2dBorrowed;
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map = ZeroMap2d::new();
+ /// map.insert(&1, "one", "foo");
+ /// map.insert(&2, "two", "bar");
+ /// let borrowed = map.as_borrowed();
+ /// assert!(borrowed.contains_key0(&1));
+ /// assert!(!borrowed.contains_key0(&3));
+ /// ```
+ pub fn contains_key0(&self, key0: &K0) -> bool {
+ self.keys0.zvl_binary_search(key0).is_ok()
+ }
+}
+
+impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ /// Produce an ordered iterator over keys0
+ pub fn iter0<'l>(&'l self) -> impl Iterator<Item = ZeroMap2dCursor<'a, 'a, K0, K1, V>> + '_ {
+ (0..self.keys0.zvl_len()).map(move |idx| ZeroMap2dCursor::from_borrowed(self, idx))
+ }
+}
+
+impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + Ord,
+ K1: ZeroMapKV<'a> + Ord,
+ V: ZeroMapKV<'a>,
+ V: Copy,
+ K0: ?Sized,
+ K1: ?Sized,
+{
+ /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE`
+ pub fn get_copied_2d(&self, key0: &K0, key1: &K1) -> Option<V> {
+ self.get0(key0)?.get1_copied(key1)
+ }
+}
+
+// We can't use the default PartialEq because ZeroMap2d is invariant
+// so otherwise rustc will not automatically allow you to compare ZeroMaps
+// with different lifetimes
+impl<'a, 'b, K0, K1, V> PartialEq<ZeroMap2dBorrowed<'b, K0, K1, V>>
+ for ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: for<'c> ZeroMapKV<'c> + ?Sized,
+ K1: for<'c> ZeroMapKV<'c> + ?Sized,
+ V: for<'c> ZeroMapKV<'c> + ?Sized,
+ <K0 as ZeroMapKV<'a>>::Slice: PartialEq<<K0 as ZeroMapKV<'b>>::Slice>,
+ <K1 as ZeroMapKV<'a>>::Slice: PartialEq<<K1 as ZeroMapKV<'b>>::Slice>,
+ <V as ZeroMapKV<'a>>::Slice: PartialEq<<V as ZeroMapKV<'b>>::Slice>,
+{
+ fn eq(&self, other: &ZeroMap2dBorrowed<'b, K0, K1, V>) -> bool {
+ self.keys0.eq(other.keys0)
+ && self.joiner.eq(other.joiner)
+ && self.keys1.eq(other.keys1)
+ && self.values.eq(other.values)
+ }
+}
+
+impl<'a, K0, K1, V> fmt::Debug for ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + ?Sized,
+ K1: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ K0::Slice: fmt::Debug,
+ K1::Slice: fmt::Debug,
+ V::Slice: fmt::Debug,
+{
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+ f.debug_struct("ZeroMap2dBorrowed")
+ .field("keys0", &self.keys0)
+ .field("joiner", &self.joiner)
+ .field("keys1", &self.keys1)
+ .field("values", &self.values)
+ .finish()
+ }
+}
diff --git a/third_party/rust/zerovec/src/map2d/cursor.rs b/third_party/rust/zerovec/src/map2d/cursor.rs
new file mode 100644
index 0000000000..4802187bec
--- /dev/null
+++ b/third_party/rust/zerovec/src/map2d/cursor.rs
@@ -0,0 +1,358 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::{ZeroMap2d, ZeroSlice};
+
+use core::cmp::Ordering;
+use core::fmt;
+use core::ops::Range;
+
+use crate::map::ZeroMapKV;
+use crate::map::ZeroVecLike;
+
+use super::ZeroMap2dBorrowed;
+
+/// An intermediate state of queries over [`ZeroMap2d`] and [`ZeroMap2dBorrowed`].
+pub struct ZeroMap2dCursor<'l, 'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ // Invariant: these fields have the same invariants as they do in ZeroMap2d
+ keys0: &'l K0::Slice,
+ joiner: &'l ZeroSlice<u32>,
+ keys1: &'l K1::Slice,
+ values: &'l V::Slice,
+ // Invariant: key0_index is in range
+ key0_index: usize,
+}
+
+impl<'a, K0, K1, V> ZeroMap2dCursor<'a, 'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ /// `key0_index` must be in range
+ pub(crate) fn from_borrowed(
+ borrowed: &ZeroMap2dBorrowed<'a, K0, K1, V>,
+ key0_index: usize,
+ ) -> Self {
+ debug_assert!(key0_index < borrowed.joiner.len());
+ ZeroMap2dCursor {
+ keys0: borrowed.keys0,
+ joiner: borrowed.joiner,
+ keys1: borrowed.keys1,
+ values: borrowed.values,
+ key0_index,
+ }
+ }
+}
+
+impl<'l, 'a, K0, K1, V> ZeroMap2dCursor<'l, 'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ /// `key0_index` must be in range
+ pub(crate) fn from_cow(cow: &'l ZeroMap2d<'a, K0, K1, V>, key0_index: usize) -> Self {
+ debug_assert!(key0_index < cow.joiner.len());
+ Self {
+ keys0: cow.keys0.zvl_as_borrowed(),
+ joiner: &cow.joiner,
+ keys1: cow.keys1.zvl_as_borrowed(),
+ values: cow.values.zvl_as_borrowed(),
+ key0_index,
+ }
+ }
+
+ /// Returns the key0 corresponding to the cursor position.
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map = ZeroMap2d::new();
+ /// map.insert("one", &1u32, "foo");
+ /// assert_eq!(map.get0("one").unwrap().key0(), "one");
+ /// ```
+ pub fn key0(&self) -> &'l K0::GetType {
+ #[allow(clippy::unwrap_used)] // safe by invariant on `self.key0_index`
+ self.keys0.zvl_get(self.key0_index).unwrap()
+ }
+
+ /// Borrow an ordered iterator over keys1 and values for a particular key0.
+ ///
+ /// To get the values as copy types, see [`Self::iter1_copied`].
+ ///
+ /// For an example, see [`ZeroMap2d::iter0()`].
+ pub fn iter1(
+ &self,
+ ) -> impl Iterator<
+ Item = (
+ &'l <K1 as ZeroMapKV<'a>>::GetType,
+ &'l <V as ZeroMapKV<'a>>::GetType,
+ ),
+ > + '_ {
+ let range = self.get_range();
+ #[allow(clippy::unwrap_used)] // `self.get_range()` returns a valid range
+ range.map(move |idx| {
+ (
+ self.keys1.zvl_get(idx).unwrap(),
+ self.values.zvl_get(idx).unwrap(),
+ )
+ })
+ }
+
+ /// Transform this cursor into an ordered iterator over keys1 for a particular key0.
+ pub fn into_iter1(
+ self,
+ ) -> impl Iterator<
+ Item = (
+ &'l <K1 as ZeroMapKV<'a>>::GetType,
+ &'l <V as ZeroMapKV<'a>>::GetType,
+ ),
+ > {
+ let range = self.get_range();
+ #[allow(clippy::unwrap_used)] // `self.get_range()` returns a valid range
+ range.map(move |idx| {
+ (
+ self.keys1.zvl_get(idx).unwrap(),
+ self.values.zvl_get(idx).unwrap(),
+ )
+ })
+ }
+
+ /// Given key0_index, returns the corresponding range of keys1, which will be valid
+ pub(super) fn get_range(&self) -> Range<usize> {
+ debug_assert!(self.key0_index < self.joiner.len());
+ let start = if self.key0_index == 0 {
+ 0
+ } else {
+ #[allow(clippy::unwrap_used)] // protected by the debug_assert above
+ self.joiner.get(self.key0_index - 1).unwrap()
+ };
+ #[allow(clippy::unwrap_used)] // protected by the debug_assert above
+ let limit = self.joiner.get(self.key0_index).unwrap();
+ // These two assertions are true based on the invariants of ZeroMap2d
+ debug_assert!(start < limit);
+ debug_assert!((limit as usize) <= self.values.zvl_len());
+ (start as usize)..(limit as usize)
+ }
+}
+
+impl<'l, 'a, K0, K1, V> ZeroMap2dCursor<'l, 'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: Copy,
+{
+ /// Borrow an ordered iterator over keys1 and values for a particular key0.
+ ///
+ /// The values are returned as copy types.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let zm2d: ZeroMap2d<str, u8, usize> = [
+ /// ("a", 0u8, 1usize),
+ /// ("b", 1u8, 1000usize),
+ /// ("b", 2u8, 2000usize),
+ /// ]
+ /// .into_iter()
+ /// .collect();
+ ///
+ /// let mut total_value = 0;
+ ///
+ /// for cursor in zm2d.iter0() {
+ /// for (_, value) in cursor.iter1_copied() {
+ /// total_value += value;
+ /// }
+ /// }
+ ///
+ /// assert_eq!(total_value, 3001);
+ /// ```
+ pub fn iter1_copied(
+ &self,
+ ) -> impl Iterator<Item = (&'l <K1 as ZeroMapKV<'a>>::GetType, V)> + '_ {
+ let range = self.get_range();
+ #[allow(clippy::unwrap_used)] // `self.get_range()` returns a valid range
+ range.map(move |idx| {
+ (
+ self.keys1.zvl_get(idx).unwrap(),
+ self.get1_copied_at(idx).unwrap(),
+ )
+ })
+ }
+
+ fn get1_copied_at(&self, index: usize) -> Option<V> {
+ let ule = self.values.zvl_get(index)?;
+ let mut result = Option::<V>::None;
+ V::Container::zvl_get_as_t(ule, |v| result.replace(*v));
+ #[allow(clippy::unwrap_used)] // `zvl_get_as_t` guarantees that the callback is invoked
+ Some(result.unwrap())
+ }
+}
+
+impl<'l, 'a, K0, K1, V> ZeroMap2dCursor<'l, 'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a> + Ord,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ /// Gets the value for a key1 from this cursor, or `None` if key1 is not in the map.
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map = ZeroMap2d::new();
+ /// map.insert("one", &1u32, "foo");
+ /// assert_eq!(map.get0("one").unwrap().get1(&1), Some("foo"));
+ /// assert_eq!(map.get0("one").unwrap().get1(&2), None);
+ /// ```
+ pub fn get1(&self, key1: &K1) -> Option<&'l V::GetType> {
+ let key1_index = self.get_key1_index(key1)?;
+ #[allow(clippy::unwrap_used)] // key1_index is valid
+ Some(self.values.zvl_get(key1_index).unwrap())
+ }
+
+ /// Gets the value for a predicate from this cursor, or `None` if key1 is not in the map.
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map = ZeroMap2d::new();
+ /// map.insert("one", &1u32, "foo");
+ /// assert_eq!(map.get0("one").unwrap().get1_by(|v| v.cmp(&1)), Some("foo"));
+ /// assert_eq!(map.get0("one").unwrap().get1_by(|v| v.cmp(&2)), None);
+ /// ```
+ pub fn get1_by(&self, predicate: impl FnMut(&K1) -> Ordering) -> Option<&'l V::GetType> {
+ let key1_index = self.get_key1_index_by(predicate)?;
+ #[allow(clippy::unwrap_used)] // key1_index is valid
+ Some(self.values.zvl_get(key1_index).unwrap())
+ }
+
+ /// Given key0_index and predicate, returns the index into the values array
+ fn get_key1_index_by(&self, predicate: impl FnMut(&K1) -> Ordering) -> Option<usize> {
+ let range = self.get_range();
+ debug_assert!(range.start < range.end); // '<' because every key0 should have a key1
+ debug_assert!(range.end <= self.keys1.zvl_len());
+ let start = range.start;
+ #[allow(clippy::expect_used)] // protected by the debug_assert above
+ let binary_search_result = self
+ .keys1
+ .zvl_binary_search_in_range_by(predicate, range)
+ .expect("in-bounds range");
+ binary_search_result.ok().map(move |s| s + start)
+ }
+
+ /// Given key0_index and key1, returns the index into the values array
+ fn get_key1_index(&self, key1: &K1) -> Option<usize> {
+ let range = self.get_range();
+ debug_assert!(range.start < range.end); // '<' because every key0 should have a key1
+ debug_assert!(range.end <= self.keys1.zvl_len());
+ let start = range.start;
+ #[allow(clippy::expect_used)] // protected by the debug_assert above
+ let binary_search_result = self
+ .keys1
+ .zvl_binary_search_in_range(key1, range)
+ .expect("in-bounds range");
+ binary_search_result.ok().map(move |s| s + start)
+ }
+}
+
+impl<'l, 'a, K0, K1, V> ZeroMap2dCursor<'l, 'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a> + Ord,
+ V: ZeroMapKV<'a>,
+ V: Copy,
+ K0: ?Sized,
+ K1: ?Sized,
+{
+ /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE`
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map: ZeroMap2d<u16, u16, u16> = ZeroMap2d::new();
+ /// map.insert(&1, &2, &3);
+ /// map.insert(&1, &4, &5);
+ /// map.insert(&6, &7, &8);
+ ///
+ /// assert_eq!(map.get0(&6).unwrap().get1_copied(&7), Some(8));
+ /// ```
+ #[inline]
+ pub fn get1_copied(&self, key1: &K1) -> Option<V> {
+ let key1_index = self.get_key1_index(key1)?;
+ self.get1_copied_at(key1_index)
+ }
+
+ /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE`
+ #[inline]
+ pub fn get1_copied_by(&self, predicate: impl FnMut(&K1) -> Ordering) -> Option<V> {
+ let key1_index = self.get_key1_index_by(predicate)?;
+ self.get1_copied_at(key1_index)
+ }
+}
+
+// We can't use the default PartialEq because ZeroMap2d is invariant
+// so otherwise rustc will not automatically allow you to compare ZeroMaps
+// with different lifetimes
+impl<'m, 'n, 'a, 'b, K0, K1, V> PartialEq<ZeroMap2dCursor<'n, 'b, K0, K1, V>>
+ for ZeroMap2dCursor<'m, 'a, K0, K1, V>
+where
+ K0: for<'c> ZeroMapKV<'c> + ?Sized,
+ K1: for<'c> ZeroMapKV<'c> + ?Sized,
+ V: for<'c> ZeroMapKV<'c> + ?Sized,
+ <K0 as ZeroMapKV<'a>>::Slice: PartialEq<<K0 as ZeroMapKV<'b>>::Slice>,
+ <K1 as ZeroMapKV<'a>>::Slice: PartialEq<<K1 as ZeroMapKV<'b>>::Slice>,
+ <V as ZeroMapKV<'a>>::Slice: PartialEq<<V as ZeroMapKV<'b>>::Slice>,
+{
+ fn eq(&self, other: &ZeroMap2dCursor<'n, 'b, K0, K1, V>) -> bool {
+ self.keys0.eq(other.keys0)
+ && self.joiner.eq(other.joiner)
+ && self.keys1.eq(other.keys1)
+ && self.values.eq(other.values)
+ && self.key0_index.eq(&other.key0_index)
+ }
+}
+
+impl<'l, 'a, K0, K1, V> fmt::Debug for ZeroMap2dCursor<'l, 'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + ?Sized,
+ K1: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ K0::Slice: fmt::Debug,
+ K1::Slice: fmt::Debug,
+ V::Slice: fmt::Debug,
+{
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+ f.debug_struct("ZeroMap2d")
+ .field("keys0", &self.keys0)
+ .field("joiner", &self.joiner)
+ .field("keys1", &self.keys1)
+ .field("values", &self.values)
+ .field("key0_index", &self.key0_index)
+ .finish()
+ }
+}
diff --git a/third_party/rust/zerovec/src/map2d/databake.rs b/third_party/rust/zerovec/src/map2d/databake.rs
new file mode 100644
index 0000000000..c5b9aca546
--- /dev/null
+++ b/third_party/rust/zerovec/src/map2d/databake.rs
@@ -0,0 +1,110 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::{maps::ZeroMap2dBorrowed, maps::ZeroMapKV, ZeroMap2d};
+use databake::*;
+
+impl<'a, K0, K1, V> Bake for ZeroMap2d<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + ?Sized,
+ K1: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ K0::Container: Bake,
+ K1::Container: Bake,
+ V::Container: Bake,
+{
+ fn bake(&self, env: &CrateEnv) -> TokenStream {
+ env.insert("zerovec");
+ let keys0 = self.keys0.bake(env);
+ let joiner = self.joiner.bake(env);
+ let keys1 = self.keys1.bake(env);
+ let values = self.values.bake(env);
+ quote! { unsafe { #[allow(unused_unsafe)] zerovec::ZeroMap2d::from_parts_unchecked(#keys0, #joiner, #keys1, #values) } }
+ }
+}
+
+impl<'a, K0, K1, V> Bake for ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + ?Sized,
+ K1: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ &'a K0::Slice: Bake,
+ &'a K1::Slice: Bake,
+ &'a V::Slice: Bake,
+{
+ fn bake(&self, env: &CrateEnv) -> TokenStream {
+ env.insert("zerovec");
+ let keys0 = self.keys0.bake(env);
+ let joiner = self.joiner.bake(env);
+ let keys1 = self.keys1.bake(env);
+ let values = self.values.bake(env);
+ quote! { unsafe { #[allow(unused_unsafe)] zerovec::maps::ZeroMap2dBorrowed::from_parts_unchecked(#keys0, #joiner, #keys1, #values) } }
+ }
+}
+
+#[test]
+fn test_baked_map() {
+ test_bake!(
+ ZeroMap2d<str, str, str>,
+ const: unsafe {
+ #[allow(unused_unsafe)]
+ crate::ZeroMap2d::from_parts_unchecked(
+ unsafe {
+ crate::VarZeroVec::from_bytes_unchecked(
+ b"\x0E\0\0\0\0\0\x05\0\x07\0\t\0\x0B\0\x10\0\x12\0\x14\0\x1C\0\x1E\0#\0%\0'\0,\0arcazcuenffgrckkkukylifmanmnpapalsdtgugunruzyuezh"
+ )
+ },
+ unsafe {
+ crate::ZeroVec::from_bytes_unchecked(
+ b"\x02\0\0\0\x03\0\0\0\x04\0\0\0\x05\0\0\0\x06\0\0\0\x07\0\0\0\x08\0\0\0\n\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x10\0\0\0\x11\0\0\0\x14\0\0\0\x15\0\0\0\x16\0\0\0\x17\0\0\0\x18\0\0\0\x19\0\0\0\x1C\0\0\0"
+ )
+ },
+ unsafe {
+ crate::VarZeroVec::from_bytes_unchecked(
+ b"\x1C\0\0\0\0\0\x04\0\x08\0\x0C\0\x10\0\x14\0\x18\0\x1C\0 \0$\0(\0,\x000\x004\08\0<\0@\0D\0H\0L\0P\0T\0X\0\\\0`\0d\0h\0l\0NbatPalmArabGlagShawAdlmLinbArabArabYeziArabLatnLimbNkooMongArabPhlpDevaKhojSindArabCyrlDevaArabHansBopoHanbHant"
+ )
+ },
+ unsafe {
+ crate::VarZeroVec::from_bytes_unchecked(
+ b"\x1C\0\0\0\0\0\x02\0\x04\0\x06\0\x08\0\n\0\x0C\0\x0E\0\x10\0\x12\0\x14\0\x16\0\x18\0\x1A\0\x1C\0\x1E\0 \0\"\0$\0&\0(\0*\0,\0.\x000\x002\x004\x006\0JOSYIRBGGBGNGRCNIQGECNTRINGNCNPKCNINININPKKZNPAFCNTWTWTW"
+ )
+ },
+ )
+ },
+ zerovec
+ );
+}
+
+#[test]
+fn test_baked_borrowed_map() {
+ test_bake!(
+ ZeroMap2dBorrowed<str, str, str>,
+ const: unsafe {
+ #[allow(unused_unsafe)]
+ crate::maps::ZeroMap2dBorrowed::from_parts_unchecked(
+ unsafe {
+ crate::VarZeroSlice::from_bytes_unchecked(
+ b"\x0E\0\0\0\0\0\x05\0\x07\0\t\0\x0B\0\x10\0\x12\0\x14\0\x1C\0\x1E\0#\0%\0'\0,\0arcazcuenffgrckkkukylifmanmnpapalsdtgugunruzyuezh"
+ )
+ },
+ unsafe {
+ crate::ZeroSlice::from_bytes_unchecked(
+ b"\x02\0\0\0\x03\0\0\0\x04\0\0\0\x05\0\0\0\x06\0\0\0\x07\0\0\0\x08\0\0\0\n\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x10\0\0\0\x11\0\0\0\x14\0\0\0\x15\0\0\0\x16\0\0\0\x17\0\0\0\x18\0\0\0\x19\0\0\0\x1C\0\0\0"
+ )
+ },
+ unsafe {
+ crate::VarZeroSlice::from_bytes_unchecked(
+ b"\x1C\0\0\0\0\0\x04\0\x08\0\x0C\0\x10\0\x14\0\x18\0\x1C\0 \0$\0(\0,\x000\x004\08\0<\0@\0D\0H\0L\0P\0T\0X\0\\\0`\0d\0h\0l\0NbatPalmArabGlagShawAdlmLinbArabArabYeziArabLatnLimbNkooMongArabPhlpDevaKhojSindArabCyrlDevaArabHansBopoHanbHant"
+ )
+ },
+ unsafe {
+ crate::VarZeroSlice::from_bytes_unchecked(
+ b"\x1C\0\0\0\0\0\x02\0\x04\0\x06\0\x08\0\n\0\x0C\0\x0E\0\x10\0\x12\0\x14\0\x16\0\x18\0\x1A\0\x1C\0\x1E\0 \0\"\0$\0&\0(\0*\0,\0.\x000\x002\x004\x006\0JOSYIRBGGBGNGRCNIQGECNTRINGNCNPKCNINININPKKZNPAFCNTWTWTW"
+ )
+ },
+ )
+ },
+ zerovec
+ );
+}
diff --git a/third_party/rust/zerovec/src/map2d/map.rs b/third_party/rust/zerovec/src/map2d/map.rs
new file mode 100644
index 0000000000..1975387a43
--- /dev/null
+++ b/third_party/rust/zerovec/src/map2d/map.rs
@@ -0,0 +1,875 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::ule::AsULE;
+use crate::ZeroVec;
+use alloc::borrow::Borrow;
+use core::cmp::Ordering;
+use core::convert::TryFrom;
+use core::fmt;
+use core::iter::FromIterator;
+use core::ops::Range;
+
+use super::*;
+use crate::map::ZeroMapKV;
+use crate::map::{MutableZeroVecLike, ZeroVecLike};
+
+/// A zero-copy, two-dimensional map datastructure .
+///
+/// This is an extension of [`ZeroMap`] that supports two layers of keys. For example,
+/// to map a pair of an integer and a string to a buffer, you can write:
+///
+/// ```no_run
+/// # use zerovec::ZeroMap2d;
+/// let _: ZeroMap2d<u32, str, [u8]> = unimplemented!();
+/// ```
+///
+/// Internally, `ZeroMap2d` stores four zero-copy vectors, one for each type argument plus
+/// one more to match between the two vectors of keys.
+///
+/// # Examples
+///
+/// ```
+/// use zerovec::ZeroMap2d;
+///
+/// // Example byte buffer representing the map { 1: {2: "three" } }
+/// let BINCODE_BYTES: &[u8; 51] = &[
+/// 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0,
+/// 0, 0, 0, 0, 0, 0, 2, 0, 11, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 116,
+/// 104, 114, 101, 101,
+/// ];
+///
+/// // Deserializing to ZeroMap requires no heap allocations.
+/// let zero_map: ZeroMap2d<u16, u16, str> =
+/// bincode::deserialize(BINCODE_BYTES)
+/// .expect("Should deserialize successfully");
+/// assert_eq!(zero_map.get_2d(&1, &2), Some("three"));
+/// ```
+///
+/// [`VarZeroVec`]: crate::VarZeroVec
+/// [`ZeroMap`]: crate::ZeroMap
+// ZeroMap2d contains 4 fields:
+//
+// - keys0 = sorted list of all K0 in the map
+// - joiner = helper vec that maps from a K0 to a range of keys1
+// - keys1 = list of all K1 in the map, sorted in ranges for each K0
+// - values = list of all values in the map, sorted by (K0, K1)
+//
+// For a particular K0 at index i, the range of keys1 corresponding to K0 is
+// (joiner[i-1]..joiner[i]), where the first range starts at 0.
+//
+// Required Invariants:
+//
+// 1. len(keys0) == len(joiner)
+// 2. len(keys1) == len(values)
+// 3. joiner is sorted
+// 4. the last element of joiner is the length of keys1
+//
+// Optional Invariants:
+//
+// 5. keys0 is sorted (for binary_search)
+// 6. ranges within keys1 are sorted (for binary_search)
+// 7. every K0 is associated with at least one K1 (no empty ranges)
+//
+// During deserialization, these three invariants are not checked, because they put the
+// ZeroMap2d in a deterministic state, even though it may have unexpected behavior.
+pub struct ZeroMap2d<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ pub(crate) keys0: K0::Container,
+ pub(crate) joiner: ZeroVec<'a, u32>,
+ pub(crate) keys1: K1::Container,
+ pub(crate) values: V::Container,
+}
+
+impl<'a, K0, K1, V> Default for ZeroMap2d<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl<'a, K0, K1, V> ZeroMap2d<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ /// Creates a new, empty `ZeroMap2d`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let zm: ZeroMap2d<u16, str, str> = ZeroMap2d::new();
+ /// assert!(zm.is_empty());
+ /// ```
+ pub fn new() -> Self {
+ Self {
+ keys0: K0::Container::zvl_with_capacity(0),
+ joiner: ZeroVec::new(),
+ keys1: K1::Container::zvl_with_capacity(0),
+ values: V::Container::zvl_with_capacity(0),
+ }
+ }
+
+ #[doc(hidden)] // databake internal
+ pub const unsafe fn from_parts_unchecked(
+ keys0: K0::Container,
+ joiner: ZeroVec<'a, u32>,
+ keys1: K1::Container,
+ values: V::Container,
+ ) -> Self {
+ Self {
+ keys0,
+ joiner,
+ keys1,
+ values,
+ }
+ }
+
+ /// Construct a new [`ZeroMap2d`] with a given capacity
+ pub fn with_capacity(capacity: usize) -> Self {
+ Self {
+ keys0: K0::Container::zvl_with_capacity(capacity),
+ joiner: ZeroVec::with_capacity(capacity),
+ keys1: K1::Container::zvl_with_capacity(capacity),
+ values: V::Container::zvl_with_capacity(capacity),
+ }
+ }
+
+ /// Obtain a borrowed version of this map
+ pub fn as_borrowed(&'a self) -> ZeroMap2dBorrowed<'a, K0, K1, V> {
+ ZeroMap2dBorrowed {
+ keys0: self.keys0.zvl_as_borrowed(),
+ joiner: &self.joiner,
+ keys1: self.keys1.zvl_as_borrowed(),
+ values: self.values.zvl_as_borrowed(),
+ }
+ }
+
+ /// The number of values in the [`ZeroMap2d`]
+ pub fn len(&self) -> usize {
+ self.values.zvl_len()
+ }
+
+ /// Whether the [`ZeroMap2d`] is empty
+ pub fn is_empty(&self) -> bool {
+ self.values.zvl_len() == 0
+ }
+
+ /// Remove all elements from the [`ZeroMap2d`]
+ pub fn clear(&mut self) {
+ self.keys0.zvl_clear();
+ self.joiner.clear();
+ self.keys1.zvl_clear();
+ self.values.zvl_clear();
+ }
+
+ /// Reserve capacity for `additional` more elements to be inserted into
+ /// the [`ZeroMap2d`] to avoid frequent reallocations.
+ ///
+ /// See [`Vec::reserve()`](alloc::vec::Vec::reserve) for more information.
+ pub fn reserve(&mut self, additional: usize) {
+ self.keys0.zvl_reserve(additional);
+ self.joiner.zvl_reserve(additional);
+ self.keys1.zvl_reserve(additional);
+ self.values.zvl_reserve(additional);
+ }
+
+ /// Produce an ordered iterator over keys0, which can then be used to get an iterator
+ /// over keys1 for a particular key0.
+ ///
+ /// # Example
+ ///
+ /// Loop over all elements of a ZeroMap2d:
+ ///
+ /// ```
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map: ZeroMap2d<u16, u16, str> = ZeroMap2d::new();
+ /// map.insert(&1, &1, "foo");
+ /// map.insert(&2, &3, "bar");
+ /// map.insert(&2, &4, "baz");
+ ///
+ /// let mut total_value = 0;
+ ///
+ /// for cursor in map.iter0() {
+ /// for (key1, value) in cursor.iter1() {
+ /// // This code runs for every (key0, key1) pair
+ /// total_value += cursor.key0().as_unsigned_int() as usize;
+ /// total_value += key1.as_unsigned_int() as usize;
+ /// total_value += value.len();
+ /// }
+ /// }
+ ///
+ /// assert_eq!(total_value, 22);
+ /// ```
+ pub fn iter0<'l>(&'l self) -> impl Iterator<Item = ZeroMap2dCursor<'l, 'a, K0, K1, V>> + 'l {
+ (0..self.keys0.zvl_len()).map(move |idx| ZeroMap2dCursor::from_cow(self, idx))
+ }
+
+ // INTERNAL ROUTINES FOLLOW //
+
+ /// Given an index into the joiner array, returns the corresponding range of keys1
+ fn get_range_for_key0_index(&self, key0_index: usize) -> Range<usize> {
+ ZeroMap2dCursor::from_cow(self, key0_index).get_range()
+ }
+
+ /// Removes key0_index from the keys0 array and the joiner array
+ fn remove_key0_index(&mut self, key0_index: usize) {
+ self.keys0.zvl_remove(key0_index);
+ self.joiner.with_mut(|v| v.remove(key0_index));
+ }
+
+ /// Shifts all joiner ranges from key0_index onward one index up
+ fn joiner_expand(&mut self, key0_index: usize) {
+ #[allow(clippy::expect_used)] // slice overflow
+ self.joiner
+ .to_mut_slice()
+ .iter_mut()
+ .skip(key0_index)
+ .for_each(|ref mut v| {
+ // TODO(#1410): Make this fallible
+ **v = v
+ .as_unsigned_int()
+ .checked_add(1)
+ .expect("Attempted to add more than 2^32 elements to a ZeroMap2d")
+ .to_unaligned()
+ })
+ }
+
+ /// Shifts all joiner ranges from key0_index onward one index down
+ fn joiner_shrink(&mut self, key0_index: usize) {
+ self.joiner
+ .to_mut_slice()
+ .iter_mut()
+ .skip(key0_index)
+ .for_each(|ref mut v| **v = (v.as_unsigned_int() - 1).to_unaligned())
+ }
+}
+
+impl<'a, K0, K1, V> ZeroMap2d<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + Ord,
+ K1: ZeroMapKV<'a> + Ord,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ /// Get the value associated with `key0` and `key1`, if it exists.
+ ///
+ /// For more fine-grained error handling, use [`ZeroMap2d::get0`].
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map = ZeroMap2d::new();
+ /// map.insert(&1, "one", "foo");
+ /// map.insert(&2, "one", "bar");
+ /// map.insert(&2, "two", "baz");
+ /// assert_eq!(map.get_2d(&1, "one"), Some("foo"));
+ /// assert_eq!(map.get_2d(&1, "two"), None);
+ /// assert_eq!(map.get_2d(&2, "one"), Some("bar"));
+ /// assert_eq!(map.get_2d(&2, "two"), Some("baz"));
+ /// assert_eq!(map.get_2d(&3, "three"), None);
+ /// ```
+ pub fn get_2d(&self, key0: &K0, key1: &K1) -> Option<&V::GetType> {
+ self.get0(key0)?.get1(key1)
+ }
+
+ /// Insert `value` with `key`, returning the existing value if it exists.
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map = ZeroMap2d::new();
+ /// assert_eq!(map.insert(&0, "zero", "foo"), None,);
+ /// assert_eq!(map.insert(&1, "one", "bar"), None,);
+ /// assert_eq!(map.insert(&1, "one", "baz").as_deref(), Some("bar"),);
+ /// assert_eq!(map.get_2d(&1, "one").as_deref(), Some("baz"));
+ /// assert_eq!(map.len(), 2);
+ /// ```
+ pub fn insert(&mut self, key0: &K0, key1: &K1, value: &V) -> Option<V::OwnedType> {
+ let (key0_index, range) = self.get_or_insert_range_for_key0(key0);
+ debug_assert!(range.start <= range.end); // '<=' because we may have inserted a new key0
+ debug_assert!(range.end <= self.keys1.zvl_len());
+ let range_start = range.start;
+ #[allow(clippy::unwrap_used)] // by debug_assert! invariants
+ let index = range_start
+ + match self.keys1.zvl_binary_search_in_range(key1, range).unwrap() {
+ Ok(index) => return Some(self.values.zvl_replace(range_start + index, value)),
+ Err(index) => index,
+ };
+ self.keys1.zvl_insert(index, key1);
+ self.values.zvl_insert(index, value);
+ self.joiner_expand(key0_index);
+ #[cfg(debug_assertions)]
+ self.check_invariants();
+ None
+ }
+
+ /// Remove the value at `key`, returning it if it exists.
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map = ZeroMap2d::new();
+ /// map.insert(&1, "one", "foo");
+ /// map.insert(&2, "two", "bar");
+ /// assert_eq!(
+ /// map.remove(&1, "one"),
+ /// Some("foo".to_owned().into_boxed_str())
+ /// );
+ /// assert_eq!(map.get_2d(&1, "one"), None);
+ /// assert_eq!(map.remove(&1, "one"), None);
+ /// ```
+ pub fn remove(&mut self, key0: &K0, key1: &K1) -> Option<V::OwnedType> {
+ let key0_index = self.keys0.zvl_binary_search(key0).ok()?;
+ let range = self.get_range_for_key0_index(key0_index);
+ debug_assert!(range.start < range.end); // '<' because every key0 should have a key1
+ debug_assert!(range.end <= self.keys1.zvl_len());
+ let is_singleton_range = range.start + 1 == range.end;
+ #[allow(clippy::unwrap_used)] // by debug_assert invariants
+ let index = range.start
+ + self
+ .keys1
+ .zvl_binary_search_in_range(key1, range)
+ .unwrap()
+ .ok()?;
+ self.keys1.zvl_remove(index);
+ let removed = self.values.zvl_remove(index);
+ self.joiner_shrink(key0_index);
+ if is_singleton_range {
+ self.remove_key0_index(key0_index);
+ }
+ #[cfg(debug_assertions)]
+ self.check_invariants();
+ Some(removed)
+ }
+
+ /// Appends `value` with `key` to the end of the underlying vector, returning
+ /// `key` and `value` _if it failed_. Useful for extending with an existing
+ /// sorted list.
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map = ZeroMap2d::new();
+ /// assert!(map.try_append(&1, "one", "uno").is_none());
+ /// assert!(map.try_append(&3, "three", "tres").is_none());
+ ///
+ /// let unsuccessful = map.try_append(&3, "three", "tres-updated");
+ /// assert!(unsuccessful.is_some(), "append duplicate of last key");
+ ///
+ /// let unsuccessful = map.try_append(&2, "two", "dos");
+ /// assert!(unsuccessful.is_some(), "append out of order");
+ ///
+ /// assert_eq!(map.get_2d(&1, "one"), Some("uno"));
+ ///
+ /// // contains the original value for the key: 3
+ /// assert_eq!(map.get_2d(&3, "three"), Some("tres"));
+ ///
+ /// // not appended since it wasn't in order
+ /// assert_eq!(map.get_2d(&2, "two"), None);
+ /// ```
+ #[must_use]
+ pub fn try_append<'b>(
+ &mut self,
+ key0: &'b K0,
+ key1: &'b K1,
+ value: &'b V,
+ ) -> Option<(&'b K0, &'b K1, &'b V)> {
+ if self.is_empty() {
+ self.keys0.zvl_push(key0);
+ self.joiner.with_mut(|v| v.push(1u32.to_unaligned()));
+ self.keys1.zvl_push(key1);
+ self.values.zvl_push(value);
+ return None;
+ }
+
+ // The unwraps are protected by the fact that we are not empty
+ #[allow(clippy::unwrap_used)]
+ let last_key0 = self.keys0.zvl_get(self.keys0.zvl_len() - 1).unwrap();
+ let key0_cmp = K0::Container::t_cmp_get(key0, last_key0);
+ #[allow(clippy::unwrap_used)]
+ let last_key1 = self.keys1.zvl_get(self.keys1.zvl_len() - 1).unwrap();
+ let key1_cmp = K1::Container::t_cmp_get(key1, last_key1);
+
+ // Check for error case (out of order)
+ match key0_cmp {
+ Ordering::Less => {
+ // Error case
+ return Some((key0, key1, value));
+ }
+ Ordering::Equal => {
+ match key1_cmp {
+ Ordering::Less | Ordering::Equal => {
+ // Error case
+ return Some((key0, key1, value));
+ }
+ _ => {}
+ }
+ }
+ _ => {}
+ }
+
+ #[allow(clippy::expect_used)] // slice overflow
+ let joiner_value = u32::try_from(self.keys1.zvl_len() + 1)
+ .expect("Attempted to add more than 2^32 elements to a ZeroMap2d");
+
+ // All OK to append
+ #[allow(clippy::unwrap_used)]
+ if key0_cmp == Ordering::Greater {
+ self.keys0.zvl_push(key0);
+ self.joiner
+ .with_mut(|v| v.push(joiner_value.to_unaligned()));
+ } else {
+ // This unwrap is protected because we are not empty
+ *self.joiner.to_mut_slice().last_mut().unwrap() = joiner_value.to_unaligned();
+ }
+ self.keys1.zvl_push(key1);
+ self.values.zvl_push(value);
+
+ #[cfg(debug_assertions)]
+ self.check_invariants();
+
+ None
+ }
+
+ // INTERNAL ROUTINES FOLLOW //
+
+ #[cfg(debug_assertions)]
+ #[allow(clippy::unwrap_used)] // this is an assertion function
+ pub(crate) fn check_invariants(&self) {
+ debug_assert_eq!(self.keys0.zvl_len(), self.joiner.len());
+ debug_assert_eq!(self.keys1.zvl_len(), self.values.zvl_len());
+ debug_assert!(self.keys0.zvl_is_ascending());
+ debug_assert!(self.joiner.zvl_is_ascending());
+ if let Some(last_joiner) = self.joiner.last() {
+ debug_assert_eq!(last_joiner as usize, self.keys1.zvl_len());
+ }
+ for i in 0..self.joiner.len() {
+ let j0 = if i == 0 {
+ 0
+ } else {
+ self.joiner.get(i - 1).unwrap() as usize
+ };
+ let j1 = self.joiner.get(i).unwrap() as usize;
+ debug_assert_ne!(j0, j1);
+ for j in (j0 + 1)..j1 {
+ let m0 = self.keys1.zvl_get(j - 1).unwrap();
+ let m1 = self.keys1.zvl_get(j).unwrap();
+ debug_assert_eq!(Ordering::Less, K1::Container::get_cmp_get(m0, m1));
+ }
+ }
+ }
+}
+
+impl<'a, K0, K1, V> ZeroMap2d<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + Ord,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ /// Gets a cursor for `key0`. If `None`, then `key0` is not in the map. If `Some`,
+ /// then `key0` is in the map, and `key1` can be queried.
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map = ZeroMap2d::new();
+ /// map.insert(&1u32, "one", "foo");
+ /// map.insert(&2, "one", "bar");
+ /// map.insert(&2, "two", "baz");
+ /// assert_eq!(map.get0(&1).unwrap().get1("one").unwrap(), "foo");
+ /// assert_eq!(map.get0(&1).unwrap().get1("two"), None);
+ /// assert_eq!(map.get0(&2).unwrap().get1("one").unwrap(), "bar");
+ /// assert_eq!(map.get0(&2).unwrap().get1("two").unwrap(), "baz");
+ /// assert_eq!(map.get0(&3), None);
+ /// ```
+ #[inline]
+ pub fn get0<'l>(&'l self, key0: &K0) -> Option<ZeroMap2dCursor<'l, 'a, K0, K1, V>> {
+ let key0_index = self.keys0.zvl_binary_search(key0).ok()?;
+ Some(ZeroMap2dCursor::from_cow(self, key0_index))
+ }
+
+ /// Binary search the map for `key0`, returning a cursor.
+ ///
+ /// ```rust
+ /// use zerovec::maps::ZeroMap2dBorrowed;
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map = ZeroMap2d::new();
+ /// map.insert(&1, "one", "foo");
+ /// map.insert(&2, "two", "bar");
+ /// assert!(matches!(map.get0_by(|probe| probe.cmp(&1)), Some(_)));
+ /// assert!(matches!(map.get0_by(|probe| probe.cmp(&3)), None));
+ /// ```
+ pub fn get0_by<'l>(
+ &'l self,
+ predicate: impl FnMut(&K0) -> Ordering,
+ ) -> Option<ZeroMap2dCursor<'l, 'a, K0, K1, V>> {
+ let key0_index = self.keys0.zvl_binary_search_by(predicate).ok()?;
+ Some(ZeroMap2dCursor::from_cow(self, key0_index))
+ }
+
+ /// Returns whether `key0` is contained in this map
+ ///
+ /// ```rust
+ /// use zerovec::ZeroMap2d;
+ ///
+ /// let mut map = ZeroMap2d::new();
+ /// map.insert(&1, "one", "foo");
+ /// map.insert(&2, "two", "bar");
+ /// assert!(map.contains_key0(&1));
+ /// assert!(!map.contains_key0(&3));
+ /// ```
+ pub fn contains_key0(&self, key0: &K0) -> bool {
+ self.keys0.zvl_binary_search(key0).is_ok()
+ }
+
+ // INTERNAL ROUTINES FOLLOW //
+
+ /// Same as `get_range_for_key0`, but creates key0 if it doesn't already exist
+ fn get_or_insert_range_for_key0(&mut self, key0: &K0) -> (usize, Range<usize>) {
+ match self.keys0.zvl_binary_search(key0) {
+ Ok(key0_index) => (key0_index, self.get_range_for_key0_index(key0_index)),
+ Err(key0_index) => {
+ // Add an entry to self.keys0 and self.joiner
+ let joiner_value = if key0_index == 0 {
+ 0
+ } else {
+ debug_assert!(key0_index <= self.joiner.len());
+ // The unwrap is protected by the debug_assert above and key0_index != 0
+ #[allow(clippy::unwrap_used)]
+ self.joiner.get(key0_index - 1).unwrap()
+ };
+ self.keys0.zvl_insert(key0_index, key0);
+ self.joiner
+ .with_mut(|v| v.insert(key0_index, joiner_value.to_unaligned()));
+ (key0_index, (joiner_value as usize)..(joiner_value as usize))
+ }
+ }
+ }
+}
+
+impl<'a, K0, K1, V> ZeroMap2d<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + Ord,
+ K1: ZeroMapKV<'a> + Ord,
+ V: ZeroMapKV<'a>,
+ V: Copy,
+ K0: ?Sized,
+ K1: ?Sized,
+{
+ /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE`
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use zerovec::ZeroMap2d;
+ /// let mut map: ZeroMap2d<u16, u16, u16> = ZeroMap2d::new();
+ /// map.insert(&1, &2, &3);
+ /// map.insert(&1, &4, &5);
+ /// map.insert(&6, &7, &8);
+ ///
+ /// assert_eq!(map.get_copied_2d(&6, &7), Some(8));
+ /// ```
+ #[inline]
+ pub fn get_copied_2d(&self, key0: &K0, key1: &K1) -> Option<V> {
+ self.get0(key0)?.get1_copied(key1)
+ }
+}
+
+impl<'a, K0, K1, V> From<ZeroMap2dBorrowed<'a, K0, K1, V>> for ZeroMap2d<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a>,
+ K1: ZeroMapKV<'a>,
+ V: ZeroMapKV<'a>,
+ K0: ?Sized,
+ K1: ?Sized,
+ V: ?Sized,
+{
+ fn from(other: ZeroMap2dBorrowed<'a, K0, K1, V>) -> Self {
+ Self {
+ keys0: K0::Container::zvl_from_borrowed(other.keys0),
+ joiner: other.joiner.as_zerovec(),
+ keys1: K1::Container::zvl_from_borrowed(other.keys1),
+ values: V::Container::zvl_from_borrowed(other.values),
+ }
+ }
+}
+
+// We can't use the default PartialEq because ZeroMap2d is invariant
+// so otherwise rustc will not automatically allow you to compare ZeroMaps
+// with different lifetimes
+impl<'a, 'b, K0, K1, V> PartialEq<ZeroMap2d<'b, K0, K1, V>> for ZeroMap2d<'a, K0, K1, V>
+where
+ K0: for<'c> ZeroMapKV<'c> + ?Sized,
+ K1: for<'c> ZeroMapKV<'c> + ?Sized,
+ V: for<'c> ZeroMapKV<'c> + ?Sized,
+ <K0 as ZeroMapKV<'a>>::Container: PartialEq<<K0 as ZeroMapKV<'b>>::Container>,
+ <K1 as ZeroMapKV<'a>>::Container: PartialEq<<K1 as ZeroMapKV<'b>>::Container>,
+ <V as ZeroMapKV<'a>>::Container: PartialEq<<V as ZeroMapKV<'b>>::Container>,
+{
+ fn eq(&self, other: &ZeroMap2d<'b, K0, K1, V>) -> bool {
+ self.keys0.eq(&other.keys0)
+ && self.joiner.eq(&other.joiner)
+ && self.keys1.eq(&other.keys1)
+ && self.values.eq(&other.values)
+ }
+}
+
+impl<'a, K0, K1, V> fmt::Debug for ZeroMap2d<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + ?Sized,
+ K1: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ <K0 as ZeroMapKV<'a>>::Container: fmt::Debug,
+ <K1 as ZeroMapKV<'a>>::Container: fmt::Debug,
+ <V as ZeroMapKV<'a>>::Container: fmt::Debug,
+{
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+ f.debug_struct("ZeroMap2d")
+ .field("keys0", &self.keys0)
+ .field("joiner", &self.joiner)
+ .field("keys1", &self.keys1)
+ .field("values", &self.values)
+ .finish()
+ }
+}
+
+impl<'a, K0, K1, V> Clone for ZeroMap2d<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + ?Sized,
+ K1: ZeroMapKV<'a> + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ <K0 as ZeroMapKV<'a>>::Container: Clone,
+ <K1 as ZeroMapKV<'a>>::Container: Clone,
+ <V as ZeroMapKV<'a>>::Container: Clone,
+{
+ fn clone(&self) -> Self {
+ Self {
+ keys0: self.keys0.clone(),
+ joiner: self.joiner.clone(),
+ keys1: self.keys1.clone(),
+ values: self.values.clone(),
+ }
+ }
+}
+
+impl<'a, A, B, C, K0, K1, V> FromIterator<(A, B, C)> for ZeroMap2d<'a, K0, K1, V>
+where
+ A: Borrow<K0>,
+ B: Borrow<K1>,
+ C: Borrow<V>,
+ K0: ZeroMapKV<'a> + ?Sized + Ord,
+ K1: ZeroMapKV<'a> + ?Sized + Ord,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ fn from_iter<T>(iter: T) -> Self
+ where
+ T: IntoIterator<Item = (A, B, C)>,
+ {
+ let iter = iter.into_iter();
+ let mut map = match iter.size_hint() {
+ (_, Some(upper)) => Self::with_capacity(upper),
+ (lower, None) => Self::with_capacity(lower),
+ };
+
+ for (key0, key1, value) in iter {
+ if let Some((key0, key1, value)) =
+ map.try_append(key0.borrow(), key1.borrow(), value.borrow())
+ {
+ map.insert(key0, key1, value);
+ }
+ }
+ #[cfg(debug_assertions)]
+ map.check_invariants();
+ map
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use alloc::collections::BTreeMap;
+
+ #[test]
+ fn stress_test() {
+ let mut zm2d = ZeroMap2d::<u16, str, str>::new();
+
+ assert_eq!(
+ format!("{zm2d:?}"),
+ "ZeroMap2d { keys0: ZeroVec([]), joiner: ZeroVec([]), keys1: [], values: [] }"
+ );
+ assert_eq!(zm2d.get0(&0), None);
+
+ let result = zm2d.try_append(&3, "ccc", "CCC");
+ assert!(result.is_none());
+
+ assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3]), joiner: ZeroVec([1]), keys1: [\"ccc\"], values: [\"CCC\"] }");
+ assert_eq!(zm2d.get0(&0), None);
+ assert_eq!(zm2d.get0(&3).unwrap().get1(""), None);
+ assert_eq!(zm2d.get_2d(&3, "ccc"), Some("CCC"));
+ assert_eq!(zm2d.get0(&99), None);
+
+ let result = zm2d.try_append(&3, "eee", "EEE");
+ assert!(result.is_none());
+
+ assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3]), joiner: ZeroVec([2]), keys1: [\"ccc\", \"eee\"], values: [\"CCC\", \"EEE\"] }");
+ assert_eq!(zm2d.get0(&0), None);
+ assert_eq!(zm2d.get0(&3).unwrap().get1(""), None);
+ assert_eq!(zm2d.get_2d(&3, "ccc"), Some("CCC"));
+ assert_eq!(zm2d.get_2d(&3, "eee"), Some("EEE"));
+ assert_eq!(zm2d.get0(&3).unwrap().get1("five"), None);
+ assert_eq!(zm2d.get0(&99), None);
+
+ // Out of order
+ let result = zm2d.try_append(&3, "ddd", "DD0");
+ assert!(result.is_some());
+
+ // Append a few more elements
+ let result = zm2d.try_append(&5, "ddd", "DD1");
+ assert!(result.is_none());
+ let result = zm2d.try_append(&7, "ddd", "DD2");
+ assert!(result.is_none());
+ let result = zm2d.try_append(&7, "eee", "EEE");
+ assert!(result.is_none());
+ let result = zm2d.try_append(&7, "www", "WWW");
+ assert!(result.is_none());
+ let result = zm2d.try_append(&9, "yyy", "YYY");
+ assert!(result.is_none());
+
+ assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3, 5, 7, 9]), joiner: ZeroVec([2, 3, 6, 7]), keys1: [\"ccc\", \"eee\", \"ddd\", \"ddd\", \"eee\", \"www\", \"yyy\"], values: [\"CCC\", \"EEE\", \"DD1\", \"DD2\", \"EEE\", \"WWW\", \"YYY\"] }");
+ assert_eq!(zm2d.get0(&0), None);
+ assert_eq!(zm2d.get0(&3).unwrap().get1(""), None);
+ assert_eq!(zm2d.get_2d(&3, "ccc"), Some("CCC"));
+ assert_eq!(zm2d.get_2d(&3, "eee"), Some("EEE"));
+ assert_eq!(zm2d.get0(&3).unwrap().get1("zzz"), None);
+ assert_eq!(zm2d.get0(&4), None);
+ assert_eq!(zm2d.get0(&5).unwrap().get1("aaa"), None);
+ assert_eq!(zm2d.get_2d(&5, "ddd"), Some("DD1"));
+ assert_eq!(zm2d.get0(&5).unwrap().get1("zzz"), None);
+ assert_eq!(zm2d.get0(&6), None);
+ assert_eq!(zm2d.get0(&7).unwrap().get1("aaa"), None);
+ assert_eq!(zm2d.get_2d(&7, "ddd"), Some("DD2"));
+ assert_eq!(zm2d.get_2d(&7, "eee"), Some("EEE"));
+ assert_eq!(zm2d.get_2d(&7, "www"), Some("WWW"));
+ assert_eq!(zm2d.get0(&7).unwrap().get1("yyy"), None);
+ assert_eq!(zm2d.get0(&7).unwrap().get1("zzz"), None);
+ assert_eq!(zm2d.get0(&8), None);
+ assert_eq!(zm2d.get0(&9).unwrap().get1("aaa"), None);
+ assert_eq!(zm2d.get0(&9).unwrap().get1("www"), None);
+ assert_eq!(zm2d.get_2d(&9, "yyy"), Some("YYY"));
+ assert_eq!(zm2d.get0(&9).unwrap().get1("zzz"), None);
+ assert_eq!(zm2d.get0(&10), None);
+ assert_eq!(zm2d.get0(&99), None);
+
+ // Insert some elements
+ zm2d.insert(&3, "mmm", "MM0");
+ zm2d.insert(&6, "ddd", "DD3");
+ zm2d.insert(&6, "mmm", "MM1");
+ zm2d.insert(&6, "nnn", "NNN");
+
+ assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3, 5, 6, 7, 9]), joiner: ZeroVec([3, 4, 7, 10, 11]), keys1: [\"ccc\", \"eee\", \"mmm\", \"ddd\", \"ddd\", \"mmm\", \"nnn\", \"ddd\", \"eee\", \"www\", \"yyy\"], values: [\"CCC\", \"EEE\", \"MM0\", \"DD1\", \"DD3\", \"MM1\", \"NNN\", \"DD2\", \"EEE\", \"WWW\", \"YYY\"] }");
+ assert_eq!(zm2d.get0(&0), None);
+ assert_eq!(zm2d.get0(&3).unwrap().get1(""), None);
+ assert_eq!(zm2d.get_2d(&3, "ccc"), Some("CCC"));
+ assert_eq!(zm2d.get_2d(&3, "eee"), Some("EEE"));
+ assert_eq!(zm2d.get_2d(&3, "mmm"), Some("MM0"));
+ assert_eq!(zm2d.get0(&3).unwrap().get1("zzz"), None);
+ assert_eq!(zm2d.get0(&4), None);
+ assert_eq!(zm2d.get0(&5).unwrap().get1("aaa"), None);
+ assert_eq!(zm2d.get_2d(&5, "ddd"), Some("DD1"));
+ assert_eq!(zm2d.get0(&5).unwrap().get1("zzz"), None);
+ assert_eq!(zm2d.get0(&6).unwrap().get1("aaa"), None);
+ assert_eq!(zm2d.get_2d(&6, "ddd"), Some("DD3"));
+ assert_eq!(zm2d.get_2d(&6, "mmm"), Some("MM1"));
+ assert_eq!(zm2d.get_2d(&6, "nnn"), Some("NNN"));
+ assert_eq!(zm2d.get0(&6).unwrap().get1("zzz"), None);
+ assert_eq!(zm2d.get0(&7).unwrap().get1("aaa"), None);
+ assert_eq!(zm2d.get_2d(&7, "ddd"), Some("DD2"));
+ assert_eq!(zm2d.get_2d(&7, "eee"), Some("EEE"));
+ assert_eq!(zm2d.get_2d(&7, "www"), Some("WWW"));
+ assert_eq!(zm2d.get0(&7).unwrap().get1("yyy"), None);
+ assert_eq!(zm2d.get0(&7).unwrap().get1("zzz"), None);
+ assert_eq!(zm2d.get0(&8), None);
+ assert_eq!(zm2d.get0(&9).unwrap().get1("aaa"), None);
+ assert_eq!(zm2d.get0(&9).unwrap().get1("www"), None);
+ assert_eq!(zm2d.get_2d(&9, "yyy"), Some("YYY"));
+ assert_eq!(zm2d.get0(&9).unwrap().get1("zzz"), None);
+ assert_eq!(zm2d.get0(&10), None);
+ assert_eq!(zm2d.get0(&99), None);
+
+ // Remove some elements
+ let result = zm2d.remove(&3, "ccc"); // first element
+ assert_eq!(result.as_deref(), Some("CCC"));
+ let result = zm2d.remove(&3, "mmm"); // middle element
+ assert_eq!(result.as_deref(), Some("MM0"));
+ let result = zm2d.remove(&5, "ddd"); // singleton K0
+ assert_eq!(result.as_deref(), Some("DD1"));
+ let result = zm2d.remove(&9, "yyy"); // last element
+ assert_eq!(result.as_deref(), Some("YYY"));
+
+ assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3, 6, 7]), joiner: ZeroVec([1, 4, 7]), keys1: [\"eee\", \"ddd\", \"mmm\", \"nnn\", \"ddd\", \"eee\", \"www\"], values: [\"EEE\", \"DD3\", \"MM1\", \"NNN\", \"DD2\", \"EEE\", \"WWW\"] }");
+ }
+
+ #[test]
+ fn zeromap2d_metazone() {
+ let source_data = [
+ (*b"aedxb", 0, Some(*b"gulf")),
+ (*b"afkbl", 0, Some(*b"afgh")),
+ (*b"ushnl", 0, None),
+ (*b"ushnl", 7272660, Some(*b"haal")),
+ (*b"ushnl", 0, None),
+ (*b"ushnl", 7272660, Some(*b"haal")),
+ ];
+
+ let btreemap: BTreeMap<([u8; 5], i32), Option<[u8; 4]>> = source_data
+ .iter()
+ .copied()
+ .map(|(a, b, c)| ((a, b), c))
+ .collect();
+
+ let zeromap2d: ZeroMap2d<[u8; 5], i32, Option<[u8; 4]>> =
+ source_data.iter().copied().collect();
+
+ let mut btreemap_iter = btreemap.iter();
+
+ for cursor in zeromap2d.iter0() {
+ for (key1, value) in cursor.iter1() {
+ // This code runs for every (key0, key1) pair in order
+ let expected = btreemap_iter.next().unwrap();
+ assert_eq!(
+ (expected.0 .0, expected.0 .1, expected.1),
+ (*cursor.key0(), key1.as_unsigned_int() as i32, &value.get())
+ );
+ }
+ }
+ assert!(btreemap_iter.next().is_none());
+ }
+}
diff --git a/third_party/rust/zerovec/src/map2d/mod.rs b/third_party/rust/zerovec/src/map2d/mod.rs
new file mode 100644
index 0000000000..f5465fcf24
--- /dev/null
+++ b/third_party/rust/zerovec/src/map2d/mod.rs
@@ -0,0 +1,18 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! See [`ZeroMap2d`](crate::ZeroMap2d) for details.
+
+mod borrowed;
+mod cursor;
+pub(crate) mod map;
+
+#[cfg(feature = "databake")]
+mod databake;
+#[cfg(feature = "serde")]
+mod serde;
+
+pub use crate::ZeroMap2d;
+pub use borrowed::ZeroMap2dBorrowed;
+pub use cursor::ZeroMap2dCursor;
diff --git a/third_party/rust/zerovec/src/map2d/serde.rs b/third_party/rust/zerovec/src/map2d/serde.rs
new file mode 100644
index 0000000000..53e3284b31
--- /dev/null
+++ b/third_party/rust/zerovec/src/map2d/serde.rs
@@ -0,0 +1,430 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::{ZeroMap2d, ZeroMap2dBorrowed, ZeroMap2dCursor};
+use crate::map::{MutableZeroVecLike, ZeroMapKV, ZeroVecLike};
+use crate::ZeroVec;
+use alloc::vec::Vec;
+use core::fmt;
+use core::marker::PhantomData;
+use serde::de::{self, Deserialize, Deserializer, MapAccess, Visitor};
+#[cfg(feature = "serde")]
+use serde::ser::{Serialize, SerializeMap, Serializer};
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl<'a, K0, K1, V> Serialize for ZeroMap2d<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + Serialize + ?Sized + Ord,
+ K1: ZeroMapKV<'a> + Serialize + ?Sized + Ord,
+ V: ZeroMapKV<'a> + Serialize + ?Sized,
+ K0::Container: Serialize,
+ K1::Container: Serialize,
+ V::Container: Serialize,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ if serializer.is_human_readable() {
+ let mut serde_map = serializer.serialize_map(None)?;
+ for cursor in self.iter0() {
+ K0::Container::zvl_get_as_t(cursor.key0(), |k| serde_map.serialize_key(k))?;
+ let inner_map = ZeroMap2dInnerMapSerialize { cursor };
+ serde_map.serialize_value(&inner_map)?;
+ }
+ serde_map.end()
+ } else {
+ (&self.keys0, &self.joiner, &self.keys1, &self.values).serialize(serializer)
+ }
+ }
+}
+
+/// Helper struct for human-serializing the inner map of a ZeroMap2d
+#[cfg(feature = "serde")]
+struct ZeroMap2dInnerMapSerialize<'a, 'l, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + ?Sized + Ord,
+ K1: ZeroMapKV<'a> + ?Sized + Ord,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ pub cursor: ZeroMap2dCursor<'l, 'a, K0, K1, V>,
+}
+
+#[cfg(feature = "serde")]
+impl<'a, 'l, K0, K1, V> Serialize for ZeroMap2dInnerMapSerialize<'a, 'l, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + Serialize + ?Sized + Ord,
+ K1: ZeroMapKV<'a> + Serialize + ?Sized + Ord,
+ V: ZeroMapKV<'a> + Serialize + ?Sized,
+ K0::Container: Serialize,
+ K1::Container: Serialize,
+ V::Container: Serialize,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ let mut serde_map = serializer.serialize_map(None)?;
+ for (key1, v) in self.cursor.iter1() {
+ K1::Container::zvl_get_as_t(key1, |k| serde_map.serialize_key(k))?;
+ V::Container::zvl_get_as_t(v, |v| serde_map.serialize_value(v))?;
+ }
+ serde_map.end()
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl<'a, K0, K1, V> Serialize for ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + Serialize + ?Sized + Ord,
+ K1: ZeroMapKV<'a> + Serialize + ?Sized + Ord,
+ V: ZeroMapKV<'a> + Serialize + ?Sized,
+ K0::Container: Serialize,
+ K1::Container: Serialize,
+ V::Container: Serialize,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ ZeroMap2d::<K0, K1, V>::from(*self).serialize(serializer)
+ }
+}
+
+/// Modified example from https://serde.rs/deserialize-map.html
+struct ZeroMap2dMapVisitor<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + ?Sized + Ord,
+ K1: ZeroMapKV<'a> + ?Sized + Ord,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ #[allow(clippy::type_complexity)] // it's a marker type, complexity doesn't matter
+ marker: PhantomData<fn() -> (&'a K0::OwnedType, &'a K1::OwnedType, &'a V::OwnedType)>,
+}
+
+impl<'a, K0, K1, V> ZeroMap2dMapVisitor<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + ?Sized + Ord,
+ K1: ZeroMapKV<'a> + ?Sized + Ord,
+ V: ZeroMapKV<'a> + ?Sized,
+{
+ fn new() -> Self {
+ ZeroMap2dMapVisitor {
+ marker: PhantomData,
+ }
+ }
+}
+
+impl<'a, 'de, K0, K1, V> Visitor<'de> for ZeroMap2dMapVisitor<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + Ord + ?Sized + Ord,
+ K1: ZeroMapKV<'a> + Ord + ?Sized + Ord,
+ V: ZeroMapKV<'a> + ?Sized,
+ K1::Container: Deserialize<'de>,
+ V::Container: Deserialize<'de>,
+ K0::OwnedType: Deserialize<'de>,
+ K1::OwnedType: Deserialize<'de>,
+ V::OwnedType: Deserialize<'de>,
+{
+ type Value = ZeroMap2d<'a, K0, K1, V>;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ formatter.write_str("a map produced by ZeroMap2d")
+ }
+
+ fn visit_map<M>(self, mut access: M) -> Result<Self::Value, M::Error>
+ where
+ M: MapAccess<'de>,
+ {
+ let mut map = ZeroMap2d::with_capacity(access.size_hint().unwrap_or(0));
+
+ // On the first level, pull out the K0s and a TupleVecMap of the
+ // K1s and Vs, and then collect them into a ZeroMap2d
+ while let Some((key0, inner_map)) =
+ access.next_entry::<K0::OwnedType, TupleVecMap<K1::OwnedType, V::OwnedType>>()?
+ {
+ for (key1, value) in inner_map.entries.iter() {
+ if map
+ .try_append(
+ K0::Container::owned_as_t(&key0),
+ K1::Container::owned_as_t(key1),
+ V::Container::owned_as_t(value),
+ )
+ .is_some()
+ {
+ return Err(de::Error::custom(
+ "ZeroMap2d's keys must be sorted while deserializing",
+ ));
+ }
+ }
+ }
+
+ Ok(map)
+ }
+}
+
+/// Helper struct for human-deserializing the inner map of a ZeroMap2d
+struct TupleVecMap<K1, V> {
+ pub entries: Vec<(K1, V)>,
+}
+
+struct TupleVecMapVisitor<K1, V> {
+ #[allow(clippy::type_complexity)] // it's a marker type, complexity doesn't matter
+ marker: PhantomData<fn() -> (K1, V)>,
+}
+
+impl<K1, V> TupleVecMapVisitor<K1, V> {
+ fn new() -> Self {
+ TupleVecMapVisitor {
+ marker: PhantomData,
+ }
+ }
+}
+
+impl<'de, K1, V> Visitor<'de> for TupleVecMapVisitor<K1, V>
+where
+ K1: Deserialize<'de>,
+ V: Deserialize<'de>,
+{
+ type Value = TupleVecMap<K1, V>;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ formatter.write_str("an inner map produced by ZeroMap2d")
+ }
+
+ fn visit_map<M>(self, mut access: M) -> Result<Self::Value, M::Error>
+ where
+ M: MapAccess<'de>,
+ {
+ let mut result = Vec::with_capacity(access.size_hint().unwrap_or(0));
+ while let Some((key1, value)) = access.next_entry::<K1, V>()? {
+ result.push((key1, value));
+ }
+ Ok(TupleVecMap { entries: result })
+ }
+}
+
+impl<'de, K1, V> Deserialize<'de> for TupleVecMap<K1, V>
+where
+ K1: Deserialize<'de>,
+ V: Deserialize<'de>,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ deserializer.deserialize_map(TupleVecMapVisitor::<K1, V>::new())
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl<'de, 'a, K0, K1, V> Deserialize<'de> for ZeroMap2d<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + Ord + ?Sized,
+ K1: ZeroMapKV<'a> + Ord + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ K0::Container: Deserialize<'de>,
+ K1::Container: Deserialize<'de>,
+ V::Container: Deserialize<'de>,
+ K0::OwnedType: Deserialize<'de>,
+ K1::OwnedType: Deserialize<'de>,
+ V::OwnedType: Deserialize<'de>,
+ 'de: 'a,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ deserializer.deserialize_map(ZeroMap2dMapVisitor::<'a, K0, K1, V>::new())
+ } else {
+ let (keys0, joiner, keys1, values): (
+ K0::Container,
+ ZeroVec<u32>,
+ K1::Container,
+ V::Container,
+ ) = Deserialize::deserialize(deserializer)?;
+ // Invariant 1: len(keys0) == len(joiner)
+ if keys0.zvl_len() != joiner.len() {
+ return Err(de::Error::custom(
+ "Mismatched keys0 and joiner sizes in ZeroMap2d",
+ ));
+ }
+ // Invariant 2: len(keys1) == len(values)
+ if keys1.zvl_len() != values.zvl_len() {
+ return Err(de::Error::custom(
+ "Mismatched keys1 and value sizes in ZeroMap2d",
+ ));
+ }
+ // Invariant 3: joiner is sorted
+ if !joiner.zvl_is_ascending() {
+ return Err(de::Error::custom(
+ "ZeroMap2d deserializing joiner array out of order",
+ ));
+ }
+ // Invariant 4: the last element of joiner is the length of keys1
+ if let Some(last_joiner0) = joiner.last() {
+ if keys1.zvl_len() != last_joiner0 as usize {
+ return Err(de::Error::custom(
+ "ZeroMap2d deserializing joiner array malformed",
+ ));
+ }
+ }
+ let result = Self {
+ keys0,
+ joiner,
+ keys1,
+ values,
+ };
+ // In debug mode, check the optional invariants, too
+ #[cfg(debug_assertions)]
+ result.check_invariants();
+ Ok(result)
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl<'de, 'a, K0, K1, V> Deserialize<'de> for ZeroMap2dBorrowed<'a, K0, K1, V>
+where
+ K0: ZeroMapKV<'a> + Ord + ?Sized,
+ K1: ZeroMapKV<'a> + Ord + ?Sized,
+ V: ZeroMapKV<'a> + ?Sized,
+ K0::Container: Deserialize<'de>,
+ K1::Container: Deserialize<'de>,
+ V::Container: Deserialize<'de>,
+ K0::OwnedType: Deserialize<'de>,
+ K1::OwnedType: Deserialize<'de>,
+ V::OwnedType: Deserialize<'de>,
+ 'de: 'a,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ Err(de::Error::custom(
+ "ZeroMap2dBorrowed cannot be deserialized from human-readable formats",
+ ))
+ } else {
+ let deserialized: ZeroMap2d<'a, K0, K1, V> = ZeroMap2d::deserialize(deserializer)?;
+ let keys0 = if let Some(keys0) = deserialized.keys0.zvl_as_borrowed_inner() {
+ keys0
+ } else {
+ return Err(de::Error::custom(
+ "ZeroMap2dBorrowed can only deserialize in zero-copy ways",
+ ));
+ };
+ let joiner = if let Some(joiner) = deserialized.joiner.zvl_as_borrowed_inner() {
+ joiner
+ } else {
+ return Err(de::Error::custom(
+ "ZeroMap2dBorrowed can only deserialize in zero-copy ways",
+ ));
+ };
+ let keys1 = if let Some(keys1) = deserialized.keys1.zvl_as_borrowed_inner() {
+ keys1
+ } else {
+ return Err(de::Error::custom(
+ "ZeroMap2dBorrowed can only deserialize in zero-copy ways",
+ ));
+ };
+ let values = if let Some(values) = deserialized.values.zvl_as_borrowed_inner() {
+ values
+ } else {
+ return Err(de::Error::custom(
+ "ZeroMap2dBorrowed can only deserialize in zero-copy ways",
+ ));
+ };
+ Ok(Self {
+ keys0,
+ joiner,
+ keys1,
+ values,
+ })
+ }
+ }
+}
+
+#[cfg(test)]
+#[allow(non_camel_case_types)]
+mod test {
+ use crate::map2d::{ZeroMap2d, ZeroMap2dBorrowed};
+
+ #[derive(serde::Serialize, serde::Deserialize)]
+ struct DeriveTest_ZeroMap2d<'data> {
+ #[serde(borrow)]
+ _data: ZeroMap2d<'data, u16, str, [u8]>,
+ }
+
+ #[derive(serde::Serialize, serde::Deserialize)]
+ struct DeriveTest_ZeroMap2dBorrowed<'data> {
+ #[serde(borrow)]
+ _data: ZeroMap2dBorrowed<'data, u16, str, [u8]>,
+ }
+
+ const JSON_STR: &str = "{\"1\":{\"1\":\"uno\"},\"2\":{\"2\":\"dos\",\"3\":\"tres\"}}";
+ const BINCODE_BYTES: &[u8] = &[
+ 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0,
+ 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 3, 0, 20, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
+ 3, 0, 6, 0, 117, 110, 111, 100, 111, 115, 116, 114, 101, 115,
+ ];
+
+ fn make_map() -> ZeroMap2d<'static, u32, u16, str> {
+ let mut map = ZeroMap2d::new();
+ map.insert(&1, &1, "uno");
+ map.insert(&2, &2, "dos");
+ map.insert(&2, &3, "tres");
+ map
+ }
+
+ #[test]
+ fn test_serde_json() {
+ let map = make_map();
+ let json_str = serde_json::to_string(&map).expect("serialize");
+ assert_eq!(JSON_STR, json_str);
+ let new_map: ZeroMap2d<u32, u16, str> =
+ serde_json::from_str(&json_str).expect("deserialize");
+ assert_eq!(format!("{new_map:?}"), format!("{map:?}"));
+ }
+
+ #[test]
+ fn test_bincode() {
+ let map = make_map();
+ let bincode_bytes = bincode::serialize(&map).expect("serialize");
+ assert_eq!(BINCODE_BYTES, bincode_bytes);
+ let new_map: ZeroMap2d<u32, u16, str> =
+ bincode::deserialize(&bincode_bytes).expect("deserialize");
+ assert_eq!(
+ format!("{new_map:?}"),
+ format!("{map:?}").replace("Owned", "Borrowed"),
+ );
+
+ let new_map: ZeroMap2dBorrowed<u32, u16, str> =
+ bincode::deserialize(&bincode_bytes).expect("deserialize");
+ assert_eq!(
+ format!("{new_map:?}"),
+ format!("{map:?}")
+ .replace("Owned", "Borrowed")
+ .replace("ZeroMap2d", "ZeroMap2dBorrowed")
+ );
+ }
+
+ #[test]
+ fn test_sample_bincode() {
+ // This is the map from the main docs page for ZeroMap2d
+ let mut map: ZeroMap2d<u16, u16, str> = ZeroMap2d::new();
+ map.insert(&1, &2, "three");
+ let bincode_bytes: Vec<u8> = bincode::serialize(&map).expect("serialize");
+ assert_eq!(
+ bincode_bytes.as_slice(),
+ &[
+ 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0,
+ 0, 0, 2, 0, 11, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 116, 104, 114, 101, 101
+ ]
+ );
+ }
+}
diff --git a/third_party/rust/zerovec/src/samples.rs b/third_party/rust/zerovec/src/samples.rs
new file mode 100644
index 0000000000..723aacdedc
--- /dev/null
+++ b/third_party/rust/zerovec/src/samples.rs
@@ -0,0 +1,74 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Example data useful for testing ZeroVec.
+
+// This module is included directly in tests and can trigger the dead_code
+// warning since not all samples are used in each test
+#![allow(dead_code)]
+
+#[repr(align(8))]
+struct Aligned<T>(pub T);
+
+// This is aligned so that we can test unaligned behavior at odd offsets
+const ALIGNED_TEST_BUFFER_LE: Aligned<[u8; 80]> = Aligned([
+ 0x00, 0x01, 0x02, 0x00, 0x04, 0x05, 0x06, 0x00, 0x08, 0x09, 0x0a, 0x00, 0x0c, 0x0d, 0x0e, 0x00,
+ 0x10, 0x11, 0x12, 0x00, 0x14, 0x15, 0x16, 0x00, 0x18, 0x19, 0x1a, 0x00, 0x1c, 0x1d, 0x1e, 0x00,
+ 0x20, 0x21, 0x22, 0x00, 0x24, 0x25, 0x26, 0x00, 0x28, 0x29, 0x2a, 0x00, 0x2c, 0x2d, 0x2e, 0x00,
+ 0x30, 0x31, 0x32, 0x00, 0x34, 0x35, 0x36, 0x00, 0x38, 0x39, 0x3a, 0x00, 0x3c, 0x3d, 0x3e, 0x00,
+ 0x40, 0x41, 0x42, 0x00, 0x44, 0x45, 0x46, 0x00, 0x48, 0x49, 0x4a, 0x00, 0x4c, 0x4d, 0x4e, 0x00,
+]);
+
+/// An example byte array intended to be used in `ZeroVec<u32>`.
+pub const TEST_BUFFER_LE: &[u8] = &ALIGNED_TEST_BUFFER_LE.0;
+
+/// u32 numbers corresponding to the above byte array.
+pub const TEST_SLICE: &[u32] = &[
+ 0x020100, 0x060504, 0x0a0908, 0x0e0d0c, 0x121110, 0x161514, 0x1a1918, 0x1e1d1c, 0x222120,
+ 0x262524, 0x2a2928, 0x2e2d2c, 0x323130, 0x363534, 0x3a3938, 0x3e3d3c, 0x424140, 0x464544,
+ 0x4a4948, 0x4e4d4c,
+];
+
+/// The sum of the numbers in TEST_SLICE.
+pub const TEST_SUM: u32 = 52629240;
+
+/// Representation of TEST_SLICE in JSON.
+pub const JSON_STR: &str = "[131328,394500,657672,920844,1184016,1447188,1710360,1973532,2236704,2499876,2763048,3026220,3289392,3552564,3815736,4078908,4342080,4605252,4868424,5131596]";
+
+/// Representation of TEST_SLICE in Bincode.
+pub const BINCODE_BUF: &[u8] = &[
+ 80, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 4, 5, 6, 0, 8, 9, 10, 0, 12, 13, 14, 0, 16, 17, 18, 0, 20,
+ 21, 22, 0, 24, 25, 26, 0, 28, 29, 30, 0, 32, 33, 34, 0, 36, 37, 38, 0, 40, 41, 42, 0, 44, 45,
+ 46, 0, 48, 49, 50, 0, 52, 53, 54, 0, 56, 57, 58, 0, 60, 61, 62, 0, 64, 65, 66, 0, 68, 69, 70,
+ 0, 72, 73, 74, 0, 76, 77, 78, 0,
+];
+
+/// Representation of a VarZeroVec<str> with contents ["w", "ω", "文", "𑄃"]
+pub const TEST_VARZEROSLICE_BYTES: &[u8] = &[
+ 4, 0, 0, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240, 145, 132, 131,
+];
+
+#[test]
+fn validate() {
+ use crate::{VarZeroVec, ZeroVec};
+
+ assert_eq!(
+ ZeroVec::<u32>::parse_byte_slice(TEST_BUFFER_LE).unwrap(),
+ ZeroVec::alloc_from_slice(TEST_SLICE)
+ );
+
+ assert_eq!(TEST_SLICE.iter().sum::<u32>(), TEST_SUM);
+
+ assert_eq!(
+ serde_json::from_str::<ZeroVec::<u32>>(JSON_STR).unwrap(),
+ ZeroVec::alloc_from_slice(TEST_SLICE)
+ );
+
+ assert_eq!(
+ bincode::deserialize::<ZeroVec::<u32>>(BINCODE_BUF).unwrap(),
+ ZeroVec::alloc_from_slice(TEST_SLICE)
+ );
+
+ VarZeroVec::<str>::parse_byte_slice(TEST_VARZEROSLICE_BYTES).unwrap();
+}
diff --git a/third_party/rust/zerovec/src/ule/chars.rs b/third_party/rust/zerovec/src/ule/chars.rs
new file mode 100644
index 0000000000..e4c1efc4ec
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/chars.rs
@@ -0,0 +1,190 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#![allow(clippy::upper_case_acronyms)]
+//! ULE implementation for the `char` type.
+
+use super::*;
+use crate::impl_ule_from_array;
+use core::cmp::Ordering;
+use core::convert::TryFrom;
+
+/// A u8 array of little-endian data corresponding to a Unicode scalar value.
+///
+/// The bytes of a `CharULE` are guaranteed to represent a little-endian-encoded u32 that is a
+/// valid `char` and can be converted without validation.
+///
+/// # Examples
+///
+/// Convert a `char` to a `CharULE` and back again:
+///
+/// ```
+/// use zerovec::ule::{AsULE, CharULE, ULE};
+///
+/// let c1 = '𑄃';
+/// let ule = c1.to_unaligned();
+/// assert_eq!(CharULE::as_byte_slice(&[ule]), &[0x03, 0x11, 0x01]);
+/// let c2 = char::from_unaligned(ule);
+/// assert_eq!(c1, c2);
+/// ```
+///
+/// Attempt to parse invalid bytes to a `CharULE`:
+///
+/// ```
+/// use zerovec::ule::{CharULE, ULE};
+///
+/// let bytes: &[u8] = &[0xFF, 0xFF, 0xFF, 0xFF];
+/// CharULE::parse_byte_slice(bytes).expect_err("Invalid bytes");
+/// ```
+#[repr(transparent)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
+pub struct CharULE([u8; 3]);
+
+impl CharULE {
+ /// Converts a [`char`] to a [`CharULE`]. This is equivalent to calling
+ /// [`AsULE::to_unaligned()`]
+ ///
+ /// See the type-level documentation for [`CharULE`] for more information.
+ #[inline]
+ pub const fn from_aligned(c: char) -> Self {
+ let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
+ Self([u0, u1, u2])
+ }
+
+ impl_ule_from_array!(char, CharULE, Self([0; 3]));
+}
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. CharULE does not include any uninitialized or padding bytes.
+// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+// 2. CharULE is aligned to 1 byte.
+// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid.
+// 4. The impl of validate_byte_slice() returns an error if there are extra bytes.
+// 5. The other ULE methods use the default impl.
+// 6. CharULE byte equality is semantic equality
+unsafe impl ULE for CharULE {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ if bytes.len() % 3 != 0 {
+ return Err(ZeroVecError::length::<Self>(bytes.len()));
+ }
+ // Validate the bytes
+ for chunk in bytes.chunks_exact(3) {
+ // TODO: Use slice::as_chunks() when stabilized
+ #[allow(clippy::indexing_slicing)]
+ // Won't panic because the chunks are always 3 bytes long
+ let u = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], 0]);
+ char::try_from(u).map_err(|_| ZeroVecError::parse::<Self>())?;
+ }
+ Ok(())
+ }
+}
+
+impl AsULE for char {
+ type ULE = CharULE;
+
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ CharULE::from_aligned(self)
+ }
+
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value.
+ unsafe {
+ Self::from_u32_unchecked(u32::from_le_bytes([
+ unaligned.0[0],
+ unaligned.0[1],
+ unaligned.0[2],
+ 0,
+ ]))
+ }
+ }
+}
+
+impl PartialOrd for CharULE {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for CharULE {
+ fn cmp(&self, other: &Self) -> Ordering {
+ char::from_unaligned(*self).cmp(&char::from_unaligned(*other))
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_from_array() {
+ const CHARS: [char; 2] = ['a', '🙃'];
+ const CHARS_ULE: [CharULE; 2] = CharULE::from_array(CHARS);
+ assert_eq!(
+ CharULE::as_byte_slice(&CHARS_ULE),
+ &[0x61, 0x00, 0x00, 0x43, 0xF6, 0x01]
+ );
+ }
+
+ #[test]
+ fn test_from_array_zst() {
+ const CHARS: [char; 0] = [];
+ const CHARS_ULE: [CharULE; 0] = CharULE::from_array(CHARS);
+ let bytes = CharULE::as_byte_slice(&CHARS_ULE);
+ let empty: &[u8] = &[];
+ assert_eq!(bytes, empty);
+ }
+
+ #[test]
+ fn test_parse() {
+ // 1-byte, 2-byte, 3-byte, and two 4-byte character in UTF-8 (not as relevant in UTF-32)
+ let chars = ['w', 'ω', '文', '𑄃', '🙃'];
+ let char_ules: Vec<CharULE> = chars.iter().copied().map(char::to_unaligned).collect();
+ let char_bytes: &[u8] = CharULE::as_byte_slice(&char_ules);
+
+ // Check parsing
+ let parsed_ules: &[CharULE] = CharULE::parse_byte_slice(char_bytes).unwrap();
+ assert_eq!(char_ules, parsed_ules);
+ let parsed_chars: Vec<char> = parsed_ules
+ .iter()
+ .copied()
+ .map(char::from_unaligned)
+ .collect();
+ assert_eq!(&chars, parsed_chars.as_slice());
+
+ // Compare to golden expected data
+ assert_eq!(
+ &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1],
+ char_bytes
+ );
+ }
+
+ #[test]
+ fn test_failures() {
+ // 119 and 120 are valid, but not 0xD800 (high surrogate)
+ let u32s = [119, 0xD800, 120];
+ let u32_ules: Vec<RawBytesULE<4>> = u32s
+ .iter()
+ .copied()
+ .map(<u32 as AsULE>::to_unaligned)
+ .collect();
+ let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
+ let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
+ assert!(parsed_ules_result.is_err());
+
+ // 0x20FFFF is out of range for a char
+ let u32s = [0x20FFFF];
+ let u32_ules: Vec<RawBytesULE<4>> = u32s
+ .iter()
+ .copied()
+ .map(<u32 as AsULE>::to_unaligned)
+ .collect();
+ let u32_bytes: &[u8] = RawBytesULE::<4>::as_byte_slice(&u32_ules);
+ let parsed_ules_result = CharULE::parse_byte_slice(u32_bytes);
+ assert!(parsed_ules_result.is_err());
+ }
+}
diff --git a/third_party/rust/zerovec/src/ule/custom.rs b/third_party/rust/zerovec/src/ule/custom.rs
new file mode 100644
index 0000000000..8cc6e9de4e
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/custom.rs
@@ -0,0 +1,145 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Documentation on implementing custom VarULE types.
+//!
+//! This module contains documentation for defining custom VarULE types,
+//! especially those using complex custom dynamically sized types.
+//!
+//! In *most cases* you should be able to create custom VarULE types using
+//! [`#[make_varule]`](crate::make_ule).
+//!
+//! # Example
+//!
+//! For example, if your regular stack type is:
+//!
+//! ```rust
+//! use zerofrom::ZeroFrom;
+//! use zerovec::ule::*;
+//! use zerovec::ZeroVec;
+//!
+//! #[derive(serde::Serialize, serde::Deserialize)]
+//! struct Foo<'a> {
+//! field1: char,
+//! field2: u32,
+//! #[serde(borrow)]
+//! field3: ZeroVec<'a, u32>,
+//! }
+//! ```
+//!
+//! then the ULE type will be implemented as follows. Ideally, you should have
+//! `EncodeAsVarULE` and `ZeroFrom` implementations on `Foo` pertaining to `FooULE`,
+//! as well as a `Serialize` impl on `FooULE` and a `Deserialize` impl on `Box<FooULE>`
+//! to enable human-readable serialization and deserialization.
+//!
+//! ```rust
+//! use zerovec::{ZeroVec, VarZeroVec, ZeroSlice};
+//! use zerovec::ule::*;
+//! use zerofrom::ZeroFrom;
+//! use core::mem;
+//!
+//! # #[derive(serde::Serialize, serde::Deserialize)]
+//! # struct Foo<'a> {
+//! # field1: char,
+//! # field2: u32,
+//! # #[serde(borrow)]
+//! # field3: ZeroVec<'a, u32>
+//! # }
+//!
+//! // Must be repr(packed) for safety of VarULE!
+//! // Must also only contain ULE types
+//! #[repr(packed)]
+//! struct FooULE {
+//! field1: <char as AsULE>::ULE,
+//! field2: <u32 as AsULE>::ULE,
+//! field3: ZeroSlice<u32>,
+//! }
+//!
+//! // Safety (based on the safety checklist on the VarULE trait):
+//! // 1. FooULE does not include any uninitialized or padding bytes. (achieved by `#[repr(packed)]` on
+//! // a struct with only ULE fields)
+//! // 2. FooULE is aligned to 1 byte. (achieved by `#[repr(packed)]` on
+//! // a struct with only ULE fields)
+//! // 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid.
+//! // 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety
+//! // 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data.
+//! // 6. The other VarULE methods use the default impl.
+//! // 7. FooULE byte equality is semantic equality
+//! unsafe impl VarULE for FooULE {
+//! fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+//! // validate each field
+//! <char as AsULE>::ULE::validate_byte_slice(&bytes[0..3]).map_err(|_| ZeroVecError::parse::<Self>())?;
+//! <u32 as AsULE>::ULE::validate_byte_slice(&bytes[3..7]).map_err(|_| ZeroVecError::parse::<Self>())?;
+//! let _ = ZeroVec::<u32>::parse_byte_slice(&bytes[7..]).map_err(|_| ZeroVecError::parse::<Self>())?;
+//! Ok(())
+//! }
+//! unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+//! let ptr = bytes.as_ptr();
+//! let len = bytes.len();
+//! // subtract the length of the char and u32 to get the length of the array
+//! let len_new = (len - 7) / 4;
+//! // it's hard constructing custom DSTs, we fake a pointer/length construction
+//! // eventually we can use the Pointer::Metadata APIs when they stabilize
+//! let fake_slice = core::ptr::slice_from_raw_parts(ptr as *const <u32 as AsULE>::ULE, len_new);
+//! &*(fake_slice as *const Self)
+//! }
+//! }
+//!
+//! unsafe impl EncodeAsVarULE<FooULE> for Foo<'_> {
+//! fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
+//! // take each field, convert to ULE byte slices, and pass them through
+//! cb(&[<char as AsULE>::ULE::as_byte_slice(&[self.field1.to_unaligned()]),
+//! <u32 as AsULE>::ULE::as_byte_slice(&[self.field2.to_unaligned()]),
+//! // the ZeroVec is already in the correct slice format
+//! self.field3.as_bytes()])
+//! }
+//! }
+//!
+//! impl<'a> ZeroFrom<'a, FooULE> for Foo<'a> {
+//! fn zero_from(other: &'a FooULE) -> Self {
+//! Self {
+//! field1: AsULE::from_unaligned(other.field1),
+//! field2: AsULE::from_unaligned(other.field2),
+//! field3: ZeroFrom::zero_from(&other.field3),
+//! }
+//! }
+//! }
+//!
+//!
+//! impl serde::Serialize for FooULE
+//! {
+//! fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+//! where
+//! S: serde::Serializer,
+//! {
+//! Foo::zero_from(self).serialize(serializer)
+//! }
+//! }
+//!
+//! impl<'de> serde::Deserialize<'de> for Box<FooULE>
+//! {
+//! fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+//! where
+//! D: serde::Deserializer<'de>,
+//! {
+//! let mut foo = Foo::deserialize(deserializer)?;
+//! Ok(encode_varule_to_box(&foo))
+//! }
+//! }
+//!
+//! fn main() {
+//! let mut foos = [Foo {field1: 'u', field2: 983, field3: ZeroVec::alloc_from_slice(&[1212,2309,500,7000])},
+//! Foo {field1: 'l', field2: 1010, field3: ZeroVec::alloc_from_slice(&[1932, 0, 8888, 91237])}];
+//!
+//! let vzv = VarZeroVec::<_>::from(&foos);
+//!
+//! assert_eq!(char::from_unaligned(vzv.get(0).unwrap().field1), 'u');
+//! assert_eq!(u32::from_unaligned(vzv.get(0).unwrap().field2), 983);
+//! assert_eq!(&vzv.get(0).unwrap().field3, &[1212,2309,500,7000][..]);
+//!
+//! assert_eq!(char::from_unaligned(vzv.get(1).unwrap().field1), 'l');
+//! assert_eq!(u32::from_unaligned(vzv.get(1).unwrap().field2), 1010);
+//! assert_eq!(&vzv.get(1).unwrap().field3, &[1932, 0, 8888, 91237][..]);
+//! }
+//! ```
diff --git a/third_party/rust/zerovec/src/ule/encode.rs b/third_party/rust/zerovec/src/ule/encode.rs
new file mode 100644
index 0000000000..adea123aa2
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/encode.rs
@@ -0,0 +1,400 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::ule::*;
+use crate::varzerovec::VarZeroVecFormat;
+use crate::{VarZeroSlice, VarZeroVec, ZeroSlice, ZeroVec};
+use alloc::borrow::{Cow, ToOwned};
+use alloc::boxed::Box;
+use alloc::string::String;
+use alloc::{vec, vec::Vec};
+use core::mem;
+
+/// Allows types to be encoded as VarULEs. This is highly useful for implementing VarULE on
+/// custom DSTs where the type cannot be obtained as a reference to some other type.
+///
+/// [`Self::encode_var_ule_as_slices()`] should be implemented by providing an encoded slice for each field
+/// of the VarULE type to the callback, in order. For an implementation to be safe, the slices
+/// to the callback must, when concatenated, be a valid instance of the VarULE type.
+///
+/// See the [custom VarULEdocumentation](crate::ule::custom) for examples.
+///
+/// [`Self::encode_var_ule_as_slices()`] is only used to provide default implementations for [`Self::encode_var_ule_write()`]
+/// and [`Self::encode_var_ule_len()`]. If you override the default implementations it is totally valid to
+/// replace [`Self::encode_var_ule_as_slices()`]'s body with `unreachable!()`. This can be done for cases where
+/// it is not possible to implement [`Self::encode_var_ule_as_slices()`] but the other methods still work.
+///
+/// A typical implementation will take each field in the order found in the [`VarULE`] type,
+/// convert it to ULE, call [`ULE::as_byte_slice()`] on them, and pass the slices to `cb` in order.
+/// A trailing [`ZeroVec`](crate::ZeroVec) or [`VarZeroVec`](crate::VarZeroVec) can have their underlying
+/// byte representation passed through.
+///
+/// In case the compiler is not optimizing [`Self::encode_var_ule_len()`], it can be overridden. A typical
+/// implementation will add up the sizes of each field on the [`VarULE`] type and then add in the byte length of the
+/// dynamically-sized part.
+///
+/// # Safety
+///
+/// The safety invariants of [`Self::encode_var_ule_as_slices()`] are:
+/// - It must call `cb` (only once)
+/// - The slices passed to `cb`, if concatenated, should be a valid instance of the `T` [`VarULE`] type
+/// (i.e. if fed to [`VarULE::validate_byte_slice()`] they must produce a successful result)
+/// - It must return the return value of `cb` to the caller
+///
+/// One or more of [`Self::encode_var_ule_len()`] and [`Self::encode_var_ule_write()`] may be provided.
+/// If both are, then `zerovec` code is guaranteed to not call [`Self::encode_var_ule_as_slices()`], and it may be replaced
+/// with `unreachable!()`.
+///
+/// The safety invariants of [`Self::encode_var_ule_len()`] are:
+/// - It must return the length of the corresponding VarULE type
+///
+/// The safety invariants of [`Self::encode_var_ule_write()`] are:
+/// - The slice written to `dst` must be a valid instance of the `T` [`VarULE`] type
+pub unsafe trait EncodeAsVarULE<T: VarULE + ?Sized> {
+ /// Calls `cb` with a piecewise list of byte slices that when concatenated
+ /// produce the memory pattern of the corresponding instance of `T`.
+ ///
+ /// Do not call this function directly; instead use the other two. Some implementors
+ /// may define this function to panic.
+ fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R;
+
+ /// Return the length, in bytes, of the corresponding [`VarULE`] type
+ fn encode_var_ule_len(&self) -> usize {
+ self.encode_var_ule_as_slices(|slices| slices.iter().map(|s| s.len()).sum())
+ }
+
+ /// Write the corresponding [`VarULE`] type to the `dst` buffer. `dst` should
+ /// be the size of [`Self::encode_var_ule_len()`]
+ fn encode_var_ule_write(&self, mut dst: &mut [u8]) {
+ debug_assert_eq!(self.encode_var_ule_len(), dst.len());
+ self.encode_var_ule_as_slices(move |slices| {
+ #[allow(clippy::indexing_slicing)] // by debug_assert
+ for slice in slices {
+ dst[..slice.len()].copy_from_slice(slice);
+ dst = &mut dst[slice.len()..];
+ }
+ });
+ }
+}
+
+/// Given an [`EncodeAsVarULE`] type `S`, encode it into a `Box<T>`
+///
+/// This is primarily useful for generating `Deserialize` impls for VarULE types
+pub fn encode_varule_to_box<S: EncodeAsVarULE<T>, T: VarULE + ?Sized>(x: &S) -> Box<T> {
+ // zero-fill the vector to avoid uninitialized data UB
+ let mut vec: Vec<u8> = vec![0; x.encode_var_ule_len()];
+ x.encode_var_ule_write(&mut vec);
+ let boxed = mem::ManuallyDrop::new(vec.into_boxed_slice());
+ unsafe {
+ // Safety: `ptr` is a box, and `T` is a VarULE which guarantees it has the same memory layout as `[u8]`
+ // and can be recouped via from_byte_slice_unchecked()
+ let ptr: *mut T = T::from_byte_slice_unchecked(&boxed) as *const T as *mut T;
+
+ // Safety: we can construct an owned version since we have mem::forgotten the older owner
+ Box::from_raw(ptr)
+ }
+}
+
+unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for T {
+ fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
+ cb(&[T::as_byte_slice(self)])
+ }
+}
+
+unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for &'_ T {
+ fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
+ cb(&[T::as_byte_slice(self)])
+ }
+}
+
+unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Cow<'_, T>
+where
+ T: ToOwned,
+{
+ fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
+ cb(&[T::as_byte_slice(self.as_ref())])
+ }
+}
+
+unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Box<T> {
+ fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
+ cb(&[T::as_byte_slice(self)])
+ }
+}
+
+unsafe impl EncodeAsVarULE<str> for String {
+ fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
+ cb(&[self.as_bytes()])
+ }
+}
+
+// Note: This impl could technically use `T: AsULE`, but we want users to prefer `ZeroSlice<T>`
+// for cases where T is not a ULE. Therefore, we can use the more efficient `memcpy` impl here.
+unsafe impl<T> EncodeAsVarULE<[T]> for Vec<T>
+where
+ T: ULE,
+{
+ fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
+ cb(&[<[T] as VarULE>::as_byte_slice(self)])
+ }
+}
+
+unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for &'_ [T]
+where
+ T: AsULE + 'static,
+{
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unreachable!()
+ }
+
+ #[inline]
+ fn encode_var_ule_len(&self) -> usize {
+ self.len() * core::mem::size_of::<T::ULE>()
+ }
+
+ fn encode_var_ule_write(&self, dst: &mut [u8]) {
+ #[allow(non_snake_case)]
+ let S = core::mem::size_of::<T::ULE>();
+ debug_assert_eq!(self.len() * S, dst.len());
+ for (item, ref mut chunk) in self.iter().zip(dst.chunks_mut(S)) {
+ let ule = item.to_unaligned();
+ chunk.copy_from_slice(ULE::as_byte_slice(core::slice::from_ref(&ule)));
+ }
+ }
+}
+
+unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for Vec<T>
+where
+ T: AsULE + 'static,
+{
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unreachable!()
+ }
+
+ #[inline]
+ fn encode_var_ule_len(&self) -> usize {
+ self.as_slice().encode_var_ule_len()
+ }
+
+ #[inline]
+ fn encode_var_ule_write(&self, dst: &mut [u8]) {
+ self.as_slice().encode_var_ule_write(dst)
+ }
+}
+
+unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for ZeroVec<'_, T>
+where
+ T: AsULE + 'static,
+{
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unreachable!()
+ }
+
+ #[inline]
+ fn encode_var_ule_len(&self) -> usize {
+ self.as_bytes().len()
+ }
+
+ fn encode_var_ule_write(&self, dst: &mut [u8]) {
+ debug_assert_eq!(self.as_bytes().len(), dst.len());
+ dst.copy_from_slice(self.as_bytes());
+ }
+}
+
+unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for &'_ [E]
+where
+ T: VarULE + ?Sized,
+ E: EncodeAsVarULE<T>,
+ F: VarZeroVecFormat,
+{
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unimplemented!()
+ }
+
+ #[allow(clippy::unwrap_used)] // TODO(#1410): Rethink length errors in VZV.
+ fn encode_var_ule_len(&self) -> usize {
+ crate::varzerovec::components::compute_serializable_len::<T, E, F>(self).unwrap() as usize
+ }
+
+ fn encode_var_ule_write(&self, dst: &mut [u8]) {
+ crate::varzerovec::components::write_serializable_bytes::<T, E, F>(self, dst)
+ }
+}
+
+unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for Vec<E>
+where
+ T: VarULE + ?Sized,
+ E: EncodeAsVarULE<T>,
+ F: VarZeroVecFormat,
+{
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unreachable!()
+ }
+
+ #[inline]
+ fn encode_var_ule_len(&self) -> usize {
+ <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_len(&self.as_slice())
+ }
+
+ #[inline]
+ fn encode_var_ule_write(&self, dst: &mut [u8]) {
+ <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_write(&self.as_slice(), dst)
+ }
+}
+
+unsafe impl<T, F> EncodeAsVarULE<VarZeroSlice<T, F>> for VarZeroVec<'_, T, F>
+where
+ T: VarULE + ?Sized,
+ F: VarZeroVecFormat,
+{
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unreachable!()
+ }
+
+ #[inline]
+ fn encode_var_ule_len(&self) -> usize {
+ self.as_bytes().len()
+ }
+
+ #[inline]
+ fn encode_var_ule_write(&self, dst: &mut [u8]) {
+ debug_assert_eq!(self.as_bytes().len(), dst.len());
+ dst.copy_from_slice(self.as_bytes());
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ const STRING_ARRAY: [&str; 2] = ["hello", "world"];
+
+ const STRING_SLICE: &[&str] = &STRING_ARRAY;
+
+ const U8_ARRAY: [u8; 8] = [0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07];
+
+ const U8_2D_ARRAY: [&[u8]; 2] = [&U8_ARRAY, &U8_ARRAY];
+
+ const U8_2D_SLICE: &[&[u8]] = &[&U8_ARRAY, &U8_ARRAY];
+
+ const U8_3D_ARRAY: [&[&[u8]]; 2] = [U8_2D_SLICE, U8_2D_SLICE];
+
+ const U8_3D_SLICE: &[&[&[u8]]] = &[U8_2D_SLICE, U8_2D_SLICE];
+
+ const U32_ARRAY: [u32; 4] = [0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F];
+
+ const U32_2D_ARRAY: [&[u32]; 2] = [&U32_ARRAY, &U32_ARRAY];
+
+ const U32_2D_SLICE: &[&[u32]] = &[&U32_ARRAY, &U32_ARRAY];
+
+ const U32_3D_ARRAY: [&[&[u32]]; 2] = [U32_2D_SLICE, U32_2D_SLICE];
+
+ const U32_3D_SLICE: &[&[&[u32]]] = &[U32_2D_SLICE, U32_2D_SLICE];
+
+ #[test]
+ fn test_vzv_from() {
+ type VZV<'a, T> = VarZeroVec<'a, T>;
+ type ZS<T> = ZeroSlice<T>;
+ type VZS<T> = VarZeroSlice<T>;
+
+ let u8_zerovec: ZeroVec<u8> = ZeroVec::from_slice_or_alloc(&U8_ARRAY);
+ let u8_2d_zerovec: [ZeroVec<u8>; 2] = [u8_zerovec.clone(), u8_zerovec.clone()];
+ let u8_2d_vec: Vec<Vec<u8>> = vec![U8_ARRAY.into(), U8_ARRAY.into()];
+ let u8_3d_vec: Vec<Vec<Vec<u8>>> = vec![u8_2d_vec.clone(), u8_2d_vec.clone()];
+
+ let u32_zerovec: ZeroVec<u32> = ZeroVec::from_slice_or_alloc(&U32_ARRAY);
+ let u32_2d_zerovec: [ZeroVec<u32>; 2] = [u32_zerovec.clone(), u32_zerovec.clone()];
+ let u32_2d_vec: Vec<Vec<u32>> = vec![U32_ARRAY.into(), U32_ARRAY.into()];
+ let u32_3d_vec: Vec<Vec<Vec<u32>>> = vec![u32_2d_vec.clone(), u32_2d_vec.clone()];
+
+ let a: VZV<str> = VarZeroVec::from(&STRING_ARRAY);
+ let b: VZV<str> = VarZeroVec::from(STRING_SLICE);
+ let c: VZV<str> = VarZeroVec::from(&Vec::from(STRING_SLICE));
+ assert_eq!(a, STRING_SLICE);
+ assert_eq!(a, b);
+ assert_eq!(a, c);
+
+ let a: VZV<[u8]> = VarZeroVec::from(&U8_2D_ARRAY);
+ let b: VZV<[u8]> = VarZeroVec::from(U8_2D_SLICE);
+ let c: VZV<[u8]> = VarZeroVec::from(&u8_2d_vec);
+ assert_eq!(a, U8_2D_SLICE);
+ assert_eq!(a, b);
+ assert_eq!(a, c);
+ let u8_3d_vzv_brackets = &[a.clone(), a.clone()];
+
+ let a: VZV<ZS<u8>> = VarZeroVec::from(&U8_2D_ARRAY);
+ let b: VZV<ZS<u8>> = VarZeroVec::from(U8_2D_SLICE);
+ let c: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_vec);
+ let d: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_zerovec);
+ assert_eq!(a, U8_2D_SLICE);
+ assert_eq!(a, b);
+ assert_eq!(a, c);
+ assert_eq!(a, d);
+ let u8_3d_vzv_zeroslice = &[a.clone(), a.clone()];
+
+ let a: VZV<VZS<[u8]>> = VarZeroVec::from(&U8_3D_ARRAY);
+ let b: VZV<VZS<[u8]>> = VarZeroVec::from(U8_3D_SLICE);
+ let c: VZV<VZS<[u8]>> = VarZeroVec::from(&u8_3d_vec);
+ let d: VZV<VZS<[u8]>> = VarZeroVec::from(u8_3d_vzv_brackets);
+ assert_eq!(
+ a.iter()
+ .map(|x| x.iter().map(|y| y.to_vec()).collect::<Vec<Vec<u8>>>())
+ .collect::<Vec<Vec<Vec<u8>>>>(),
+ u8_3d_vec
+ );
+ assert_eq!(a, b);
+ assert_eq!(a, c);
+ assert_eq!(a, d);
+
+ let a: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&U8_3D_ARRAY);
+ let b: VZV<VZS<ZS<u8>>> = VarZeroVec::from(U8_3D_SLICE);
+ let c: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&u8_3d_vec);
+ let d: VZV<VZS<ZS<u8>>> = VarZeroVec::from(u8_3d_vzv_zeroslice);
+ assert_eq!(
+ a.iter()
+ .map(|x| x
+ .iter()
+ .map(|y| y.iter().collect::<Vec<u8>>())
+ .collect::<Vec<Vec<u8>>>())
+ .collect::<Vec<Vec<Vec<u8>>>>(),
+ u8_3d_vec
+ );
+ assert_eq!(a, b);
+ assert_eq!(a, c);
+ assert_eq!(a, d);
+
+ let a: VZV<ZS<u32>> = VarZeroVec::from(&U32_2D_ARRAY);
+ let b: VZV<ZS<u32>> = VarZeroVec::from(U32_2D_SLICE);
+ let c: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_vec);
+ let d: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_zerovec);
+ assert_eq!(a, u32_2d_zerovec);
+ assert_eq!(a, b);
+ assert_eq!(a, c);
+ assert_eq!(a, d);
+ let u32_3d_vzv = &[a.clone(), a.clone()];
+
+ let a: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&U32_3D_ARRAY);
+ let b: VZV<VZS<ZS<u32>>> = VarZeroVec::from(U32_3D_SLICE);
+ let c: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&u32_3d_vec);
+ let d: VZV<VZS<ZS<u32>>> = VarZeroVec::from(u32_3d_vzv);
+ assert_eq!(
+ a.iter()
+ .map(|x| x
+ .iter()
+ .map(|y| y.iter().collect::<Vec<u32>>())
+ .collect::<Vec<Vec<u32>>>())
+ .collect::<Vec<Vec<Vec<u32>>>>(),
+ u32_3d_vec
+ );
+ assert_eq!(a, b);
+ assert_eq!(a, c);
+ assert_eq!(a, d);
+ }
+}
diff --git a/third_party/rust/zerovec/src/ule/macros.rs b/third_party/rust/zerovec/src/ule/macros.rs
new file mode 100644
index 0000000000..955b1eb2e4
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/macros.rs
@@ -0,0 +1,29 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+/// Given `Self` (`$aligned`), `Self::ULE` (`$unaligned`), and a conversion function (`$single` or
+/// `Self::from_aligned`), implement `from_array` for arrays of `$aligned` to `$unaligned`.
+///
+/// The `$default` argument is due to current compiler limitations.
+/// Pass any (cheap to construct) value.
+#[macro_export]
+macro_rules! impl_ule_from_array {
+ ($aligned:ty, $unaligned:ty, $default:expr, $single:path) => {
+ #[doc = concat!("Convert an array of `", stringify!($aligned), "` to an array of `", stringify!($unaligned), "`.")]
+ pub const fn from_array<const N: usize>(arr: [$aligned; N]) -> [Self; N] {
+ let mut result = [$default; N];
+ let mut i = 0;
+ // Won't panic because i < N and arr has length N
+ #[allow(clippy::indexing_slicing)]
+ while i < N {
+ result[i] = $single(arr[i]);
+ i += 1;
+ }
+ result
+ }
+ };
+ ($aligned:ty, $unaligned:ty, $default:expr) => {
+ impl_ule_from_array!($aligned, $unaligned, $default, Self::from_aligned);
+ };
+}
diff --git a/third_party/rust/zerovec/src/ule/mod.rs b/third_party/rust/zerovec/src/ule/mod.rs
new file mode 100644
index 0000000000..5a6d9cd471
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/mod.rs
@@ -0,0 +1,394 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#![allow(clippy::upper_case_acronyms)]
+
+//! Traits over unaligned little-endian data (ULE, pronounced "yule").
+//!
+//! The main traits for this module are [`ULE`], [`AsULE`] and, [`VarULE`].
+//!
+//! See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how these traits
+//! works under the hood.
+mod chars;
+#[cfg(doc)]
+pub mod custom;
+mod encode;
+mod macros;
+mod multi;
+mod niche;
+mod option;
+mod plain;
+mod slices;
+mod unvalidated;
+
+pub mod tuple;
+pub use super::ZeroVecError;
+pub use chars::CharULE;
+pub use encode::{encode_varule_to_box, EncodeAsVarULE};
+pub use multi::MultiFieldsULE;
+pub use niche::{NicheBytes, NichedOption, NichedOptionULE};
+pub use option::{OptionULE, OptionVarULE};
+pub use plain::RawBytesULE;
+pub use unvalidated::{UnvalidatedChar, UnvalidatedStr};
+
+use alloc::alloc::Layout;
+use alloc::borrow::ToOwned;
+use alloc::boxed::Box;
+use core::{mem, slice};
+
+/// Fixed-width, byte-aligned data that can be cast to and from a little-endian byte slice.
+///
+/// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) or
+/// [`#[derive(ULE)]`](macro@ULE) instead.
+///
+/// Types that are not fixed-width can implement [`VarULE`] instead.
+///
+/// "ULE" stands for "Unaligned little-endian"
+///
+/// # Safety
+///
+/// Safety checklist for `ULE`:
+///
+/// 1. The type *must not* include any uninitialized or padding bytes.
+/// 2. The type must have an alignment of 1 byte.
+/// 3. The impl of [`ULE::validate_byte_slice()`] *must* return an error if the given byte slice
+/// would not represent a valid slice of this type.
+/// 4. The impl of [`ULE::validate_byte_slice()`] *must* return an error if the given byte slice
+/// cannot be used in its entirety (if its length is not a multiple of `size_of::<Self>()`).
+/// 5. All other methods *must* be left with their default impl, or else implemented according to
+/// their respective safety guidelines.
+/// 6. Acknowledge the following note about the equality invariant.
+///
+/// If the ULE type is a struct only containing other ULE types (or other types which satisfy invariants 1 and 2,
+/// like `[u8; N]`), invariants 1 and 2 can be achieved via `#[repr(packed)]` or `#[repr(transparent)]`.
+///
+/// # Equality invariant
+///
+/// A non-safety invariant is that if `Self` implements `PartialEq`, the it *must* be logically
+/// equivalent to byte equality on [`Self::as_byte_slice()`].
+///
+/// It may be necessary to introduce a "canonical form" of the ULE if logical equality does not
+/// equal byte equality. In such a case, [`Self::validate_byte_slice()`] should return an error
+/// for any values that are not in canonical form. For example, the decimal strings "1.23e4" and
+/// "12.3e3" are logically equal, but not byte-for-byte equal, so we could define a canonical form
+/// where only a single digit is allowed before `.`.
+///
+/// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may
+/// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`.
+pub unsafe trait ULE
+where
+ Self: Sized,
+ Self: Copy + 'static,
+{
+ /// Validates a byte slice, `&[u8]`.
+ ///
+ /// If `Self` is not well-defined for all possible bit values, the bytes should be validated.
+ /// If the bytes can be transmuted, *in their entirety*, to a valid slice of `Self`, then `Ok`
+ /// should be returned; otherwise, `Self::Error` should be returned.
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError>;
+
+ /// Parses a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime.
+ ///
+ /// If `Self` is not well-defined for all possible bit values, the bytes should be validated,
+ /// and an error should be returned in the same cases as [`Self::validate_byte_slice()`].
+ ///
+ /// The default implementation executes [`Self::validate_byte_slice()`] followed by
+ /// [`Self::from_byte_slice_unchecked`].
+ ///
+ /// Note: The following equality should hold: `bytes.len() % size_of::<Self>() == 0`. This
+ /// means that the returned slice can span the entire byte slice.
+ fn parse_byte_slice(bytes: &[u8]) -> Result<&[Self], ZeroVecError> {
+ Self::validate_byte_slice(bytes)?;
+ debug_assert_eq!(bytes.len() % mem::size_of::<Self>(), 0);
+ Ok(unsafe { Self::from_byte_slice_unchecked(bytes) })
+ }
+
+ /// Takes a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime, assuming
+ /// that this byte slice has previously been run through [`Self::parse_byte_slice()`] with
+ /// success.
+ ///
+ /// The default implementation performs a pointer cast to the same region of memory.
+ ///
+ /// # Safety
+ ///
+ /// ## Callers
+ ///
+ /// Callers of this method must take care to ensure that `bytes` was previously passed through
+ /// [`Self::validate_byte_slice()`] with success (and was not changed since then).
+ ///
+ /// ## Implementors
+ ///
+ /// Implementations of this method may call unsafe functions to cast the pointer to the correct
+ /// type, assuming the "Callers" invariant above.
+ ///
+ /// Keep in mind that `&[Self]` and `&[u8]` may have different lengths.
+ ///
+ /// Safety checklist:
+ ///
+ /// 1. This method *must* return the same result as [`Self::parse_byte_slice()`].
+ /// 2. This method *must* return a slice to the same region of memory as the argument.
+ #[inline]
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &[Self] {
+ let data = bytes.as_ptr();
+ let len = bytes.len() / mem::size_of::<Self>();
+ debug_assert_eq!(bytes.len() % mem::size_of::<Self>(), 0);
+ core::slice::from_raw_parts(data as *const Self, len)
+ }
+
+ /// Given `&[Self]`, returns a `&[u8]` with the same lifetime.
+ ///
+ /// The default implementation performs a pointer cast to the same region of memory.
+ ///
+ /// # Safety
+ ///
+ /// Implementations of this method should call potentially unsafe functions to cast the
+ /// pointer to the correct type.
+ ///
+ /// Keep in mind that `&[Self]` and `&[u8]` may have different lengths.
+ #[inline]
+ #[allow(clippy::wrong_self_convention)] // https://github.com/rust-lang/rust-clippy/issues/7219
+ fn as_byte_slice(slice: &[Self]) -> &[u8] {
+ unsafe {
+ slice::from_raw_parts(slice as *const [Self] as *const u8, mem::size_of_val(slice))
+ }
+ }
+}
+
+/// A trait for any type that has a 1:1 mapping with an unaligned little-endian (ULE) type.
+///
+/// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) instead.
+pub trait AsULE: Copy {
+ /// The ULE type corresponding to `Self`.
+ ///
+ /// Types having infallible conversions from all bit values (Plain Old Data) can use
+ /// `RawBytesULE` with the desired width; for example, `u32` uses `RawBytesULE<4>`.
+ ///
+ /// Types that are not well-defined for all bit values should implement a custom ULE.
+ type ULE: ULE;
+
+ /// Converts from `Self` to `Self::ULE`.
+ ///
+ /// This function may involve byte order swapping (native-endian to little-endian).
+ ///
+ /// For best performance, mark your implementation of this function `#[inline]`.
+ fn to_unaligned(self) -> Self::ULE;
+
+ /// Converts from `Self::ULE` to `Self`.
+ ///
+ /// This function may involve byte order swapping (little-endian to native-endian).
+ ///
+ /// For best performance, mark your implementation of this function `#[inline]`.
+ ///
+ /// # Safety
+ ///
+ /// This function is infallible because bit validation should have occurred when `Self::ULE`
+ /// was first constructed. An implementation may therefore involve an `unsafe{}` block, like
+ /// `from_bytes_unchecked()`.
+ fn from_unaligned(unaligned: Self::ULE) -> Self;
+}
+
+/// An [`EqULE`] type is one whose byte sequence equals the byte sequence of its ULE type on
+/// little-endian platforms. This enables certain performance optimizations, such as
+/// [`ZeroVec::try_from_slice`](crate::ZeroVec::try_from_slice).
+///
+/// # Implementation safety
+///
+/// This trait is safe to implement if the type's ULE (as defined by `impl `[`AsULE`]` for T`)
+/// has an equal byte sequence as the type itself on little-endian platforms; i.e., one where
+/// `*const T` can be cast to a valid `*const T::ULE`.
+pub unsafe trait EqULE: AsULE {}
+
+/// A trait for a type where aligned slices can be cast to unaligned slices.
+///
+/// Auto-implemented on all types implementing [`EqULE`].
+pub trait SliceAsULE
+where
+ Self: AsULE + Sized,
+{
+ /// Converts from `&[Self]` to `&[Self::ULE]` if possible.
+ ///
+ /// In general, this function returns `Some` on little-endian and `None` on big-endian.
+ fn slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]>;
+}
+
+#[cfg(target_endian = "little")]
+impl<T> SliceAsULE for T
+where
+ T: EqULE,
+{
+ #[inline]
+ fn slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]> {
+ // This is safe because on little-endian platforms, the byte sequence of &[T]
+ // is equivalent to the byte sequence of &[T::ULE] by the contract of EqULE,
+ // and &[T::ULE] has equal or looser alignment than &[T].
+ let ule_slice =
+ unsafe { core::slice::from_raw_parts(slice.as_ptr() as *const Self::ULE, slice.len()) };
+ Some(ule_slice)
+ }
+}
+
+#[cfg(not(target_endian = "little"))]
+impl<T> SliceAsULE for T
+where
+ T: EqULE,
+{
+ #[inline]
+ fn slice_to_unaligned(_: &[Self]) -> Option<&[Self::ULE]> {
+ None
+ }
+}
+
+/// Variable-width, byte-aligned data that can be cast to and from a little-endian byte slice.
+///
+/// If you need to implement this trait, consider using [`#[make_varule]`](crate::make_varule) or
+/// [`#[derive(VarULE)]`](macro@VarULE) instead.
+///
+/// This trait is mostly for unsized types like `str` and `[T]`. It can be implemented on sized types;
+/// however, it is much more preferable to use [`ULE`] for that purpose. The [`custom`] module contains
+/// additional documentation on how this type can be implemented on custom types.
+///
+/// If deserialization with `VarZeroVec` is desired is recommended to implement `Deserialize` for
+/// `Box<T>` (serde does not do this automatically for unsized `T`).
+///
+/// For convenience it is typically desired to implement [`EncodeAsVarULE`] and [`ZeroFrom`](zerofrom::ZeroFrom)
+/// on some stack type to convert to and from the ULE type efficiently when necessary.
+///
+/// # Safety
+///
+/// Safety checklist for `VarULE`:
+///
+/// 1. The type *must not* include any uninitialized or padding bytes.
+/// 2. The type must have an alignment of 1 byte.
+/// 3. The impl of [`VarULE::validate_byte_slice()`] *must* return an error if the given byte slice
+/// would not represent a valid slice of this type.
+/// 4. The impl of [`VarULE::validate_byte_slice()`] *must* return an error if the given byte slice
+/// cannot be used in its entirety.
+/// 5. The impl of [`VarULE::from_byte_slice_unchecked()`] must produce a reference to the same
+/// underlying data assuming that the given bytes previously passed validation.
+/// 6. All other methods *must* be left with their default impl, or else implemented according to
+/// their respective safety guidelines.
+/// 7. Acknowledge the following note about the equality invariant.
+///
+/// If the ULE type is a struct only containing other ULE/VarULE types (or other types which satisfy invariants 1 and 2,
+/// like `[u8; N]`), invariants 1 and 2 can be achieved via `#[repr(packed)]` or `#[repr(transparent)]`.
+///
+/// # Equality invariant
+///
+/// A non-safety invariant is that if `Self` implements `PartialEq`, the it *must* be logically
+/// equivalent to byte equality on [`Self::as_byte_slice()`].
+///
+/// It may be necessary to introduce a "canonical form" of the ULE if logical equality does not
+/// equal byte equality. In such a case, [`Self::validate_byte_slice()`] should return an error
+/// for any values that are not in canonical form. For example, the decimal strings "1.23e4" and
+/// "12.3e3" are logically equal, but not byte-for-byte equal, so we could define a canonical form
+/// where only a single digit is allowed before `.`.
+///
+/// There may also be cases where a `VarULE` has muiltiple canonical forms, such as a faster
+/// version and a smaller version. The cleanest way to handle this case would be separate types.
+/// However, if this is not feasible, then the application should ensure that the data it is
+/// deserializing is in the expected form. For example, if the data is being loaded from an
+/// external source, then requests could carry information about the expected form of the data.
+///
+/// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may
+/// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`.
+pub unsafe trait VarULE: 'static {
+ /// Validates a byte slice, `&[u8]`.
+ ///
+ /// If `Self` is not well-defined for all possible bit values, the bytes should be validated.
+ /// If the bytes can be transmuted, *in their entirety*, to a valid `&Self`, then `Ok` should
+ /// be returned; otherwise, `Self::Error` should be returned.
+ fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError>;
+
+ /// Parses a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime.
+ ///
+ /// If `Self` is not well-defined for all possible bit values, the bytes should be validated,
+ /// and an error should be returned in the same cases as [`Self::validate_byte_slice()`].
+ ///
+ /// The default implementation executes [`Self::validate_byte_slice()`] followed by
+ /// [`Self::from_byte_slice_unchecked`].
+ ///
+ /// Note: The following equality should hold: `size_of_val(result) == size_of_val(bytes)`,
+ /// where `result` is the successful return value of the method. This means that the return
+ /// value spans the entire byte slice.
+ fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> {
+ Self::validate_byte_slice(bytes)?;
+ let result = unsafe { Self::from_byte_slice_unchecked(bytes) };
+ debug_assert_eq!(mem::size_of_val(result), mem::size_of_val(bytes));
+ Ok(result)
+ }
+
+ /// Takes a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime, assuming
+ /// that this byte slice has previously been run through [`Self::parse_byte_slice()`] with
+ /// success.
+ ///
+ /// # Safety
+ ///
+ /// ## Callers
+ ///
+ /// Callers of this method must take care to ensure that `bytes` was previously passed through
+ /// [`Self::validate_byte_slice()`] with success (and was not changed since then).
+ ///
+ /// ## Implementors
+ ///
+ /// Implementations of this method may call unsafe functions to cast the pointer to the correct
+ /// type, assuming the "Callers" invariant above.
+ ///
+ /// Safety checklist:
+ ///
+ /// 1. This method *must* return the same result as [`Self::parse_byte_slice()`].
+ /// 2. This method *must* return a slice to the same region of memory as the argument.
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self;
+
+ /// Given `&Self`, returns a `&[u8]` with the same lifetime.
+ ///
+ /// The default implementation performs a pointer cast to the same region of memory.
+ ///
+ /// # Safety
+ ///
+ /// Implementations of this method should call potentially unsafe functions to cast the
+ /// pointer to the correct type.
+ #[inline]
+ fn as_byte_slice(&self) -> &[u8] {
+ unsafe { slice::from_raw_parts(self as *const Self as *const u8, mem::size_of_val(self)) }
+ }
+
+ /// Allocate on the heap as a `Box<T>`
+ #[inline]
+ fn to_boxed(&self) -> Box<Self> {
+ let bytesvec = self.as_byte_slice().to_owned().into_boxed_slice();
+ let bytesvec = mem::ManuallyDrop::new(bytesvec);
+ unsafe {
+ // Get the pointer representation
+ let ptr: *mut Self =
+ Self::from_byte_slice_unchecked(&bytesvec) as *const Self as *mut Self;
+ assert_eq!(Layout::for_value(&*ptr), Layout::for_value(&**bytesvec));
+ // Transmute the pointer to an owned pointer
+ Box::from_raw(ptr)
+ }
+ }
+}
+
+// Proc macro reexports
+//
+// These exist so that our docs can use intra-doc links.
+// Due to quirks of how rustdoc does documentation on reexports, these must be in this module and not reexported from
+// a submodule
+
+/// Custom derive for [`ULE`].
+///
+/// This can be attached to [`Copy`] structs containing only [`ULE`] types.
+///
+/// Most of the time, it is recommended one use [`#[make_ule]`](crate::make_ule) instead of defining
+/// a custom ULE type.
+#[cfg(feature = "derive")]
+pub use zerovec_derive::ULE;
+
+/// Custom derive for [`VarULE`]
+///
+/// This can be attached to structs containing only [`ULE`] types with one [`VarULE`] type at the end.
+///
+/// Most of the time, it is recommended one use [`#[make_varule]`](crate::make_varule) instead of defining
+/// a custom [`VarULE`] type.
+#[cfg(feature = "derive")]
+pub use zerovec_derive::VarULE;
diff --git a/third_party/rust/zerovec/src/ule/multi.rs b/third_party/rust/zerovec/src/ule/multi.rs
new file mode 100644
index 0000000000..3281b20888
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/multi.rs
@@ -0,0 +1,154 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use crate::varzerovec::Index32;
+use crate::VarZeroSlice;
+use core::mem;
+
+/// This type is used by the custom derive to represent multiple [`VarULE`]
+/// fields packed into a single end-of-struct field. It is not recommended
+/// to use this type directly.
+///
+/// Logically, consider it to be `(V1, V2, V3, ..)`
+/// where `V1` etc are potentially different [`VarULE`] types.
+///
+/// Internally, it is represented by a VarZeroSlice.
+#[derive(PartialEq, Eq, Debug)]
+#[repr(transparent)]
+pub struct MultiFieldsULE(VarZeroSlice<[u8], Index32>);
+
+impl MultiFieldsULE {
+ /// Compute the amount of bytes needed to support elements with lengths `lengths`
+ #[inline]
+ pub fn compute_encoded_len_for(lengths: &[usize]) -> usize {
+ #[allow(clippy::expect_used)] // See #1410
+ unsafe {
+ // safe since BlankSliceEncoder is transparent over usize
+ let lengths = &*(lengths as *const [usize] as *const [BlankSliceEncoder]);
+ crate::varzerovec::components::compute_serializable_len::<_, _, Index32>(lengths)
+ .expect("Too many bytes to encode") as usize
+ }
+ }
+
+ /// Construct a partially initialized MultiFieldsULE backed by a mutable byte buffer
+ pub fn new_from_lengths_partially_initialized<'a>(
+ lengths: &[usize],
+ output: &'a mut [u8],
+ ) -> &'a mut Self {
+ unsafe {
+ // safe since BlankSliceEncoder is transparent over usize
+ let lengths = &*(lengths as *const [usize] as *const [BlankSliceEncoder]);
+ crate::varzerovec::components::write_serializable_bytes::<_, _, Index32>(
+ lengths, output,
+ );
+ debug_assert!(
+ <VarZeroSlice<[u8], Index32>>::validate_byte_slice(output).is_ok(),
+ "Encoded slice must be valid VarZeroSlice"
+ );
+ // Safe since write_serializable_bytes produces a valid VarZeroSlice buffer
+ let slice = <VarZeroSlice<[u8], Index32>>::from_byte_slice_unchecked_mut(output);
+ // safe since `Self` is transparent over VarZeroSlice
+ mem::transmute::<&mut VarZeroSlice<_, Index32>, &mut Self>(slice)
+ }
+ }
+
+ /// Given a buffer of size obtained by [`Self::compute_encoded_len_for()`], write element A to index idx
+ ///
+ /// # Safety
+ /// - `idx` must be in range
+ /// - `T` must be the appropriate type expected by the custom derive in this usage of this type
+ #[inline]
+ pub unsafe fn set_field_at<T: VarULE + ?Sized, A: EncodeAsVarULE<T> + ?Sized>(
+ &mut self,
+ idx: usize,
+ value: &A,
+ ) {
+ value.encode_var_ule_write(self.0.get_bytes_at_mut(idx))
+ }
+
+ /// Validate field at `index` to see if it is a valid `T` VarULE type
+ ///
+ /// # Safety
+ ///
+ /// - `index` must be in range
+ #[inline]
+ pub unsafe fn validate_field<T: VarULE + ?Sized>(
+ &self,
+ index: usize,
+ ) -> Result<(), ZeroVecError> {
+ T::validate_byte_slice(self.0.get_unchecked(index))
+ }
+
+ /// Get field at `index` as a value of type T
+ ///
+ /// # Safety
+ ///
+ /// - `index` must be in range
+ /// - Element at `index` must have been created with the VarULE type T
+ #[inline]
+ pub unsafe fn get_field<T: VarULE + ?Sized>(&self, index: usize) -> &T {
+ T::from_byte_slice_unchecked(self.0.get_unchecked(index))
+ }
+
+ /// Construct from a byte slice
+ ///
+ /// # Safety
+ /// - byte slice must be a valid VarZeroSlice<[u8]>
+ #[inline]
+ pub unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ // &Self is transparent over &VZS<..>
+ mem::transmute(<VarZeroSlice<[u8]>>::from_byte_slice_unchecked(bytes))
+ }
+}
+
+/// This lets us conveniently use the EncodeAsVarULE functionality to create
+/// `VarZeroVec<[u8]>`s that have the right amount of space for elements
+/// without having to duplicate any unsafe code
+#[repr(transparent)]
+struct BlankSliceEncoder(usize);
+
+unsafe impl EncodeAsVarULE<[u8]> for BlankSliceEncoder {
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unreachable!()
+ }
+
+ #[inline]
+ fn encode_var_ule_len(&self) -> usize {
+ self.0
+ }
+
+ #[inline]
+ fn encode_var_ule_write(&self, _dst: &mut [u8]) {
+ // do nothing
+ }
+}
+
+// Safety (based on the safety checklist on the VarULE trait):
+// 1. MultiFieldsULE does not include any uninitialized or padding bytes (achieved by being transparent over a VarULE type)
+// 2. MultiFieldsULE is aligned to 1 byte (achieved by being transparent over a VarULE type)
+// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid.
+// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety
+// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data.
+// 6. All other methods are defaulted
+// 7. `MultiFieldsULE` byte equality is semantic equality (achieved by being transparent over a VarULE type)
+unsafe impl VarULE for MultiFieldsULE {
+ /// Note: MultiFieldsULE is usually used in cases where one should be calling .validate_field() directly for
+ /// each field, rather than using the regular VarULE impl.
+ ///
+ /// This impl exists so that EncodeAsVarULE can work.
+ #[inline]
+ fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> {
+ <VarZeroSlice<[u8], Index32>>::validate_byte_slice(slice)
+ }
+
+ #[inline]
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ // &Self is transparent over &VZS<..>
+ mem::transmute(<VarZeroSlice<[u8], Index32>>::from_byte_slice_unchecked(
+ bytes,
+ ))
+ }
+}
diff --git a/third_party/rust/zerovec/src/ule/niche.rs b/third_party/rust/zerovec/src/ule/niche.rs
new file mode 100644
index 0000000000..ae61faca0b
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/niche.rs
@@ -0,0 +1,180 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::{marker::Copy, mem::size_of};
+
+use super::{AsULE, ULE};
+
+/// The [`ULE`] types implementing this trait guarantee that [`NicheBytes::NICHE_BIT_PATTERN`]
+/// can never occur as a valid byte representation of the type.
+///
+/// Guarantees for a valid implementation.
+/// 1. N must be equal to `core::mem::sizeo_of::<Self>()` or else it will
+/// cause panics.
+/// 2. The bit pattern [`NicheBytes::NICHE_BIT_PATTERN`] must not be incorrect as it would lead to
+/// weird behaviour.
+/// 3. The abstractions built on top of this trait must panic on an invalid N.
+/// 4. The abstractions built on this trait that use type punning must ensure that type being
+/// punned is [`ULE`].
+pub trait NicheBytes<const N: usize> {
+ const NICHE_BIT_PATTERN: [u8; N];
+}
+
+/// [`ULE`] type for [`NichedOption<U,N>`] where U implements [`NicheBytes`].
+/// The invalid bit pattern is used as the niche.
+///
+/// This uses 1 byte less than [`crate::ule::OptionULE<U>`] to represent [`NichedOption<U,N>`].
+///
+/// # Example
+///
+/// ```
+/// use core::num::NonZeroI8;
+/// use zerovec::ule::NichedOption;
+/// use zerovec::ZeroVec;
+///
+/// let bytes = &[0x00, 0x01, 0x02, 0x00];
+/// let zv_no: ZeroVec<NichedOption<NonZeroI8, 1>> =
+/// ZeroVec::parse_byte_slice(bytes)
+/// .expect("Unable to parse as NichedOption.");
+///
+/// assert_eq!(zv_no.get(0).map(|e| e.0), Some(None));
+/// assert_eq!(zv_no.get(1).map(|e| e.0), Some(NonZeroI8::new(1)));
+/// assert_eq!(zv_no.get(2).map(|e| e.0), Some(NonZeroI8::new(2)));
+/// assert_eq!(zv_no.get(3).map(|e| e.0), Some(None));
+/// ```
+// Invariants:
+// The union stores [`NicheBytes::NICHE_BIT_PATTERN`] when None.
+// Any other bit pattern is a valid.
+#[repr(C)]
+pub union NichedOptionULE<U: NicheBytes<N> + ULE, const N: usize> {
+ /// Invariant: The value is `niche` only if the bytes equal NICHE_BIT_PATTERN.
+ niche: [u8; N],
+ /// Invariant: The value is `valid` if the `niche` field does not match NICHE_BIT_PATTERN.
+ valid: U,
+}
+
+impl<U: NicheBytes<N> + ULE + core::fmt::Debug, const N: usize> core::fmt::Debug
+ for NichedOptionULE<U, N>
+{
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ self.get().fmt(f)
+ }
+}
+
+impl<U: NicheBytes<N> + ULE, const N: usize> NichedOptionULE<U, N> {
+ /// New `NichedOptionULE<U, N>` from `Option<U>`
+ pub fn new(opt: Option<U>) -> Self {
+ assert!(N == core::mem::size_of::<U>());
+ match opt {
+ Some(u) => Self { valid: u },
+ None => Self {
+ niche: <U as NicheBytes<N>>::NICHE_BIT_PATTERN,
+ },
+ }
+ }
+
+ /// Convert to an `Option<U>`
+ pub fn get(self) -> Option<U> {
+ // Safety: The union stores NICHE_BIT_PATTERN when None otherwise a valid U
+ unsafe {
+ if self.niche == <U as NicheBytes<N>>::NICHE_BIT_PATTERN {
+ None
+ } else {
+ Some(self.valid)
+ }
+ }
+ }
+}
+
+impl<U: NicheBytes<N> + ULE, const N: usize> Copy for NichedOptionULE<U, N> {}
+
+impl<U: NicheBytes<N> + ULE, const N: usize> Clone for NichedOptionULE<U, N> {
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<U: NicheBytes<N> + ULE + PartialEq, const N: usize> PartialEq for NichedOptionULE<U, N> {
+ fn eq(&self, other: &Self) -> bool {
+ self.get().eq(&other.get())
+ }
+}
+
+impl<U: NicheBytes<N> + ULE + Eq, const N: usize> Eq for NichedOptionULE<U, N> {}
+
+/// Safety for ULE trait
+/// 1. NichedOptionULE does not have any padding bytes due to `#[repr(C)]` on a struct
+/// containing only ULE fields.
+/// NichedOptionULE either contains NICHE_BIT_PATTERN or valid U byte sequences.
+/// In both cases the data is initialized.
+/// 2. NichedOptionULE is aligned to 1 byte due to `#[repr(packed)]` on a struct containing only
+/// ULE fields.
+/// 3. validate_byte_slice impl returns an error if invalid bytes are encountered.
+/// 4. validate_byte_slice impl returns an error there are extra bytes.
+/// 5. The other ULE methods are left to their default impl.
+/// 6. NichedOptionULE equality is based on ULE equality of the subfield, assuming that NicheBytes
+/// has been implemented correctly (this is a correctness but not a safety guarantee).
+unsafe impl<U: NicheBytes<N> + ULE, const N: usize> ULE for NichedOptionULE<U, N> {
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), crate::ZeroVecError> {
+ let size = size_of::<Self>();
+ // The implemention is only correct if NICHE_BIT_PATTERN has same number of bytes as the
+ // type.
+ debug_assert!(N == core::mem::size_of::<U>());
+
+ // The bytes should fully transmute to a collection of Self
+ if bytes.len() % size != 0 {
+ return Err(crate::ZeroVecError::length::<Self>(bytes.len()));
+ }
+ bytes.chunks(size).try_for_each(|chunk| {
+ // Associated const cannot be referenced in a pattern
+ // https://doc.rust-lang.org/error-index.html#E0158
+ if chunk == <U as NicheBytes<N>>::NICHE_BIT_PATTERN {
+ Ok(())
+ } else {
+ U::validate_byte_slice(chunk)
+ }
+ })
+ }
+}
+
+/// Optional type which uses [`NichedOptionULE<U,N>`] as ULE type.
+/// The implementors guarantee that `N == core::mem::sizeo_of::<Self>()`
+/// [`repr(transparent)`] guarantees that the layout is same as [`Option<U>`]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+#[repr(transparent)]
+#[non_exhaustive]
+pub struct NichedOption<U, const N: usize>(pub Option<U>);
+
+impl<U, const N: usize> NichedOption<U, N> {
+ pub const fn new(o: Option<U>) -> Self {
+ Self(o)
+ }
+}
+
+impl<U, const N: usize> Default for NichedOption<U, N> {
+ fn default() -> Self {
+ Self(None)
+ }
+}
+
+impl<U, const N: usize> From<Option<U>> for NichedOption<U, N> {
+ fn from(o: Option<U>) -> Self {
+ Self(o)
+ }
+}
+
+impl<U: AsULE, const N: usize> AsULE for NichedOption<U, N>
+where
+ U::ULE: NicheBytes<N>,
+{
+ type ULE = NichedOptionULE<U::ULE, N>;
+
+ fn to_unaligned(self) -> Self::ULE {
+ NichedOptionULE::new(self.0.map(U::to_unaligned))
+ }
+
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ Self(unaligned.get().map(U::from_unaligned))
+ }
+}
diff --git a/third_party/rust/zerovec/src/ule/option.rs b/third_party/rust/zerovec/src/ule/option.rs
new file mode 100644
index 0000000000..9b0dc5b28a
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/option.rs
@@ -0,0 +1,264 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use core::cmp::Ordering;
+use core::marker::PhantomData;
+use core::mem::{self, MaybeUninit};
+
+/// This type is the [`ULE`] type for `Option<U>` where `U` is a [`ULE`] type
+///
+/// # Example
+///
+/// ```rust
+/// use zerovec::ZeroVec;
+///
+/// let z = ZeroVec::alloc_from_slice(&[
+/// Some('a'),
+/// Some('á'),
+/// Some('ø'),
+/// None,
+/// Some('ł'),
+/// ]);
+///
+/// assert_eq!(z.get(2), Some(Some('ø')));
+/// assert_eq!(z.get(3), Some(None));
+/// ```
+// Invariants:
+// The MaybeUninit is zeroed when None (bool = false),
+// and is valid when Some (bool = true)
+#[repr(packed)]
+pub struct OptionULE<U>(bool, MaybeUninit<U>);
+
+impl<U: Copy> OptionULE<U> {
+ /// Obtain this as an `Option<T>`
+ pub fn get(self) -> Option<U> {
+ if self.0 {
+ unsafe {
+ // safety: self.0 is true so the MaybeUninit is valid
+ Some(self.1.assume_init())
+ }
+ } else {
+ None
+ }
+ }
+
+ /// Construct an `OptionULE<U>` from an equivalent `Option<T>`
+ pub fn new(opt: Option<U>) -> Self {
+ if let Some(inner) = opt {
+ Self(true, MaybeUninit::new(inner))
+ } else {
+ Self(false, MaybeUninit::zeroed())
+ }
+ }
+}
+
+impl<U: Copy + core::fmt::Debug> core::fmt::Debug for OptionULE<U> {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ self.get().fmt(f)
+ }
+}
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. OptionULE does not include any uninitialized or padding bytes.
+// (achieved by `#[repr(packed)]` on a struct containing only ULE fields,
+// in the context of this impl. The MaybeUninit is valid for all byte sequences, and we only generate
+/// zeroed or valid-T byte sequences to fill it)
+// 2. OptionULE is aligned to 1 byte.
+// (achieved by `#[repr(packed)]` on a struct containing only ULE fields, in the context of this impl)
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid.
+// 4. The impl of validate_byte_slice() returns an error if there are extra bytes.
+// 5. The other ULE methods use the default impl.
+// 6. OptionULE byte equality is semantic equality by relying on the ULE equality
+// invariant on the subfields
+unsafe impl<U: ULE> ULE for OptionULE<U> {
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ let size = mem::size_of::<Self>();
+ if bytes.len() % size != 0 {
+ return Err(ZeroVecError::length::<Self>(bytes.len()));
+ }
+ for chunk in bytes.chunks(size) {
+ #[allow(clippy::indexing_slicing)] // `chunk` will have enough bytes to fit Self
+ match chunk[0] {
+ // https://doc.rust-lang.org/reference/types/boolean.html
+ // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1
+ 0 => {
+ if !chunk[1..].iter().all(|x| *x == 0) {
+ return Err(ZeroVecError::parse::<Self>());
+ }
+ }
+ 1 => U::validate_byte_slice(&chunk[1..])?,
+ _ => return Err(ZeroVecError::parse::<Self>()),
+ }
+ }
+ Ok(())
+ }
+}
+
+impl<T: AsULE> AsULE for Option<T> {
+ type ULE = OptionULE<T::ULE>;
+ fn to_unaligned(self) -> OptionULE<T::ULE> {
+ OptionULE::new(self.map(T::to_unaligned))
+ }
+
+ fn from_unaligned(other: OptionULE<T::ULE>) -> Self {
+ other.get().map(T::from_unaligned)
+ }
+}
+
+impl<U: Copy> Copy for OptionULE<U> {}
+
+impl<U: Copy> Clone for OptionULE<U> {
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<U: Copy + PartialEq> PartialEq for OptionULE<U> {
+ fn eq(&self, other: &Self) -> bool {
+ self.get().eq(&other.get())
+ }
+}
+
+impl<U: Copy + Eq> Eq for OptionULE<U> {}
+
+/// A type allowing one to represent `Option<U>` for [`VarULE`] `U` types.
+///
+/// ```rust
+/// use zerovec::ule::OptionVarULE;
+/// use zerovec::VarZeroVec;
+///
+/// let mut zv: VarZeroVec<OptionVarULE<str>> = VarZeroVec::new();
+///
+/// zv.make_mut().push(&None::<&str>);
+/// zv.make_mut().push(&Some("hello"));
+/// zv.make_mut().push(&Some("world"));
+/// zv.make_mut().push(&None::<&str>);
+///
+/// assert_eq!(zv.get(0).unwrap().as_ref(), None);
+/// assert_eq!(zv.get(1).unwrap().as_ref(), Some("hello"));
+/// ```
+// The slice field is empty when None (bool = false),
+// and is a valid T when Some (bool = true)
+#[repr(packed)]
+pub struct OptionVarULE<U: VarULE + ?Sized>(PhantomData<U>, bool, [u8]);
+
+impl<U: VarULE + ?Sized> OptionVarULE<U> {
+ /// Obtain this as an `Option<&U>`
+ pub fn as_ref(&self) -> Option<&U> {
+ if self.1 {
+ unsafe {
+ // Safety: byte field is a valid T if boolean field is true
+ Some(U::from_byte_slice_unchecked(&self.2))
+ }
+ } else {
+ None
+ }
+ }
+}
+
+impl<U: VarULE + ?Sized + core::fmt::Debug> core::fmt::Debug for OptionVarULE<U> {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ self.as_ref().fmt(f)
+ }
+}
+
+// Safety (based on the safety checklist on the VarULE trait):
+// 1. OptionVarULE<T> does not include any uninitialized or padding bytes
+// (achieved by being repr(packed) on ULE types)
+// 2. OptionVarULE<T> is aligned to 1 byte (achieved by being repr(packed) on ULE types)
+// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid.
+// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety
+// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data.
+// 6. All other methods are defaulted
+// 7. OptionVarULE<T> byte equality is semantic equality (achieved by being an aggregate)
+unsafe impl<U: VarULE + ?Sized> VarULE for OptionVarULE<U> {
+ #[inline]
+ fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> {
+ if slice.is_empty() {
+ return Err(ZeroVecError::length::<Self>(slice.len()));
+ }
+ #[allow(clippy::indexing_slicing)] // slice already verified to be nonempty
+ match slice[0] {
+ // https://doc.rust-lang.org/reference/types/boolean.html
+ // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1
+ 0 => {
+ if slice.len() != 1 {
+ Err(ZeroVecError::length::<Self>(slice.len()))
+ } else {
+ Ok(())
+ }
+ }
+ 1 => U::validate_byte_slice(&slice[1..]),
+ _ => Err(ZeroVecError::parse::<Self>()),
+ }
+ }
+
+ #[inline]
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ let entire_struct_as_slice: *const [u8] =
+ ::core::ptr::slice_from_raw_parts(bytes.as_ptr(), bytes.len() - 1);
+ &*(entire_struct_as_slice as *const Self)
+ }
+}
+
+unsafe impl<T, U> EncodeAsVarULE<OptionVarULE<U>> for Option<T>
+where
+ T: EncodeAsVarULE<U>,
+ U: VarULE + ?Sized,
+{
+ fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
+ // unnecessary if the other two are implemented
+ unreachable!()
+ }
+
+ #[inline]
+ fn encode_var_ule_len(&self) -> usize {
+ if let Some(ref inner) = *self {
+ // slice + boolean
+ 1 + inner.encode_var_ule_len()
+ } else {
+ // boolean + empty slice
+ 1
+ }
+ }
+
+ #[allow(clippy::indexing_slicing)] // This method is allowed to panic when lengths are invalid
+ fn encode_var_ule_write(&self, dst: &mut [u8]) {
+ if let Some(ref inner) = *self {
+ debug_assert!(
+ !dst.is_empty(),
+ "OptionVarULE must have at least one byte when Some"
+ );
+ dst[0] = 1;
+ inner.encode_var_ule_write(&mut dst[1..]);
+ } else {
+ debug_assert!(
+ dst.len() == 1,
+ "OptionVarULE must have exactly one byte when None"
+ );
+ dst[0] = 0;
+ }
+ }
+}
+
+impl<U: VarULE + ?Sized + PartialEq> PartialEq for OptionVarULE<U> {
+ fn eq(&self, other: &Self) -> bool {
+ self.as_ref().eq(&other.as_ref())
+ }
+}
+
+impl<U: VarULE + ?Sized + Eq> Eq for OptionVarULE<U> {}
+
+impl<U: VarULE + ?Sized + PartialOrd> PartialOrd for OptionVarULE<U> {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ self.as_ref().partial_cmp(&other.as_ref())
+ }
+}
+
+impl<U: VarULE + ?Sized + Ord> Ord for OptionVarULE<U> {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.as_ref().cmp(&other.as_ref())
+ }
+}
diff --git a/third_party/rust/zerovec/src/ule/plain.rs b/third_party/rust/zerovec/src/ule/plain.rs
new file mode 100644
index 0000000000..f244f6b682
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/plain.rs
@@ -0,0 +1,366 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#![allow(clippy::upper_case_acronyms)]
+//! ULE implementation for Plain Old Data types, including all sized integers.
+
+use super::*;
+use crate::impl_ule_from_array;
+use crate::ZeroSlice;
+use core::num::{NonZeroI8, NonZeroU8};
+
+/// A u8 array of little-endian data with infallible conversions to and from &[u8].
+#[repr(transparent)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Hash)]
+#[allow(clippy::exhaustive_structs)] // newtype
+pub struct RawBytesULE<const N: usize>(pub [u8; N]);
+
+impl<const N: usize> RawBytesULE<N> {
+ #[inline]
+ pub fn as_bytes(&self) -> &[u8] {
+ &self.0
+ }
+
+ #[inline]
+ pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
+ let data = bytes.as_mut_ptr();
+ let len = bytes.len() / N;
+ // Safe because Self is transparent over [u8; N]
+ unsafe { core::slice::from_raw_parts_mut(data as *mut Self, len) }
+ }
+}
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. RawBytesULE does not include any uninitialized or padding bytes.
+// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+// 2. RawBytesULE is aligned to 1 byte.
+// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never).
+// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes.
+// 5. The other ULE methods use the default impl.
+// 6. RawBytesULE byte equality is semantic equality
+unsafe impl<const N: usize> ULE for RawBytesULE<N> {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ if bytes.len() % N == 0 {
+ // Safe because Self is transparent over [u8; N]
+ Ok(())
+ } else {
+ Err(ZeroVecError::length::<Self>(bytes.len()))
+ }
+ }
+}
+
+impl<const N: usize> From<[u8; N]> for RawBytesULE<N> {
+ #[inline]
+ fn from(le_bytes: [u8; N]) -> Self {
+ Self(le_bytes)
+ }
+}
+
+macro_rules! impl_byte_slice_size {
+ ($unsigned:ty, $size:literal) => {
+ impl RawBytesULE<$size> {
+ #[doc = concat!("Gets this `RawBytesULE` as a `", stringify!($unsigned), "`. This is equivalent to calling [`AsULE::from_unaligned()`] on the appropriately sized type.")]
+ #[inline]
+ pub fn as_unsigned_int(&self) -> $unsigned {
+ <$unsigned as $crate::ule::AsULE>::from_unaligned(*self)
+ }
+
+ #[doc = concat!("Converts a `", stringify!($unsigned), "` to a `RawBytesULE`. This is equivalent to calling [`AsULE::to_unaligned()`] on the appropriately sized type.")]
+ #[inline]
+ pub const fn from_aligned(value: $unsigned) -> Self {
+ Self(value.to_le_bytes())
+ }
+
+ impl_ule_from_array!(
+ $unsigned,
+ RawBytesULE<$size>,
+ RawBytesULE([0; $size])
+ );
+ }
+ };
+}
+
+macro_rules! impl_const_constructors {
+ ($base:ty, $size:literal) => {
+ impl ZeroSlice<$base> {
+ /// This function can be used for constructing ZeroVecs in a const context, avoiding
+ /// parsing checks.
+ ///
+ /// This cannot be generic over T because of current limitations in `const`, but if
+ /// this method is needed in a non-const context, check out [`ZeroSlice::parse_byte_slice()`]
+ /// instead.
+ ///
+ /// See [`ZeroSlice::cast()`] for an example.
+ pub const fn try_from_bytes(bytes: &[u8]) -> Result<&Self, ZeroVecError> {
+ let len = bytes.len();
+ #[allow(clippy::modulo_one)]
+ if len % $size == 0 {
+ Ok(unsafe { Self::from_bytes_unchecked(bytes) })
+ } else {
+ Err(ZeroVecError::InvalidLength {
+ ty: concat!("<const construct: ", $size, ">"),
+ len,
+ })
+ }
+ }
+ }
+ };
+}
+
+macro_rules! impl_byte_slice_type {
+ ($single_fn:ident, $type:ty, $size:literal) => {
+ impl From<$type> for RawBytesULE<$size> {
+ #[inline]
+ fn from(value: $type) -> Self {
+ Self(value.to_le_bytes())
+ }
+ }
+ impl AsULE for $type {
+ type ULE = RawBytesULE<$size>;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ RawBytesULE(self.to_le_bytes())
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ <$type>::from_le_bytes(unaligned.0)
+ }
+ }
+ // EqULE is true because $type and RawBytesULE<$size>
+ // have the same byte sequence on little-endian
+ unsafe impl EqULE for $type {}
+
+ impl RawBytesULE<$size> {
+ pub const fn $single_fn(v: $type) -> Self {
+ RawBytesULE(v.to_le_bytes())
+ }
+ }
+ };
+}
+
+macro_rules! impl_byte_slice_unsigned_type {
+ ($type:ty, $size:literal) => {
+ impl_byte_slice_type!(from_unsigned, $type, $size);
+ };
+}
+
+macro_rules! impl_byte_slice_signed_type {
+ ($type:ty, $size:literal) => {
+ impl_byte_slice_type!(from_signed, $type, $size);
+ };
+}
+
+impl_byte_slice_size!(u16, 2);
+impl_byte_slice_size!(u32, 4);
+impl_byte_slice_size!(u64, 8);
+impl_byte_slice_size!(u128, 16);
+
+impl_byte_slice_unsigned_type!(u16, 2);
+impl_byte_slice_unsigned_type!(u32, 4);
+impl_byte_slice_unsigned_type!(u64, 8);
+impl_byte_slice_unsigned_type!(u128, 16);
+
+impl_byte_slice_signed_type!(i16, 2);
+impl_byte_slice_signed_type!(i32, 4);
+impl_byte_slice_signed_type!(i64, 8);
+impl_byte_slice_signed_type!(i128, 16);
+
+impl_const_constructors!(u8, 1);
+impl_const_constructors!(u16, 2);
+impl_const_constructors!(u32, 4);
+impl_const_constructors!(u64, 8);
+impl_const_constructors!(u128, 16);
+
+// Note: The f32 and f64 const constructors currently have limited use because
+// `f32::to_le_bytes` is not yet const.
+
+impl_const_constructors!(bool, 1);
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. u8 does not include any uninitialized or padding bytes.
+// 2. u8 is aligned to 1 byte.
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never).
+// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never).
+// 5. The other ULE methods use the default impl.
+// 6. u8 byte equality is semantic equality
+unsafe impl ULE for u8 {
+ #[inline]
+ fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError> {
+ Ok(())
+ }
+}
+
+impl AsULE for u8 {
+ type ULE = Self;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned
+ }
+}
+
+// EqULE is true because u8 is its own ULE.
+unsafe impl EqULE for u8 {}
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. NonZeroU8 does not include any uninitialized or padding bytes.
+// 2. NonZeroU8 is aligned to 1 byte.
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (0x00).
+// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never).
+// 5. The other ULE methods use the default impl.
+// 6. NonZeroU8 byte equality is semantic equality
+unsafe impl ULE for NonZeroU8 {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ bytes.iter().try_for_each(|b| {
+ if *b == 0x00 {
+ Err(ZeroVecError::parse::<Self>())
+ } else {
+ Ok(())
+ }
+ })
+ }
+}
+
+impl AsULE for NonZeroU8 {
+ type ULE = Self;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned
+ }
+}
+
+unsafe impl EqULE for NonZeroU8 {}
+
+impl NicheBytes<1> for NonZeroU8 {
+ const NICHE_BIT_PATTERN: [u8; 1] = [0x00];
+}
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. i8 does not include any uninitialized or padding bytes.
+// 2. i8 is aligned to 1 byte.
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never).
+// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never).
+// 5. The other ULE methods use the default impl.
+// 6. i8 byte equality is semantic equality
+unsafe impl ULE for i8 {
+ #[inline]
+ fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError> {
+ Ok(())
+ }
+}
+
+impl AsULE for i8 {
+ type ULE = Self;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned
+ }
+}
+
+// EqULE is true because i8 is its own ULE.
+unsafe impl EqULE for i8 {}
+
+impl AsULE for NonZeroI8 {
+ type ULE = NonZeroU8;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ // Safety: NonZeroU8 and NonZeroI8 have same size
+ unsafe { core::mem::transmute(self) }
+ }
+
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ // Safety: NonZeroU8 and NonZeroI8 have same size
+ unsafe { core::mem::transmute(unaligned) }
+ }
+}
+
+// These impls are actually safe and portable due to Rust always using IEEE 754, see the documentation
+// on f32::from_bits: https://doc.rust-lang.org/stable/std/primitive.f32.html#method.from_bits
+//
+// The only potential problem is that some older platforms treat signaling NaNs differently. This is
+// still quite portable, signalingness is not typically super important.
+
+impl AsULE for f32 {
+ type ULE = RawBytesULE<4>;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self.to_bits().to_unaligned()
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ Self::from_bits(u32::from_unaligned(unaligned))
+ }
+}
+
+impl AsULE for f64 {
+ type ULE = RawBytesULE<8>;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self.to_bits().to_unaligned()
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ Self::from_bits(u64::from_unaligned(unaligned))
+ }
+}
+
+// The from_bits documentation mentions that they have identical byte representations to integers
+// and EqULE only cares about LE systems
+unsafe impl EqULE for f32 {}
+unsafe impl EqULE for f64 {}
+
+// The bool impl is not as efficient as it could be
+// We can, in the future, have https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md#bitpacking
+// for better bitpacking
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. bool does not include any uninitialized or padding bytes (the remaining 7 bytes in bool are by definition zero)
+// 2. bool is aligned to 1 byte.
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid (bytes that are not 0 or 1).
+// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never).
+// 5. The other ULE methods use the default impl.
+// 6. bool byte equality is semantic equality
+unsafe impl ULE for bool {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ for byte in bytes {
+ // https://doc.rust-lang.org/reference/types/boolean.html
+ // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1
+ if *byte > 1 {
+ return Err(ZeroVecError::parse::<Self>());
+ }
+ }
+ Ok(())
+ }
+}
+
+impl AsULE for bool {
+ type ULE = Self;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned
+ }
+}
+
+// EqULE is true because bool is its own ULE.
+unsafe impl EqULE for bool {}
diff --git a/third_party/rust/zerovec/src/ule/slices.rs b/third_party/rust/zerovec/src/ule/slices.rs
new file mode 100644
index 0000000000..75ea57e02e
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/slices.rs
@@ -0,0 +1,103 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::ule::*;
+use core::str;
+
+// Safety (based on the safety checklist on the ULE trait):
+// 1. [T; N] does not include any uninitialized or padding bytes since T is ULE
+// 2. [T; N] is aligned to 1 byte since T is ULE
+// 3. The impl of validate_byte_slice() returns an error if any byte is not valid.
+// 4. The impl of validate_byte_slice() returns an error if there are leftover bytes.
+// 5. The other ULE methods use the default impl.
+// 6. [T; N] byte equality is semantic equality since T is ULE
+unsafe impl<T: ULE, const N: usize> ULE for [T; N] {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ // a slice of multiple Selfs is equivalent to just a larger slice of Ts
+ T::validate_byte_slice(bytes)
+ }
+}
+
+impl<T: AsULE, const N: usize> AsULE for [T; N] {
+ type ULE = [T::ULE; N];
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self.map(T::to_unaligned)
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned.map(T::from_unaligned)
+ }
+}
+
+unsafe impl<T: EqULE, const N: usize> EqULE for [T; N] {}
+
+// Safety (based on the safety checklist on the VarULE trait):
+// 1. str does not include any uninitialized or padding bytes.
+// 2. str is aligned to 1 byte.
+// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid.
+// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety
+// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data.
+// 6. `parse_byte_slice()` is equivalent to `validate_byte_slice()` followed by `from_byte_slice_unchecked()`
+// 7. str byte equality is semantic equality
+unsafe impl VarULE for str {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ str::from_utf8(bytes).map_err(|_| ZeroVecError::parse::<Self>())?;
+ Ok(())
+ }
+
+ #[inline]
+ fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> {
+ str::from_utf8(bytes).map_err(|_| ZeroVecError::parse::<Self>())
+ }
+ /// Invariant: must be safe to call when called on a slice that previously
+ /// succeeded with `parse_byte_slice`
+ #[inline]
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ str::from_utf8_unchecked(bytes)
+ }
+}
+
+/// Note: VarULE is well-defined for all `[T]` where `T: ULE`, but [`ZeroSlice`] is more ergonomic
+/// when `T` is a low-level ULE type. For example:
+///
+/// ```no_run
+/// # use zerovec::ZeroSlice;
+/// # use zerovec::VarZeroVec;
+/// # use zerovec::ule::AsULE;
+/// // OK: [u8] is a useful type
+/// let _: VarZeroVec<[u8]> = unimplemented!();
+///
+/// // Technically works, but [u32::ULE] is not very useful
+/// let _: VarZeroVec<[<u32 as AsULE>::ULE]> = unimplemented!();
+///
+/// // Better: ZeroSlice<u32>
+/// let _: VarZeroVec<ZeroSlice<u32>> = unimplemented!();
+/// ```
+///
+/// [`ZeroSlice`]: crate::ZeroSlice
+// Safety (based on the safety checklist on the VarULE trait):
+// 1. [T] does not include any uninitialized or padding bytes (achieved by being a slice of a ULE type)
+// 2. [T] is aligned to 1 byte (achieved by being a slice of a ULE type)
+// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid.
+// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety
+// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data.
+// 6. All other methods are defaulted
+// 7. `[T]` byte equality is semantic equality (achieved by being a slice of a ULE type)
+unsafe impl<T> VarULE for [T]
+where
+ T: ULE,
+{
+ #[inline]
+ fn validate_byte_slice(slice: &[u8]) -> Result<(), ZeroVecError> {
+ T::validate_byte_slice(slice)
+ }
+
+ #[inline]
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ T::from_byte_slice_unchecked(bytes)
+ }
+}
diff --git a/third_party/rust/zerovec/src/ule/tuple.rs b/third_party/rust/zerovec/src/ule/tuple.rs
new file mode 100644
index 0000000000..3e0f291b3f
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/tuple.rs
@@ -0,0 +1,179 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! ULE impls for tuples.
+//!
+//! Rust does not guarantee the layout of tuples, so ZeroVec defines its own tuple ULE types.
+//!
+//! Impls are defined for tuples of up to 6 elements. For longer tuples, use a custom struct
+//! with [`#[make_ule]`](crate::make_ule).
+//!
+//! # Examples
+//!
+//! ```
+//! use zerovec::ZeroVec;
+//!
+//! // ZeroVec of tuples!
+//! let zerovec: ZeroVec<(u32, char)> = [(1, 'a'), (1234901, '啊'), (100, 'अ')]
+//! .iter()
+//! .copied()
+//! .collect();
+//!
+//! assert_eq!(zerovec.get(1), Some((1234901, '啊')));
+//! ```
+
+use super::*;
+use core::fmt;
+use core::mem;
+
+macro_rules! tuple_ule {
+ ($name:ident, $len:literal, [ $($t:ident $i:tt),+ ]) => {
+ #[doc = concat!("ULE type for tuples with ", $len, " elements.")]
+ #[repr(packed)]
+ #[allow(clippy::exhaustive_structs)] // stable
+ pub struct $name<$($t),+>($(pub $t),+);
+
+ // Safety (based on the safety checklist on the ULE trait):
+ // 1. TupleULE does not include any uninitialized or padding bytes.
+ // (achieved by `#[repr(packed)]` on a struct containing only ULE fields)
+ // 2. TupleULE is aligned to 1 byte.
+ // (achieved by `#[repr(packed)]` on a struct containing only ULE fields)
+ // 3. The impl of validate_byte_slice() returns an error if any byte is not valid.
+ // 4. The impl of validate_byte_slice() returns an error if there are extra bytes.
+ // 5. The other ULE methods use the default impl.
+ // 6. TupleULE byte equality is semantic equality by relying on the ULE equality
+ // invariant on the subfields
+ unsafe impl<$($t: ULE),+> ULE for $name<$($t),+> {
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ // expands to: 0size + mem::size_of::<A>() + mem::size_of::<B>();
+ let ule_bytes = 0usize $(+ mem::size_of::<$t>())+;
+ if bytes.len() % ule_bytes != 0 {
+ return Err(ZeroVecError::length::<Self>(bytes.len()));
+ }
+ for chunk in bytes.chunks(ule_bytes) {
+ let mut i = 0;
+ $(
+ let j = i;
+ i += mem::size_of::<$t>();
+ #[allow(clippy::indexing_slicing)] // length checked
+ <$t>::validate_byte_slice(&chunk[j..i])?;
+ )+
+ }
+ Ok(())
+ }
+ }
+
+ impl<$($t: AsULE),+> AsULE for ($($t),+) {
+ type ULE = $name<$(<$t>::ULE),+>;
+
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ $name($(
+ self.$i.to_unaligned()
+ ),+)
+ }
+
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ ($(
+ <$t>::from_unaligned(unaligned.$i)
+ ),+)
+ }
+ }
+
+ impl<$($t: fmt::Debug + ULE),+> fmt::Debug for $name<$($t),+> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
+ ($(self.$i),+).fmt(f)
+ }
+ }
+
+ // We need manual impls since `#[derive()]` is disallowed on packed types
+ impl<$($t: PartialEq + ULE),+> PartialEq for $name<$($t),+> {
+ fn eq(&self, other: &Self) -> bool {
+ ($(self.$i),+).eq(&($(other.$i),+))
+ }
+ }
+
+ impl<$($t: Eq + ULE),+> Eq for $name<$($t),+> {}
+
+ impl<$($t: PartialOrd + ULE),+> PartialOrd for $name<$($t),+> {
+ fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+ ($(self.$i),+).partial_cmp(&($(other.$i),+))
+ }
+ }
+
+ impl<$($t: Ord + ULE),+> Ord for $name<$($t),+> {
+ fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+ ($(self.$i),+).cmp(&($(other.$i),+))
+ }
+ }
+
+ impl<$($t: ULE),+> Clone for $name<$($t),+> {
+ fn clone(&self) -> Self {
+ *self
+ }
+ }
+
+ impl<$($t: ULE),+> Copy for $name<$($t),+> {}
+
+ impl<'a, $($t: Ord + AsULE + 'static),+> crate::map::ZeroMapKV<'a> for ($($t),+) {
+ type Container = crate::ZeroVec<'a, ($($t),+)>;
+ type Slice = crate::ZeroSlice<($($t),+)>;
+ type GetType = $name<$(<$t>::ULE),+>;
+ type OwnedType = ($($t),+);
+ }
+ };
+}
+
+tuple_ule!(Tuple2ULE, "2", [ A 0, B 1 ]);
+tuple_ule!(Tuple3ULE, "3", [ A 0, B 1, C 2 ]);
+tuple_ule!(Tuple4ULE, "4", [ A 0, B 1, C 2, D 3 ]);
+tuple_ule!(Tuple5ULE, "5", [ A 0, B 1, C 2, D 3, E 4 ]);
+tuple_ule!(Tuple6ULE, "6", [ A 0, B 1, C 2, D 3, E 4, F 5 ]);
+
+#[test]
+fn test_pairule_validate() {
+ use crate::ZeroVec;
+ let vec: Vec<(u32, char)> = vec![(1, 'a'), (1234901, '啊'), (100, 'अ')];
+ let zerovec: ZeroVec<(u32, char)> = vec.iter().copied().collect();
+ let bytes = zerovec.as_bytes();
+ let zerovec2 = ZeroVec::parse_byte_slice(bytes).unwrap();
+ assert_eq!(zerovec, zerovec2);
+
+ // Test failed validation with a correctly sized but differently constrained tuple
+ // Note: 1234901 is not a valid char
+ let zerovec3 = ZeroVec::<(char, u32)>::parse_byte_slice(bytes);
+ assert!(zerovec3.is_err());
+}
+
+#[test]
+fn test_tripleule_validate() {
+ use crate::ZeroVec;
+ let vec: Vec<(u32, char, i8)> = vec![(1, 'a', -5), (1234901, '啊', 3), (100, 'अ', -127)];
+ let zerovec: ZeroVec<(u32, char, i8)> = vec.iter().copied().collect();
+ let bytes = zerovec.as_bytes();
+ let zerovec2 = ZeroVec::parse_byte_slice(bytes).unwrap();
+ assert_eq!(zerovec, zerovec2);
+
+ // Test failed validation with a correctly sized but differently constrained tuple
+ // Note: 1234901 is not a valid char
+ let zerovec3 = ZeroVec::<(char, i8, u32)>::parse_byte_slice(bytes);
+ assert!(zerovec3.is_err());
+}
+
+#[test]
+fn test_quadule_validate() {
+ use crate::ZeroVec;
+ let vec: Vec<(u32, char, i8, u16)> =
+ vec![(1, 'a', -5, 3), (1234901, '啊', 3, 11), (100, 'अ', -127, 0)];
+ let zerovec: ZeroVec<(u32, char, i8, u16)> = vec.iter().copied().collect();
+ let bytes = zerovec.as_bytes();
+ let zerovec2 = ZeroVec::parse_byte_slice(bytes).unwrap();
+ assert_eq!(zerovec, zerovec2);
+
+ // Test failed validation with a correctly sized but differently constrained tuple
+ // Note: 1234901 is not a valid char
+ let zerovec3 = ZeroVec::<(char, i8, u16, u32)>::parse_byte_slice(bytes);
+ assert!(zerovec3.is_err());
+}
diff --git a/third_party/rust/zerovec/src/ule/unvalidated.rs b/third_party/rust/zerovec/src/ule/unvalidated.rs
new file mode 100644
index 0000000000..21cfb0c0d5
--- /dev/null
+++ b/third_party/rust/zerovec/src/ule/unvalidated.rs
@@ -0,0 +1,527 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::{AsULE, RawBytesULE, VarULE};
+use crate::ule::EqULE;
+use crate::{map::ZeroMapKV, VarZeroSlice, VarZeroVec, ZeroVecError};
+use alloc::boxed::Box;
+use core::cmp::Ordering;
+use core::fmt;
+use core::ops::Deref;
+
+/// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant.
+///
+/// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For
+/// example, strings that are keys of a map don't need to ever be reified as `str`s.
+///
+/// [`UnvalidatedStr`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`].
+///
+/// The main advantage of this type over `[u8]` is that it serializes as a string in
+/// human-readable formats like JSON.
+///
+/// # Examples
+///
+/// Using an [`UnvalidatedStr`] as the key of a [`ZeroMap`]:
+///
+/// ```
+/// use zerovec::ule::UnvalidatedStr;
+/// use zerovec::ZeroMap;
+///
+/// let map: ZeroMap<UnvalidatedStr, usize> = [
+/// (UnvalidatedStr::from_str("abc"), 11),
+/// (UnvalidatedStr::from_str("def"), 22),
+/// (UnvalidatedStr::from_str("ghi"), 33),
+/// ]
+/// .into_iter()
+/// .collect();
+///
+/// let key = "abc";
+/// let value = map.get_copied_by(|uvstr| uvstr.as_bytes().cmp(key.as_bytes()));
+/// assert_eq!(Some(11), value);
+/// ```
+///
+/// [`ZeroMap`]: crate::ZeroMap
+#[repr(transparent)]
+#[derive(PartialEq, Eq, PartialOrd, Ord)]
+#[allow(clippy::exhaustive_structs)] // transparent newtype
+pub struct UnvalidatedStr([u8]);
+
+impl fmt::Debug for UnvalidatedStr {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // Debug as a string if possible
+ match self.try_as_str() {
+ Ok(s) => fmt::Debug::fmt(s, f),
+ Err(_) => fmt::Debug::fmt(&self.0, f),
+ }
+ }
+}
+
+impl UnvalidatedStr {
+ /// Create a [`UnvalidatedStr`] from a byte slice.
+ #[inline]
+ pub const fn from_bytes(other: &[u8]) -> &Self {
+ // Safety: UnvalidatedStr is transparent over [u8]
+ unsafe { core::mem::transmute(other) }
+ }
+
+ /// Create a [`UnvalidatedStr`] from a string slice.
+ #[inline]
+ pub const fn from_str(s: &str) -> &Self {
+ Self::from_bytes(s.as_bytes())
+ }
+
+ /// Create a [`UnvalidatedStr`] from boxed bytes.
+ #[inline]
+ pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> {
+ // Safety: UnvalidatedStr is transparent over [u8]
+ unsafe { core::mem::transmute(other) }
+ }
+
+ /// Create a [`UnvalidatedStr`] from a boxed `str`.
+ #[inline]
+ pub fn from_boxed_str(other: Box<str>) -> Box<Self> {
+ Self::from_boxed_bytes(other.into_boxed_bytes())
+ }
+
+ /// Get the bytes from a [`UnvalidatedStr].
+ #[inline]
+ pub const fn as_bytes(&self) -> &[u8] {
+ &self.0
+ }
+
+ /// Attempt to convert a [`UnvalidatedStr`] to a `str`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ule::UnvalidatedStr;
+ ///
+ /// static A: &UnvalidatedStr = UnvalidatedStr::from_bytes(b"abc");
+ ///
+ /// let b = A.try_as_str().unwrap();
+ /// assert_eq!(b, "abc");
+ /// ```
+ // Note: this is const starting in 1.63
+ #[inline]
+ pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> {
+ core::str::from_utf8(&self.0)
+ }
+}
+
+impl<'a> From<&'a str> for &'a UnvalidatedStr {
+ #[inline]
+ fn from(other: &'a str) -> Self {
+ UnvalidatedStr::from_str(other)
+ }
+}
+
+impl From<Box<str>> for Box<UnvalidatedStr> {
+ #[inline]
+ fn from(other: Box<str>) -> Self {
+ UnvalidatedStr::from_boxed_str(other)
+ }
+}
+
+impl Deref for UnvalidatedStr {
+ type Target = [u8];
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl<'a> ZeroMapKV<'a> for UnvalidatedStr {
+ type Container = VarZeroVec<'a, UnvalidatedStr>;
+ type Slice = VarZeroSlice<UnvalidatedStr>;
+ type GetType = UnvalidatedStr;
+ type OwnedType = Box<UnvalidatedStr>;
+}
+
+// Safety (based on the safety checklist on the VarULE trait):
+// 1. UnvalidatedStr does not include any uninitialized or padding bytes (transparent over a ULE)
+// 2. UnvalidatedStr is aligned to 1 byte (transparent over a ULE)
+// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid (impossible)
+// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety (impossible)
+// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data (returns the argument directly)
+// 6. All other methods are defaulted
+// 7. `[T]` byte equality is semantic equality (transparent over a ULE)
+unsafe impl VarULE for UnvalidatedStr {
+ #[inline]
+ fn validate_byte_slice(_: &[u8]) -> Result<(), ZeroVecError> {
+ Ok(())
+ }
+ #[inline]
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ UnvalidatedStr::from_bytes(bytes)
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl serde::Serialize for UnvalidatedStr {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ use serde::ser::Error;
+ let s = self
+ .try_as_str()
+ .map_err(|_| S::Error::custom("invalid UTF-8 in UnvalidatedStr"))?;
+ if serializer.is_human_readable() {
+ serializer.serialize_str(s)
+ } else {
+ serializer.serialize_bytes(s.as_bytes())
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl<'de> serde::Deserialize<'de> for Box<UnvalidatedStr> {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ let boxed_str = Box::<str>::deserialize(deserializer)?;
+ Ok(UnvalidatedStr::from_boxed_str(boxed_str))
+ } else {
+ let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?;
+ Ok(UnvalidatedStr::from_boxed_bytes(boxed_bytes))
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl<'de, 'a> serde::Deserialize<'de> for &'a UnvalidatedStr
+where
+ 'de: 'a,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ let s = <&str>::deserialize(deserializer)?;
+ Ok(UnvalidatedStr::from_str(s))
+ } else {
+ let bytes = <&[u8]>::deserialize(deserializer)?;
+ Ok(UnvalidatedStr::from_bytes(bytes))
+ }
+ }
+}
+
+/// A u8 array of little-endian data that is expected to be a Unicode scalar value, but is not
+/// validated as such.
+///
+/// Use this type instead of `char` when you want to deal with data that is expected to be valid
+/// Unicode scalar values, but you want control over when or if you validate that assumption.
+///
+/// # Examples
+///
+/// ```
+/// use zerovec::ule::{RawBytesULE, UnvalidatedChar, ULE};
+/// use zerovec::{ZeroSlice, ZeroVec};
+///
+/// // data known to be little-endian three-byte chunks of valid Unicode scalar values
+/// let data = [0x68, 0x00, 0x00, 0x69, 0x00, 0x00, 0x4B, 0xF4, 0x01];
+/// // ground truth expectation
+/// let real = ['h', 'i', '👋'];
+///
+/// let chars: &ZeroSlice<UnvalidatedChar> = ZeroSlice::parse_byte_slice(&data).expect("invalid data length");
+/// let parsed: Vec<_> = chars.iter().map(|c| unsafe { c.to_char_unchecked() }).collect();
+/// assert_eq!(&parsed, &real);
+///
+/// let real_chars: ZeroVec<_> = real.iter().copied().map(UnvalidatedChar::from_char).collect();
+/// let serialized_data = chars.as_bytes();
+/// assert_eq!(serialized_data, &data);
+/// ```
+#[repr(transparent)]
+#[derive(PartialEq, Eq, Clone, Copy, Hash)]
+pub struct UnvalidatedChar([u8; 3]);
+
+impl UnvalidatedChar {
+ /// Create a [`UnvalidatedChar`] from a `char`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ule::UnvalidatedChar;
+ ///
+ /// let a = UnvalidatedChar::from_char('a');
+ /// assert_eq!(a.try_to_char().unwrap(), 'a');
+ /// ```
+ #[inline]
+ pub const fn from_char(c: char) -> Self {
+ let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
+ Self([u0, u1, u2])
+ }
+
+ #[inline]
+ #[doc(hidden)]
+ pub const fn from_u24(c: u32) -> Self {
+ let [u0, u1, u2, _u3] = c.to_le_bytes();
+ Self([u0, u1, u2])
+ }
+
+ /// Attempt to convert a [`UnvalidatedChar`] to a `char`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ule::{AsULE, UnvalidatedChar};
+ ///
+ /// let a = UnvalidatedChar::from_char('a');
+ /// assert_eq!(a.try_to_char(), Ok('a'));
+ ///
+ /// let b = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into());
+ /// assert!(matches!(b.try_to_char(), Err(_)));
+ /// ```
+ #[inline]
+ pub fn try_to_char(self) -> Result<char, core::char::CharTryFromError> {
+ let [u0, u1, u2] = self.0;
+ char::try_from(u32::from_le_bytes([u0, u1, u2, 0]))
+ }
+
+ /// Convert a [`UnvalidatedChar`] to a `char', returning [`char::REPLACEMENT_CHARACTER`]
+ /// if the `UnvalidatedChar` does not represent a valid Unicode scalar value.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ule::{AsULE, UnvalidatedChar};
+ ///
+ /// let a = UnvalidatedChar::from_unaligned([0xFF, 0xFF, 0xFF].into());
+ /// assert_eq!(a.to_char_lossy(), char::REPLACEMENT_CHARACTER);
+ /// ```
+ #[inline]
+ pub fn to_char_lossy(self) -> char {
+ self.try_to_char().unwrap_or(char::REPLACEMENT_CHARACTER)
+ }
+
+ /// Convert a [`UnvalidatedChar`] to a `char` without checking that it is
+ /// a valid Unicode scalar value.
+ ///
+ /// # Safety
+ ///
+ /// The `UnvalidatedChar` must be a valid Unicode scalar value in little-endian order.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ule::UnvalidatedChar;
+ ///
+ /// let a = UnvalidatedChar::from_char('a');
+ /// assert_eq!(unsafe { a.to_char_unchecked() }, 'a');
+ /// ```
+ #[inline]
+ pub unsafe fn to_char_unchecked(self) -> char {
+ let [u0, u1, u2] = self.0;
+ char::from_u32_unchecked(u32::from_le_bytes([u0, u1, u2, 0]))
+ }
+}
+
+impl RawBytesULE<3> {
+ /// Converts a [`UnvalidatedChar`] to its ULE type. This is equivalent to calling
+ /// [`AsULE::to_unaligned`].
+ #[inline]
+ pub const fn from_unvalidated_char(uc: UnvalidatedChar) -> Self {
+ RawBytesULE(uc.0)
+ }
+}
+
+impl AsULE for UnvalidatedChar {
+ type ULE = RawBytesULE<3>;
+
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ RawBytesULE(self.0)
+ }
+
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ Self(unaligned.0)
+ }
+}
+
+// Safety: UnvalidatedChar is always the little-endian representation of a char,
+// which corresponds to its AsULE::ULE type
+unsafe impl EqULE for UnvalidatedChar {}
+
+impl fmt::Debug for UnvalidatedChar {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // Debug as a char if possible
+ match self.try_to_char() {
+ Ok(c) => fmt::Debug::fmt(&c, f),
+ Err(_) => fmt::Debug::fmt(&self.0, f),
+ }
+ }
+}
+
+impl PartialOrd for UnvalidatedChar {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for UnvalidatedChar {
+ // custom implementation, as derived Ord would compare lexicographically
+ fn cmp(&self, other: &Self) -> Ordering {
+ let [a0, a1, a2] = self.0;
+ let a = u32::from_le_bytes([a0, a1, a2, 0]);
+ let [b0, b1, b2] = other.0;
+ let b = u32::from_le_bytes([b0, b1, b2, 0]);
+ a.cmp(&b)
+ }
+}
+
+impl From<char> for UnvalidatedChar {
+ #[inline]
+ fn from(value: char) -> Self {
+ Self::from_char(value)
+ }
+}
+
+impl TryFrom<UnvalidatedChar> for char {
+ type Error = core::char::CharTryFromError;
+
+ #[inline]
+ fn try_from(value: UnvalidatedChar) -> Result<char, Self::Error> {
+ value.try_to_char()
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl serde::Serialize for UnvalidatedChar {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ use serde::ser::Error;
+ let c = self
+ .try_to_char()
+ .map_err(|_| S::Error::custom("invalid Unicode scalar value in UnvalidatedChar"))?;
+ if serializer.is_human_readable() {
+ serializer.serialize_char(c)
+ } else {
+ self.0.serialize(serializer)
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl<'de> serde::Deserialize<'de> for UnvalidatedChar {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ let c = <char>::deserialize(deserializer)?;
+ Ok(UnvalidatedChar::from_char(c))
+ } else {
+ let bytes = <[u8; 3]>::deserialize(deserializer)?;
+ Ok(UnvalidatedChar(bytes))
+ }
+ }
+}
+
+#[cfg(feature = "databake")]
+impl databake::Bake for UnvalidatedChar {
+ fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
+ match self.try_to_char() {
+ Ok(ch) => {
+ env.insert("zerovec");
+ let ch = ch.bake(env);
+ databake::quote! {
+ zerovec::ule::UnvalidatedChar::from_char(#ch)
+ }
+ }
+ Err(_) => {
+ env.insert("zerovec");
+ let u24 = u32::from_le_bytes([self.0[0], self.0[1], self.0[2], 0]);
+ databake::quote! {
+ zerovec::ule::UnvalidatedChar::from_u24(#u24)
+ }
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::ZeroVec;
+
+ #[test]
+ fn test_serde_fail() {
+ let uc = UnvalidatedChar([0xFF, 0xFF, 0xFF]);
+ serde_json::to_string(&uc).expect_err("serialize invalid char bytes");
+ bincode::serialize(&uc).expect_err("serialize invalid char bytes");
+ }
+
+ #[test]
+ fn test_serde_json() {
+ let c = '🙃';
+ let uc = UnvalidatedChar::from_char(c);
+ let json_ser = serde_json::to_string(&uc).unwrap();
+
+ assert_eq!(json_ser, r#""🙃""#);
+
+ let json_de: UnvalidatedChar = serde_json::from_str(&json_ser).unwrap();
+
+ assert_eq!(uc, json_de);
+ }
+
+ #[test]
+ fn test_serde_bincode() {
+ let c = '🙃';
+ let uc = UnvalidatedChar::from_char(c);
+ let bytes_ser = bincode::serialize(&uc).unwrap();
+
+ assert_eq!(bytes_ser, [0x43, 0xF6, 0x01]);
+
+ let bytes_de: UnvalidatedChar = bincode::deserialize(&bytes_ser).unwrap();
+
+ assert_eq!(uc, bytes_de);
+ }
+
+ #[test]
+ fn test_representation() {
+ let chars = ['w', 'ω', '文', '𑄃', '🙃'];
+
+ // backed by [UnvalidatedChar]
+ let uvchars: Vec<_> = chars
+ .iter()
+ .copied()
+ .map(UnvalidatedChar::from_char)
+ .collect();
+ // backed by [RawBytesULE<3>]
+ let zvec: ZeroVec<_> = uvchars.clone().into_iter().collect();
+
+ let ule_bytes = zvec.as_bytes();
+ let uvbytes;
+ unsafe {
+ let ptr = &uvchars[..] as *const _ as *const u8;
+ uvbytes = core::slice::from_raw_parts(ptr, ule_bytes.len());
+ }
+
+ // UnvalidatedChar is defined as little-endian, so this must be true on all platforms
+ // also asserts that to_unaligned/from_unaligned are no-ops
+ assert_eq!(uvbytes, ule_bytes);
+
+ assert_eq!(
+ &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1],
+ ule_bytes
+ );
+ }
+
+ #[test]
+ fn test_char_bake() {
+ databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_char('b'), zerovec);
+ // surrogate code point
+ databake::test_bake!(UnvalidatedChar, const: crate::ule::UnvalidatedChar::from_u24(55296u32), zerovec);
+ }
+}
diff --git a/third_party/rust/zerovec/src/varzerovec/components.rs b/third_party/rust/zerovec/src/varzerovec/components.rs
new file mode 100644
index 0000000000..9b48a5bd60
--- /dev/null
+++ b/third_party/rust/zerovec/src/varzerovec/components.rs
@@ -0,0 +1,574 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::ule::*;
+use alloc::boxed::Box;
+use alloc::format;
+use alloc::string::String;
+use alloc::vec::Vec;
+use core::cmp::Ordering;
+use core::convert::TryFrom;
+use core::marker::PhantomData;
+use core::ops::Range;
+
+// Also used by owned.rs
+pub(super) const LENGTH_WIDTH: usize = 4;
+pub(super) const METADATA_WIDTH: usize = 0;
+pub(super) const MAX_LENGTH: usize = u32::MAX as usize;
+pub(super) const MAX_INDEX: usize = u32::MAX as usize;
+
+/// This trait allows switching between different possible internal
+/// representations of VarZeroVec.
+///
+/// Currently this crate supports two formats: [`Index16`] and [`Index32`],
+/// with [`Index16`] being the default for all [`VarZeroVec`](super::VarZeroVec)
+/// types unless explicitly specified otherwise.
+///
+/// Do not implement this trait, its internals may be changed in the future,
+/// and all of its associated items are hidden from the docs.
+#[allow(clippy::missing_safety_doc)] // no safety section for you, don't implement this trait period
+pub unsafe trait VarZeroVecFormat: 'static + Sized {
+ #[doc(hidden)]
+ const INDEX_WIDTH: usize;
+ #[doc(hidden)]
+ const MAX_VALUE: u32;
+ /// This is always `RawBytesULE<Self::INDEX_WIDTH>` however
+ /// Rust does not currently support using associated constants in const
+ /// generics
+ #[doc(hidden)]
+ type RawBytes: ULE;
+
+ // various conversions because RawBytes is an associated constant now
+ #[doc(hidden)]
+ fn rawbytes_to_usize(raw: Self::RawBytes) -> usize;
+ #[doc(hidden)]
+ fn usize_to_rawbytes(u: usize) -> Self::RawBytes;
+
+ #[doc(hidden)]
+ fn rawbytes_from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self::RawBytes];
+}
+
+/// This is a [`VarZeroVecFormat`] that stores u16s in the index array.
+/// Will have a smaller data size, but it's more likely for larger arrays
+/// to be unrepresentable (and error on construction)
+///
+/// This is the default index size used by all [`VarZeroVec`](super::VarZeroVec) types.
+#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
+#[allow(clippy::exhaustive_structs)] // marker
+pub struct Index16;
+
+/// This is a [`VarZeroVecFormat`] that stores u32s in the index array.
+/// Will have a larger data size, but will support large arrays without
+/// problems.
+#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
+#[allow(clippy::exhaustive_structs)] // marker
+pub struct Index32;
+
+unsafe impl VarZeroVecFormat for Index16 {
+ const INDEX_WIDTH: usize = 2;
+ const MAX_VALUE: u32 = u16::MAX as u32;
+ type RawBytes = RawBytesULE<2>;
+ #[inline]
+ fn rawbytes_to_usize(raw: Self::RawBytes) -> usize {
+ raw.as_unsigned_int() as usize
+ }
+ #[inline]
+ fn usize_to_rawbytes(u: usize) -> Self::RawBytes {
+ (u as u16).to_unaligned()
+ }
+ #[inline]
+ fn rawbytes_from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self::RawBytes] {
+ Self::RawBytes::from_byte_slice_unchecked_mut(bytes)
+ }
+}
+
+unsafe impl VarZeroVecFormat for Index32 {
+ const INDEX_WIDTH: usize = 4;
+ const MAX_VALUE: u32 = u32::MAX;
+ type RawBytes = RawBytesULE<4>;
+ #[inline]
+ fn rawbytes_to_usize(raw: Self::RawBytes) -> usize {
+ raw.as_unsigned_int() as usize
+ }
+ #[inline]
+ fn usize_to_rawbytes(u: usize) -> Self::RawBytes {
+ (u as u32).to_unaligned()
+ }
+ #[inline]
+ fn rawbytes_from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self::RawBytes] {
+ Self::RawBytes::from_byte_slice_unchecked_mut(bytes)
+ }
+}
+
+/// A more parsed version of `VarZeroSlice`. This type is where most of the VarZeroVec
+/// internal representation code lies.
+///
+/// This is *basically* an `&'a [u8]` to a zero copy buffer, but split out into
+/// the buffer components. Logically this is capable of behaving as
+/// a `&'a [T::VarULE]`, but since `T::VarULE` is unsized that type does not actually
+/// exist.
+///
+/// See [`VarZeroVecComponents::parse_byte_slice()`] for information on the internal invariants involved
+#[derive(Debug)]
+pub struct VarZeroVecComponents<'a, T: ?Sized, F> {
+ /// The number of elements
+ len: u32,
+ /// The list of indices into the `things` slice
+ indices: &'a [u8],
+ /// The contiguous list of `T::VarULE`s
+ things: &'a [u8],
+ /// The original slice this was constructed from
+ entire_slice: &'a [u8],
+ marker: PhantomData<(&'a T, F)>,
+}
+
+// #[derive()] won't work here since we do not want it to be
+// bound on T: Copy
+impl<'a, T: ?Sized, F> Copy for VarZeroVecComponents<'a, T, F> {}
+impl<'a, T: ?Sized, F> Clone for VarZeroVecComponents<'a, T, F> {
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<'a, T: VarULE + ?Sized, F> Default for VarZeroVecComponents<'a, T, F> {
+ #[inline]
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl<'a, T: VarULE + ?Sized, F> VarZeroVecComponents<'a, T, F> {
+ #[inline]
+ pub fn new() -> Self {
+ Self {
+ len: 0,
+ indices: &[],
+ things: &[],
+ entire_slice: &[],
+ marker: PhantomData,
+ }
+ }
+}
+impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F> {
+ /// Construct a new VarZeroVecComponents, checking invariants about the overall buffer size:
+ ///
+ /// - There must be either zero or at least four bytes (if four, this is the "length" parsed as a usize)
+ /// - There must be at least `4*length + 4` bytes total, to form the array `indices` of indices
+ /// - `indices[i]..indices[i+1]` must index into a valid section of
+ /// `things`, such that it parses to a `T::VarULE`
+ /// - `indices[len - 1]..things.len()` must index into a valid section of
+ /// `things`, such that it parses to a `T::VarULE`
+ #[inline]
+ pub fn parse_byte_slice(slice: &'a [u8]) -> Result<Self, ZeroVecError> {
+ // The empty VZV is special-cased to the empty slice
+ if slice.is_empty() {
+ return Ok(VarZeroVecComponents {
+ len: 0,
+ indices: &[],
+ things: &[],
+ entire_slice: slice,
+ marker: PhantomData,
+ });
+ }
+ let len_bytes = slice
+ .get(0..LENGTH_WIDTH)
+ .ok_or(ZeroVecError::VarZeroVecFormatError)?;
+ let len_ule = RawBytesULE::<LENGTH_WIDTH>::parse_byte_slice(len_bytes)
+ .map_err(|_| ZeroVecError::VarZeroVecFormatError)?;
+
+ let len = len_ule
+ .get(0)
+ .ok_or(ZeroVecError::VarZeroVecFormatError)?
+ .as_unsigned_int();
+ let indices_bytes = slice
+ .get(
+ LENGTH_WIDTH + METADATA_WIDTH
+ ..LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * (len as usize),
+ )
+ .ok_or(ZeroVecError::VarZeroVecFormatError)?;
+ let things = slice
+ .get(F::INDEX_WIDTH * (len as usize) + LENGTH_WIDTH + METADATA_WIDTH..)
+ .ok_or(ZeroVecError::VarZeroVecFormatError)?;
+
+ let borrowed = VarZeroVecComponents {
+ len,
+ indices: indices_bytes,
+ things,
+ entire_slice: slice,
+ marker: PhantomData,
+ };
+
+ borrowed.check_indices_and_things()?;
+
+ Ok(borrowed)
+ }
+
+ /// Construct a [`VarZeroVecComponents`] from a byte slice that has previously
+ /// successfully returned a [`VarZeroVecComponents`] when passed to
+ /// [`VarZeroVecComponents::parse_byte_slice()`]. Will return the same
+ /// object as one would get from calling [`VarZeroVecComponents::parse_byte_slice()`].
+ ///
+ /// # Safety
+ /// The bytes must have previously successfully run through
+ /// [`VarZeroVecComponents::parse_byte_slice()`]
+ pub unsafe fn from_bytes_unchecked(slice: &'a [u8]) -> Self {
+ // The empty VZV is special-cased to the empty slice
+ if slice.is_empty() {
+ return VarZeroVecComponents {
+ len: 0,
+ indices: &[],
+ things: &[],
+ entire_slice: slice,
+ marker: PhantomData,
+ };
+ }
+ let len_bytes = slice.get_unchecked(0..LENGTH_WIDTH);
+ let len_ule = RawBytesULE::<LENGTH_WIDTH>::from_byte_slice_unchecked(len_bytes);
+
+ let len = len_ule.get_unchecked(0).as_unsigned_int();
+ let indices_bytes = slice.get_unchecked(
+ LENGTH_WIDTH + METADATA_WIDTH
+ ..LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * (len as usize),
+ );
+ let things =
+ slice.get_unchecked(LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * (len as usize)..);
+
+ VarZeroVecComponents {
+ len,
+ indices: indices_bytes,
+ things,
+ entire_slice: slice,
+ marker: PhantomData,
+ }
+ }
+
+ /// Get the number of elements in this vector
+ #[inline]
+ pub fn len(self) -> usize {
+ self.len as usize
+ }
+
+ /// Returns `true` if the vector contains no elements.
+ #[inline]
+ pub fn is_empty(self) -> bool {
+ self.indices.is_empty()
+ }
+
+ /// Get the idx'th element out of this slice. Returns `None` if out of bounds.
+ #[inline]
+ pub fn get(self, idx: usize) -> Option<&'a T> {
+ if idx >= self.len() {
+ return None;
+ }
+ Some(unsafe { self.get_unchecked(idx) })
+ }
+
+ /// Get the idx'th element out of this slice. Does not bounds check.
+ ///
+ /// Safety:
+ /// - `idx` must be in bounds (`idx < self.len()`)
+ #[inline]
+ pub(crate) unsafe fn get_unchecked(self, idx: usize) -> &'a T {
+ let range = self.get_things_range(idx);
+ let things_slice = self.things.get_unchecked(range);
+ T::from_byte_slice_unchecked(things_slice)
+ }
+
+ /// Get the range in `things` for the element at `idx`. Does not bounds check.
+ ///
+ /// Safety:
+ /// - `idx` must be in bounds (`idx < self.len()`)
+ #[inline]
+ unsafe fn get_things_range(self, idx: usize) -> Range<usize> {
+ let start = F::rawbytes_to_usize(*self.indices_slice().get_unchecked(idx));
+ let end = if idx + 1 == self.len() {
+ self.things.len()
+ } else {
+ F::rawbytes_to_usize(*self.indices_slice().get_unchecked(idx + 1))
+ };
+ debug_assert!(start <= end);
+ start..end
+ }
+
+ /// Get the range in `entire_slice` for the element at `idx`. Does not bounds check.
+ ///
+ /// Safety:
+ /// - `idx` must be in bounds (`idx < self.len()`)
+ #[inline]
+ pub(crate) unsafe fn get_range(self, idx: usize) -> Range<usize> {
+ let range = self.get_things_range(idx);
+ let offset = (self.things as *const [u8] as *const u8)
+ .offset_from(self.entire_slice as *const [u8] as *const u8)
+ as usize;
+ range.start + offset..range.end + offset
+ }
+
+ /// Check the internal invariants of VarZeroVecComponents:
+ ///
+ /// - `indices[i]..indices[i+1]` must index into a valid section of
+ /// `things`, such that it parses to a `T::VarULE`
+ /// - `indices[len - 1]..things.len()` must index into a valid section of
+ /// `things`, such that it parses to a `T::VarULE`
+ /// - `indices` is monotonically increasing
+ ///
+ /// This method is NOT allowed to call any other methods on VarZeroVecComponents since all other methods
+ /// assume that the slice has been passed through check_indices_and_things
+ #[inline]
+ #[allow(clippy::len_zero)] // more explicit to enforce safety invariants
+ fn check_indices_and_things(self) -> Result<(), ZeroVecError> {
+ assert_eq!(self.len(), self.indices_slice().len());
+ if self.len() == 0 {
+ if self.things.len() > 0 {
+ return Err(ZeroVecError::VarZeroVecFormatError);
+ } else {
+ return Ok(());
+ }
+ }
+ // Safety: i is in bounds (assertion above)
+ let mut start = F::rawbytes_to_usize(unsafe { *self.indices_slice().get_unchecked(0) });
+ if start != 0 {
+ return Err(ZeroVecError::VarZeroVecFormatError);
+ }
+ for i in 0..self.len() {
+ let end = if i == self.len() - 1 {
+ self.things.len()
+ } else {
+ // Safety: i+1 is in bounds (assertion above)
+ F::rawbytes_to_usize(unsafe { *self.indices_slice().get_unchecked(i + 1) })
+ };
+ if start > end {
+ return Err(ZeroVecError::VarZeroVecFormatError);
+ }
+ if end > self.things.len() {
+ return Err(ZeroVecError::VarZeroVecFormatError);
+ }
+ // Safety: start..end is a valid range in self.things
+ let bytes = unsafe { self.things.get_unchecked(start..end) };
+ T::parse_byte_slice(bytes)?;
+ start = end;
+ }
+ Ok(())
+ }
+
+ /// Create an iterator over the Ts contained in VarZeroVecComponents
+ #[inline]
+ pub fn iter(self) -> impl Iterator<Item = &'a T> {
+ self.indices_slice()
+ .iter()
+ .copied()
+ .map(F::rawbytes_to_usize)
+ .zip(
+ self.indices_slice()
+ .iter()
+ .copied()
+ .map(F::rawbytes_to_usize)
+ .skip(1)
+ .chain([self.things.len()]),
+ )
+ .map(move |(start, end)| unsafe { self.things.get_unchecked(start..end) })
+ .map(|bytes| unsafe { T::from_byte_slice_unchecked(bytes) })
+ }
+
+ pub fn to_vec(self) -> Vec<Box<T>> {
+ self.iter().map(T::to_boxed).collect()
+ }
+
+ #[inline]
+ fn indices_slice(&self) -> &'a [F::RawBytes] {
+ unsafe { F::RawBytes::from_byte_slice_unchecked(self.indices) }
+ }
+
+ // Dump a debuggable representation of this type
+ #[allow(unused)] // useful for debugging
+ pub(crate) fn dump(&self) -> String {
+ let indices = self
+ .indices_slice()
+ .iter()
+ .copied()
+ .map(F::rawbytes_to_usize)
+ .collect::<Vec<_>>();
+ format!("VarZeroVecComponents {{ indices: {indices:?} }}")
+ }
+}
+
+impl<'a, T, F> VarZeroVecComponents<'a, T, F>
+where
+ T: VarULE,
+ T: ?Sized,
+ T: Ord,
+ F: VarZeroVecFormat,
+{
+ /// Binary searches a sorted `VarZeroVecComponents<T>` for the given element. For more information, see
+ /// the primitive function [`binary_search`](slice::binary_search).
+ pub fn binary_search(&self, needle: &T) -> Result<usize, usize> {
+ self.binary_search_impl(|probe| probe.cmp(needle), self.indices_slice())
+ }
+
+ pub fn binary_search_in_range(
+ &self,
+ needle: &T,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ let indices_slice = self.indices_slice().get(range)?;
+ Some(self.binary_search_impl(|probe| probe.cmp(needle), indices_slice))
+ }
+}
+
+impl<'a, T, F> VarZeroVecComponents<'a, T, F>
+where
+ T: VarULE,
+ T: ?Sized,
+ F: VarZeroVecFormat,
+{
+ /// Binary searches a sorted `VarZeroVecComponents<T>` for the given predicate. For more information, see
+ /// the primitive function [`binary_search_by`](slice::binary_search_by).
+ pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
+ self.binary_search_impl(predicate, self.indices_slice())
+ }
+
+ pub fn binary_search_in_range_by(
+ &self,
+ predicate: impl FnMut(&T) -> Ordering,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ let indices_slice = self.indices_slice().get(range)?;
+ Some(self.binary_search_impl(predicate, indices_slice))
+ }
+
+ /// Binary searches a sorted `VarZeroVecComponents<T>` with the given predicate. For more information, see
+ /// the primitive function [`binary_search`](slice::binary_search).
+ fn binary_search_impl(
+ &self,
+ mut predicate: impl FnMut(&T) -> Ordering,
+ indices_slice: &[F::RawBytes],
+ ) -> Result<usize, usize> {
+ // This code is an absolute atrocity. This code is not a place of honor. This
+ // code is known to the State of California to cause cancer.
+ //
+ // Unfortunately, the stdlib's `binary_search*` functions can only operate on slices.
+ // We do not have a slice. We have something we can .get() and index on, but that is not
+ // a slice.
+ //
+ // The `binary_search*` functions also do not have a variant where they give you the element's
+ // index, which we could otherwise use to directly index `self`.
+ // We do have `self.indices`, but these are indices into a byte buffer, which cannot in
+ // isolation be used to recoup the logical index of the element they refer to.
+ //
+ // However, `binary_search_by()` provides references to the elements of the slice being iterated.
+ // Since the layout of Rust slices is well-defined, we can do pointer arithmetic on these references
+ // to obtain the index being used by the search.
+ //
+ // It's worth noting that the slice we choose to search is irrelevant, as long as it has the appropriate
+ // length. `self.indices` is defined to have length `self.len()`, so it is convenient to use
+ // here and does not require additional allocations.
+ //
+ // The alternative to doing this is to implement our own binary search. This is significantly less fun.
+
+ // Note: We always use zero_index relative to the whole indices array, even if we are
+ // only searching a subslice of it.
+ let zero_index = self.indices.as_ptr() as *const _ as usize;
+ indices_slice.binary_search_by(|probe: &_| {
+ // `self.indices` is a vec of unaligned F::INDEX_WIDTH values, so we divide by F::INDEX_WIDTH
+ // to get the actual index
+ let index = (probe as *const _ as usize - zero_index) / F::INDEX_WIDTH;
+ // safety: we know this is in bounds
+ let actual_probe = unsafe { self.get_unchecked(index) };
+ predicate(actual_probe)
+ })
+ }
+}
+
+/// Collects the bytes for a VarZeroSlice into a Vec.
+pub fn get_serializable_bytes_non_empty<T, A, F>(elements: &[A]) -> Option<Vec<u8>>
+where
+ T: VarULE + ?Sized,
+ A: EncodeAsVarULE<T>,
+ F: VarZeroVecFormat,
+{
+ debug_assert!(!elements.is_empty());
+ let len = compute_serializable_len::<T, A, F>(elements)?;
+ debug_assert!(len >= LENGTH_WIDTH as u32);
+ let mut output: Vec<u8> = alloc::vec![0; len as usize];
+ write_serializable_bytes::<T, A, F>(elements, &mut output);
+ Some(output)
+}
+
+/// Writes the bytes for a VarZeroSlice into an output buffer.
+///
+/// Every byte in the buffer will be initialized after calling this function.
+///
+/// # Panics
+///
+/// Panics if the buffer is not exactly the correct length.
+pub fn write_serializable_bytes<T, A, F>(elements: &[A], output: &mut [u8])
+where
+ T: VarULE + ?Sized,
+ A: EncodeAsVarULE<T>,
+ F: VarZeroVecFormat,
+{
+ assert!(elements.len() <= MAX_LENGTH);
+ let num_elements_bytes = elements.len().to_le_bytes();
+ #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior
+ output[0..LENGTH_WIDTH].copy_from_slice(&num_elements_bytes[0..LENGTH_WIDTH]);
+
+ // idx_offset = offset from the start of the buffer for the next index
+ let mut idx_offset: usize = LENGTH_WIDTH + METADATA_WIDTH;
+ // first_dat_offset = offset from the start of the buffer of the first data block
+ let first_dat_offset: usize = idx_offset + elements.len() * F::INDEX_WIDTH;
+ // dat_offset = offset from the start of the buffer of the next data block
+ let mut dat_offset: usize = first_dat_offset;
+
+ for element in elements.iter() {
+ let element_len = element.encode_var_ule_len();
+
+ let idx_limit = idx_offset + F::INDEX_WIDTH;
+ #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior
+ let idx_slice = &mut output[idx_offset..idx_limit];
+ // VZV expects data offsets to be stored relative to the first data block
+ let idx = dat_offset - first_dat_offset;
+ assert!(idx <= MAX_INDEX);
+ #[allow(clippy::indexing_slicing)] // this function is explicitly panicky
+ idx_slice.copy_from_slice(&idx.to_le_bytes()[..F::INDEX_WIDTH]);
+
+ let dat_limit = dat_offset + element_len;
+ #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior
+ let dat_slice = &mut output[dat_offset..dat_limit];
+ element.encode_var_ule_write(dat_slice);
+ debug_assert_eq!(T::validate_byte_slice(dat_slice), Ok(()));
+
+ idx_offset = idx_limit;
+ dat_offset = dat_limit;
+ }
+
+ debug_assert_eq!(
+ idx_offset,
+ LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * elements.len()
+ );
+ assert_eq!(dat_offset, output.len());
+}
+
+pub fn compute_serializable_len<T, A, F>(elements: &[A]) -> Option<u32>
+where
+ T: VarULE + ?Sized,
+ A: EncodeAsVarULE<T>,
+ F: VarZeroVecFormat,
+{
+ let idx_len: u32 = u32::try_from(elements.len())
+ .ok()?
+ .checked_mul(F::INDEX_WIDTH as u32)?
+ .checked_add(LENGTH_WIDTH as u32)?
+ .checked_add(METADATA_WIDTH as u32)?;
+ let data_len: u32 = elements
+ .iter()
+ .map(|v| u32::try_from(v.encode_var_ule_len()).ok())
+ .try_fold(0u32, |s, v| s.checked_add(v?))?;
+ let ret = idx_len.checked_add(data_len);
+ if let Some(r) = ret {
+ if r >= F::MAX_VALUE {
+ return None;
+ }
+ }
+ ret
+}
diff --git a/third_party/rust/zerovec/src/varzerovec/databake.rs b/third_party/rust/zerovec/src/varzerovec/databake.rs
new file mode 100644
index 0000000000..a3f9db2d17
--- /dev/null
+++ b/third_party/rust/zerovec/src/varzerovec/databake.rs
@@ -0,0 +1,68 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::{ule::VarULE, VarZeroSlice, VarZeroVec};
+use databake::*;
+
+impl<T: VarULE + ?Sized> Bake for VarZeroVec<'_, T> {
+ fn bake(&self, env: &CrateEnv) -> TokenStream {
+ env.insert("zerovec");
+ if self.is_empty() {
+ quote! { zerovec::VarZeroVec::new() }
+ } else {
+ let bytes = databake::Bake::bake(&self.as_bytes(), env);
+ // Safe because self.as_bytes is a safe input
+ quote! { unsafe { zerovec::VarZeroVec::from_bytes_unchecked(#bytes) } }
+ }
+ }
+}
+
+impl<T: VarULE + ?Sized> Bake for &VarZeroSlice<T> {
+ fn bake(&self, env: &CrateEnv) -> TokenStream {
+ env.insert("zerovec");
+ if self.is_empty() {
+ quote! { zerovec::VarZeroSlice::new_empty() }
+ } else {
+ let bytes = databake::Bake::bake(&self.as_bytes(), env);
+ // Safe because self.as_bytes is a safe input
+ quote! { unsafe { zerovec::VarZeroSlice::from_bytes_unchecked(#bytes) } }
+ }
+ }
+}
+
+#[test]
+fn test_baked_vec() {
+ test_bake!(
+ VarZeroVec<str>,
+ const: crate::VarZeroVec::new(),
+ zerovec
+ );
+ test_bake!(
+ VarZeroVec<str>,
+ const: unsafe {
+ crate::VarZeroVec::from_bytes_unchecked(
+ b"\x02\x01\0\x16\0M\x01\\\x11"
+ )
+ },
+ zerovec
+ );
+}
+
+#[test]
+fn test_baked_slice() {
+ test_bake!(
+ &VarZeroSlice<str>,
+ const: crate::VarZeroSlice::new_empty(),
+ zerovec
+ );
+ test_bake!(
+ &VarZeroSlice<str>,
+ const: unsafe {
+ crate::VarZeroSlice::from_bytes_unchecked(
+ b"\x02\x01\0\x16\0M\x01\\\x11"
+ )
+ },
+ zerovec
+ );
+}
diff --git a/third_party/rust/zerovec/src/varzerovec/mod.rs b/third_party/rust/zerovec/src/varzerovec/mod.rs
new file mode 100644
index 0000000000..2e9f680006
--- /dev/null
+++ b/third_party/rust/zerovec/src/varzerovec/mod.rs
@@ -0,0 +1,26 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! See [`VarZeroVec`](crate::VarZeroVec) for details
+
+pub(crate) mod components;
+pub(crate) mod owned;
+pub(crate) mod slice;
+pub(crate) mod vec;
+
+#[cfg(feature = "databake")]
+mod databake;
+
+#[cfg(feature = "serde")]
+mod serde;
+
+pub use crate::{VarZeroSlice, VarZeroVec};
+
+#[cfg(feature = "bench")]
+#[doc(hidden)]
+pub use components::VarZeroVecComponents;
+
+pub use components::{Index16, Index32, VarZeroVecFormat};
+
+pub use owned::VarZeroVecOwned;
diff --git a/third_party/rust/zerovec/src/varzerovec/owned.rs b/third_party/rust/zerovec/src/varzerovec/owned.rs
new file mode 100644
index 0000000000..c5556315fb
--- /dev/null
+++ b/third_party/rust/zerovec/src/varzerovec/owned.rs
@@ -0,0 +1,662 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// The mutation operations in this file should panic to prevent undefined behavior
+#![allow(clippy::unwrap_used)]
+#![allow(clippy::expect_used)]
+#![allow(clippy::indexing_slicing)]
+#![allow(clippy::panic)]
+
+use super::*;
+use crate::ule::*;
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::any;
+use core::convert::TryInto;
+use core::marker::PhantomData;
+use core::ops::Deref;
+use core::ops::Range;
+use core::{fmt, ptr, slice};
+
+use super::components::LENGTH_WIDTH;
+use super::components::MAX_INDEX;
+use super::components::MAX_LENGTH;
+use super::components::METADATA_WIDTH;
+
+/// A fully-owned [`VarZeroVec`]. This type has no lifetime but has the same
+/// internal buffer representation of [`VarZeroVec`], making it cheaply convertible to
+/// [`VarZeroVec`] and [`VarZeroSlice`].
+///
+/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
+/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
+pub struct VarZeroVecOwned<T: ?Sized, F = Index16> {
+ marker: PhantomData<(Box<T>, F)>,
+ // safety invariant: must parse into a valid VarZeroVecComponents
+ entire_slice: Vec<u8>,
+}
+
+impl<T: ?Sized, F> Clone for VarZeroVecOwned<T, F> {
+ fn clone(&self) -> Self {
+ VarZeroVecOwned {
+ marker: self.marker,
+ entire_slice: self.entire_slice.clone(),
+ }
+ }
+}
+
+// The effect of a shift on the indices in the varzerovec.
+#[derive(PartialEq)]
+enum ShiftType {
+ Insert,
+ Replace,
+ Remove,
+}
+
+impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Deref for VarZeroVecOwned<T, F> {
+ type Target = VarZeroSlice<T, F>;
+ fn deref(&self) -> &VarZeroSlice<T, F> {
+ self.as_slice()
+ }
+}
+
+impl<T: VarULE + ?Sized, F> VarZeroVecOwned<T, F> {
+ /// Construct an empty VarZeroVecOwned
+ pub fn new() -> Self {
+ Self {
+ marker: PhantomData,
+ entire_slice: Vec::new(),
+ }
+ }
+}
+
+impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecOwned<T, F> {
+ /// Construct a VarZeroVecOwned from a [`VarZeroSlice`] by cloning the internal data
+ pub fn from_slice(slice: &VarZeroSlice<T, F>) -> Self {
+ Self {
+ marker: PhantomData,
+ entire_slice: slice.as_bytes().into(),
+ }
+ }
+
+ /// Construct a VarZeroVecOwned from a list of elements
+ pub fn try_from_elements<A>(elements: &[A]) -> Result<Self, &'static str>
+ where
+ A: EncodeAsVarULE<T>,
+ {
+ Ok(if elements.is_empty() {
+ Self::from_slice(VarZeroSlice::new_empty())
+ } else {
+ Self {
+ marker: PhantomData,
+ // TODO(#1410): Rethink length errors in VZV.
+ entire_slice: components::get_serializable_bytes_non_empty::<T, A, F>(elements)
+ .ok_or(
+ "Attempted to build VarZeroVec out of elements that \
+ cumulatively are larger than a u32 in size",
+ )?,
+ }
+ })
+ }
+
+ /// Obtain this `VarZeroVec` as a [`VarZeroSlice`]
+ pub fn as_slice(&self) -> &VarZeroSlice<T, F> {
+ let slice: &[u8] = &self.entire_slice;
+ unsafe {
+ // safety: the slice is known to come from a valid parsed VZV
+ VarZeroSlice::from_byte_slice_unchecked(slice)
+ }
+ }
+
+ /// Try to allocate a buffer with enough capacity for `capacity`
+ /// elements. Since `T` can take up an arbitrary size this will
+ /// just allocate enough space for 4-byte Ts
+ pub(crate) fn with_capacity(capacity: usize) -> Self {
+ Self {
+ marker: PhantomData,
+ entire_slice: Vec::with_capacity(capacity * (F::INDEX_WIDTH + 4)),
+ }
+ }
+
+ /// Try to reserve space for `capacity`
+ /// elements. Since `T` can take up an arbitrary size this will
+ /// just allocate enough space for 4-byte Ts
+ pub(crate) fn reserve(&mut self, capacity: usize) {
+ self.entire_slice.reserve(capacity * (F::INDEX_WIDTH + 4))
+ }
+
+ /// Get the position of a specific element in the data segment.
+ ///
+ /// If `idx == self.len()`, it will return the size of the data segment (where a new element would go).
+ ///
+ /// ## Safety
+ /// `idx <= self.len()` and `self.as_encoded_bytes()` is well-formed.
+ unsafe fn element_position_unchecked(&self, idx: usize) -> usize {
+ let len = self.len();
+ let out = if idx == len {
+ self.entire_slice.len() - LENGTH_WIDTH - METADATA_WIDTH - (F::INDEX_WIDTH * len)
+ } else {
+ F::rawbytes_to_usize(*self.index_data(idx))
+ };
+ debug_assert!(
+ out + LENGTH_WIDTH + METADATA_WIDTH + len * F::INDEX_WIDTH <= self.entire_slice.len()
+ );
+ out
+ }
+
+ /// Get the range of a specific element in the data segment.
+ ///
+ /// ## Safety
+ /// `idx < self.len()` and `self.as_encoded_bytes()` is well-formed.
+ unsafe fn element_range_unchecked(&self, idx: usize) -> core::ops::Range<usize> {
+ let start = self.element_position_unchecked(idx);
+ let end = self.element_position_unchecked(idx + 1);
+ debug_assert!(start <= end, "{start} > {end}");
+ start..end
+ }
+
+ /// Set the number of elements in the list without any checks.
+ ///
+ /// ## Safety
+ /// No safe functions may be called until `self.as_encoded_bytes()` is well-formed.
+ unsafe fn set_len(&mut self, len: usize) {
+ assert!(len <= MAX_LENGTH);
+ let len_bytes = len.to_le_bytes();
+ self.entire_slice[0..LENGTH_WIDTH].copy_from_slice(&len_bytes[0..LENGTH_WIDTH]);
+ // Double-check that the length fits in the length field
+ assert_eq!(len_bytes[LENGTH_WIDTH..].iter().sum::<u8>(), 0);
+ }
+
+ fn index_range(index: usize) -> Range<usize> {
+ let pos = LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * index;
+ pos..pos + F::INDEX_WIDTH
+ }
+
+ /// Return the raw bytes representing the given `index`.
+ ///
+ /// ## Safety
+ /// The index must be valid, and self.as_encoded_bytes() must be well-formed
+ unsafe fn index_data(&self, index: usize) -> &F::RawBytes {
+ &F::RawBytes::from_byte_slice_unchecked(&self.entire_slice[Self::index_range(index)])[0]
+ }
+
+ /// Return the mutable slice representing the given `index`.
+ ///
+ /// ## Safety
+ /// The index must be valid. self.as_encoded_bytes() must have allocated space
+ /// for this index, but need not have its length appropriately set.
+ unsafe fn index_data_mut(&mut self, index: usize) -> &mut F::RawBytes {
+ let ptr = self.entire_slice.as_mut_ptr();
+ let range = Self::index_range(index);
+
+ // Doing this instead of just `get_unchecked_mut()` because it's unclear
+ // if `get_unchecked_mut()` can be called out of bounds on a slice even
+ // if we know the buffer is larger.
+ let data = slice::from_raw_parts_mut(ptr.add(range.start), F::INDEX_WIDTH);
+
+ &mut F::rawbytes_from_byte_slice_unchecked_mut(data)[0]
+ }
+
+ /// Shift the indices starting with and after `starting_index` by the provided `amount`.
+ ///
+ /// ## Safety
+ /// Adding `amount` to each index after `starting_index` must not result in the slice from becoming malformed.
+ /// The length of the slice must be correctly set.
+ unsafe fn shift_indices(&mut self, starting_index: usize, amount: i32) {
+ let len = self.len();
+ let indices = F::rawbytes_from_byte_slice_unchecked_mut(
+ &mut self.entire_slice[LENGTH_WIDTH + METADATA_WIDTH
+ ..LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * len],
+ );
+ for idx in &mut indices[starting_index..] {
+ let mut new_idx = F::rawbytes_to_usize(*idx);
+ if amount > 0 {
+ new_idx = new_idx.checked_add(amount.try_into().unwrap()).unwrap();
+ } else {
+ new_idx = new_idx.checked_sub((-amount).try_into().unwrap()).unwrap();
+ }
+ *idx = F::usize_to_rawbytes(new_idx);
+ }
+ }
+
+ /// Get this [`VarZeroVecOwned`] as a borrowed [`VarZeroVec`]
+ ///
+ /// If you wish to repeatedly call methods on this [`VarZeroVecOwned`],
+ /// it is more efficient to perform this conversion first
+ pub fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> {
+ self.as_slice().into()
+ }
+
+ /// Empty the vector
+ pub fn clear(&mut self) {
+ self.entire_slice.clear()
+ }
+
+ /// Consume this vector and return the backing buffer
+ #[inline]
+ pub fn into_bytes(self) -> Vec<u8> {
+ self.entire_slice
+ }
+
+ /// Invalidate and resize the data at an index, optionally inserting or removing the index.
+ /// Also updates affected indices and the length.
+ /// Returns a slice to the new element data - it doesn't contain uninitialized data but its value is indeterminate.
+ ///
+ /// ## Safety
+ /// - `index` must be a valid index, or, if `shift_type == ShiftType::Insert`, `index == self.len()` is allowed.
+ /// - `new_size` musn't result in the data segment growing larger than `F::MAX_VALUE`.
+ unsafe fn shift(&mut self, index: usize, new_size: usize, shift_type: ShiftType) -> &mut [u8] {
+ // The format of the encoded data is:
+ // - four bytes of "len"
+ // - len*4 bytes for an array of indices
+ // - the actual data to which the indices point
+ //
+ // When inserting or removing an element, the size of the indices segment must be changed,
+ // so the data before the target element must be shifted by 4 bytes in addition to the
+ // shifting needed for the new element size.
+ let len = self.len();
+ let slice_len = self.entire_slice.len();
+
+ let prev_element = match shift_type {
+ ShiftType::Insert => {
+ let pos = self.element_position_unchecked(index);
+ // In the case of an insert, there's no previous element,
+ // so it's an empty range at the new position.
+ pos..pos
+ }
+ _ => self.element_range_unchecked(index),
+ };
+
+ // How much shifting must be done in bytes due to removal/insertion of an index.
+ let index_shift: i64 = match shift_type {
+ ShiftType::Insert => F::INDEX_WIDTH as i64,
+ ShiftType::Replace => 0,
+ ShiftType::Remove => -(F::INDEX_WIDTH as i64),
+ };
+ // The total shift in byte size of the owned slice.
+ let shift: i64 =
+ new_size as i64 - (prev_element.end - prev_element.start) as i64 + index_shift;
+ let new_slice_len = slice_len.wrapping_add(shift as usize);
+ if shift > 0 {
+ if new_slice_len > F::MAX_VALUE as usize {
+ panic!(
+ "Attempted to grow VarZeroVec to an encoded size that does not fit within the length size used by {}",
+ any::type_name::<F>()
+ );
+ }
+ self.entire_slice.resize(new_slice_len, 0);
+ }
+
+ // Now that we've ensured there's enough space, we can shift the data around.
+ {
+ // Note: There are no references introduced between pointer creation and pointer use, and all
+ // raw pointers are derived from a single &mut. This preserves pointer provenance.
+ let slice_range = self.entire_slice.as_mut_ptr_range();
+ let old_slice_end = slice_range.start.add(slice_len);
+ let data_start = slice_range
+ .start
+ .add(LENGTH_WIDTH + METADATA_WIDTH + len * F::INDEX_WIDTH);
+ let prev_element_p =
+ data_start.add(prev_element.start)..data_start.add(prev_element.end);
+
+ // The memory range of the affected index.
+ // When inserting: where the new index goes.
+ // When removing: where the index being removed is.
+ // When replacing: unused.
+ let index_range = {
+ let index_start = slice_range
+ .start
+ .add(LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH * index);
+ index_start..index_start.add(F::INDEX_WIDTH)
+ };
+
+ unsafe fn shift_bytes(block: Range<*const u8>, to: *mut u8) {
+ debug_assert!(block.end >= block.start);
+ ptr::copy(block.start, to, block.end.offset_from(block.start) as usize);
+ }
+
+ if shift_type == ShiftType::Remove {
+ // Move the data before the element back by 4 to remove the index.
+ shift_bytes(index_range.end..prev_element_p.start, index_range.start);
+ }
+
+ // Shift data after the element to its new position.
+ shift_bytes(
+ prev_element_p.end..old_slice_end,
+ prev_element_p
+ .start
+ .offset((new_size as i64 + index_shift) as isize),
+ );
+
+ let first_affected_index = match shift_type {
+ ShiftType::Insert => {
+ // Move data before the element forward by 4 to make space for a new index.
+ shift_bytes(index_range.start..prev_element_p.start, index_range.end);
+
+ *self.index_data_mut(index) = F::usize_to_rawbytes(prev_element.start);
+ self.set_len(len + 1);
+ index + 1
+ }
+ ShiftType::Remove => {
+ self.set_len(len - 1);
+ index
+ }
+ ShiftType::Replace => index + 1,
+ };
+ // No raw pointer use should occur after this point (because of self.index_data and self.set_len).
+
+ // Set the new slice length. This must be done after shifting data around to avoid uninitialized data.
+ self.entire_slice.set_len(new_slice_len);
+
+ // Shift the affected indices.
+ self.shift_indices(first_affected_index, (shift - index_shift) as i32);
+ };
+
+ debug_assert!(self.verify_integrity());
+
+ // Return a mut slice to the new element data.
+ let element_pos = LENGTH_WIDTH
+ + METADATA_WIDTH
+ + self.len() * F::INDEX_WIDTH
+ + self.element_position_unchecked(index);
+ &mut self.entire_slice[element_pos..element_pos + new_size]
+ }
+
+ /// Checks the internal invariants of the vec to ensure safe code will not cause UB.
+ /// Returns whether integrity was verified.
+ ///
+ /// Note: an index is valid if it doesn't point to data past the end of the slice and is
+ /// less than or equal to all future indices. The length of the index segment is not part of each index.
+ fn verify_integrity(&self) -> bool {
+ if self.is_empty() && !self.entire_slice.is_empty() {
+ return false;
+ }
+ let slice_len = self.entire_slice.len();
+ match slice_len {
+ 0 => return true,
+ 1..=3 => return false,
+ _ => (),
+ }
+ let len = unsafe {
+ RawBytesULE::<LENGTH_WIDTH>::from_byte_slice_unchecked(
+ &self.entire_slice[..LENGTH_WIDTH],
+ )[0]
+ .as_unsigned_int()
+ };
+ if len == 0 {
+ // An empty vec must have an empty slice: there is only a single valid byte representation.
+ return false;
+ }
+ if slice_len < LENGTH_WIDTH + METADATA_WIDTH + len as usize * F::INDEX_WIDTH {
+ // Not enough room for the indices.
+ return false;
+ }
+ let data_len =
+ self.entire_slice.len() - LENGTH_WIDTH - METADATA_WIDTH - len as usize * F::INDEX_WIDTH;
+ if data_len > MAX_INDEX {
+ // The data segment is too long.
+ return false;
+ }
+
+ // Test index validity.
+ let indices = unsafe {
+ F::RawBytes::from_byte_slice_unchecked(
+ &self.entire_slice[LENGTH_WIDTH + METADATA_WIDTH
+ ..LENGTH_WIDTH + METADATA_WIDTH + len as usize * F::INDEX_WIDTH],
+ )
+ };
+ for idx in indices {
+ if F::rawbytes_to_usize(*idx) > data_len {
+ // Indices must not point past the data segment.
+ return false;
+ }
+ }
+ for window in indices.windows(2) {
+ if F::rawbytes_to_usize(window[0]) > F::rawbytes_to_usize(window[1]) {
+ // Indices must be in non-decreasing order.
+ return false;
+ }
+ }
+ true
+ }
+
+ /// Insert an element at the end of this vector
+ pub fn push<A: EncodeAsVarULE<T> + ?Sized>(&mut self, element: &A) {
+ self.insert(self.len(), element)
+ }
+
+ /// Insert an element at index `idx`
+ pub fn insert<A: EncodeAsVarULE<T> + ?Sized>(&mut self, index: usize, element: &A) {
+ let len = self.len();
+ if index > len {
+ panic!("Called out-of-bounds insert() on VarZeroVec, index {index} len {len}");
+ }
+
+ let value_len = element.encode_var_ule_len();
+
+ if len == 0 {
+ let header_len = LENGTH_WIDTH + METADATA_WIDTH + F::INDEX_WIDTH;
+ let cap = header_len + value_len;
+ self.entire_slice.resize(cap, 0);
+ self.entire_slice[0] = 1; // set length
+ element.encode_var_ule_write(&mut self.entire_slice[header_len..]);
+ return;
+ }
+
+ assert!(value_len < MAX_INDEX);
+ unsafe {
+ let place = self.shift(index, value_len, ShiftType::Insert);
+ element.encode_var_ule_write(place);
+ }
+ }
+
+ /// Remove the element at index `idx`
+ pub fn remove(&mut self, index: usize) {
+ let len = self.len();
+ if index >= len {
+ panic!("Called out-of-bounds remove() on VarZeroVec, index {index} len {len}");
+ }
+ if len == 1 {
+ // This is removing the last element. Set the slice to empty to ensure all empty vecs have empty data slices.
+ self.entire_slice.clear();
+ return;
+ }
+ unsafe {
+ self.shift(index, 0, ShiftType::Remove);
+ }
+ }
+
+ /// Replace the element at index `idx` with another
+ pub fn replace<A: EncodeAsVarULE<T> + ?Sized>(&mut self, index: usize, element: &A) {
+ let len = self.len();
+ if index >= len {
+ panic!("Called out-of-bounds replace() on VarZeroVec, index {index} len {len}");
+ }
+
+ let value_len = element.encode_var_ule_len();
+
+ assert!(value_len < MAX_INDEX);
+ unsafe {
+ let place = self.shift(index, value_len, ShiftType::Replace);
+ element.encode_var_ule_write(place);
+ }
+ }
+}
+
+impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroVecOwned<T, F>
+where
+ T: fmt::Debug,
+{
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ VarZeroSlice::fmt(self, f)
+ }
+}
+
+impl<T: VarULE + ?Sized, F> Default for VarZeroVecOwned<T, F> {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl<T, A, F> PartialEq<&'_ [A]> for VarZeroVecOwned<T, F>
+where
+ T: VarULE + ?Sized,
+ T: PartialEq,
+ A: AsRef<T>,
+ F: VarZeroVecFormat,
+{
+ #[inline]
+ fn eq(&self, other: &&[A]) -> bool {
+ self.iter().eq(other.iter().map(|t| t.as_ref()))
+ }
+}
+
+impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From<&'a VarZeroSlice<T, F>>
+ for VarZeroVecOwned<T, F>
+{
+ fn from(other: &'a VarZeroSlice<T, F>) -> Self {
+ Self::from_slice(other)
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::VarZeroVecOwned;
+ #[test]
+ fn test_insert_integrity() {
+ let mut items: Vec<String> = Vec::new();
+ let mut zerovec = VarZeroVecOwned::<str>::new();
+
+ // Insert into an empty vec.
+ items.insert(0, "1234567890".into());
+ zerovec.insert(0, "1234567890");
+ assert_eq!(zerovec, &*items);
+
+ zerovec.insert(1, "foo3");
+ items.insert(1, "foo3".into());
+ assert_eq!(zerovec, &*items);
+
+ // Insert at the end.
+ items.insert(items.len(), "qwertyuiop".into());
+ zerovec.insert(zerovec.len(), "qwertyuiop");
+ assert_eq!(zerovec, &*items);
+
+ items.insert(0, "asdfghjkl;".into());
+ zerovec.insert(0, "asdfghjkl;");
+ assert_eq!(zerovec, &*items);
+
+ items.insert(2, "".into());
+ zerovec.insert(2, "");
+ assert_eq!(zerovec, &*items);
+ }
+
+ #[test]
+ // ensure that inserting empty items works
+ fn test_empty_inserts() {
+ let mut items: Vec<String> = Vec::new();
+ let mut zerovec = VarZeroVecOwned::<str>::new();
+
+ // Insert into an empty vec.
+ items.insert(0, "".into());
+ zerovec.insert(0, "");
+ assert_eq!(zerovec, &*items);
+
+ items.insert(0, "".into());
+ zerovec.insert(0, "");
+ assert_eq!(zerovec, &*items);
+
+ items.insert(0, "1234567890".into());
+ zerovec.insert(0, "1234567890");
+ assert_eq!(zerovec, &*items);
+
+ items.insert(0, "".into());
+ zerovec.insert(0, "");
+ assert_eq!(zerovec, &*items);
+ }
+
+ #[test]
+ fn test_small_insert_integrity() {
+ // Tests that insert() works even when there
+ // is not enough space for the new index in entire_slice.len()
+ let mut items: Vec<String> = Vec::new();
+ let mut zerovec = VarZeroVecOwned::<str>::new();
+
+ // Insert into an empty vec.
+ items.insert(0, "abc".into());
+ zerovec.insert(0, "abc");
+ assert_eq!(zerovec, &*items);
+
+ zerovec.insert(1, "def");
+ items.insert(1, "def".into());
+ assert_eq!(zerovec, &*items);
+ }
+
+ #[test]
+ #[should_panic]
+ fn test_insert_past_end() {
+ VarZeroVecOwned::<str>::new().insert(1, "");
+ }
+
+ #[test]
+ fn test_remove_integrity() {
+ let mut items: Vec<&str> = vec!["apples", "bananas", "eeples", "", "baneenees", "five", ""];
+ let mut zerovec = VarZeroVecOwned::<str>::try_from_elements(&items).unwrap();
+
+ for index in [0, 2, 4, 0, 1, 1, 0] {
+ items.remove(index);
+ zerovec.remove(index);
+ assert_eq!(zerovec, &*items, "index {}, len {}", index, items.len());
+ }
+ }
+
+ #[test]
+ fn test_removing_last_element_clears() {
+ let mut zerovec = VarZeroVecOwned::<str>::try_from_elements(&["buy some apples"]).unwrap();
+ assert!(!zerovec.as_bytes().is_empty());
+ zerovec.remove(0);
+ assert!(zerovec.as_bytes().is_empty());
+ }
+
+ #[test]
+ #[should_panic]
+ fn test_remove_past_end() {
+ VarZeroVecOwned::<str>::new().remove(0);
+ }
+
+ #[test]
+ fn test_replace_integrity() {
+ let mut items: Vec<&str> = vec!["apples", "bananas", "eeples", "", "baneenees", "five", ""];
+ let mut zerovec = VarZeroVecOwned::<str>::try_from_elements(&items).unwrap();
+
+ // Replace with an element of the same size (and the first element)
+ items[0] = "blablah";
+ zerovec.replace(0, "blablah");
+ assert_eq!(zerovec, &*items);
+
+ // Replace with a smaller element
+ items[1] = "twily";
+ zerovec.replace(1, "twily");
+ assert_eq!(zerovec, &*items);
+
+ // Replace an empty element
+ items[3] = "aoeuidhtns";
+ zerovec.replace(3, "aoeuidhtns");
+ assert_eq!(zerovec, &*items);
+
+ // Replace the last element
+ items[6] = "0123456789";
+ zerovec.replace(6, "0123456789");
+ assert_eq!(zerovec, &*items);
+
+ // Replace with an empty element
+ items[2] = "";
+ zerovec.replace(2, "");
+ assert_eq!(zerovec, &*items);
+ }
+
+ #[test]
+ #[should_panic]
+ fn test_replace_past_end() {
+ VarZeroVecOwned::<str>::new().replace(0, "");
+ }
+}
diff --git a/third_party/rust/zerovec/src/varzerovec/serde.rs b/third_party/rust/zerovec/src/varzerovec/serde.rs
new file mode 100644
index 0000000000..8025fc085b
--- /dev/null
+++ b/third_party/rust/zerovec/src/varzerovec/serde.rs
@@ -0,0 +1,268 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::{VarZeroSlice, VarZeroVec, VarZeroVecFormat};
+use crate::ule::*;
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::fmt;
+use core::marker::PhantomData;
+use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor};
+#[cfg(feature = "serde")]
+use serde::ser::{Serialize, SerializeSeq, Serializer};
+
+struct VarZeroVecVisitor<T: ?Sized, F: VarZeroVecFormat> {
+ #[allow(clippy::type_complexity)] // this is a private marker type, who cares
+ marker: PhantomData<(fn() -> Box<T>, F)>,
+}
+
+impl<T: ?Sized, F: VarZeroVecFormat> Default for VarZeroVecVisitor<T, F> {
+ fn default() -> Self {
+ Self {
+ marker: PhantomData,
+ }
+ }
+}
+
+impl<'de, T, F> Visitor<'de> for VarZeroVecVisitor<T, F>
+where
+ T: VarULE + ?Sized,
+ Box<T>: Deserialize<'de>,
+ F: VarZeroVecFormat,
+{
+ type Value = VarZeroVec<'de, T, F>;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ formatter.write_str("a sequence or borrowed buffer of bytes")
+ }
+
+ fn visit_borrowed_bytes<E>(self, bytes: &'de [u8]) -> Result<Self::Value, E>
+ where
+ E: de::Error,
+ {
+ VarZeroVec::parse_byte_slice(bytes).map_err(de::Error::custom)
+ }
+
+ fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
+ where
+ A: SeqAccess<'de>,
+ {
+ let mut vec: Vec<Box<T>> = if let Some(capacity) = seq.size_hint() {
+ Vec::with_capacity(capacity)
+ } else {
+ Vec::new()
+ };
+ while let Some(value) = seq.next_element::<Box<T>>()? {
+ vec.push(value);
+ }
+ Ok(VarZeroVec::from(&vec))
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl<'de, 'a, T, F> Deserialize<'de> for VarZeroVec<'a, T, F>
+where
+ T: VarULE + ?Sized,
+ Box<T>: Deserialize<'de>,
+ F: VarZeroVecFormat,
+ 'de: 'a,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ let visitor = VarZeroVecVisitor::<T, F>::default();
+ if deserializer.is_human_readable() {
+ deserializer.deserialize_seq(visitor)
+ } else {
+ deserializer.deserialize_bytes(visitor)
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl<'de, 'a, T, F> Deserialize<'de> for &'a VarZeroSlice<T, F>
+where
+ T: VarULE + ?Sized,
+ Box<T>: Deserialize<'de>,
+ F: VarZeroVecFormat,
+ 'de: 'a,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ Err(de::Error::custom(
+ "&VarZeroSlice cannot be deserialized from human-readable formats",
+ ))
+ } else {
+ let deserialized = VarZeroVec::<'a, T, F>::deserialize(deserializer)?;
+ let borrowed = if let VarZeroVec::Borrowed(b) = deserialized {
+ b
+ } else {
+ return Err(de::Error::custom(
+ "&VarZeroSlice can only deserialize in zero-copy ways",
+ ));
+ };
+ Ok(borrowed)
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl<'de, T, F> Deserialize<'de> for Box<VarZeroSlice<T, F>>
+where
+ T: VarULE + ?Sized,
+ Box<T>: Deserialize<'de>,
+ F: VarZeroVecFormat,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ let deserialized = VarZeroVec::<T, F>::deserialize(deserializer)?;
+ Ok(deserialized.to_boxed())
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl<T, F> Serialize for VarZeroVec<'_, T, F>
+where
+ T: Serialize + VarULE + ?Sized,
+ F: VarZeroVecFormat,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ if serializer.is_human_readable() {
+ let mut seq = serializer.serialize_seq(Some(self.len()))?;
+ for value in self.iter() {
+ seq.serialize_element(value)?;
+ }
+ seq.end()
+ } else {
+ serializer.serialize_bytes(self.as_bytes())
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+#[cfg(feature = "serde")]
+impl<T, F> Serialize for VarZeroSlice<T, F>
+where
+ T: Serialize + VarULE + ?Sized,
+ F: VarZeroVecFormat,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ self.as_varzerovec().serialize(serializer)
+ }
+}
+
+#[cfg(test)]
+#[allow(non_camel_case_types)]
+mod test {
+ use crate::{VarZeroSlice, VarZeroVec};
+
+ #[derive(serde::Serialize, serde::Deserialize)]
+ struct DeriveTest_VarZeroVec<'data> {
+ #[serde(borrow)]
+ _data: VarZeroVec<'data, str>,
+ }
+
+ #[derive(serde::Serialize, serde::Deserialize)]
+ struct DeriveTest_VarZeroSlice<'data> {
+ #[serde(borrow)]
+ _data: &'data VarZeroSlice<str>,
+ }
+
+ #[derive(serde::Serialize, serde::Deserialize)]
+ struct DeriveTest_VarZeroVec_of_VarZeroSlice<'data> {
+ #[serde(borrow)]
+ _data: VarZeroVec<'data, VarZeroSlice<str>>,
+ }
+
+ // ["foo", "bar", "baz", "dolor", "quux", "lorem ipsum"];
+ const BYTES: &[u8] = &[
+ 6, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 14, 0, 18, 0, 102, 111, 111, 98, 97, 114, 98, 97, 122,
+ 100, 111, 108, 111, 114, 113, 117, 117, 120, 108, 111, 114, 101, 109, 32, 105, 112, 115,
+ 117, 109,
+ ];
+ const JSON_STR: &str = "[\"foo\",\"bar\",\"baz\",\"dolor\",\"quux\",\"lorem ipsum\"]";
+ const BINCODE_BUF: &[u8] = &[
+ 45, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 14, 0, 18, 0, 102, 111, 111,
+ 98, 97, 114, 98, 97, 122, 100, 111, 108, 111, 114, 113, 117, 117, 120, 108, 111, 114, 101,
+ 109, 32, 105, 112, 115, 117, 109,
+ ];
+
+ // ["w", "ω", "文", "𑄃"]
+ const NONASCII_STR: &[&str] = &["w", "ω", "文", "𑄃"];
+ const NONASCII_BYTES: &[u8] = &[
+ 4, 0, 0, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240, 145, 132, 131,
+ ];
+ #[test]
+ fn test_serde_json() {
+ let zerovec_orig: VarZeroVec<str> = VarZeroVec::parse_byte_slice(BYTES).expect("parse");
+ let json_str = serde_json::to_string(&zerovec_orig).expect("serialize");
+ assert_eq!(JSON_STR, json_str);
+ // VarZeroVec should deserialize from JSON to either Vec or VarZeroVec
+ let vec_new: Vec<Box<str>> =
+ serde_json::from_str(&json_str).expect("deserialize from buffer to Vec");
+ assert_eq!(zerovec_orig.to_vec(), vec_new);
+ let zerovec_new: VarZeroVec<str> =
+ serde_json::from_str(&json_str).expect("deserialize from buffer to VarZeroVec");
+ assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec());
+ assert!(zerovec_new.is_owned());
+ }
+
+ #[test]
+ fn test_serde_bincode() {
+ let zerovec_orig: VarZeroVec<str> = VarZeroVec::parse_byte_slice(BYTES).expect("parse");
+ let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize");
+ assert_eq!(BINCODE_BUF, bincode_buf);
+ let zerovec_new: VarZeroVec<str> =
+ bincode::deserialize(&bincode_buf).expect("deserialize from buffer to VarZeroVec");
+ assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec());
+ assert!(!zerovec_new.is_owned());
+ }
+
+ #[test]
+ fn test_vzv_borrowed() {
+ let zerovec_orig: &VarZeroSlice<str> =
+ VarZeroSlice::parse_byte_slice(BYTES).expect("parse");
+ let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize");
+ assert_eq!(BINCODE_BUF, bincode_buf);
+ let zerovec_new: &VarZeroSlice<str> =
+ bincode::deserialize(&bincode_buf).expect("deserialize from buffer to VarZeroSlice");
+ assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec());
+ }
+
+ #[test]
+ fn test_nonascii_bincode() {
+ let src_vec = NONASCII_STR
+ .iter()
+ .copied()
+ .map(Box::<str>::from)
+ .collect::<Vec<_>>();
+ let mut zerovec: VarZeroVec<str> =
+ VarZeroVec::parse_byte_slice(NONASCII_BYTES).expect("parse");
+ assert_eq!(zerovec.to_vec(), src_vec);
+ let bincode_buf = bincode::serialize(&zerovec).expect("serialize");
+ let zerovec_result =
+ bincode::deserialize::<VarZeroVec<str>>(&bincode_buf).expect("deserialize");
+ assert_eq!(zerovec_result.to_vec(), src_vec);
+
+ // try again with owned zerovec
+ zerovec.make_mut();
+ let bincode_buf = bincode::serialize(&zerovec).expect("serialize");
+ let zerovec_result =
+ bincode::deserialize::<VarZeroVec<str>>(&bincode_buf).expect("deserialize");
+ assert_eq!(zerovec_result.to_vec(), src_vec);
+ }
+}
diff --git a/third_party/rust/zerovec/src/varzerovec/slice.rs b/third_party/rust/zerovec/src/varzerovec/slice.rs
new file mode 100644
index 0000000000..119f1d38f8
--- /dev/null
+++ b/third_party/rust/zerovec/src/varzerovec/slice.rs
@@ -0,0 +1,573 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::components::VarZeroVecComponents;
+use super::*;
+use crate::ule::*;
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::cmp::{Ord, Ordering, PartialOrd};
+use core::fmt;
+use core::marker::PhantomData;
+use core::mem;
+
+use core::ops::Index;
+use core::ops::Range;
+
+/// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]`
+/// where `T` is not `Sized`.
+///
+/// This behaves similarly to [`VarZeroVec<T>`], however [`VarZeroVec<T>`] is allowed to contain
+/// owned data and as such is ideal for deserialization since most human readable
+/// serialization formats cannot unconditionally deserialize zero-copy.
+///
+/// This type can be used inside [`VarZeroVec<T>`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap):
+/// This essentially allows for the construction of zero-copy types isomorphic to `Vec<Vec<T>>` by instead
+/// using `VarZeroVec<ZeroSlice<T>>`.
+///
+/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
+/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
+///
+/// This type can be nested within itself to allow for multi-level nested `Vec`s.
+///
+/// # Examples
+///
+/// ## Nested Slices
+///
+/// The following code constructs the conceptual zero-copy equivalent of `Vec<Vec<Vec<str>>>`
+///
+/// ```rust
+/// use zerovec::ule::*;
+/// use zerovec::{VarZeroSlice, VarZeroVec, ZeroVec};
+/// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"];
+/// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"];
+/// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"];
+/// let strings_4: Vec<&str> = vec!["w", "ω", "文", "𑄃"];
+/// let strings_12 = vec![&*strings_1, &*strings_2];
+/// let strings_34 = vec![&*strings_3, &*strings_4];
+/// let all_strings = vec![strings_12, strings_34];
+///
+/// let vzv_1: VarZeroVec<str> = VarZeroVec::from(&strings_1);
+/// let vzv_2: VarZeroVec<str> = VarZeroVec::from(&strings_2);
+/// let vzv_3: VarZeroVec<str> = VarZeroVec::from(&strings_3);
+/// let vzv_4: VarZeroVec<str> = VarZeroVec::from(&strings_4);
+/// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]);
+/// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]);
+/// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]);
+///
+/// let reconstructed: Vec<Vec<Vec<String>>> = vzv_all
+/// .iter()
+/// .map(|v: &VarZeroSlice<VarZeroSlice<str>>| {
+/// v.iter()
+/// .map(|x: &VarZeroSlice<_>| {
+/// x.as_varzerovec()
+/// .iter()
+/// .map(|s| s.to_owned())
+/// .collect::<Vec<String>>()
+/// })
+/// .collect::<Vec<_>>()
+/// })
+/// .collect::<Vec<_>>();
+/// assert_eq!(reconstructed, all_strings);
+///
+/// let bytes = vzv_all.as_bytes();
+/// let vzv_from_bytes: VarZeroVec<VarZeroSlice<VarZeroSlice<str>>> =
+/// VarZeroVec::parse_byte_slice(bytes).unwrap();
+/// assert_eq!(vzv_from_bytes, vzv_all);
+/// ```
+///
+/// ## Iterate over Windows
+///
+/// Although [`VarZeroSlice`] does not itself have a `.windows` iterator like
+/// [core::slice::Windows], this behavior can be easily modeled using an iterator:
+///
+/// ```
+/// use zerovec::VarZeroVec;
+///
+/// let vzv = VarZeroVec::<str>::from(&["a", "b", "c", "d"]);
+/// # let mut pairs: Vec<(&str, &str)> = Vec::new();
+///
+/// let mut it = vzv.iter().peekable();
+/// while let (Some(x), Some(y)) = (it.next(), it.peek()) {
+/// // Evaluate (x, y) here.
+/// # pairs.push((x, y));
+/// }
+/// # assert_eq!(pairs, &[("a", "b"), ("b", "c"), ("c", "d")]);
+/// ```
+//
+// safety invariant: The slice MUST be one which parses to
+// a valid VarZeroVecComponents<T>
+#[repr(transparent)]
+pub struct VarZeroSlice<T: ?Sized, F = Index16> {
+ marker: PhantomData<(F, T)>,
+ /// The original slice this was constructed from
+ entire_slice: [u8],
+}
+
+impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> {
+ /// Construct a new empty VarZeroSlice
+ pub const fn new_empty() -> &'static Self {
+ // The empty VZV is special-cased to the empty slice
+ unsafe { mem::transmute(&[] as &[u8]) }
+ }
+
+ /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer
+ #[inline]
+ pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> {
+ unsafe {
+ // safety: VarZeroSlice is guaranteed to parse here
+ VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice)
+ }
+ }
+
+ /// Uses a `&[u8]` buffer as a `VarZeroSlice<T>` without any verification.
+ ///
+ /// # Safety
+ ///
+ /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
+ pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
+ // self is really just a wrapper around a byte slice
+ mem::transmute(bytes)
+ }
+
+ /// Get the number of elements in this slice
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use std::str::Utf8Error;
+ /// # use zerovec::ule::ZeroVecError;
+ /// # use zerovec::VarZeroVec;
+ ///
+ /// let strings = vec!["foo", "bar", "baz", "quux"];
+ /// let vec = VarZeroVec::<str>::from(&strings);
+ ///
+ /// assert_eq!(vec.len(), 4);
+ /// # Ok::<(), ZeroVecError>(())
+ /// ```
+ pub fn len(&self) -> usize {
+ self.as_components().len()
+ }
+
+ /// Returns `true` if the slice contains no elements.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use std::str::Utf8Error;
+ /// # use zerovec::ule::ZeroVecError;
+ /// # use zerovec::VarZeroVec;
+ ///
+ /// let strings: Vec<String> = vec![];
+ /// let vec = VarZeroVec::<str>::from(&strings);
+ ///
+ /// assert!(vec.is_empty());
+ /// # Ok::<(), ZeroVecError>(())
+ /// ```
+ pub fn is_empty(&self) -> bool {
+ self.as_components().is_empty()
+ }
+
+ /// Obtain an iterator over this slice's elements
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use std::str::Utf8Error;
+ /// # use zerovec::ule::ZeroVecError;
+ /// # use zerovec::VarZeroVec;
+ ///
+ /// let strings = vec!["foo", "bar", "baz", "quux"];
+ /// let vec = VarZeroVec::<str>::from(&strings);
+ ///
+ /// let mut iter_results: Vec<&str> = vec.iter().collect();
+ /// assert_eq!(iter_results[0], "foo");
+ /// assert_eq!(iter_results[1], "bar");
+ /// assert_eq!(iter_results[2], "baz");
+ /// assert_eq!(iter_results[3], "quux");
+ /// # Ok::<(), ZeroVecError>(())
+ /// ```
+ pub fn iter<'b>(&'b self) -> impl Iterator<Item = &'b T> {
+ self.as_components().iter()
+ }
+
+ /// Get one of this slice's elements, returning `None` if the index is out of bounds
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use std::str::Utf8Error;
+ /// # use zerovec::ule::ZeroVecError;
+ /// # use zerovec::VarZeroVec;
+ ///
+ /// let strings = vec!["foo", "bar", "baz", "quux"];
+ /// let vec = VarZeroVec::<str>::from(&strings);
+ ///
+ /// let mut iter_results: Vec<&str> = vec.iter().collect();
+ /// assert_eq!(vec.get(0), Some("foo"));
+ /// assert_eq!(vec.get(1), Some("bar"));
+ /// assert_eq!(vec.get(2), Some("baz"));
+ /// assert_eq!(vec.get(3), Some("quux"));
+ /// assert_eq!(vec.get(4), None);
+ /// # Ok::<(), ZeroVecError>(())
+ /// ```
+ pub fn get(&self, idx: usize) -> Option<&T> {
+ self.as_components().get(idx)
+ }
+
+ /// Get one of this slice's elements
+ ///
+ /// # Safety
+ ///
+ /// `index` must be in range
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use std::str::Utf8Error;
+ /// # use zerovec::ule::ZeroVecError;
+ /// # use zerovec::VarZeroVec;
+ ///
+ /// let strings = vec!["foo", "bar", "baz", "quux"];
+ /// let vec = VarZeroVec::<str>::from(&strings);
+ ///
+ /// let mut iter_results: Vec<&str> = vec.iter().collect();
+ /// unsafe {
+ /// assert_eq!(vec.get_unchecked(0), "foo");
+ /// assert_eq!(vec.get_unchecked(1), "bar");
+ /// assert_eq!(vec.get_unchecked(2), "baz");
+ /// assert_eq!(vec.get_unchecked(3), "quux");
+ /// }
+ /// # Ok::<(), ZeroVecError>(())
+ /// ```
+ pub unsafe fn get_unchecked(&self, idx: usize) -> &T {
+ self.as_components().get_unchecked(idx)
+ }
+
+ /// Obtain an owned `Vec<Box<T>>` out of this
+ pub fn to_vec(&self) -> Vec<Box<T>> {
+ self.as_components().to_vec()
+ }
+
+ /// Get a reference to the entire encoded backing buffer of this slice
+ ///
+ /// The bytes can be passed back to [`Self::parse_byte_slice()`].
+ ///
+ /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`].
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use std::str::Utf8Error;
+ /// # use zerovec::ule::ZeroVecError;
+ /// # use zerovec::VarZeroVec;
+ ///
+ /// let strings = vec!["foo", "bar", "baz"];
+ /// let vzv = VarZeroVec::<str>::from(&strings);
+ ///
+ /// assert_eq!(vzv, VarZeroVec::parse_byte_slice(vzv.as_bytes()).unwrap());
+ ///
+ /// # Ok::<(), ZeroVecError>(())
+ /// ```
+ #[inline]
+ pub const fn as_bytes(&self) -> &[u8] {
+ &self.entire_slice
+ }
+
+ /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`]
+ ///
+ /// If you wish to repeatedly call methods on this [`VarZeroSlice`],
+ /// it is more efficient to perform this conversion first
+ pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> {
+ VarZeroVec::Borrowed(self)
+ }
+
+ /// Parse a VarZeroSlice from a slice of the appropriate format
+ ///
+ /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`]
+ pub fn parse_byte_slice<'a>(slice: &'a [u8]) -> Result<&'a Self, ZeroVecError> {
+ <Self as VarULE>::parse_byte_slice(slice)
+ }
+
+ /// Convert a `bytes` array known to represent a `VarZeroSlice` to a mutable reference to a `VarZeroSlice`
+ ///
+ /// # Safety
+ /// - `bytes` must be a valid sequence of bytes for this VarZeroVec
+ pub(crate) unsafe fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut Self {
+ // self is really just a wrapper around a byte slice
+ mem::transmute(bytes)
+ }
+
+ pub(crate) unsafe fn get_bytes_at_mut(&mut self, idx: usize) -> &mut [u8] {
+ let range = self.as_components().get_range(idx);
+ #[allow(clippy::indexing_slicing)] // get_range() is known to return in-bounds ranges
+ &mut self.entire_slice[range]
+ }
+}
+
+impl<T, F> VarZeroSlice<T, F>
+where
+ T: VarULE,
+ T: ?Sized,
+ T: Ord,
+ F: VarZeroVecFormat,
+{
+ /// Binary searches a sorted `VarZeroVec<T>` for the given element. For more information, see
+ /// the standard library function [`binary_search`].
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use std::str::Utf8Error;
+ /// # use zerovec::ule::ZeroVecError;
+ /// # use zerovec::VarZeroVec;
+ ///
+ /// let strings = vec!["a", "b", "f", "g"];
+ /// let vec = VarZeroVec::<str>::from(&strings);
+ ///
+ /// assert_eq!(vec.binary_search("f"), Ok(2));
+ /// assert_eq!(vec.binary_search("e"), Err(2));
+ /// # Ok::<(), ZeroVecError>(())
+ /// ```
+ ///
+ /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
+ #[inline]
+ pub fn binary_search(&self, x: &T) -> Result<usize, usize> {
+ self.as_components().binary_search(x)
+ }
+
+ /// Binary searches a `VarZeroVec<T>` for the given element within a certain sorted range.
+ ///
+ /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
+ /// to the behavior of the standard library function [`binary_search`].
+ ///
+ /// The index is returned relative to the start of the range.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use std::str::Utf8Error;
+ /// # use zerovec::ule::ZeroVecError;
+ /// # use zerovec::VarZeroVec;
+ ///
+ /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
+ /// let vec = VarZeroVec::<str>::from(&strings);
+ ///
+ /// // Same behavior as binary_search when the range covers the whole slice:
+ /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3)));
+ /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4)));
+ ///
+ /// // Will not look outside of the range:
+ /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1)));
+ /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0)));
+ ///
+ /// // Will return indices relative to the start of the range:
+ /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2)));
+ /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3)));
+ ///
+ /// // Will return `None` if the range is out of bounds:
+ /// assert_eq!(vec.binary_search_in_range("x", 100..200), None);
+ /// assert_eq!(vec.binary_search_in_range("x", 0..200), None);
+ /// # Ok::<(), ZeroVecError>(())
+ /// ```
+ ///
+ /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
+ #[inline]
+ pub fn binary_search_in_range(
+ &self,
+ x: &T,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ self.as_components().binary_search_in_range(x, range)
+ }
+}
+
+impl<T, F> VarZeroSlice<T, F>
+where
+ T: VarULE,
+ T: ?Sized,
+ F: VarZeroVecFormat,
+{
+ /// Binary searches a sorted `VarZeroVec<T>` for the given predicate. For more information, see
+ /// the standard library function [`binary_search_by`].
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use std::str::Utf8Error;
+ /// # use zerovec::ule::ZeroVecError;
+ /// # use zerovec::VarZeroVec;
+ ///
+ /// let strings = vec!["a", "b", "f", "g"];
+ /// let vec = VarZeroVec::<str>::from(&strings);
+ ///
+ /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2));
+ /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2));
+ /// # Ok::<(), ZeroVecError>(())
+ /// ```
+ ///
+ /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by
+ #[inline]
+ pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
+ self.as_components().binary_search_by(predicate)
+ }
+
+ /// Binary searches a `VarZeroVec<T>` for the given predicate within a certain sorted range.
+ ///
+ /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
+ /// to the behavior of the standard library function [`binary_search`].
+ ///
+ /// The index is returned relative to the start of the range.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use std::str::Utf8Error;
+ /// # use zerovec::ule::ZeroVecError;
+ /// # use zerovec::VarZeroVec;
+ ///
+ /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
+ /// let vec = VarZeroVec::<str>::from(&strings);
+ ///
+ /// // Same behavior as binary_search when the range covers the whole slice:
+ /// assert_eq!(
+ /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7),
+ /// Some(Ok(3))
+ /// );
+ /// assert_eq!(
+ /// vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7),
+ /// Some(Err(4))
+ /// );
+ ///
+ /// // Will not look outside of the range:
+ /// assert_eq!(
+ /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1),
+ /// Some(Err(1))
+ /// );
+ /// assert_eq!(
+ /// vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7),
+ /// Some(Err(0))
+ /// );
+ ///
+ /// // Will return indices relative to the start of the range:
+ /// assert_eq!(
+ /// vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6),
+ /// Some(Ok(2))
+ /// );
+ /// assert_eq!(
+ /// vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6),
+ /// Some(Err(3))
+ /// );
+ ///
+ /// // Will return `None` if the range is out of bounds:
+ /// assert_eq!(
+ /// vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200),
+ /// None
+ /// );
+ /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None);
+ /// # Ok::<(), ZeroVecError>(())
+ /// ```
+ ///
+ /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
+ pub fn binary_search_in_range_by(
+ &self,
+ predicate: impl FnMut(&T) -> Ordering,
+ range: Range<usize>,
+ ) -> Option<Result<usize, usize>> {
+ self.as_components()
+ .binary_search_in_range_by(predicate, range)
+ }
+}
+// Safety (based on the safety checklist on the VarULE trait):
+// 1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a
+// `[u8]` slice which satisfies this invariant)
+// 2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a
+// `[u8]` slice which satisfies this invariant)
+// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid.
+// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety
+// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data.
+// 6. `as_byte_slice()` is equivalent to a regular transmute of the underlying data
+// 7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type)
+unsafe impl<T: VarULE + ?Sized + 'static, F: VarZeroVecFormat> VarULE for VarZeroSlice<T, F> {
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ let _: VarZeroVecComponents<T, F> = VarZeroVecComponents::parse_byte_slice(bytes)?;
+ Ok(())
+ }
+
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ // self is really just a wrapper around a byte slice
+ mem::transmute(bytes)
+ }
+
+ fn as_byte_slice(&self) -> &[u8] {
+ &self.entire_slice
+ }
+}
+
+impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Index<usize> for VarZeroSlice<T, F> {
+ type Output = T;
+ fn index(&self, index: usize) -> &Self::Output {
+ #[allow(clippy::panic)] // documented
+ match self.get(index) {
+ Some(x) => x,
+ None => panic!(
+ "index out of bounds: the len is {} but the index is {index}",
+ self.len()
+ ),
+ }
+ }
+}
+
+impl<T, F> PartialEq<VarZeroSlice<T, F>> for VarZeroSlice<T, F>
+where
+ T: VarULE,
+ T: ?Sized,
+ T: PartialEq,
+ F: VarZeroVecFormat,
+{
+ #[inline]
+ fn eq(&self, other: &VarZeroSlice<T, F>) -> bool {
+ // VarULE has an API guarantee that this is equivalent
+ // to `T::VarULE::eq()`
+ self.entire_slice.eq(&other.entire_slice)
+ }
+}
+
+impl<T, F> Eq for VarZeroSlice<T, F>
+where
+ T: VarULE,
+ T: ?Sized,
+ T: Eq,
+ F: VarZeroVecFormat,
+{
+}
+
+impl<T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroSlice<T, F> {
+ #[inline]
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ self.iter().partial_cmp(other.iter())
+ }
+}
+
+impl<T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroSlice<T, F> {
+ #[inline]
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.iter().cmp(other.iter())
+ }
+}
+
+impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroSlice<T, F>
+where
+ T: fmt::Debug,
+{
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_list().entries(self.iter()).finish()
+ }
+}
+
+impl<T: ?Sized, F: VarZeroVecFormat> AsRef<VarZeroSlice<T, F>> for VarZeroSlice<T, F> {
+ fn as_ref(&self) -> &VarZeroSlice<T, F> {
+ self
+ }
+}
diff --git a/third_party/rust/zerovec/src/varzerovec/vec.rs b/third_party/rust/zerovec/src/varzerovec/vec.rs
new file mode 100644
index 0000000000..64928509f8
--- /dev/null
+++ b/third_party/rust/zerovec/src/varzerovec/vec.rs
@@ -0,0 +1,531 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::ule::*;
+
+use alloc::vec::Vec;
+use core::cmp::{Ord, Ordering, PartialOrd};
+use core::fmt;
+use core::ops::Deref;
+
+use super::*;
+
+/// A zero-copy, byte-aligned vector for variable-width types.
+///
+/// `VarZeroVec<T>` is designed as a drop-in replacement for `Vec<T>` in situations where it is
+/// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization, and
+/// where `T`'s data is variable-length (e.g. `String`)
+///
+/// `T` must implement [`VarULE`], which is already implemented for [`str`] and `[u8]`. For storing more
+/// complicated series of elements, it is implemented on `ZeroSlice<T>` as well as `VarZeroSlice<T>`
+/// for nesting. [`zerovec::make_varule`](crate::make_varule) may be used to generate
+/// a dynamically-sized [`VarULE`] type and conversions to and from a custom type.
+///
+/// For example, here are some owned types and their zero-copy equivalents:
+///
+/// - `Vec<String>`: `VarZeroVec<'a, str>`
+/// - `Vec<Vec<u8>>>`: `VarZeroVec<'a, [u8]>`
+/// - `Vec<Vec<u32>>`: `VarZeroVec<'a, ZeroSlice<u32>>`
+/// - `Vec<Vec<String>>`: `VarZeroVec<'a, VarZeroSlice<str>>`
+///
+/// Most of the methods on `VarZeroVec<'a, T>` come from its [`Deref`] implementation to [`VarZeroSlice<T>`](VarZeroSlice).
+///
+/// For creating zero-copy vectors of fixed-size types, see [`ZeroVec`](crate::ZeroVec).
+///
+/// `VarZeroVec<T>` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from
+/// owned data (and then mutated!) but can also borrow from some buffer.
+///
+/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
+/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
+///
+/// # Bytes and Equality
+///
+/// Two [`VarZeroVec`]s are equal if and only if their bytes are equal, as described in the trait
+/// [`VarULE`]. However, we do not guarantee stability of byte equality or serialization format
+/// across major SemVer releases.
+///
+/// To compare a [`Vec<T>`] to a [`VarZeroVec<T>`], it is generally recommended to use
+/// [`Iterator::eq`], since it is somewhat expensive at runtime to convert from a [`Vec<T>`] to a
+/// [`VarZeroVec<T>`] or vice-versa.
+///
+/// Prior to zerovec reaching 1.0, the precise byte representation of [`VarZeroVec`] is still
+/// under consideration, with different options along the space-time spectrum. See
+/// [#1410](https://github.com/unicode-org/icu4x/issues/1410).
+///
+/// # Example
+///
+/// ```rust
+/// # use std::str::Utf8Error;
+/// # use zerovec::ule::ZeroVecError;
+/// use zerovec::VarZeroVec;
+///
+/// // The little-endian bytes correspond to the list of strings.
+/// let strings = vec!["w", "ω", "文", "𑄃"];
+///
+/// #[derive(serde::Serialize, serde::Deserialize)]
+/// struct Data<'a> {
+/// #[serde(borrow)]
+/// strings: VarZeroVec<'a, str>,
+/// }
+///
+/// let data = Data {
+/// strings: VarZeroVec::from(&strings),
+/// };
+///
+/// let bincode_bytes =
+/// bincode::serialize(&data).expect("Serialization should be successful");
+///
+/// // Will deserialize without allocations
+/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
+/// .expect("Deserialization should be successful");
+///
+/// assert_eq!(deserialized.strings.get(2), Some("文"));
+/// assert_eq!(deserialized.strings, &*strings);
+/// # Ok::<(), ZeroVecError>(())
+/// ```
+///
+/// Here's another example with `ZeroSlice<T>` (similar to `[T]`):
+///
+/// ```rust
+/// # use std::str::Utf8Error;
+/// # use zerovec::ule::ZeroVecError;
+/// use zerovec::ule::*;
+/// use zerovec::VarZeroVec;
+/// use zerovec::ZeroSlice;
+/// use zerovec::ZeroVec;
+///
+/// // The structured list correspond to the list of integers.
+/// let numbers: &[&[u32]] = &[
+/// &[12, 25, 38],
+/// &[39179, 100],
+/// &[42, 55555],
+/// &[12345, 54321, 9],
+/// ];
+///
+/// #[derive(serde::Serialize, serde::Deserialize)]
+/// struct Data<'a> {
+/// #[serde(borrow)]
+/// vecs: VarZeroVec<'a, ZeroSlice<u32>>,
+/// }
+///
+/// let data = Data {
+/// vecs: VarZeroVec::from(numbers),
+/// };
+///
+/// let bincode_bytes =
+/// bincode::serialize(&data).expect("Serialization should be successful");
+///
+/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
+/// .expect("Deserialization should be successful");
+///
+/// assert_eq!(deserialized.vecs[0].get(1).unwrap(), 25);
+/// assert_eq!(deserialized.vecs[1], *numbers[1]);
+///
+/// # Ok::<(), ZeroVecError>(())
+/// ```
+///
+/// [`VarZeroVec`]s can be nested infinitely via a similar mechanism, see the docs of [`VarZeroSlice`]
+/// for more information.
+///
+/// # How it Works
+///
+/// `VarZeroVec<T>`, when used with non-human-readable serializers (like `bincode`), will
+/// serialize to a specially formatted list of bytes. The format is:
+///
+/// - 4 bytes for `length` (interpreted as a little-endian u32)
+/// - `4 * length` bytes of `indices` (interpreted as little-endian u32)
+/// - Remaining bytes for actual `data`
+///
+/// Each element in the `indices` array points to the starting index of its corresponding
+/// data part in the `data` list. The ending index can be calculated from the starting index
+/// of the next element (or the length of the slice if dealing with the last element).
+///
+/// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details.
+///
+/// [`ule`]: crate::ule
+#[non_exhaustive]
+pub enum VarZeroVec<'a, T: ?Sized, F = Index16> {
+ /// An allocated VarZeroVec, allowing for mutations.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::VarZeroVec;
+ ///
+ /// let mut vzv = VarZeroVec::<str>::default();
+ /// vzv.make_mut().push("foo");
+ /// vzv.make_mut().push("bar");
+ /// assert!(matches!(vzv, VarZeroVec::Owned(_)));
+ /// ```
+ Owned(VarZeroVecOwned<T, F>),
+ /// A borrowed VarZeroVec, requiring no allocations.
+ ///
+ /// If a mutating operation is invoked on VarZeroVec, the Borrowed is converted to Owned.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::VarZeroVec;
+ ///
+ /// let bytes = &[
+ /// 4, 0, 0, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240,
+ /// 145, 132, 131,
+ /// ];
+ ///
+ /// let vzv: VarZeroVec<str> = VarZeroVec::parse_byte_slice(bytes).unwrap();
+ /// assert!(matches!(vzv, VarZeroVec::Borrowed(_)));
+ /// ```
+ Borrowed(&'a VarZeroSlice<T, F>),
+}
+
+impl<'a, T: ?Sized, F> Clone for VarZeroVec<'a, T, F> {
+ fn clone(&self) -> Self {
+ match *self {
+ VarZeroVec::Owned(ref o) => o.clone().into(),
+ VarZeroVec::Borrowed(b) => b.into(),
+ }
+ }
+}
+
+impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroVec<'_, T, F>
+where
+ T: fmt::Debug,
+{
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ VarZeroSlice::fmt(self, f)
+ }
+}
+
+impl<'a, T: ?Sized, F> From<VarZeroVecOwned<T, F>> for VarZeroVec<'a, T, F> {
+ #[inline]
+ fn from(other: VarZeroVecOwned<T, F>) -> Self {
+ VarZeroVec::Owned(other)
+ }
+}
+
+impl<'a, T: ?Sized, F> From<&'a VarZeroSlice<T, F>> for VarZeroVec<'a, T, F> {
+ fn from(other: &'a VarZeroSlice<T, F>) -> Self {
+ VarZeroVec::Borrowed(other)
+ }
+}
+
+impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From<VarZeroVec<'a, T, F>>
+ for VarZeroVecOwned<T, F>
+{
+ #[inline]
+ fn from(other: VarZeroVec<'a, T, F>) -> Self {
+ match other {
+ VarZeroVec::Owned(o) => o,
+ VarZeroVec::Borrowed(b) => b.into(),
+ }
+ }
+}
+
+impl<T: VarULE + ?Sized> Default for VarZeroVec<'_, T> {
+ #[inline]
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Deref for VarZeroVec<'_, T, F> {
+ type Target = VarZeroSlice<T, F>;
+ fn deref(&self) -> &VarZeroSlice<T, F> {
+ self.as_slice()
+ }
+}
+
+impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVec<'a, T, F> {
+ /// Creates a new, empty `VarZeroVec<T>`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::VarZeroVec;
+ ///
+ /// let vzv: VarZeroVec<str> = VarZeroVec::new();
+ /// assert!(vzv.is_empty());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self::Borrowed(VarZeroSlice::new_empty())
+ }
+
+ /// Parse a VarZeroVec from a slice of the appropriate format
+ ///
+ /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`].
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use std::str::Utf8Error;
+ /// # use zerovec::ule::ZeroVecError;
+ /// # use zerovec::VarZeroVec;
+ ///
+ /// let strings = vec!["foo", "bar", "baz", "quux"];
+ /// let vec = VarZeroVec::<str>::from(&strings);
+ ///
+ /// assert_eq!(&vec[0], "foo");
+ /// assert_eq!(&vec[1], "bar");
+ /// assert_eq!(&vec[2], "baz");
+ /// assert_eq!(&vec[3], "quux");
+ /// # Ok::<(), ZeroVecError>(())
+ /// ```
+ pub fn parse_byte_slice(slice: &'a [u8]) -> Result<Self, ZeroVecError> {
+ let borrowed = VarZeroSlice::<T, F>::parse_byte_slice(slice)?;
+
+ Ok(VarZeroVec::Borrowed(borrowed))
+ }
+
+ /// Uses a `&[u8]` buffer as a `VarZeroVec<T>` without any verification.
+ ///
+ /// # Safety
+ ///
+ /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
+ pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self {
+ Self::Borrowed(core::mem::transmute(bytes))
+ }
+
+ /// Convert this into a mutable vector of the owned `T` type, cloning if necessary.
+ ///
+ ///
+ /// # Example
+ ///
+ /// ```rust,ignore
+ /// # use std::str::Utf8Error;
+ /// # use zerovec::ule::ZeroVecError;
+ /// # use zerovec::VarZeroVec;
+ ///
+ /// let strings = vec!["foo", "bar", "baz", "quux"];
+ /// let mut vec = VarZeroVec::<str>::from(&strings);
+ ///
+ /// assert_eq!(vec.len(), 4);
+ /// let mutvec = vec.make_mut();
+ /// mutvec.push("lorem ipsum".into());
+ /// mutvec[2] = "dolor sit".into();
+ /// assert_eq!(&vec[0], "foo");
+ /// assert_eq!(&vec[1], "bar");
+ /// assert_eq!(&vec[2], "dolor sit");
+ /// assert_eq!(&vec[3], "quux");
+ /// assert_eq!(&vec[4], "lorem ipsum");
+ /// # Ok::<(), ZeroVecError>(())
+ /// ```
+ //
+ // This function is crate-public for now since we don't yet want to stabilize
+ // the internal implementation details
+ pub fn make_mut(&mut self) -> &mut VarZeroVecOwned<T, F> {
+ match self {
+ VarZeroVec::Owned(ref mut vec) => vec,
+ VarZeroVec::Borrowed(slice) => {
+ let new_self = VarZeroVecOwned::from_slice(slice);
+ *self = new_self.into();
+ // recursion is limited since we are guaranteed to hit the Owned branch
+ self.make_mut()
+ }
+ }
+ }
+
+ /// Converts a borrowed ZeroVec to an owned ZeroVec. No-op if already owned.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// # use std::str::Utf8Error;
+ /// # use zerovec::ule::ZeroVecError;
+ /// # use zerovec::VarZeroVec;
+ ///
+ /// let strings = vec!["foo", "bar", "baz", "quux"];
+ /// let vec = VarZeroVec::<str>::from(&strings);
+ ///
+ /// assert_eq!(vec.len(), 4);
+ /// // has 'static lifetime
+ /// let owned = vec.into_owned();
+ /// # Ok::<(), ZeroVecError>(())
+ /// ```
+ pub fn into_owned(mut self) -> VarZeroVec<'static, T, F> {
+ self.make_mut();
+ match self {
+ VarZeroVec::Owned(vec) => vec.into(),
+ _ => unreachable!(),
+ }
+ }
+
+ /// Obtain this `VarZeroVec` as a [`VarZeroSlice`]
+ pub fn as_slice(&self) -> &VarZeroSlice<T, F> {
+ match *self {
+ VarZeroVec::Owned(ref owned) => owned,
+ VarZeroVec::Borrowed(b) => b,
+ }
+ }
+
+ /// Takes the byte vector representing the encoded data of this VarZeroVec. If borrowed,
+ /// this function allocates a byte vector and copies the borrowed bytes into it.
+ ///
+ /// The bytes can be passed back to [`Self::parse_byte_slice()`].
+ ///
+ /// To get a reference to the bytes without moving, see [`VarZeroSlice::as_bytes()`].
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use std::str::Utf8Error;
+ /// # use zerovec::ule::ZeroVecError;
+ /// # use zerovec::VarZeroVec;
+ ///
+ /// let strings = vec!["foo", "bar", "baz"];
+ /// let bytes = VarZeroVec::<str>::from(&strings).into_bytes();
+ ///
+ /// let mut borrowed: VarZeroVec<str> = VarZeroVec::parse_byte_slice(&bytes)?;
+ /// assert_eq!(borrowed, &*strings);
+ ///
+ /// # Ok::<(), ZeroVecError>(())
+ /// ```
+ pub fn into_bytes(self) -> Vec<u8> {
+ match self {
+ VarZeroVec::Owned(vec) => vec.into_bytes(),
+ VarZeroVec::Borrowed(vec) => vec.as_bytes().to_vec(),
+ }
+ }
+
+ /// Return whether the [`VarZeroVec`] is operating on owned or borrowed
+ /// data. [`VarZeroVec::into_owned()`] and [`VarZeroVec::make_mut()`] can
+ /// be used to force it into an owned type
+ pub fn is_owned(&self) -> bool {
+ match self {
+ VarZeroVec::Owned(..) => true,
+ VarZeroVec::Borrowed(..) => false,
+ }
+ }
+
+ #[cfg(feature = "bench")]
+ #[doc(hidden)]
+ pub fn as_components<'b>(&'b self) -> VarZeroVecComponents<'b, T, F> {
+ self.as_slice().as_components()
+ }
+}
+
+impl<A, T, F> From<&Vec<A>> for VarZeroVec<'static, T, F>
+where
+ T: VarULE + ?Sized,
+ A: EncodeAsVarULE<T>,
+ F: VarZeroVecFormat,
+{
+ #[inline]
+ fn from(elements: &Vec<A>) -> Self {
+ Self::from(elements.as_slice())
+ }
+}
+
+impl<A, T, F> From<&[A]> for VarZeroVec<'static, T, F>
+where
+ T: VarULE + ?Sized,
+ A: EncodeAsVarULE<T>,
+ F: VarZeroVecFormat,
+{
+ #[inline]
+ fn from(elements: &[A]) -> Self {
+ if elements.is_empty() {
+ VarZeroSlice::new_empty().into()
+ } else {
+ #[allow(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility
+ VarZeroVecOwned::try_from_elements(elements).unwrap().into()
+ }
+ }
+}
+
+impl<A, T, F, const N: usize> From<&[A; N]> for VarZeroVec<'static, T, F>
+where
+ T: VarULE + ?Sized,
+ A: EncodeAsVarULE<T>,
+ F: VarZeroVecFormat,
+{
+ #[inline]
+ fn from(elements: &[A; N]) -> Self {
+ Self::from(elements.as_slice())
+ }
+}
+
+impl<'a, 'b, T, F> PartialEq<VarZeroVec<'b, T, F>> for VarZeroVec<'a, T, F>
+where
+ T: VarULE,
+ T: ?Sized,
+ T: PartialEq,
+ F: VarZeroVecFormat,
+{
+ #[inline]
+ fn eq(&self, other: &VarZeroVec<'b, T, F>) -> bool {
+ // VZV::from_elements used to produce a non-canonical representation of the
+ // empty VZV, so we cannot use byte equality for empty vecs.
+ if self.is_empty() || other.is_empty() {
+ return self.is_empty() && other.is_empty();
+ }
+ // VarULE has an API guarantee that byte equality is semantic equality.
+ // For non-empty VZVs, there's only a single metadata representation,
+ // so this guarantee extends to the whole VZV representation.
+ self.as_bytes().eq(other.as_bytes())
+ }
+}
+
+impl<'a, T, F> Eq for VarZeroVec<'a, T, F>
+where
+ T: VarULE,
+ T: ?Sized,
+ T: Eq,
+ F: VarZeroVecFormat,
+{
+}
+
+impl<T, A, F> PartialEq<&'_ [A]> for VarZeroVec<'_, T, F>
+where
+ T: VarULE + ?Sized,
+ T: PartialEq,
+ A: AsRef<T>,
+ F: VarZeroVecFormat,
+{
+ #[inline]
+ fn eq(&self, other: &&[A]) -> bool {
+ self.iter().eq(other.iter().map(|t| t.as_ref()))
+ }
+}
+
+impl<T, A, F, const N: usize> PartialEq<[A; N]> for VarZeroVec<'_, T, F>
+where
+ T: VarULE + ?Sized,
+ T: PartialEq,
+ A: AsRef<T>,
+ F: VarZeroVecFormat,
+{
+ #[inline]
+ fn eq(&self, other: &[A; N]) -> bool {
+ self.iter().eq(other.iter().map(|t| t.as_ref()))
+ }
+}
+
+impl<'a, T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroVec<'a, T, F> {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ self.iter().partial_cmp(other.iter())
+ }
+}
+
+impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T, F> {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.iter().cmp(other.iter())
+ }
+}
+
+#[test]
+fn assert_single_empty_representation() {
+ assert_eq!(
+ VarZeroVec::<str>::new().as_bytes(),
+ VarZeroVec::<str>::from(&[] as &[&str]).as_bytes()
+ );
+}
+
+#[test]
+fn weird_empty_representation_equality() {
+ assert_eq!(
+ VarZeroVec::<str>::parse_byte_slice(&[0, 0, 0, 0]).unwrap(),
+ VarZeroVec::<str>::parse_byte_slice(&[]).unwrap()
+ );
+}
diff --git a/third_party/rust/zerovec/src/yoke_impls.rs b/third_party/rust/zerovec/src/yoke_impls.rs
new file mode 100644
index 0000000000..66f756dce5
--- /dev/null
+++ b/third_party/rust/zerovec/src/yoke_impls.rs
@@ -0,0 +1,551 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// This way we can copy-paste Yokeable impls
+#![allow(unknown_lints)] // forgetting_copy_types
+#![allow(renamed_and_removed_lints)] // forgetting_copy_types
+#![allow(forgetting_copy_types)]
+#![allow(clippy::forget_copy)]
+#![allow(clippy::forget_non_drop)]
+
+use crate::flexzerovec::FlexZeroVec;
+use crate::map::ZeroMapBorrowed;
+use crate::map::ZeroMapKV;
+use crate::map2d::ZeroMap2dBorrowed;
+use crate::ule::*;
+use crate::{VarZeroVec, ZeroMap, ZeroMap2d, ZeroVec};
+use core::{mem, ptr};
+use yoke::*;
+
+// This impl is similar to the impl on Cow and is safe for the same reasons
+/// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate
+unsafe impl<'a, T: 'static + AsULE + ?Sized> Yokeable<'a> for ZeroVec<'static, T> {
+ type Output = ZeroVec<'a, T>;
+ #[inline]
+ fn transform(&'a self) -> &'a Self::Output {
+ self
+ }
+ #[inline]
+ fn transform_owned(self) -> Self::Output {
+ self
+ }
+ #[inline]
+ unsafe fn make(from: Self::Output) -> Self {
+ debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>());
+ let from = mem::ManuallyDrop::new(from);
+ let ptr: *const Self = (&*from as *const Self::Output).cast();
+ ptr::read(ptr)
+ }
+ #[inline]
+ fn transform_mut<F>(&'a mut self, f: F)
+ where
+ F: 'static + for<'b> FnOnce(&'b mut Self::Output),
+ {
+ unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) }
+ }
+}
+
+// This impl is similar to the impl on Cow and is safe for the same reasons
+/// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate
+unsafe impl<'a, T: 'static + VarULE + ?Sized> Yokeable<'a> for VarZeroVec<'static, T> {
+ type Output = VarZeroVec<'a, T>;
+ #[inline]
+ fn transform(&'a self) -> &'a Self::Output {
+ self
+ }
+ #[inline]
+ fn transform_owned(self) -> Self::Output {
+ self
+ }
+ #[inline]
+ unsafe fn make(from: Self::Output) -> Self {
+ debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>());
+ let from = mem::ManuallyDrop::new(from);
+ let ptr: *const Self = (&*from as *const Self::Output).cast();
+ ptr::read(ptr)
+ }
+ #[inline]
+ fn transform_mut<F>(&'a mut self, f: F)
+ where
+ F: 'static + for<'b> FnOnce(&'b mut Self::Output),
+ {
+ unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) }
+ }
+}
+
+// This impl is similar to the impl on Cow and is safe for the same reasons
+/// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate
+unsafe impl<'a> Yokeable<'a> for FlexZeroVec<'static> {
+ type Output = FlexZeroVec<'a>;
+ #[inline]
+ fn transform(&'a self) -> &'a Self::Output {
+ self
+ }
+ #[inline]
+ fn transform_owned(self) -> Self::Output {
+ self
+ }
+ #[inline]
+ unsafe fn make(from: Self::Output) -> Self {
+ debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>());
+ let from = mem::ManuallyDrop::new(from);
+ let ptr: *const Self = (&*from as *const Self::Output).cast();
+ ptr::read(ptr)
+ }
+ #[inline]
+ fn transform_mut<F>(&'a mut self, f: F)
+ where
+ F: 'static + for<'b> FnOnce(&'b mut Self::Output),
+ {
+ unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) }
+ }
+}
+
+/// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate
+#[allow(clippy::transmute_ptr_to_ptr)]
+unsafe impl<'a, K, V> Yokeable<'a> for ZeroMap<'static, K, V>
+where
+ K: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ V: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ <K as ZeroMapKV<'static>>::Container: for<'b> Yokeable<'b>,
+ <V as ZeroMapKV<'static>>::Container: for<'b> Yokeable<'b>,
+{
+ type Output = ZeroMap<'a, K, V>;
+ #[inline]
+ fn transform(&'a self) -> &'a Self::Output {
+ unsafe {
+ // Unfortunately, because K and V are generic, rustc is
+ // unaware that these are covariant types, and cannot perform this cast automatically.
+ // We transmute it instead, and enforce the lack of a lifetime with the `K, V: 'static` bound
+ mem::transmute::<&Self, &Self::Output>(self)
+ }
+ }
+ #[inline]
+ fn transform_owned(self) -> Self::Output {
+ debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>());
+ unsafe {
+ // Similar problem as transform(), but we need to use ptr::read since
+ // the compiler isn't sure of the sizes
+ let this = mem::ManuallyDrop::new(self);
+ let ptr: *const Self::Output = (&*this as *const Self).cast();
+ ptr::read(ptr)
+ }
+ }
+ #[inline]
+ unsafe fn make(from: Self::Output) -> Self {
+ debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>());
+ let from = mem::ManuallyDrop::new(from);
+ let ptr: *const Self = (&*from as *const Self::Output).cast();
+ ptr::read(ptr)
+ }
+ #[inline]
+ fn transform_mut<F>(&'a mut self, f: F)
+ where
+ F: 'static + for<'b> FnOnce(&'b mut Self::Output),
+ {
+ unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) }
+ }
+}
+
+/// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate
+#[allow(clippy::transmute_ptr_to_ptr)]
+unsafe impl<'a, K, V> Yokeable<'a> for ZeroMapBorrowed<'static, K, V>
+where
+ K: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ V: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ &'static <K as ZeroMapKV<'static>>::Slice: for<'b> Yokeable<'b>,
+ &'static <V as ZeroMapKV<'static>>::Slice: for<'b> Yokeable<'b>,
+{
+ type Output = ZeroMapBorrowed<'a, K, V>;
+ #[inline]
+ fn transform(&'a self) -> &'a Self::Output {
+ unsafe {
+ // Unfortunately, because K and V are generic, rustc is
+ // unaware that these are covariant types, and cannot perform this cast automatically.
+ // We transmute it instead, and enforce the lack of a lifetime with the `K, V: 'static` bound
+ mem::transmute::<&Self, &Self::Output>(self)
+ }
+ }
+ #[inline]
+ fn transform_owned(self) -> Self::Output {
+ debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>());
+ unsafe {
+ // Similar problem as transform(), but we need to use ptr::read since
+ // the compiler isn't sure of the sizes
+ let this = mem::ManuallyDrop::new(self);
+ let ptr: *const Self::Output = (&*this as *const Self).cast();
+ ptr::read(ptr)
+ }
+ }
+ #[inline]
+ unsafe fn make(from: Self::Output) -> Self {
+ debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>());
+ let from = mem::ManuallyDrop::new(from);
+ let ptr: *const Self = (&*from as *const Self::Output).cast();
+ ptr::read(ptr)
+ }
+ #[inline]
+ fn transform_mut<F>(&'a mut self, f: F)
+ where
+ F: 'static + for<'b> FnOnce(&'b mut Self::Output),
+ {
+ unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) }
+ }
+}
+
+/// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate
+#[allow(clippy::transmute_ptr_to_ptr)]
+unsafe impl<'a, K0, K1, V> Yokeable<'a> for ZeroMap2d<'static, K0, K1, V>
+where
+ K0: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ K1: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ V: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ <K0 as ZeroMapKV<'static>>::Container: for<'b> Yokeable<'b>,
+ <K1 as ZeroMapKV<'static>>::Container: for<'b> Yokeable<'b>,
+ <V as ZeroMapKV<'static>>::Container: for<'b> Yokeable<'b>,
+{
+ type Output = ZeroMap2d<'a, K0, K1, V>;
+ #[inline]
+ fn transform(&'a self) -> &'a Self::Output {
+ unsafe {
+ // Unfortunately, because K and V are generic, rustc is
+ // unaware that these are covariant types, and cannot perform this cast automatically.
+ // We transmute it instead, and enforce the lack of a lifetime with the `K0, K1, V: 'static` bound
+ mem::transmute::<&Self, &Self::Output>(self)
+ }
+ }
+ #[inline]
+ fn transform_owned(self) -> Self::Output {
+ debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>());
+ unsafe {
+ // Similar problem as transform(), but we need to use ptr::read since
+ // the compiler isn't sure of the sizes
+ let this = mem::ManuallyDrop::new(self);
+ let ptr: *const Self::Output = (&*this as *const Self).cast();
+ ptr::read(ptr)
+ }
+ }
+ #[inline]
+ unsafe fn make(from: Self::Output) -> Self {
+ debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>());
+ let from = mem::ManuallyDrop::new(from);
+ let ptr: *const Self = (&*from as *const Self::Output).cast();
+ ptr::read(ptr)
+ }
+ #[inline]
+ fn transform_mut<F>(&'a mut self, f: F)
+ where
+ F: 'static + for<'b> FnOnce(&'b mut Self::Output),
+ {
+ unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) }
+ }
+}
+
+/// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate
+#[allow(clippy::transmute_ptr_to_ptr)]
+unsafe impl<'a, K0, K1, V> Yokeable<'a> for ZeroMap2dBorrowed<'static, K0, K1, V>
+where
+ K0: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ K1: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ V: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ &'static <K0 as ZeroMapKV<'static>>::Slice: for<'b> Yokeable<'b>,
+ &'static <K1 as ZeroMapKV<'static>>::Slice: for<'b> Yokeable<'b>,
+ &'static <V as ZeroMapKV<'static>>::Slice: for<'b> Yokeable<'b>,
+{
+ type Output = ZeroMap2dBorrowed<'a, K0, K1, V>;
+ #[inline]
+ fn transform(&'a self) -> &'a Self::Output {
+ unsafe {
+ // Unfortunately, because K and V are generic, rustc is
+ // unaware that these are covariant types, and cannot perform this cast automatically.
+ // We transmute it instead, and enforce the lack of a lifetime with the `K0, K1, V: 'static` bound
+ mem::transmute::<&Self, &Self::Output>(self)
+ }
+ }
+ #[inline]
+ fn transform_owned(self) -> Self::Output {
+ debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>());
+ unsafe {
+ // Similar problem as transform(), but we need to use ptr::read since
+ // the compiler isn't sure of the sizes
+ let this = mem::ManuallyDrop::new(self);
+ let ptr: *const Self::Output = (&*this as *const Self).cast();
+ ptr::read(ptr)
+ }
+ }
+ #[inline]
+ unsafe fn make(from: Self::Output) -> Self {
+ debug_assert!(mem::size_of::<Self::Output>() == mem::size_of::<Self>());
+ let from = mem::ManuallyDrop::new(from);
+ let ptr: *const Self = (&*from as *const Self::Output).cast();
+ ptr::read(ptr)
+ }
+ #[inline]
+ fn transform_mut<F>(&'a mut self, f: F)
+ where
+ F: 'static + for<'b> FnOnce(&'b mut Self::Output),
+ {
+ unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) }
+ }
+}
+
+#[cfg(test)]
+#[allow(non_camel_case_types, non_snake_case)]
+mod test {
+ use super::*;
+ use crate::{vecs::FlexZeroSlice, VarZeroSlice, ZeroSlice};
+ use databake::*;
+
+ // Note: The following derives cover Yoke as well as Serde and databake. These may partially
+ // duplicate tests elsewhere in this crate, but they are here for completeness.
+
+ #[derive(yoke::Yokeable, zerofrom::ZeroFrom)]
+ #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+ #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))]
+ struct DeriveTest_ZeroVec<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: ZeroVec<'data, u16>,
+ }
+
+ #[test]
+ #[ignore] // https://github.com/rust-lang/rust/issues/98906
+ fn bake_ZeroVec() {
+ test_bake!(
+ DeriveTest_ZeroVec<'static>,
+ crate::yoke_impls::test::DeriveTest_ZeroVec {
+ _data: crate::ZeroVec::new(),
+ },
+ zerovec,
+ );
+ }
+
+ #[derive(yoke::Yokeable)]
+ #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+ #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))]
+ struct DeriveTest_ZeroSlice<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: &'data ZeroSlice<u16>,
+ }
+
+ #[test]
+ fn bake_ZeroSlice() {
+ test_bake!(
+ DeriveTest_ZeroSlice<'static>,
+ crate::yoke_impls::test::DeriveTest_ZeroSlice {
+ _data: crate::ZeroSlice::new_empty(),
+ },
+ zerovec,
+ );
+ }
+
+ #[derive(yoke::Yokeable, zerofrom::ZeroFrom)]
+ #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+ #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))]
+ struct DeriveTest_FlexZeroVec<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: FlexZeroVec<'data>,
+ }
+
+ #[test]
+ fn bake_FlexZeroVec() {
+ test_bake!(
+ DeriveTest_FlexZeroVec<'static>,
+ crate::yoke_impls::test::DeriveTest_FlexZeroVec {
+ _data: crate::vecs::FlexZeroVec::new(),
+ },
+ zerovec,
+ );
+ }
+
+ #[derive(yoke::Yokeable)]
+ #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+ #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))]
+ struct DeriveTest_FlexZeroSlice<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: &'data FlexZeroSlice,
+ }
+
+ #[test]
+ fn bake_FlexZeroSlice() {
+ test_bake!(
+ DeriveTest_FlexZeroSlice<'static>,
+ crate::yoke_impls::test::DeriveTest_FlexZeroSlice {
+ _data: unsafe { crate::vecs::FlexZeroSlice::from_byte_slice_unchecked(b"\x01\0") },
+ },
+ zerovec,
+ );
+ }
+
+ #[derive(yoke::Yokeable, zerofrom::ZeroFrom)]
+ #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+ #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))]
+ struct DeriveTest_VarZeroVec<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: VarZeroVec<'data, str>,
+ }
+
+ #[test]
+ fn bake_VarZeroVec() {
+ test_bake!(
+ DeriveTest_VarZeroVec<'static>,
+ crate::yoke_impls::test::DeriveTest_VarZeroVec {
+ _data: crate::VarZeroVec::new(),
+ },
+ zerovec,
+ );
+ }
+
+ #[derive(yoke::Yokeable)]
+ #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+ #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))]
+ struct DeriveTest_VarZeroSlice<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: &'data VarZeroSlice<str>,
+ }
+
+ #[test]
+ fn bake_VarZeroSlice() {
+ test_bake!(
+ DeriveTest_VarZeroSlice<'static>,
+ crate::yoke_impls::test::DeriveTest_VarZeroSlice {
+ _data: crate::VarZeroSlice::new_empty()
+ },
+ zerovec,
+ );
+ }
+
+ #[derive(yoke::Yokeable, zerofrom::ZeroFrom)]
+ #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+ #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))]
+ #[yoke(prove_covariance_manually)]
+ struct DeriveTest_ZeroMap<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: ZeroMap<'data, [u8], str>,
+ }
+
+ #[test]
+ fn bake_ZeroMap() {
+ test_bake!(
+ DeriveTest_ZeroMap<'static>,
+ crate::yoke_impls::test::DeriveTest_ZeroMap {
+ _data: unsafe {
+ #[allow(unused_unsafe)]
+ crate::ZeroMap::from_parts_unchecked(
+ crate::VarZeroVec::new(),
+ crate::VarZeroVec::new(),
+ )
+ },
+ },
+ zerovec,
+ );
+ }
+
+ #[derive(yoke::Yokeable)]
+ #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+ #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))]
+ #[yoke(prove_covariance_manually)]
+ struct DeriveTest_ZeroMapBorrowed<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: ZeroMapBorrowed<'data, [u8], str>,
+ }
+
+ #[test]
+ fn bake_ZeroMapBorrowed() {
+ test_bake!(
+ DeriveTest_ZeroMapBorrowed<'static>,
+ crate::yoke_impls::test::DeriveTest_ZeroMapBorrowed {
+ _data: unsafe {
+ #[allow(unused_unsafe)]
+ crate::maps::ZeroMapBorrowed::from_parts_unchecked(
+ crate::VarZeroSlice::new_empty(),
+ crate::VarZeroSlice::new_empty(),
+ )
+ },
+ },
+ zerovec,
+ );
+ }
+
+ #[derive(yoke::Yokeable, zerofrom::ZeroFrom)]
+ #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+ #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))]
+ #[yoke(prove_covariance_manually)]
+ struct DeriveTest_ZeroMapWithULE<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: ZeroMap<'data, ZeroSlice<u32>, str>,
+ }
+
+ #[test]
+ fn bake_ZeroMapWithULE() {
+ test_bake!(
+ DeriveTest_ZeroMapWithULE<'static>,
+ crate::yoke_impls::test::DeriveTest_ZeroMapWithULE {
+ _data: unsafe {
+ #[allow(unused_unsafe)]
+ crate::ZeroMap::from_parts_unchecked(
+ crate::VarZeroVec::new(),
+ crate::VarZeroVec::new(),
+ )
+ },
+ },
+ zerovec,
+ );
+ }
+
+ #[derive(yoke::Yokeable, zerofrom::ZeroFrom)]
+ #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+ #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))]
+ #[yoke(prove_covariance_manually)]
+ struct DeriveTest_ZeroMap2d<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: ZeroMap2d<'data, u16, u16, str>,
+ }
+
+ #[test]
+ fn bake_ZeroMap2d() {
+ test_bake!(
+ DeriveTest_ZeroMap2d<'static>,
+ crate::yoke_impls::test::DeriveTest_ZeroMap2d {
+ _data: unsafe {
+ #[allow(unused_unsafe)]
+ crate::ZeroMap2d::from_parts_unchecked(
+ crate::ZeroVec::new(),
+ crate::ZeroVec::new(),
+ crate::ZeroVec::new(),
+ crate::VarZeroVec::new(),
+ )
+ },
+ },
+ zerovec,
+ );
+ }
+
+ #[derive(yoke::Yokeable)]
+ #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+ #[cfg_attr(feature = "databake", derive(databake::Bake), databake(path = zerovec::yoke_impls::test))]
+ #[yoke(prove_covariance_manually)]
+ struct DeriveTest_ZeroMap2dBorrowed<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: ZeroMap2dBorrowed<'data, u16, u16, str>,
+ }
+
+ #[test]
+ fn bake_ZeroMap2dBorrowed() {
+ test_bake!(
+ DeriveTest_ZeroMap2dBorrowed<'static>,
+ crate::yoke_impls::test::DeriveTest_ZeroMap2dBorrowed {
+ _data: unsafe {
+ #[allow(unused_unsafe)]
+ crate::maps::ZeroMap2dBorrowed::from_parts_unchecked(
+ crate::ZeroSlice::new_empty(),
+ crate::ZeroSlice::new_empty(),
+ crate::ZeroSlice::new_empty(),
+ crate::VarZeroSlice::new_empty(),
+ )
+ },
+ },
+ zerovec,
+ );
+ }
+}
diff --git a/third_party/rust/zerovec/src/zerofrom_impls.rs b/third_party/rust/zerovec/src/zerofrom_impls.rs
new file mode 100644
index 0000000000..d17e432c4f
--- /dev/null
+++ b/third_party/rust/zerovec/src/zerofrom_impls.rs
@@ -0,0 +1,124 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::map::ZeroMapKV;
+use crate::ule::*;
+use crate::vecs::{FlexZeroSlice, FlexZeroVec};
+use crate::{VarZeroSlice, VarZeroVec, ZeroMap, ZeroMap2d, ZeroSlice, ZeroVec};
+use zerofrom::ZeroFrom;
+
+impl<'zf, T> ZeroFrom<'zf, ZeroVec<'_, T>> for ZeroVec<'zf, T>
+where
+ T: 'static + AsULE + ?Sized,
+{
+ #[inline]
+ fn zero_from(other: &'zf ZeroVec<'_, T>) -> Self {
+ ZeroVec::new_borrowed(other.as_ule_slice())
+ }
+}
+
+impl<'zf, T> ZeroFrom<'zf, ZeroSlice<T>> for ZeroVec<'zf, T>
+where
+ T: 'static + AsULE + ?Sized,
+{
+ #[inline]
+ fn zero_from(other: &'zf ZeroSlice<T>) -> Self {
+ ZeroVec::new_borrowed(other.as_ule_slice())
+ }
+}
+
+impl<'zf, T> ZeroFrom<'zf, ZeroSlice<T>> for &'zf ZeroSlice<T>
+where
+ T: 'static + AsULE + ?Sized,
+{
+ #[inline]
+ fn zero_from(other: &'zf ZeroSlice<T>) -> Self {
+ other
+ }
+}
+
+impl<'zf> ZeroFrom<'zf, FlexZeroVec<'_>> for FlexZeroVec<'zf> {
+ #[inline]
+ fn zero_from(other: &'zf FlexZeroVec<'_>) -> Self {
+ FlexZeroVec::Borrowed(other)
+ }
+}
+
+impl<'zf> ZeroFrom<'zf, FlexZeroSlice> for FlexZeroVec<'zf> {
+ #[inline]
+ fn zero_from(other: &'zf FlexZeroSlice) -> Self {
+ FlexZeroVec::Borrowed(other)
+ }
+}
+
+impl<'zf> ZeroFrom<'zf, FlexZeroSlice> for &'zf FlexZeroSlice {
+ #[inline]
+ fn zero_from(other: &'zf FlexZeroSlice) -> Self {
+ other
+ }
+}
+
+impl<'zf, T> ZeroFrom<'zf, VarZeroSlice<T>> for VarZeroVec<'zf, T>
+where
+ T: 'static + VarULE + ?Sized,
+{
+ #[inline]
+ fn zero_from(other: &'zf VarZeroSlice<T>) -> Self {
+ other.into()
+ }
+}
+
+impl<'zf, T> ZeroFrom<'zf, VarZeroVec<'_, T>> for VarZeroVec<'zf, T>
+where
+ T: 'static + VarULE + ?Sized,
+{
+ #[inline]
+ fn zero_from(other: &'zf VarZeroVec<'_, T>) -> Self {
+ other.as_slice().into()
+ }
+}
+
+impl<'zf, T> ZeroFrom<'zf, VarZeroSlice<T>> for &'zf VarZeroSlice<T>
+where
+ T: 'static + VarULE + ?Sized,
+{
+ #[inline]
+ fn zero_from(other: &'zf VarZeroSlice<T>) -> Self {
+ other
+ }
+}
+
+impl<'zf, 's, K, V> ZeroFrom<'zf, ZeroMap<'s, K, V>> for ZeroMap<'zf, K, V>
+where
+ K: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ V: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ <K as ZeroMapKV<'zf>>::Container: ZeroFrom<'zf, <K as ZeroMapKV<'s>>::Container>,
+ <V as ZeroMapKV<'zf>>::Container: ZeroFrom<'zf, <V as ZeroMapKV<'s>>::Container>,
+{
+ fn zero_from(other: &'zf ZeroMap<'s, K, V>) -> Self {
+ ZeroMap {
+ keys: K::Container::zero_from(&other.keys),
+ values: V::Container::zero_from(&other.values),
+ }
+ }
+}
+
+impl<'zf, 's, K0, K1, V> ZeroFrom<'zf, ZeroMap2d<'s, K0, K1, V>> for ZeroMap2d<'zf, K0, K1, V>
+where
+ K0: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ K1: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ V: 'static + for<'b> ZeroMapKV<'b> + ?Sized,
+ <K0 as ZeroMapKV<'zf>>::Container: ZeroFrom<'zf, <K0 as ZeroMapKV<'s>>::Container>,
+ <K1 as ZeroMapKV<'zf>>::Container: ZeroFrom<'zf, <K1 as ZeroMapKV<'s>>::Container>,
+ <V as ZeroMapKV<'zf>>::Container: ZeroFrom<'zf, <V as ZeroMapKV<'s>>::Container>,
+{
+ fn zero_from(other: &'zf ZeroMap2d<'s, K0, K1, V>) -> Self {
+ ZeroMap2d {
+ keys0: K0::Container::zero_from(&other.keys0),
+ joiner: ZeroVec::zero_from(&other.joiner),
+ keys1: K1::Container::zero_from(&other.keys1),
+ values: V::Container::zero_from(&other.values),
+ }
+ }
+}
diff --git a/third_party/rust/zerovec/src/zerovec/databake.rs b/third_party/rust/zerovec/src/zerovec/databake.rs
new file mode 100644
index 0000000000..31f1675946
--- /dev/null
+++ b/third_party/rust/zerovec/src/zerovec/databake.rs
@@ -0,0 +1,69 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::ZeroVec;
+use crate::{ule::AsULE, ZeroSlice};
+use databake::*;
+
+impl<T> Bake for ZeroVec<'_, T>
+where
+ T: AsULE + ?Sized + Bake,
+{
+ fn bake(&self, env: &CrateEnv) -> TokenStream {
+ env.insert("zerovec");
+ if self.is_empty() {
+ quote! { zerovec::ZeroVec::new() }
+ } else {
+ let bytes = databake::Bake::bake(&self.as_bytes(), env);
+ quote! { unsafe { zerovec::ZeroVec::from_bytes_unchecked(#bytes) } }
+ }
+ }
+}
+
+impl<T> Bake for &ZeroSlice<T>
+where
+ T: AsULE + ?Sized,
+{
+ fn bake(&self, env: &CrateEnv) -> TokenStream {
+ env.insert("zerovec");
+ if self.is_empty() {
+ quote! { zerovec::ZeroSlice::new_empty() }
+ } else {
+ let bytes = databake::Bake::bake(&self.as_bytes(), env);
+ quote! { unsafe { zerovec::ZeroSlice::from_bytes_unchecked(#bytes) } }
+ }
+ }
+}
+
+#[test]
+fn test_baked_vec() {
+ test_bake!(
+ ZeroVec<u32>,
+ const: crate::ZeroVec::new(),
+ zerovec
+ );
+ test_bake!(
+ ZeroVec<u32>,
+ const: unsafe {
+ crate::ZeroVec::from_bytes_unchecked(b"\x02\x01\0\x16\0M\x01\\")
+ },
+ zerovec
+ );
+}
+
+#[test]
+fn test_baked_slice() {
+ test_bake!(
+ &ZeroSlice<u32>,
+ const: crate::ZeroSlice::new_empty(),
+ zerovec
+ );
+ test_bake!(
+ &ZeroSlice<u32>,
+ const: unsafe {
+ crate::ZeroSlice::from_bytes_unchecked(b"\x02\x01\0\x16\0M\x01\\")
+ },
+ zerovec
+ );
+}
diff --git a/third_party/rust/zerovec/src/zerovec/mod.rs b/third_party/rust/zerovec/src/zerovec/mod.rs
new file mode 100644
index 0000000000..e6186be0a2
--- /dev/null
+++ b/third_party/rust/zerovec/src/zerovec/mod.rs
@@ -0,0 +1,1137 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#[cfg(feature = "databake")]
+mod databake;
+
+#[cfg(feature = "serde")]
+mod serde;
+
+mod slice;
+
+pub use slice::ZeroSlice;
+
+use crate::ule::*;
+use alloc::borrow::Cow;
+use alloc::vec::Vec;
+use core::cmp::{Ord, Ordering, PartialOrd};
+use core::fmt;
+use core::iter::FromIterator;
+use core::marker::PhantomData;
+use core::mem;
+use core::num::NonZeroUsize;
+use core::ops::Deref;
+use core::ptr;
+
+/// A zero-copy, byte-aligned vector for fixed-width types.
+///
+/// `ZeroVec<T>` is designed as a drop-in replacement for `Vec<T>` in situations where it is
+/// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization.
+///
+/// `T` must implement [`AsULE`], which is auto-implemented for a number of built-in types,
+/// including all fixed-width multibyte integers. For variable-width types like [`str`],
+/// see [`VarZeroVec`](crate::VarZeroVec). [`zerovec::make_ule`](crate::make_ule) may
+/// be used to automatically implement [`AsULE`] for a type and generate the underlying [`ULE`] type.
+///
+/// Typically, the zero-copy equivalent of a `Vec<T>` will simply be `ZeroVec<'a, T>`.
+///
+/// Most of the methods on `ZeroVec<'a, T>` come from its [`Deref`] implementation to [`ZeroSlice<T>`](ZeroSlice).
+///
+/// For creating zero-copy vectors of fixed-size types, see [`VarZeroVec`](crate::VarZeroVec).
+///
+/// `ZeroVec<T>` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from
+/// owned data (and then mutated!) but can also borrow from some buffer.
+///
+/// # Example
+///
+/// ```
+/// use zerovec::ZeroVec;
+///
+/// // The little-endian bytes correspond to the numbers on the following line.
+/// let nums: &[u16] = &[211, 281, 421, 461];
+///
+/// #[derive(serde::Serialize, serde::Deserialize)]
+/// struct Data<'a> {
+/// #[serde(borrow)]
+/// nums: ZeroVec<'a, u16>,
+/// }
+///
+/// // The owned version will allocate
+/// let data = Data {
+/// nums: ZeroVec::alloc_from_slice(nums),
+/// };
+/// let bincode_bytes =
+/// bincode::serialize(&data).expect("Serialization should be successful");
+///
+/// // Will deserialize without allocations
+/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
+/// .expect("Deserialization should be successful");
+///
+/// // This deserializes without allocation!
+/// assert!(!deserialized.nums.is_owned());
+/// assert_eq!(deserialized.nums.get(2), Some(421));
+/// assert_eq!(deserialized.nums, nums);
+/// ```
+///
+/// [`ule`]: crate::ule
+///
+/// # How it Works
+///
+/// `ZeroVec<T>` represents a slice of `T` as a slice of `T::ULE`. The difference between `T` and
+/// `T::ULE` is that `T::ULE` must be encoded in little-endian with 1-byte alignment. When accessing
+/// items from `ZeroVec<T>`, we fetch the `T::ULE`, convert it on the fly to `T`, and return `T` by
+/// value.
+///
+/// Benchmarks can be found in the project repository, with some results found in the [crate-level documentation](crate).
+///
+/// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details.
+pub struct ZeroVec<'a, T>
+where
+ T: AsULE,
+{
+ vector: EyepatchHackVector<T::ULE>,
+
+ /// Marker type, signalling variance and dropck behavior
+ /// by containing all potential types this type represents
+ #[allow(clippy::type_complexity)] // needed to get correct marker type behavior
+ marker: PhantomData<(Vec<T::ULE>, &'a [T::ULE])>,
+}
+
+// Send inherits as long as all fields are Send, but also references are Send only
+// when their contents are Sync (this is the core purpose of Sync), so
+// we need a Send+Sync bound since this struct can logically be a vector or a slice.
+unsafe impl<'a, T: AsULE> Send for ZeroVec<'a, T> where T::ULE: Send + Sync {}
+// Sync typically inherits as long as all fields are Sync
+unsafe impl<'a, T: AsULE> Sync for ZeroVec<'a, T> where T::ULE: Sync {}
+
+impl<'a, T: AsULE> Deref for ZeroVec<'a, T> {
+ type Target = ZeroSlice<T>;
+ #[inline]
+ fn deref(&self) -> &Self::Target {
+ let slice: &[T::ULE] = self.vector.as_slice();
+ ZeroSlice::from_ule_slice(slice)
+ }
+}
+
+// Represents an unsafe potentially-owned vector/slice type, without a lifetime
+// working around dropck limitations.
+//
+// Must either be constructed by deconstructing a Vec<U>, or from &[U] with capacity set to
+// zero. Should not outlive its source &[U] in the borrowed case; this type does not in
+// and of itself uphold this guarantee, but the .as_slice() method assumes it.
+//
+// After https://github.com/rust-lang/rust/issues/34761 stabilizes,
+// we should remove this type and use #[may_dangle]
+struct EyepatchHackVector<U> {
+ /// Pointer to data
+ /// This pointer is *always* valid, the reason it is represented as a raw pointer
+ /// is that it may logically represent an `&[T::ULE]` or the ptr,len of a `Vec<T::ULE>`
+ buf: *mut [U],
+ /// Borrowed if zero. Capacity of buffer above if not
+ capacity: usize,
+}
+
+impl<U> EyepatchHackVector<U> {
+ // Return a slice to the inner data for an arbitrary caller-specified lifetime
+ #[inline]
+ unsafe fn as_arbitrary_slice<'a>(&self) -> &'a [U] {
+ &*self.buf
+ }
+ // Return a slice to the inner data
+ #[inline]
+ const fn as_slice<'a>(&'a self) -> &'a [U] {
+ unsafe { &*(self.buf as *const [U]) }
+ }
+
+ /// Return this type as a vector
+ ///
+ /// Data MUST be known to be owned beforehand
+ ///
+ /// Because this borrows self, this is effectively creating two owners to the same
+ /// data, make sure that `self` is cleaned up after this
+ ///
+ /// (this does not simply take `self` since then it wouldn't be usable from the Drop impl)
+ unsafe fn get_vec(&self) -> Vec<U> {
+ debug_assert!(self.capacity != 0);
+ let slice: &[U] = self.as_slice();
+ let len = slice.len();
+ // Safety: we are assuming owned, and in owned cases
+ // this always represents a valid vector
+ Vec::from_raw_parts(self.buf as *mut U, len, self.capacity)
+ }
+}
+
+impl<U> Drop for EyepatchHackVector<U> {
+ #[inline]
+ fn drop(&mut self) {
+ if self.capacity != 0 {
+ unsafe {
+ // we don't need to clean up self here since we're already in a Drop impl
+ let _ = self.get_vec();
+ }
+ }
+ }
+}
+
+impl<'a, T: AsULE> Clone for ZeroVec<'a, T> {
+ fn clone(&self) -> Self {
+ if self.is_owned() {
+ ZeroVec::new_owned(self.as_ule_slice().into())
+ } else {
+ Self {
+ vector: EyepatchHackVector {
+ buf: self.vector.buf,
+ capacity: 0,
+ },
+ marker: PhantomData,
+ }
+ }
+ }
+}
+
+impl<'a, T: AsULE> AsRef<ZeroSlice<T>> for ZeroVec<'a, T> {
+ fn as_ref(&self) -> &ZeroSlice<T> {
+ self.deref()
+ }
+}
+
+impl<T> fmt::Debug for ZeroVec<'_, T>
+where
+ T: AsULE + fmt::Debug,
+{
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "ZeroVec({:?})", self.to_vec())
+ }
+}
+
+impl<T> Eq for ZeroVec<'_, T> where T: AsULE + Eq + ?Sized {}
+
+impl<'a, 'b, T> PartialEq<ZeroVec<'b, T>> for ZeroVec<'a, T>
+where
+ T: AsULE + PartialEq + ?Sized,
+{
+ #[inline]
+ fn eq(&self, other: &ZeroVec<'b, T>) -> bool {
+ // Note: T implements PartialEq but not T::ULE
+ self.iter().eq(other.iter())
+ }
+}
+
+impl<T> PartialEq<&[T]> for ZeroVec<'_, T>
+where
+ T: AsULE + PartialEq + ?Sized,
+{
+ #[inline]
+ fn eq(&self, other: &&[T]) -> bool {
+ self.iter().eq(other.iter().copied())
+ }
+}
+
+impl<T, const N: usize> PartialEq<[T; N]> for ZeroVec<'_, T>
+where
+ T: AsULE + PartialEq + ?Sized,
+{
+ #[inline]
+ fn eq(&self, other: &[T; N]) -> bool {
+ self.iter().eq(other.iter().copied())
+ }
+}
+
+impl<'a, T: AsULE> Default for ZeroVec<'a, T> {
+ #[inline]
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl<'a, T: AsULE + PartialOrd> PartialOrd for ZeroVec<'a, T> {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ self.iter().partial_cmp(other.iter())
+ }
+}
+
+impl<'a, T: AsULE + Ord> Ord for ZeroVec<'a, T> {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.iter().cmp(other.iter())
+ }
+}
+
+impl<'a, T: AsULE> AsRef<[T::ULE]> for ZeroVec<'a, T> {
+ fn as_ref(&self) -> &[T::ULE] {
+ self.as_ule_slice()
+ }
+}
+
+impl<'a, T: AsULE> From<&'a [T::ULE]> for ZeroVec<'a, T> {
+ fn from(other: &'a [T::ULE]) -> Self {
+ ZeroVec::new_borrowed(other)
+ }
+}
+
+impl<'a, T: AsULE> From<Vec<T::ULE>> for ZeroVec<'a, T> {
+ fn from(other: Vec<T::ULE>) -> Self {
+ ZeroVec::new_owned(other)
+ }
+}
+
+impl<'a, T> ZeroVec<'a, T>
+where
+ T: AsULE + ?Sized,
+{
+ /// Creates a new, borrowed, empty `ZeroVec<T>`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let zv: ZeroVec<u16> = ZeroVec::new();
+ /// assert!(zv.is_empty());
+ /// ```
+ #[inline]
+ pub const fn new() -> Self {
+ Self::new_borrowed(&[])
+ }
+
+ /// Same as `ZeroSlice::len`, which is available through `Deref` and not `const`.
+ pub const fn const_len(&self) -> usize {
+ self.vector.as_slice().len()
+ }
+
+ /// Creates a new owned `ZeroVec` using an existing
+ /// allocated backing buffer
+ ///
+ /// If you have a slice of `&[T]`s, prefer using
+ /// [`Self::alloc_from_slice()`].
+ #[inline]
+ pub fn new_owned(vec: Vec<T::ULE>) -> Self {
+ // Deconstruct the vector into parts
+ // This is the only part of the code that goes from Vec
+ // to ZeroVec, all other such operations should use this function
+ let capacity = vec.capacity();
+ let len = vec.len();
+ let ptr = mem::ManuallyDrop::new(vec).as_mut_ptr();
+ let slice = ptr::slice_from_raw_parts_mut(ptr, len);
+ Self {
+ vector: EyepatchHackVector {
+ buf: slice,
+ capacity,
+ },
+ marker: PhantomData,
+ }
+ }
+
+ /// Creates a new borrowed `ZeroVec` using an existing
+ /// backing buffer
+ #[inline]
+ pub const fn new_borrowed(slice: &'a [T::ULE]) -> Self {
+ let slice = slice as *const [_] as *mut [_];
+ Self {
+ vector: EyepatchHackVector {
+ buf: slice,
+ capacity: 0,
+ },
+ marker: PhantomData,
+ }
+ }
+
+ /// Creates a new, owned, empty `ZeroVec<T>`, with a certain capacity pre-allocated.
+ pub fn with_capacity(capacity: usize) -> Self {
+ Self::new_owned(Vec::with_capacity(capacity))
+ }
+
+ /// Parses a `&[u8]` buffer into a `ZeroVec<T>`.
+ ///
+ /// This function is infallible for built-in integer types, but fallible for other types,
+ /// such as `char`. For more information, see [`ULE::parse_byte_slice`].
+ ///
+ /// The bytes within the byte buffer must remain constant for the life of the ZeroVec.
+ ///
+ /// # Endianness
+ ///
+ /// The byte buffer must be encoded in little-endian, even if running in a big-endian
+ /// environment. This ensures a consistent representation of data across platforms.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ ///
+ /// assert!(!zerovec.is_owned());
+ /// assert_eq!(zerovec.get(2), Some(421));
+ /// ```
+ pub fn parse_byte_slice(bytes: &'a [u8]) -> Result<Self, ZeroVecError> {
+ let slice: &'a [T::ULE] = T::ULE::parse_byte_slice(bytes)?;
+ Ok(Self::new_borrowed(slice))
+ }
+
+ /// Uses a `&[u8]` buffer as a `ZeroVec<T>` without any verification.
+ ///
+ /// # Safety
+ ///
+ /// `bytes` need to be an output from [`ZeroSlice::as_bytes()`].
+ pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self {
+ // &[u8] and &[T::ULE] are the same slice with different length metadata.
+ Self::new_borrowed(core::slice::from_raw_parts(
+ bytes.as_ptr() as *const T::ULE,
+ bytes.len() / core::mem::size_of::<T::ULE>(),
+ ))
+ }
+
+ /// Converts a `ZeroVec<T>` into a `ZeroVec<u8>`, retaining the current ownership model.
+ ///
+ /// Note that the length of the ZeroVec may change.
+ ///
+ /// # Examples
+ ///
+ /// Convert a borrowed `ZeroVec`:
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ /// let zv_bytes = zerovec.into_bytes();
+ ///
+ /// assert!(!zv_bytes.is_owned());
+ /// assert_eq!(zv_bytes.get(0), Some(0xD3));
+ /// ```
+ ///
+ /// Convert an owned `ZeroVec`:
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let nums: &[u16] = &[211, 281, 421, 461];
+ /// let zerovec = ZeroVec::alloc_from_slice(nums);
+ /// let zv_bytes = zerovec.into_bytes();
+ ///
+ /// assert!(zv_bytes.is_owned());
+ /// assert_eq!(zv_bytes.get(0), Some(0xD3));
+ /// ```
+ pub fn into_bytes(self) -> ZeroVec<'a, u8> {
+ match self.into_cow() {
+ Cow::Borrowed(slice) => {
+ let bytes: &'a [u8] = T::ULE::as_byte_slice(slice);
+ ZeroVec::new_borrowed(bytes)
+ }
+ Cow::Owned(vec) => {
+ let bytes = Vec::from(T::ULE::as_byte_slice(&vec));
+ ZeroVec::new_owned(bytes)
+ }
+ }
+ }
+
+ /// Casts a `ZeroVec<T>` to a compatible `ZeroVec<P>`.
+ ///
+ /// `T` and `P` are compatible if they have the same `ULE` representation.
+ ///
+ /// If the `ULE`s of `T` and `P` are different types but have the same size,
+ /// use [`Self::try_into_converted()`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80];
+ ///
+ /// let zerovec_u16: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ /// assert_eq!(zerovec_u16.get(3), Some(32973));
+ ///
+ /// let zerovec_i16: ZeroVec<i16> = zerovec_u16.cast();
+ /// assert_eq!(zerovec_i16.get(3), Some(-32563));
+ /// ```
+ pub fn cast<P>(self) -> ZeroVec<'a, P>
+ where
+ P: AsULE<ULE = T::ULE>,
+ {
+ match self.into_cow() {
+ Cow::Owned(v) => ZeroVec::new_owned(v),
+ Cow::Borrowed(v) => ZeroVec::new_borrowed(v),
+ }
+ }
+
+ /// Converts a `ZeroVec<T>` into a `ZeroVec<P>`, retaining the current ownership model.
+ ///
+ /// If `T` and `P` have the exact same `ULE`, use [`Self::cast()`].
+ ///
+ /// # Panics
+ ///
+ /// Panics if `T::ULE` and `P::ULE` are not the same size.
+ ///
+ /// # Examples
+ ///
+ /// Convert a borrowed `ZeroVec`:
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0x7F, 0xF3, 0x01, 0x49, 0xF6, 0x01];
+ /// let zv_char: ZeroVec<char> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("valid code points");
+ /// let zv_u8_3: ZeroVec<[u8; 3]> =
+ /// zv_char.try_into_converted().expect("infallible conversion");
+ ///
+ /// assert!(!zv_u8_3.is_owned());
+ /// assert_eq!(zv_u8_3.get(0), Some([0x7F, 0xF3, 0x01]));
+ /// ```
+ ///
+ /// Convert an owned `ZeroVec`:
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let chars: &[char] = &['🍿', '🙉'];
+ /// let zv_char = ZeroVec::alloc_from_slice(chars);
+ /// let zv_u8_3: ZeroVec<[u8; 3]> =
+ /// zv_char.try_into_converted().expect("length is divisible");
+ ///
+ /// assert!(zv_u8_3.is_owned());
+ /// assert_eq!(zv_u8_3.get(0), Some([0x7F, 0xF3, 0x01]));
+ /// ```
+ ///
+ /// If the types are not the same size, we refuse to convert:
+ ///
+ /// ```should_panic
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0x7F, 0xF3, 0x01, 0x49, 0xF6, 0x01];
+ /// let zv_char: ZeroVec<char> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("valid code points");
+ ///
+ /// // Panics! mem::size_of::<char::ULE> != mem::size_of::<u16::ULE>
+ /// zv_char.try_into_converted::<u16>();
+ /// ```
+ ///
+ /// Instead, convert to bytes and then parse:
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0x7F, 0xF3, 0x01, 0x49, 0xF6, 0x01];
+ /// let zv_char: ZeroVec<char> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("valid code points");
+ /// let zv_u16: ZeroVec<u16> =
+ /// zv_char.into_bytes().try_into_parsed().expect("infallible");
+ ///
+ /// assert!(!zv_u16.is_owned());
+ /// assert_eq!(zv_u16.get(0), Some(0xF37F));
+ /// ```
+ pub fn try_into_converted<P: AsULE>(self) -> Result<ZeroVec<'a, P>, ZeroVecError> {
+ assert_eq!(
+ core::mem::size_of::<<T as AsULE>::ULE>(),
+ core::mem::size_of::<<P as AsULE>::ULE>()
+ );
+ match self.into_cow() {
+ Cow::Borrowed(old_slice) => {
+ let bytes: &'a [u8] = T::ULE::as_byte_slice(old_slice);
+ let new_slice = P::ULE::parse_byte_slice(bytes)?;
+ Ok(ZeroVec::new_borrowed(new_slice))
+ }
+ Cow::Owned(old_vec) => {
+ let bytes: &[u8] = T::ULE::as_byte_slice(&old_vec);
+ P::ULE::validate_byte_slice(bytes)?;
+ // Feature "vec_into_raw_parts" is not yet stable (#65816). Polyfill:
+ let (ptr, len, cap) = {
+ // Take ownership of the pointer
+ let mut v = mem::ManuallyDrop::new(old_vec);
+ // Fetch the pointer, length, and capacity
+ (v.as_mut_ptr(), v.len(), v.capacity())
+ };
+ // Safety checklist for Vec::from_raw_parts:
+ // 1. ptr came from a Vec<T>
+ // 2. P and T are asserted above to be the same size
+ // 3. length is what it was before
+ // 4. capacity is what it was before
+ let new_vec = unsafe {
+ let ptr = ptr as *mut P::ULE;
+ Vec::from_raw_parts(ptr, len, cap)
+ };
+ Ok(ZeroVec::new_owned(new_vec))
+ }
+ }
+ }
+
+ /// Check if this type is fully owned
+ #[inline]
+ pub fn is_owned(&self) -> bool {
+ self.vector.capacity != 0
+ }
+
+ /// If this is a borrowed ZeroVec, return it as a slice that covers
+ /// its lifetime parameter
+ #[inline]
+ pub fn as_maybe_borrowed(&self) -> Option<&'a ZeroSlice<T>> {
+ if self.is_owned() {
+ None
+ } else {
+ // We can extend the lifetime of the slice to 'a
+ // since we know it is borrowed
+ let ule_slice = unsafe { self.vector.as_arbitrary_slice() };
+ Some(ZeroSlice::from_ule_slice(ule_slice))
+ }
+ }
+
+ /// If the ZeroVec is owned, returns the capacity of the vector.
+ ///
+ /// Otherwise, if the ZeroVec is borrowed, returns `None`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let mut zv = ZeroVec::<u8>::new_borrowed(&[0, 1, 2, 3]);
+ /// assert!(!zv.is_owned());
+ /// assert_eq!(zv.owned_capacity(), None);
+ ///
+ /// // Convert to owned without appending anything
+ /// zv.with_mut(|v| ());
+ /// assert!(zv.is_owned());
+ /// assert_eq!(zv.owned_capacity(), Some(4.try_into().unwrap()));
+ ///
+ /// // Double the size by appending
+ /// zv.with_mut(|v| v.push(0));
+ /// assert!(zv.is_owned());
+ /// assert_eq!(zv.owned_capacity(), Some(8.try_into().unwrap()));
+ /// ```
+ #[inline]
+ pub fn owned_capacity(&self) -> Option<NonZeroUsize> {
+ NonZeroUsize::try_from(self.vector.capacity).ok()
+ }
+}
+
+impl<'a> ZeroVec<'a, u8> {
+ /// Converts a `ZeroVec<u8>` into a `ZeroVec<T>`, retaining the current ownership model.
+ ///
+ /// Note that the length of the ZeroVec may change.
+ ///
+ /// # Examples
+ ///
+ /// Convert a borrowed `ZeroVec`:
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let zv_bytes = ZeroVec::new_borrowed(bytes);
+ /// let zerovec: ZeroVec<u16> = zv_bytes.try_into_parsed().expect("infallible");
+ ///
+ /// assert!(!zerovec.is_owned());
+ /// assert_eq!(zerovec.get(0), Some(211));
+ /// ```
+ ///
+ /// Convert an owned `ZeroVec`:
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: Vec<u8> = vec![0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let zv_bytes = ZeroVec::new_owned(bytes);
+ /// let zerovec: ZeroVec<u16> = zv_bytes.try_into_parsed().expect("infallible");
+ ///
+ /// assert!(zerovec.is_owned());
+ /// assert_eq!(zerovec.get(0), Some(211));
+ /// ```
+ pub fn try_into_parsed<T: AsULE>(self) -> Result<ZeroVec<'a, T>, ZeroVecError> {
+ match self.into_cow() {
+ Cow::Borrowed(bytes) => {
+ let slice: &'a [T::ULE] = T::ULE::parse_byte_slice(bytes)?;
+ Ok(ZeroVec::new_borrowed(slice))
+ }
+ Cow::Owned(vec) => {
+ let slice = Vec::from(T::ULE::parse_byte_slice(&vec)?);
+ Ok(ZeroVec::new_owned(slice))
+ }
+ }
+ }
+}
+
+impl<'a, T> ZeroVec<'a, T>
+where
+ T: AsULE,
+{
+ /// Creates a `ZeroVec<T>` from a `&[T]` by allocating memory.
+ ///
+ /// This function results in an `Owned` instance of `ZeroVec<T>`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// // The little-endian bytes correspond to the numbers on the following line.
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let nums: &[u16] = &[211, 281, 421, 461];
+ ///
+ /// let zerovec = ZeroVec::alloc_from_slice(nums);
+ ///
+ /// assert!(zerovec.is_owned());
+ /// assert_eq!(bytes, zerovec.as_bytes());
+ /// ```
+ #[inline]
+ pub fn alloc_from_slice(other: &[T]) -> Self {
+ Self::new_owned(other.iter().copied().map(T::to_unaligned).collect())
+ }
+
+ /// Creates a `Vec<T>` from a `ZeroVec<T>`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let nums: &[u16] = &[211, 281, 421, 461];
+ /// let vec: Vec<u16> = ZeroVec::alloc_from_slice(nums).to_vec();
+ ///
+ /// assert_eq!(nums, vec.as_slice());
+ /// ```
+ #[inline]
+ pub fn to_vec(&self) -> Vec<T> {
+ self.iter().collect()
+ }
+}
+
+impl<'a, T> ZeroVec<'a, T>
+where
+ T: EqULE,
+{
+ /// Attempts to create a `ZeroVec<'a, T>` from a `&'a [T]` by borrowing the argument.
+ ///
+ /// If this is not possible, such as on a big-endian platform, `None` is returned.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// // The little-endian bytes correspond to the numbers on the following line.
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let nums: &[u16] = &[211, 281, 421, 461];
+ ///
+ /// if let Some(zerovec) = ZeroVec::try_from_slice(nums) {
+ /// assert!(!zerovec.is_owned());
+ /// assert_eq!(bytes, zerovec.as_bytes());
+ /// }
+ /// ```
+ #[inline]
+ pub fn try_from_slice(slice: &'a [T]) -> Option<Self> {
+ T::slice_to_unaligned(slice).map(|ule_slice| Self::new_borrowed(ule_slice))
+ }
+
+ /// Creates a `ZeroVec<'a, T>` from a `&'a [T]`, either by borrowing the argument or by
+ /// allocating a new vector.
+ ///
+ /// This is a cheap operation on little-endian platforms, falling back to a more expensive
+ /// operation on big-endian platforms.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// // The little-endian bytes correspond to the numbers on the following line.
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let nums: &[u16] = &[211, 281, 421, 461];
+ ///
+ /// let zerovec = ZeroVec::from_slice_or_alloc(nums);
+ ///
+ /// // Note: zerovec could be either borrowed or owned.
+ /// assert_eq!(bytes, zerovec.as_bytes());
+ /// ```
+ #[inline]
+ pub fn from_slice_or_alloc(slice: &'a [T]) -> Self {
+ Self::try_from_slice(slice).unwrap_or_else(|| Self::alloc_from_slice(slice))
+ }
+}
+
+impl<'a, T> ZeroVec<'a, T>
+where
+ T: AsULE,
+{
+ /// Mutates each element according to a given function, meant to be
+ /// a more convenient version of calling `.iter_mut()` with
+ /// [`ZeroVec::with_mut()`] which serves fewer use cases.
+ ///
+ /// This will convert the ZeroVec into an owned ZeroVec if not already the case.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ule::AsULE;
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let mut zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ ///
+ /// zerovec.for_each_mut(|item| *item += 1);
+ ///
+ /// assert_eq!(zerovec.to_vec(), &[212, 282, 422, 462]);
+ /// assert!(zerovec.is_owned());
+ /// ```
+ #[inline]
+ pub fn for_each_mut(&mut self, mut f: impl FnMut(&mut T)) {
+ self.to_mut_slice().iter_mut().for_each(|item| {
+ let mut aligned = T::from_unaligned(*item);
+ f(&mut aligned);
+ *item = aligned.to_unaligned()
+ })
+ }
+
+ /// Same as [`ZeroVec::for_each_mut()`], but bubbles up errors.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ule::AsULE;
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let mut zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ ///
+ /// zerovec.try_for_each_mut(|item| {
+ /// *item = item.checked_add(1).ok_or(())?;
+ /// Ok(())
+ /// })?;
+ ///
+ /// assert_eq!(zerovec.to_vec(), &[212, 282, 422, 462]);
+ /// assert!(zerovec.is_owned());
+ /// # Ok::<(), ()>(())
+ /// ```
+ #[inline]
+ pub fn try_for_each_mut<E>(
+ &mut self,
+ mut f: impl FnMut(&mut T) -> Result<(), E>,
+ ) -> Result<(), E> {
+ self.to_mut_slice().iter_mut().try_for_each(|item| {
+ let mut aligned = T::from_unaligned(*item);
+ f(&mut aligned)?;
+ *item = aligned.to_unaligned();
+ Ok(())
+ })
+ }
+
+ /// Converts a borrowed ZeroVec to an owned ZeroVec. No-op if already owned.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ /// assert!(!zerovec.is_owned());
+ ///
+ /// let owned = zerovec.into_owned();
+ /// assert!(owned.is_owned());
+ /// ```
+ pub fn into_owned(self) -> ZeroVec<'static, T> {
+ match self.into_cow() {
+ Cow::Owned(vec) => ZeroVec::new_owned(vec),
+ Cow::Borrowed(b) => {
+ let vec: Vec<T::ULE> = b.into();
+ ZeroVec::new_owned(vec)
+ }
+ }
+ }
+
+ /// Allows the ZeroVec to be mutated by converting it to an owned variant, and producing
+ /// a mutable vector of ULEs. If you only need a mutable slice, consider using [`Self::to_mut_slice()`]
+ /// instead.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use crate::zerovec::ule::AsULE;
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let mut zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ /// assert!(!zerovec.is_owned());
+ ///
+ /// zerovec.with_mut(|v| v.push(12_u16.to_unaligned()));
+ /// assert!(zerovec.is_owned());
+ /// ```
+ pub fn with_mut<R>(&mut self, f: impl FnOnce(&mut Vec<T::ULE>) -> R) -> R {
+ // We're in danger if f() panics whilst we've moved a vector out of self;
+ // replace it with an empty dummy vector for now
+ let this = mem::take(self);
+ let mut vec = match this.into_cow() {
+ Cow::Owned(v) => v,
+ Cow::Borrowed(s) => s.into(),
+ };
+ let ret = f(&mut vec);
+ *self = Self::new_owned(vec);
+ ret
+ }
+
+ /// Allows the ZeroVec to be mutated by converting it to an owned variant (if necessary)
+ /// and returning a slice to its backing buffer. [`Self::with_mut()`] allows for mutation
+ /// of the vector itself.
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use crate::zerovec::ule::AsULE;
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01];
+ /// let mut zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ /// assert!(!zerovec.is_owned());
+ ///
+ /// zerovec.to_mut_slice()[1] = 5u16.to_unaligned();
+ /// assert!(zerovec.is_owned());
+ /// ```
+ pub fn to_mut_slice(&mut self) -> &mut [T::ULE] {
+ if !self.is_owned() {
+ // `buf` is either a valid vector or slice of `T::ULE`s, either
+ // way it's always valid
+ let slice = self.vector.as_slice();
+ *self = ZeroVec::new_owned(slice.into());
+ }
+ unsafe { &mut *self.vector.buf }
+ }
+ /// Remove all elements from this ZeroVec and reset it to an empty borrowed state.
+ pub fn clear(&mut self) {
+ *self = Self::new_borrowed(&[])
+ }
+
+ /// Converts the type into a `Cow<'a, [T::ULE]>`, which is
+ /// the logical equivalent of this type's internal representation
+ #[inline]
+ pub fn into_cow(self) -> Cow<'a, [T::ULE]> {
+ let this = mem::ManuallyDrop::new(self);
+ if this.is_owned() {
+ let vec = unsafe {
+ // safe to call: we know it's owned,
+ // and `self`/`this` are thenceforth no longer used or dropped
+ { this }.vector.get_vec()
+ };
+ Cow::Owned(vec)
+ } else {
+ // We can extend the lifetime of the slice to 'a
+ // since we know it is borrowed
+ let slice = unsafe { { this }.vector.as_arbitrary_slice() };
+ Cow::Borrowed(slice)
+ }
+ }
+}
+
+impl<T: AsULE> FromIterator<T> for ZeroVec<'_, T> {
+ /// Creates an owned [`ZeroVec`] from an iterator of values.
+ fn from_iter<I>(iter: I) -> Self
+ where
+ I: IntoIterator<Item = T>,
+ {
+ ZeroVec::new_owned(iter.into_iter().map(|t| t.to_unaligned()).collect())
+ }
+}
+
+/// Convenience wrapper for [`ZeroSlice::from_ule_slice`]. The value will be created at compile-time,
+/// meaning that all arguments must also be constant.
+///
+/// # Arguments
+///
+/// * `$aligned` - The type of an element in its canonical, aligned form, e.g., `char`.
+/// * `$convert` - A const function that converts an `$aligned` into its unaligned equivalent, e.g.,
+/// `const fn from_aligned(a: CanonicalType) -> CanonicalType::ULE`.
+/// * `$x` - The elements that the `ZeroSlice` will hold.
+///
+/// # Examples
+///
+/// Using array-conversion functions provided by this crate:
+///
+/// ```
+/// use zerovec::{ZeroSlice, zeroslice, ule::AsULE};
+/// use zerovec::ule::UnvalidatedChar;
+///
+/// const SIGNATURE: &ZeroSlice<char> = zeroslice!(char; <char as AsULE>::ULE::from_aligned; ['b', 'y', 'e', '✌']);
+/// const EMPTY: &ZeroSlice<u32> = zeroslice![];
+/// const UC: &ZeroSlice<UnvalidatedChar> =
+/// zeroslice!(
+/// UnvalidatedChar;
+/// <UnvalidatedChar as AsULE>::ULE::from_unvalidated_char;
+/// [UnvalidatedChar::from_char('a')]
+/// );
+/// let empty: &ZeroSlice<u32> = zeroslice![];
+/// let nums = zeroslice!(u32; <u32 as AsULE>::ULE::from_unsigned; [1, 2, 3, 4, 5]);
+/// assert_eq!(nums.last().unwrap(), 5);
+/// ```
+///
+/// Using a custom array-conversion function:
+///
+/// ```
+/// use zerovec::{ule::AsULE, ule::RawBytesULE, zeroslice, ZeroSlice};
+///
+/// const fn be_convert(num: i16) -> <i16 as AsULE>::ULE {
+/// RawBytesULE(num.to_be_bytes())
+/// }
+///
+/// const NUMBERS_BE: &ZeroSlice<i16> =
+/// zeroslice!(i16; be_convert; [1, -2, 3, -4, 5]);
+/// ```
+#[macro_export]
+macro_rules! zeroslice {
+ () => (
+ $crate::ZeroSlice::new_empty()
+ );
+ ($aligned:ty; $convert:expr; [$($x:expr),+ $(,)?]) => (
+ $crate::ZeroSlice::<$aligned>::from_ule_slice(
+ {const X: &[<$aligned as $crate::ule::AsULE>::ULE] = &[
+ $($convert($x)),*
+ ]; X}
+ )
+ );
+}
+
+/// Creates a borrowed `ZeroVec`. Convenience wrapper for `zeroslice!(...).as_zerovec()`. The value
+/// will be created at compile-time, meaning that all arguments must also be constant.
+///
+/// See [`zeroslice!`](crate::zeroslice) for more information.
+///
+/// # Examples
+///
+/// ```
+/// use zerovec::{ZeroVec, zerovec, ule::AsULE};
+///
+/// const SIGNATURE: ZeroVec<char> = zerovec!(char; <char as AsULE>::ULE::from_aligned; ['a', 'y', 'e', '✌']);
+/// assert!(!SIGNATURE.is_owned());
+///
+/// const EMPTY: ZeroVec<u32> = zerovec![];
+/// assert!(!EMPTY.is_owned());
+/// ```
+#[macro_export]
+macro_rules! zerovec {
+ () => (
+ $crate::ZeroVec::new()
+ );
+ ($aligned:ty; $convert:expr; [$($x:expr),+ $(,)?]) => (
+ $crate::zeroslice![$aligned; $convert; [$($x),+]].as_zerovec()
+ );
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::samples::*;
+
+ #[test]
+ fn test_get() {
+ {
+ let zerovec = ZeroVec::from_slice_or_alloc(TEST_SLICE);
+ assert_eq!(zerovec.get(0), Some(TEST_SLICE[0]));
+ assert_eq!(zerovec.get(1), Some(TEST_SLICE[1]));
+ assert_eq!(zerovec.get(2), Some(TEST_SLICE[2]));
+ }
+ {
+ let zerovec = ZeroVec::<u32>::parse_byte_slice(TEST_BUFFER_LE).unwrap();
+ assert_eq!(zerovec.get(0), Some(TEST_SLICE[0]));
+ assert_eq!(zerovec.get(1), Some(TEST_SLICE[1]));
+ assert_eq!(zerovec.get(2), Some(TEST_SLICE[2]));
+ }
+ }
+
+ #[test]
+ fn test_binary_search() {
+ {
+ let zerovec = ZeroVec::from_slice_or_alloc(TEST_SLICE);
+ assert_eq!(Ok(3), zerovec.binary_search(&0x0e0d0c));
+ assert_eq!(Err(3), zerovec.binary_search(&0x0c0d0c));
+ }
+ {
+ let zerovec = ZeroVec::<u32>::parse_byte_slice(TEST_BUFFER_LE).unwrap();
+ assert_eq!(Ok(3), zerovec.binary_search(&0x0e0d0c));
+ assert_eq!(Err(3), zerovec.binary_search(&0x0c0d0c));
+ }
+ }
+
+ #[test]
+ fn test_odd_alignment() {
+ assert_eq!(
+ Some(0x020100),
+ ZeroVec::<u32>::parse_byte_slice(TEST_BUFFER_LE)
+ .unwrap()
+ .get(0)
+ );
+ assert_eq!(
+ Some(0x04000201),
+ ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[1..77])
+ .unwrap()
+ .get(0)
+ );
+ assert_eq!(
+ Some(0x05040002),
+ ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[2..78])
+ .unwrap()
+ .get(0)
+ );
+ assert_eq!(
+ Some(0x06050400),
+ ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[3..79])
+ .unwrap()
+ .get(0)
+ );
+ assert_eq!(
+ Some(0x060504),
+ ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[4..])
+ .unwrap()
+ .get(0)
+ );
+ assert_eq!(
+ Some(0x4e4d4c00),
+ ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[75..79])
+ .unwrap()
+ .get(0)
+ );
+ assert_eq!(
+ Some(0x4e4d4c00),
+ ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[3..79])
+ .unwrap()
+ .get(18)
+ );
+ assert_eq!(
+ Some(0x4e4d4c),
+ ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[76..])
+ .unwrap()
+ .get(0)
+ );
+ assert_eq!(
+ Some(0x4e4d4c),
+ ZeroVec::<u32>::parse_byte_slice(TEST_BUFFER_LE)
+ .unwrap()
+ .get(19)
+ );
+ // TODO(#1144): Check for correct slice length in RawBytesULE
+ // assert_eq!(
+ // None,
+ // ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[77..])
+ // .unwrap()
+ // .get(0)
+ // );
+ assert_eq!(
+ None,
+ ZeroVec::<u32>::parse_byte_slice(TEST_BUFFER_LE)
+ .unwrap()
+ .get(20)
+ );
+ assert_eq!(
+ None,
+ ZeroVec::<u32>::parse_byte_slice(&TEST_BUFFER_LE[3..79])
+ .unwrap()
+ .get(19)
+ );
+ }
+}
diff --git a/third_party/rust/zerovec/src/zerovec/serde.rs b/third_party/rust/zerovec/src/zerovec/serde.rs
new file mode 100644
index 0000000000..bb180d5a19
--- /dev/null
+++ b/third_party/rust/zerovec/src/zerovec/serde.rs
@@ -0,0 +1,221 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::{ZeroSlice, ZeroVec};
+use crate::ule::*;
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::fmt;
+use core::marker::PhantomData;
+use core::mem;
+use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor};
+#[cfg(feature = "serde")]
+use serde::ser::{Serialize, SerializeSeq, Serializer};
+
+struct ZeroVecVisitor<T> {
+ marker: PhantomData<fn() -> T>,
+}
+
+impl<T> Default for ZeroVecVisitor<T> {
+ fn default() -> Self {
+ Self {
+ marker: PhantomData,
+ }
+ }
+}
+
+impl<'de, T> Visitor<'de> for ZeroVecVisitor<T>
+where
+ T: 'de + Deserialize<'de> + AsULE,
+{
+ type Value = ZeroVec<'de, T>;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ formatter.write_str("a sequence or borrowed buffer of fixed-width elements")
+ }
+
+ fn visit_borrowed_bytes<E>(self, bytes: &'de [u8]) -> Result<Self::Value, E>
+ where
+ E: de::Error,
+ {
+ ZeroVec::parse_byte_slice(bytes).map_err(de::Error::custom)
+ }
+
+ fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
+ where
+ A: SeqAccess<'de>,
+ {
+ let mut vec: Vec<T::ULE> = if let Some(capacity) = seq.size_hint() {
+ Vec::with_capacity(capacity)
+ } else {
+ Vec::new()
+ };
+ while let Some(value) = seq.next_element::<T>()? {
+ vec.push(T::to_unaligned(value));
+ }
+ Ok(ZeroVec::new_owned(vec))
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl<'de, 'a, T> Deserialize<'de> for ZeroVec<'a, T>
+where
+ T: 'de + Deserialize<'de> + AsULE,
+ 'de: 'a,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ let visitor = ZeroVecVisitor::default();
+ if deserializer.is_human_readable() {
+ deserializer.deserialize_seq(visitor)
+ } else {
+ deserializer.deserialize_bytes(visitor)
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl<T> Serialize for ZeroVec<'_, T>
+where
+ T: Serialize + AsULE,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ if serializer.is_human_readable() {
+ let mut seq = serializer.serialize_seq(Some(self.len()))?;
+ for value in self.iter() {
+ seq.serialize_element(&value)?;
+ }
+ seq.end()
+ } else {
+ serializer.serialize_bytes(self.as_bytes())
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl<'de, T> Deserialize<'de> for Box<ZeroSlice<T>>
+where
+ T: Deserialize<'de> + AsULE + 'static,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ let mut zv = ZeroVec::<T>::deserialize(deserializer)?;
+ let vec = zv.with_mut(mem::take);
+ Ok(ZeroSlice::from_boxed_slice(vec.into_boxed_slice()))
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl<'de, 'a, T> Deserialize<'de> for &'a ZeroSlice<T>
+where
+ T: Deserialize<'de> + AsULE + 'static,
+ 'de: 'a,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ Err(de::Error::custom(
+ "&ZeroSlice cannot be deserialized from human-readable formats",
+ ))
+ } else {
+ let deserialized: ZeroVec<'a, T> = ZeroVec::deserialize(deserializer)?;
+ let borrowed = if let Some(b) = deserialized.as_maybe_borrowed() {
+ b
+ } else {
+ return Err(de::Error::custom(
+ "&ZeroSlice can only deserialize in zero-copy ways",
+ ));
+ };
+ Ok(borrowed)
+ }
+ }
+}
+
+/// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate
+impl<T> Serialize for ZeroSlice<T>
+where
+ T: Serialize + AsULE,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ self.as_zerovec().serialize(serializer)
+ }
+}
+
+#[cfg(test)]
+#[allow(non_camel_case_types)]
+mod test {
+ use crate::samples::*;
+ use crate::ZeroVec;
+
+ #[derive(serde::Serialize, serde::Deserialize)]
+ struct DeriveTest_ZeroVec<'data> {
+ #[serde(borrow)]
+ _data: ZeroVec<'data, u16>,
+ }
+
+ #[test]
+ fn test_serde_json() {
+ let zerovec_orig = ZeroVec::from_slice_or_alloc(TEST_SLICE);
+ let json_str = serde_json::to_string(&zerovec_orig).expect("serialize");
+ assert_eq!(JSON_STR, json_str);
+ // ZeroVec should deserialize from JSON to either Vec or ZeroVec
+ let vec_new: Vec<u32> =
+ serde_json::from_str(&json_str).expect("deserialize from buffer to Vec");
+ assert_eq!(
+ zerovec_orig,
+ ZeroVec::<u32>::from_slice_or_alloc(vec_new.as_slice())
+ );
+ let zerovec_new: ZeroVec<u32> =
+ serde_json::from_str(&json_str).expect("deserialize from buffer to ZeroVec");
+ assert_eq!(zerovec_orig, zerovec_new);
+ assert!(zerovec_new.is_owned());
+ }
+
+ #[test]
+ fn test_serde_bincode() {
+ let zerovec_orig = ZeroVec::from_slice_or_alloc(TEST_SLICE);
+ let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize");
+ assert_eq!(BINCODE_BUF, bincode_buf);
+ // ZeroVec should deserialize from Bincode to ZeroVec but not Vec
+ bincode::deserialize::<Vec<u32>>(&bincode_buf).expect_err("deserialize from buffer to Vec");
+ let zerovec_new: ZeroVec<u32> =
+ bincode::deserialize(&bincode_buf).expect("deserialize from buffer to ZeroVec");
+ assert_eq!(zerovec_orig, zerovec_new);
+
+ assert!(!zerovec_new.is_owned());
+ }
+
+ #[test]
+ fn test_chars_valid() {
+ // 1-byte, 2-byte, 3-byte, and 4-byte character in UTF-8 (not as relevant in UTF-32)
+ let zerovec_orig = ZeroVec::alloc_from_slice(&['w', 'ω', '文', '𑄃']);
+ let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize");
+ let zerovec_new: ZeroVec<char> =
+ bincode::deserialize(&bincode_buf).expect("deserialize from buffer to ZeroVec");
+ assert_eq!(zerovec_orig, zerovec_new);
+
+ assert!(!zerovec_new.is_owned());
+ }
+
+ #[test]
+ fn test_chars_invalid() {
+ // 119 and 120 are valid, but not 0xD800 (high surrogate)
+ let zerovec_orig: ZeroVec<u32> = ZeroVec::from_slice_or_alloc(&[119, 0xD800, 120]);
+ let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize");
+ let zerovec_result = bincode::deserialize::<ZeroVec<char>>(&bincode_buf);
+ assert!(zerovec_result.is_err());
+ }
+}
diff --git a/third_party/rust/zerovec/src/zerovec/slice.rs b/third_party/rust/zerovec/src/zerovec/slice.rs
new file mode 100644
index 0000000000..12d88deff8
--- /dev/null
+++ b/third_party/rust/zerovec/src/zerovec/slice.rs
@@ -0,0 +1,596 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use alloc::boxed::Box;
+use core::cmp::Ordering;
+use core::ops::Range;
+
+/// A zero-copy "slice", i.e. the zero-copy version of `[T]`. This behaves
+/// similarly to [`ZeroVec<T>`], however [`ZeroVec<T>`] is allowed to contain
+/// owned data and as such is ideal for deserialization since most human readable
+/// serialization formats cannot unconditionally deserialize zero-copy.
+///
+/// This type can be used inside [`VarZeroVec<T>`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap):
+/// This essentially allows for the construction of zero-copy types isomorphic to `Vec<Vec<T>>` by instead
+/// using `VarZeroVec<ZeroSlice<T>>`. See the [`VarZeroVec`](crate::VarZeroVec) docs for an example.
+///
+/// # Examples
+///
+/// Const-construct a ZeroSlice of u16:
+///
+/// ```
+/// use zerovec::ule::AsULE;
+/// use zerovec::ZeroSlice;
+///
+/// const DATA: &ZeroSlice<u16> =
+/// ZeroSlice::<u16>::from_ule_slice(&<u16 as AsULE>::ULE::from_array([
+/// 211, 281, 421, 32973,
+/// ]));
+///
+/// assert_eq!(DATA.get(1), Some(281));
+/// ```
+#[repr(transparent)]
+pub struct ZeroSlice<T: AsULE>([T::ULE]);
+
+impl<T> ZeroSlice<T>
+where
+ T: AsULE,
+{
+ /// Returns an empty slice.
+ pub const fn new_empty() -> &'static Self {
+ Self::from_ule_slice(&[])
+ }
+
+ /// Get this [`ZeroSlice`] as a borrowed [`ZeroVec`]
+ ///
+ /// [`ZeroSlice`] does not have most of the methods that [`ZeroVec`] does,
+ /// so it is recommended to convert it to a [`ZeroVec`] before doing anything.
+ #[inline]
+ pub const fn as_zerovec(&self) -> ZeroVec<'_, T> {
+ ZeroVec::new_borrowed(&self.0)
+ }
+
+ /// Attempt to construct a `&ZeroSlice<T>` from a byte slice, returning an error
+ /// if it's not a valid byte sequence
+ pub fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> {
+ T::ULE::parse_byte_slice(bytes).map(Self::from_ule_slice)
+ }
+
+ /// Uses a `&[u8]` buffer as a `ZeroVec<T>` without any verification.
+ ///
+ /// # Safety
+ ///
+ /// `bytes` need to be an output from [`ZeroSlice::as_bytes()`].
+ pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
+ // &[u8] and &[T::ULE] are the same slice with different length metadata.
+ Self::from_ule_slice(core::slice::from_raw_parts(
+ bytes.as_ptr() as *const T::ULE,
+ bytes.len() / core::mem::size_of::<T::ULE>(),
+ ))
+ }
+
+ /// Construct a `&ZeroSlice<T>` from a slice of ULEs.
+ ///
+ /// This function can be used for constructing ZeroVecs in a const context, avoiding
+ /// parsing checks.
+ ///
+ /// See [`ZeroSlice`] for an example.
+ #[inline]
+ pub const fn from_ule_slice(slice: &[T::ULE]) -> &Self {
+ // This is safe because ZeroSlice is transparent over [T::ULE]
+ // so &ZeroSlice<T> can be safely cast from &[T::ULE]
+ unsafe { &*(slice as *const _ as *const Self) }
+ }
+
+ /// Construct a `Box<ZeroSlice<T>>` from a boxed slice of ULEs
+ #[inline]
+ pub fn from_boxed_slice(slice: Box<[T::ULE]>) -> Box<Self> {
+ // This is safe because ZeroSlice is transparent over [T::ULE]
+ // so Box<ZeroSlice<T>> can be safely cast from Box<[T::ULE]>
+ unsafe { Box::from_raw(Box::into_raw(slice) as *mut Self) }
+ }
+
+ /// Returns this slice as its underlying `&[u8]` byte buffer representation.
+ ///
+ /// Useful for serialization.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// // The little-endian bytes correspond to the numbers on the following line.
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80];
+ /// let nums: &[u16] = &[211, 281, 421, 32973];
+ ///
+ /// let zerovec = ZeroVec::alloc_from_slice(nums);
+ ///
+ /// assert_eq!(bytes, zerovec.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &[u8] {
+ T::ULE::as_byte_slice(self.as_ule_slice())
+ }
+
+ /// Dereferences this slice as `&[T::ULE]`.
+ #[inline]
+ pub const fn as_ule_slice(&self) -> &[T::ULE] {
+ &self.0
+ }
+
+ /// Returns the number of elements in this slice.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ule::AsULE;
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80];
+ /// let zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ ///
+ /// assert_eq!(4, zerovec.len());
+ /// assert_eq!(
+ /// bytes.len(),
+ /// zerovec.len() * std::mem::size_of::<<u16 as AsULE>::ULE>()
+ /// );
+ /// ```
+ #[inline]
+ pub const fn len(&self) -> usize {
+ self.as_ule_slice().len()
+ }
+
+ /// Returns whether this slice is empty.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80];
+ /// let zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ /// assert!(!zerovec.is_empty());
+ ///
+ /// let emptyvec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(&[]).expect("infallible");
+ /// assert!(emptyvec.is_empty());
+ /// ```
+ #[inline]
+ pub const fn is_empty(&self) -> bool {
+ self.as_ule_slice().is_empty()
+ }
+}
+
+impl<T> ZeroSlice<T>
+where
+ T: AsULE,
+{
+ /// Gets the element at the specified index. Returns `None` if out of range.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80];
+ /// let zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ ///
+ /// assert_eq!(zerovec.get(2), Some(421));
+ /// assert_eq!(zerovec.get(4), None);
+ /// ```
+ #[inline]
+ pub fn get(&self, index: usize) -> Option<T> {
+ self.as_ule_slice()
+ .get(index)
+ .copied()
+ .map(T::from_unaligned)
+ }
+
+ /// Gets the entire slice as an array of length `N`. Returns `None` if the slice
+ /// does not have exactly `N` elements.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80];
+ /// let zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ /// let array: [u16; 4] =
+ /// zerovec.get_as_array().expect("should be 4 items in array");
+ ///
+ /// assert_eq!(array[2], 421);
+ /// ```
+ pub fn get_as_array<const N: usize>(&self) -> Option<[T; N]> {
+ let ule_array = <&[T::ULE; N]>::try_from(self.as_ule_slice()).ok()?;
+ Some(ule_array.map(|u| T::from_unaligned(u)))
+ }
+
+ /// Gets a subslice of elements within a certain range. Returns `None` if the range
+ /// is out of bounds of this `ZeroSlice`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80];
+ /// let zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ ///
+ /// assert_eq!(
+ /// zerovec.get_subslice(1..3),
+ /// Some(&*ZeroVec::from_slice_or_alloc(&[0x0119, 0x01A5]))
+ /// );
+ /// assert_eq!(zerovec.get_subslice(3..5), None);
+ /// ```
+ #[inline]
+ pub fn get_subslice(&self, range: Range<usize>) -> Option<&ZeroSlice<T>> {
+ self.0.get(range).map(ZeroSlice::from_ule_slice)
+ }
+
+ /// Get a borrowed reference to the underlying ULE type at a specified index.
+ ///
+ /// Prefer [`Self::get()`] over this method where possible since working
+ /// directly with `ULE` types is less ergonomic
+ pub fn get_ule_ref(&self, index: usize) -> Option<&T::ULE> {
+ self.as_ule_slice().get(index)
+ }
+
+ /// Casts a `ZeroSlice<T>` to a compatible `ZeroSlice<P>`.
+ ///
+ /// `T` and `P` are compatible if they have the same `ULE` representation.
+ ///
+ /// If the `ULE`s of `T` and `P` are different, use [`Self::try_as_converted()`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ZeroSlice;
+ ///
+ /// const BYTES: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80];
+ /// const ZS_U16: &ZeroSlice<u16> = {
+ /// match ZeroSlice::<u16>::try_from_bytes(BYTES) {
+ /// Ok(s) => s,
+ /// Err(_) => unreachable!(),
+ /// }
+ /// };
+ ///
+ /// let zs_i16: &ZeroSlice<i16> = ZS_U16.cast();
+ ///
+ /// assert_eq!(ZS_U16.get(3), Some(32973));
+ /// assert_eq!(zs_i16.get(3), Some(-32563));
+ /// ```
+ #[inline]
+ pub const fn cast<P>(&self) -> &ZeroSlice<P>
+ where
+ P: AsULE<ULE = T::ULE>,
+ {
+ ZeroSlice::<P>::from_ule_slice(self.as_ule_slice())
+ }
+
+ /// Converts a `&ZeroSlice<T>` into a `&ZeroSlice<P>`.
+ ///
+ /// The resulting slice will have the same length as the original slice
+ /// if and only if `T::ULE` and `P::ULE` are the same size.
+ ///
+ /// If `T` and `P` have the exact same `ULE`, use [`Self::cast()`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerovec::ZeroSlice;
+ ///
+ /// const BYTES: &[u8] = &[0x7F, 0xF3, 0x01, 0x00, 0x49, 0xF6, 0x01, 0x00];
+ /// const ZS_U32: &ZeroSlice<u32> = {
+ /// match ZeroSlice::<u32>::try_from_bytes(BYTES) {
+ /// Ok(s) => s,
+ /// Err(_) => unreachable!(),
+ /// }
+ /// };
+ ///
+ /// let zs_u8_4: &ZeroSlice<[u8; 4]> =
+ /// ZS_U32.try_as_converted().expect("valid code points");
+ ///
+ /// assert_eq!(ZS_U32.get(0), Some(127871));
+ /// assert_eq!(zs_u8_4.get(0), Some([0x7F, 0xF3, 0x01, 0x00]));
+ /// ```
+ #[inline]
+ pub fn try_as_converted<P: AsULE>(&self) -> Result<&ZeroSlice<P>, ZeroVecError> {
+ let new_slice = P::ULE::parse_byte_slice(self.as_bytes())?;
+ Ok(ZeroSlice::from_ule_slice(new_slice))
+ }
+
+ /// Gets the first element. Returns `None` if empty.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80];
+ /// let zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ ///
+ /// assert_eq!(zerovec.first(), Some(211));
+ /// ```
+ #[inline]
+ pub fn first(&self) -> Option<T> {
+ self.as_ule_slice().first().copied().map(T::from_unaligned)
+ }
+
+ /// Gets the last element. Returns `None` if empty.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80];
+ /// let zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ ///
+ /// assert_eq!(zerovec.last(), Some(32973));
+ /// ```
+ #[inline]
+ pub fn last(&self) -> Option<T> {
+ self.as_ule_slice().last().copied().map(T::from_unaligned)
+ }
+
+ /// Gets an iterator over the elements.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80];
+ /// let zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ /// let mut it = zerovec.iter();
+ ///
+ /// assert_eq!(it.next(), Some(211));
+ /// assert_eq!(it.next(), Some(281));
+ /// assert_eq!(it.next(), Some(421));
+ /// assert_eq!(it.next(), Some(32973));
+ /// assert_eq!(it.next(), None);
+ /// ```
+ #[inline]
+ pub fn iter(&self) -> impl DoubleEndedIterator<Item = T> + ExactSizeIterator<Item = T> + '_ {
+ self.as_ule_slice().iter().copied().map(T::from_unaligned)
+ }
+
+ /// Returns a tuple with the first element and a subslice of the remaining elements.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ule::AsULE;
+ /// use zerovec::ZeroSlice;
+ ///
+ /// const DATA: &ZeroSlice<u16> =
+ /// ZeroSlice::<u16>::from_ule_slice(&<u16 as AsULE>::ULE::from_array([
+ /// 211, 281, 421, 32973,
+ /// ]));
+ /// const EXPECTED_VALUE: (u16, &ZeroSlice<u16>) = (
+ /// 211,
+ /// ZeroSlice::<u16>::from_ule_slice(&<u16 as AsULE>::ULE::from_array([
+ /// 281, 421, 32973,
+ /// ])),
+ /// );
+ /// assert_eq!(EXPECTED_VALUE, DATA.split_first().unwrap());
+ /// ```
+ #[inline]
+ pub fn split_first(&self) -> Option<(T, &ZeroSlice<T>)> {
+ if let Some(first) = self.first() {
+ return Some((
+ first,
+ // `unwrap()` must succeed, because `first()` returned `Some`.
+ #[allow(clippy::unwrap_used)]
+ self.get_subslice(1..self.len()).unwrap(),
+ ));
+ }
+ None
+ }
+}
+
+impl<T> ZeroSlice<T>
+where
+ T: AsULE + Ord,
+{
+ /// Binary searches a sorted `ZeroVec<T>` for the given element. For more information, see
+ /// the primitive function [`binary_search`].
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80];
+ /// let zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ ///
+ /// assert_eq!(zerovec.binary_search(&281), Ok(1));
+ /// assert_eq!(zerovec.binary_search(&282), Err(2));
+ /// ```
+ ///
+ /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
+ #[inline]
+ pub fn binary_search(&self, x: &T) -> Result<usize, usize> {
+ self.as_ule_slice()
+ .binary_search_by(|probe| T::from_unaligned(*probe).cmp(x))
+ }
+}
+
+impl<T> ZeroSlice<T>
+where
+ T: AsULE,
+{
+ /// Binary searches a sorted `ZeroVec<T>` based on a given predicate. For more information, see
+ /// the primitive function [`binary_search_by`].
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use zerovec::ZeroVec;
+ ///
+ /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80];
+ /// let zerovec: ZeroVec<u16> =
+ /// ZeroVec::parse_byte_slice(bytes).expect("infallible");
+ ///
+ /// assert_eq!(zerovec.binary_search_by(|x| x.cmp(&281)), Ok(1));
+ /// assert_eq!(zerovec.binary_search_by(|x| x.cmp(&282)), Err(2));
+ /// ```
+ ///
+ /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by
+ #[inline]
+ pub fn binary_search_by(
+ &self,
+ mut predicate: impl FnMut(T) -> Ordering,
+ ) -> Result<usize, usize> {
+ self.as_ule_slice()
+ .binary_search_by(|probe| predicate(T::from_unaligned(*probe)))
+ }
+}
+
+// Safety (based on the safety checklist on the VarULE trait):
+// (`ZeroSlice<T>` is a transparent wrapper around [T::ULE])
+// 1. [T::ULE] does not include any uninitialized or padding bytes (achieved by being a slice of a ULE type)
+// 2. [T::ULE] is aligned to 1 byte (achieved by being a slice of a ULE type)
+// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid.
+// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety
+// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data.
+// 6. `as_byte_slice()` and `parse_byte_slice()` are defaulted
+// 7. `[T::ULE]` byte equality is semantic equality (relying on the guideline of the underlying `ULE` type)
+unsafe impl<T: AsULE + 'static> VarULE for ZeroSlice<T> {
+ #[inline]
+ fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
+ T::ULE::validate_byte_slice(bytes)
+ }
+
+ #[inline]
+ unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
+ Self::from_ule_slice(T::ULE::from_byte_slice_unchecked(bytes))
+ }
+}
+
+impl<T> Eq for ZeroSlice<T> where T: AsULE + Eq {}
+
+impl<T> PartialEq<ZeroSlice<T>> for ZeroSlice<T>
+where
+ T: AsULE + PartialEq,
+{
+ #[inline]
+ fn eq(&self, other: &ZeroSlice<T>) -> bool {
+ self.as_zerovec().eq(&other.as_zerovec())
+ }
+}
+
+impl<T> PartialEq<[T]> for ZeroSlice<T>
+where
+ T: AsULE + PartialEq,
+{
+ #[inline]
+ fn eq(&self, other: &[T]) -> bool {
+ self.iter().eq(other.iter().copied())
+ }
+}
+
+impl<'a, T> PartialEq<ZeroVec<'a, T>> for ZeroSlice<T>
+where
+ T: AsULE + PartialEq,
+{
+ #[inline]
+ fn eq(&self, other: &ZeroVec<'a, T>) -> bool {
+ self.as_zerovec().eq(other)
+ }
+}
+
+impl<'a, T> PartialEq<ZeroSlice<T>> for ZeroVec<'a, T>
+where
+ T: AsULE + PartialEq,
+{
+ #[inline]
+ fn eq(&self, other: &ZeroSlice<T>) -> bool {
+ self.eq(&other.as_zerovec())
+ }
+}
+
+impl<T> fmt::Debug for ZeroSlice<T>
+where
+ T: AsULE + fmt::Debug,
+{
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ self.as_zerovec().fmt(f)
+ }
+}
+
+impl<T: AsULE + PartialOrd> PartialOrd for ZeroSlice<T> {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ self.iter().partial_cmp(other.iter())
+ }
+}
+
+impl<T: AsULE + Ord> Ord for ZeroSlice<T> {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.iter().cmp(other.iter())
+ }
+}
+
+impl<T: AsULE> AsRef<ZeroSlice<T>> for Vec<T::ULE> {
+ fn as_ref(&self) -> &ZeroSlice<T> {
+ ZeroSlice::<T>::from_ule_slice(self)
+ }
+}
+
+impl<T: AsULE> AsRef<ZeroSlice<T>> for &[T::ULE] {
+ fn as_ref(&self) -> &ZeroSlice<T> {
+ ZeroSlice::<T>::from_ule_slice(self)
+ }
+}
+
+impl<T> Default for &ZeroSlice<T>
+where
+ T: AsULE,
+{
+ fn default() -> Self {
+ ZeroSlice::from_ule_slice(&[])
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::zeroslice;
+
+ #[test]
+ fn test_split_first() {
+ {
+ // empty slice.
+ assert_eq!(None, ZeroSlice::<u16>::new_empty().split_first());
+ }
+ {
+ // single element slice
+ const DATA: &ZeroSlice<u16> =
+ zeroslice!(u16; <u16 as AsULE>::ULE::from_unsigned; [211]);
+ assert_eq!((211, zeroslice![]), DATA.split_first().unwrap());
+ }
+ {
+ // slice with many elements.
+ const DATA: &ZeroSlice<u16> =
+ zeroslice!(u16; <u16 as AsULE>::ULE::from_unsigned; [211, 281, 421, 32973]);
+ const EXPECTED_VALUE: (u16, &ZeroSlice<u16>) = (
+ 211,
+ zeroslice!(u16; <u16 as AsULE>::ULE::from_unsigned; [281, 421, 32973]),
+ );
+
+ assert_eq!(EXPECTED_VALUE, DATA.split_first().unwrap());
+ }
+ }
+}