summaryrefslogtreecommitdiffstats
path: root/third_party/rust/zerovec/benches
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/zerovec/benches')
-rw-r--r--third_party/rust/zerovec/benches/vzv.rs212
-rw-r--r--third_party/rust/zerovec/benches/zeromap.rs396
-rw-r--r--third_party/rust/zerovec/benches/zerovec.rs165
-rw-r--r--third_party/rust/zerovec/benches/zerovec_iai.rs65
-rw-r--r--third_party/rust/zerovec/benches/zerovec_serde.rs145
5 files changed, 983 insertions, 0 deletions
diff --git a/third_party/rust/zerovec/benches/vzv.rs b/third_party/rust/zerovec/benches/vzv.rs
new file mode 100644
index 0000000000..94b6621a96
--- /dev/null
+++ b/third_party/rust/zerovec/benches/vzv.rs
@@ -0,0 +1,212 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use rand::SeedableRng;
+use rand_distr::{Alphanumeric, Distribution, Uniform};
+use rand_pcg::Lcg64Xsh32;
+use std::ops::RangeInclusive;
+
+use zerovec::VarZeroVec;
+
+#[repr(align(8))]
+#[derive(Default)]
+struct AlignedBuffer(Vec<u8>);
+
+/// Generates an array of random alphanumeric strings.
+///
+/// - length = range of lengths for the strings (chosen uniformly at random)
+/// - count = number of strings to generate
+/// - seed = seed for the PRNG
+///
+/// Returns a tuple including the vector and a u64 that can be used to seed the next PRNG.
+fn random_alphanums(lengths: RangeInclusive<usize>, count: usize, seed: u64) -> (Vec<String>, u64) {
+ // Lcg64Xsh32 is a small, fast PRNG for reproducible benchmarks.
+ let mut rng1 = Lcg64Xsh32::seed_from_u64(seed);
+ let mut rng2 = Lcg64Xsh32::seed_from_u64(rand::Rng::gen(&mut rng1));
+ let alpha_dist = Alphanumeric;
+ let len_dist = Uniform::from(lengths);
+ let string_vec = len_dist
+ .sample_iter(&mut rng1)
+ .take(count)
+ .map(|len| {
+ (&alpha_dist)
+ .sample_iter(&mut rng2)
+ .take(len)
+ .map(char::from)
+ .collect::<String>()
+ })
+ .collect();
+ (string_vec, rand::Rng::gen(&mut rng1))
+}
+
+fn overview_bench(c: &mut Criterion) {
+ // Same as vzv/char_count/vzv but with different inputs
+ let seed = 42;
+ let (string_vec, _) = random_alphanums(2..=10, 100, seed);
+ let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
+ let vzv = VarZeroVec::<str>::parse_byte_slice(black_box(bytes.as_slice())).unwrap();
+
+ c.bench_function("vzv/overview", |b| {
+ b.iter(|| {
+ black_box(&vzv)
+ .iter()
+ .fold(0, |sum, string| sum + string.chars().count())
+ });
+ });
+
+ #[cfg(feature = "bench")]
+ {
+ char_count_benches(c);
+ binary_search_benches(c);
+ vzv_precompute_bench(c);
+ }
+
+ #[cfg(all(feature = "bench", feature = "serde"))]
+ {
+ serde_benches(c);
+ }
+}
+
+#[cfg(feature = "bench")]
+fn char_count_benches(c: &mut Criterion) {
+ let seed = 2021;
+ let (string_vec, _) = random_alphanums(2..=20, 100, seed);
+ let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
+ let vzv = VarZeroVec::<str>::parse_byte_slice(black_box(bytes.as_slice())).unwrap();
+
+ // *** Count chars in vec of 100 strings ***
+ c.bench_function("vzv/char_count/slice", |b| {
+ b.iter(|| {
+ black_box(&string_vec)
+ .iter()
+ .fold(0, |sum, string| sum + string.chars().count())
+ });
+ });
+
+ // *** Count chars in vec of 100 strings ***
+ c.bench_function("vzv/char_count/vzv", |b| {
+ b.iter(|| {
+ black_box(&vzv)
+ .iter()
+ .fold(0, |sum, string| sum + string.chars().count())
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn binary_search_benches(c: &mut Criterion) {
+ let seed = 2021;
+ let (string_vec, seed) = random_alphanums(2..=20, 500, seed);
+ let (needles, _) = random_alphanums(2..=20, 10, seed);
+ let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
+ let vzv = VarZeroVec::<str>::parse_byte_slice(black_box(bytes.as_slice())).unwrap();
+ let single_needle = "lmnop".to_owned();
+
+ // *** Binary search vec of 500 strings 10 times ***
+ c.bench_function("vzv/binary_search/slice", |b| {
+ b.iter(|| {
+ black_box(&needles)
+ .iter()
+ .map(|needle| black_box(&string_vec).binary_search(needle))
+ .filter(|r| r.is_ok())
+ .count()
+ });
+ });
+
+ // *** Binary search vec of 500 strings 10 times ***
+ c.bench_function("vzv/binary_search/vzv", |b| {
+ b.iter(|| {
+ black_box(&needles)
+ .iter()
+ .map(|needle| black_box(&vzv).binary_search(needle))
+ .filter(|r| r.is_ok())
+ .count()
+ });
+ });
+
+ c.bench_function("vzv/binary_search/single/slice", |b| {
+ b.iter(|| black_box(&string_vec).binary_search(black_box(&single_needle)));
+ });
+
+ c.bench_function("vzv/binary_search/single/vzv", |b| {
+ b.iter(|| black_box(&vzv).binary_search(black_box(&single_needle)));
+ });
+}
+
+#[cfg(all(feature = "bench", feature = "serde"))]
+fn serde_benches(c: &mut Criterion) {
+ let seed = 2021;
+ let (string_vec, _) = random_alphanums(2..=20, 100, seed);
+ let bincode_vec = bincode::serialize(&string_vec).unwrap();
+ let vzv: VarZeroVec<str> = VarZeroVec::from(&*string_vec);
+ let bincode_vzv = bincode::serialize(&vzv).unwrap();
+
+ // *** Deserialize vec of 100 strings ***
+ c.bench_function("vzv/deserialize/string/vec_owned", |b| {
+ b.iter(|| bincode::deserialize::<Vec<String>>(black_box(&bincode_vec)));
+ });
+
+ // *** Deserialize vec of 100 strings ***
+ c.bench_function("vzv/deserialize/string/vec_borrowed", |b| {
+ b.iter(|| bincode::deserialize::<Vec<&str>>(black_box(&bincode_vec)));
+ });
+
+ // *** Deserialize vec of 100 strings ***
+ c.bench_function("vzv/deserialize/string/vzv", |b| {
+ b.iter(|| bincode::deserialize::<VarZeroVec<str>>(black_box(&bincode_vzv)));
+ });
+}
+
+#[cfg(feature = "bench")]
+// Testing differences between operating on slices with precomputed/non-precomputed indexing info
+fn vzv_precompute_bench(c: &mut Criterion) {
+ let seed = 2021;
+ let (string_vec, seed) = random_alphanums(2..=20, 500, seed);
+ let (needles, _) = random_alphanums(2..=20, 10, seed);
+ let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
+ let vzv = VarZeroVec::<str>::parse_byte_slice(black_box(bytes.as_slice())).unwrap();
+ let borrowed = vzv.as_components();
+ let slice = vzv.as_slice();
+ let single_needle = "lmnop";
+
+ c.bench_function("vzv_precompute/get/precomputed", |b| {
+ b.iter(|| black_box(&borrowed).get(100));
+ });
+
+ c.bench_function("vzv_precompute/get/slice", |b| {
+ b.iter(|| black_box(&slice).get(100));
+ });
+
+ c.bench_function("vzv_precompute/search/precomputed", |b| {
+ b.iter(|| black_box(&borrowed).binary_search(single_needle));
+ });
+
+ c.bench_function("vzv_precompute/search/slice", |b| {
+ b.iter(|| black_box(&slice).binary_search(single_needle));
+ });
+
+ c.bench_function("vzv_precompute/search_multi/precomputed", |b| {
+ b.iter(|| {
+ black_box(&needles)
+ .iter()
+ .map(|needle| black_box(&borrowed).binary_search(needle))
+ .filter(|r| r.is_ok())
+ .count()
+ });
+ });
+
+ c.bench_function("vzv_precompute/search_multi/slice", |b| {
+ b.iter(|| {
+ black_box(&needles)
+ .iter()
+ .map(|needle| black_box(&slice).binary_search(needle))
+ .filter(|r| r.is_ok())
+ .count()
+ });
+ });
+}
+
+criterion_group!(benches, overview_bench,);
+criterion_main!(benches);
diff --git a/third_party/rust/zerovec/benches/zeromap.rs b/third_party/rust/zerovec/benches/zeromap.rs
new file mode 100644
index 0000000000..5f3e87b8c0
--- /dev/null
+++ b/third_party/rust/zerovec/benches/zeromap.rs
@@ -0,0 +1,396 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use std::collections::HashMap;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+use zerovec::maps::ZeroMapKV;
+use zerovec::vecs::{Index32, VarZeroSlice, VarZeroVec};
+use zerovec::{ZeroHashMap, ZeroMap};
+
+const DATA: [(&str, &str); 16] = [
+ ("ar", "Arabic"),
+ ("bn", "Bangla"),
+ ("ccp", "Chakma"),
+ ("chr", "Cherokee"),
+ ("el", "Greek"),
+ ("en", "English"),
+ ("eo", "Esperanto"),
+ ("es", "Spanish"),
+ ("fr", "French"),
+ ("iu", "Inuktitut"),
+ ("ja", "Japanese"),
+ ("ru", "Russian"),
+ ("sr", "Serbian"),
+ ("th", "Thai"),
+ ("tr", "Turkish"),
+ ("zh", "Chinese"),
+];
+
+const POSTCARD: [u8; 282] = [
+ 102, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 7, 0, 0, 0, 10, 0, 0, 0, 12, 0, 0, 0, 14,
+ 0, 0, 0, 16, 0, 0, 0, 18, 0, 0, 0, 20, 0, 0, 0, 22, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0, 0, 28, 0,
+ 0, 0, 30, 0, 0, 0, 32, 0, 0, 0, 97, 114, 98, 110, 99, 99, 112, 99, 104, 114, 101, 108, 101,
+ 110, 101, 111, 101, 115, 102, 114, 105, 117, 106, 97, 114, 117, 115, 114, 116, 104, 116, 114,
+ 122, 104, 177, 1, 16, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 12, 0, 0, 0, 18, 0, 0, 0, 26, 0, 0, 0,
+ 31, 0, 0, 0, 38, 0, 0, 0, 47, 0, 0, 0, 54, 0, 0, 0, 60, 0, 0, 0, 69, 0, 0, 0, 77, 0, 0, 0, 84,
+ 0, 0, 0, 91, 0, 0, 0, 95, 0, 0, 0, 102, 0, 0, 0, 65, 114, 97, 98, 105, 99, 66, 97, 110, 103,
+ 108, 97, 67, 104, 97, 107, 109, 97, 67, 104, 101, 114, 111, 107, 101, 101, 71, 114, 101, 101,
+ 107, 69, 110, 103, 108, 105, 115, 104, 69, 115, 112, 101, 114, 97, 110, 116, 111, 83, 112, 97,
+ 110, 105, 115, 104, 70, 114, 101, 110, 99, 104, 73, 110, 117, 107, 116, 105, 116, 117, 116, 74,
+ 97, 112, 97, 110, 101, 115, 101, 82, 117, 115, 115, 105, 97, 110, 83, 101, 114, 98, 105, 97,
+ 110, 84, 104, 97, 105, 84, 117, 114, 107, 105, 115, 104, 67, 104, 105, 110, 101, 115, 101,
+];
+
+const POSTCARD_HASHMAP: [u8; 176] = [
+ 16, 2, 114, 117, 7, 82, 117, 115, 115, 105, 97, 110, 3, 99, 99, 112, 6, 67, 104, 97, 107, 109,
+ 97, 3, 99, 104, 114, 8, 67, 104, 101, 114, 111, 107, 101, 101, 2, 116, 114, 7, 84, 117, 114,
+ 107, 105, 115, 104, 2, 116, 104, 4, 84, 104, 97, 105, 2, 106, 97, 8, 74, 97, 112, 97, 110, 101,
+ 115, 101, 2, 101, 115, 7, 83, 112, 97, 110, 105, 115, 104, 2, 101, 111, 9, 69, 115, 112, 101,
+ 114, 97, 110, 116, 111, 2, 122, 104, 7, 67, 104, 105, 110, 101, 115, 101, 2, 115, 114, 7, 83,
+ 101, 114, 98, 105, 97, 110, 2, 101, 110, 7, 69, 110, 103, 108, 105, 115, 104, 2, 105, 117, 9,
+ 73, 110, 117, 107, 116, 105, 116, 117, 116, 2, 102, 114, 6, 70, 114, 101, 110, 99, 104, 2, 98,
+ 110, 6, 66, 97, 110, 103, 108, 97, 2, 101, 108, 5, 71, 114, 101, 101, 107, 2, 97, 114, 6, 65,
+ 114, 97, 98, 105, 99,
+];
+
+const POSTCARD_ZEROHASHMAP: [u8; 412] = [
+ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,
+ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,
+ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 1,
+ 0, 0, 0, 102, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 11, 0,
+ 0, 0, 13, 0, 0, 0, 15, 0, 0, 0, 17, 0, 0, 0, 19, 0, 0, 0, 21, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0,
+ 0, 28, 0, 0, 0, 30, 0, 0, 0, 32, 0, 0, 0, 101, 110, 102, 114, 106, 97, 101, 108, 99, 104, 114,
+ 98, 110, 115, 114, 105, 117, 101, 111, 116, 114, 99, 99, 112, 122, 104, 114, 117, 101, 115,
+ 116, 104, 97, 114, 177, 1, 16, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 13, 0, 0, 0, 21, 0, 0, 0, 26,
+ 0, 0, 0, 34, 0, 0, 0, 40, 0, 0, 0, 47, 0, 0, 0, 56, 0, 0, 0, 65, 0, 0, 0, 72, 0, 0, 0, 78, 0,
+ 0, 0, 85, 0, 0, 0, 92, 0, 0, 0, 99, 0, 0, 0, 103, 0, 0, 0, 69, 110, 103, 108, 105, 115, 104,
+ 70, 114, 101, 110, 99, 104, 74, 97, 112, 97, 110, 101, 115, 101, 71, 114, 101, 101, 107, 67,
+ 104, 101, 114, 111, 107, 101, 101, 66, 97, 110, 103, 108, 97, 83, 101, 114, 98, 105, 97, 110,
+ 73, 110, 117, 107, 116, 105, 116, 117, 116, 69, 115, 112, 101, 114, 97, 110, 116, 111, 84, 117,
+ 114, 107, 105, 115, 104, 67, 104, 97, 107, 109, 97, 67, 104, 105, 110, 101, 115, 101, 82, 117,
+ 115, 115, 105, 97, 110, 83, 112, 97, 110, 105, 115, 104, 84, 104, 97, 105, 65, 114, 97, 98,
+ 105, 99,
+];
+
+/// Run this function to print new data to the console.
+/// Requires the optional `serde` Cargo feature.
+#[allow(dead_code)]
+fn generate_zeromap() {
+ let map = build_zeromap(false);
+ let buf = postcard::to_stdvec(&map).unwrap();
+ println!("{buf:?}");
+}
+
+/// Run this function to print new data to the console.
+/// Requires the optional `serde` Cargo feature.
+#[allow(dead_code)]
+fn generate_hashmap() {
+ let map = build_hashmap(false);
+ let buf = postcard::to_stdvec(&map).unwrap();
+ println!("{buf:?}");
+}
+
+/// Run this function to print new data to the console.
+/// Requires the optional `serde` Cargo feature.
+#[allow(dead_code)]
+fn generate_zerohashmap() {
+ let map = build_zerohashmap(false);
+ let buf = postcard::to_stdvec(&map).unwrap();
+ println!("{buf:?}");
+}
+
+fn overview_bench(c: &mut Criterion) {
+ bench_zeromap(c);
+ bench_hashmap(c);
+ bench_zerohashmap(c);
+}
+
+fn bench_zeromap(c: &mut Criterion) {
+ // Uncomment the following line to re-generate the const data.
+ // generate_hashmap();
+
+ bench_deserialize(c);
+ #[cfg(feature = "bench")]
+ bench_deserialize_large(c);
+ bench_lookup(c);
+ #[cfg(feature = "bench")]
+ bench_lookup_large(c);
+}
+
+fn build_zeromap(large: bool) -> ZeroMap<'static, Index32Str, Index32Str> {
+ // TODO(#2826): This should use ZeroMap::from_iter, however that currently takes
+ // *minutes*, whereas this code runs in milliseconds
+ let mut keys = Vec::new();
+ let mut values = Vec::new();
+ let mut data = DATA.to_vec();
+ data.sort();
+ for &(key, value) in data.iter() {
+ if large {
+ for n in 0..8192 {
+ keys.push(format!("{key}{n:04}"));
+ values.push(indexify(value));
+ }
+ } else {
+ keys.push(key.to_owned());
+ values.push(indexify(value));
+ }
+ }
+
+ let keys = keys.iter().map(|s| indexify(s)).collect::<Vec<_>>();
+ // keys are sorted by construction
+ unsafe { ZeroMap::from_parts_unchecked(VarZeroVec::from(&keys), VarZeroVec::from(&values)) }
+}
+
+fn bench_deserialize(c: &mut Criterion) {
+ c.bench_function("zeromap/deserialize/small", |b| {
+ b.iter(|| {
+ let map: ZeroMap<Index32Str, Index32Str> =
+ postcard::from_bytes(black_box(&POSTCARD)).unwrap();
+ assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut"));
+ })
+ });
+}
+
+#[cfg(feature = "bench")]
+fn bench_deserialize_large(c: &mut Criterion) {
+ let buf = large_zeromap_postcard_bytes();
+ c.bench_function("zeromap/deserialize/large", |b| {
+ b.iter(|| {
+ let map: ZeroMap<Index32Str, Index32Str> =
+ postcard::from_bytes(black_box(&buf)).unwrap();
+ assert_eq!(map.get(indexify("iu3333")).map(|x| &x.0), Some("Inuktitut"));
+ })
+ });
+}
+
+fn bench_lookup(c: &mut Criterion) {
+ let map: ZeroMap<Index32Str, Index32Str> = postcard::from_bytes(black_box(&POSTCARD)).unwrap();
+ c.bench_function("zeromap/lookup/small", |b| {
+ b.iter(|| {
+ assert_eq!(
+ map.get(black_box(indexify("iu"))).map(|x| &x.0),
+ Some("Inuktitut")
+ );
+ assert_eq!(map.get(black_box(indexify("zz"))).map(|x| &x.0), None);
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn bench_lookup_large(c: &mut Criterion) {
+ let buf = large_zeromap_postcard_bytes();
+ let map: ZeroMap<Index32Str, Index32Str> = postcard::from_bytes(&buf).unwrap();
+ c.bench_function("zeromap/lookup/large", |b| {
+ b.iter(|| {
+ assert_eq!(
+ map.get(black_box(indexify("iu3333"))).map(|x| &x.0),
+ Some("Inuktitut")
+ );
+ assert_eq!(map.get(black_box(indexify("zz"))).map(|x| &x.0), None);
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn large_zeromap_postcard_bytes() -> Vec<u8> {
+ postcard::to_stdvec(&build_zeromap(true)).unwrap()
+}
+
+fn bench_hashmap(c: &mut Criterion) {
+ // Uncomment the following line to re-generate the const data.
+ // generate_hashmap();
+
+ bench_deserialize_hashmap(c);
+ #[cfg(feature = "bench")]
+ bench_deserialize_large_hashmap(c);
+ bench_lookup_hashmap(c);
+ #[cfg(feature = "bench")]
+ bench_lookup_large_hashmap(c);
+}
+
+fn build_hashmap(large: bool) -> HashMap<String, String> {
+ let mut map: HashMap<String, String> = HashMap::new();
+ for &(key, value) in DATA.iter() {
+ if large {
+ for n in 0..8192 {
+ map.insert(format!("{key}{n}"), value.to_owned());
+ }
+ } else {
+ map.insert(key.to_owned(), value.to_owned());
+ }
+ }
+ map
+}
+
+fn bench_deserialize_hashmap(c: &mut Criterion) {
+ c.bench_function("zeromap/deserialize/small/hashmap", |b| {
+ b.iter(|| {
+ let map: HashMap<String, String> =
+ postcard::from_bytes(black_box(&POSTCARD_HASHMAP)).unwrap();
+ assert_eq!(map.get("iu"), Some(&"Inuktitut".to_owned()));
+ })
+ });
+}
+
+#[cfg(feature = "bench")]
+fn bench_deserialize_large_hashmap(c: &mut Criterion) {
+ let buf = large_hashmap_postcard_bytes();
+ c.bench_function("zeromap/deserialize/large/hashmap", |b| {
+ b.iter(|| {
+ let map: HashMap<String, String> = postcard::from_bytes(black_box(&buf)).unwrap();
+ assert_eq!(map.get("iu3333"), Some(&"Inuktitut".to_owned()));
+ })
+ });
+}
+
+fn bench_lookup_hashmap(c: &mut Criterion) {
+ let map: HashMap<String, String> = postcard::from_bytes(black_box(&POSTCARD_HASHMAP)).unwrap();
+ c.bench_function("zeromap/lookup/small/hashmap", |b| {
+ b.iter(|| {
+ assert_eq!(map.get(black_box("iu")), Some(&"Inuktitut".to_owned()));
+ assert_eq!(map.get(black_box("zz")), None);
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn bench_lookup_large_hashmap(c: &mut Criterion) {
+ let buf = large_hashmap_postcard_bytes();
+ let map: HashMap<String, String> = postcard::from_bytes(&buf).unwrap();
+ c.bench_function("zeromap/lookup/large/hashmap", |b| {
+ b.iter(|| {
+ assert_eq!(map.get(black_box("iu3333")), Some(&"Inuktitut".to_owned()));
+ assert_eq!(map.get(black_box("zz")), None);
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn large_hashmap_postcard_bytes() -> Vec<u8> {
+ postcard::to_stdvec(&build_hashmap(true)).unwrap()
+}
+
+fn bench_zerohashmap(c: &mut Criterion) {
+ // Uncomment the following line to re-generate the const data.
+ // generate_zerohashmap();
+
+ bench_deserialize_zerohashmap(c);
+ #[cfg(feature = "bench")]
+ bench_deserialize_large_zerohashmap(c);
+ bench_zerohashmap_lookup(c);
+ #[cfg(feature = "bench")]
+ bench_zerohashmap_lookup_large(c);
+}
+
+fn build_zerohashmap(large: bool) -> ZeroHashMap<'static, Index32Str, Index32Str> {
+ let mut kv = Vec::new();
+
+ for (key, value) in DATA.iter() {
+ if large {
+ for n in 0..512 {
+ kv.push((format!("{key}{n}"), indexify(value)));
+ }
+ } else {
+ kv.push((key.to_string(), indexify(value)));
+ }
+ }
+
+ ZeroHashMap::from_iter(kv.iter().map(|kv| (indexify(&kv.0), kv.1)))
+}
+
+fn bench_deserialize_zerohashmap(c: &mut Criterion) {
+ c.bench_function("zerohashmap/deserialize/small", |b| {
+ b.iter(|| {
+ let map: ZeroHashMap<Index32Str, Index32Str> =
+ postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap();
+ assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut"));
+ })
+ });
+}
+
+fn bench_deserialize_large_zerohashmap(c: &mut Criterion) {
+ let buf = large_zerohashmap_postcard_bytes();
+ c.bench_function("zerohashmap/deserialize/large", |b| {
+ b.iter(|| {
+ let map: ZeroHashMap<Index32Str, Index32Str> =
+ postcard::from_bytes(black_box(&buf)).unwrap();
+ assert_eq!(map.get(indexify("iu333")).map(|x| &x.0), Some("Inuktitut"));
+ })
+ });
+}
+
+fn bench_zerohashmap_lookup(c: &mut Criterion) {
+ let zero_hashmap: ZeroHashMap<Index32Str, Index32Str> =
+ postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap();
+
+ c.bench_function("zerohashmap/lookup/small", |b| {
+ b.iter(|| {
+ assert_eq!(
+ zero_hashmap.get(black_box(indexify("iu"))).map(|x| &x.0),
+ Some("Inuktitut")
+ );
+ assert_eq!(
+ zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0),
+ None
+ );
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn bench_zerohashmap_lookup_large(c: &mut Criterion) {
+ let buf = large_zerohashmap_postcard_bytes();
+ let zero_hashmap: ZeroHashMap<Index32Str, Index32Str> = postcard::from_bytes(&buf).unwrap();
+
+ c.bench_function("zerohashmap/lookup/large", |b| {
+ b.iter(|| {
+ assert_eq!(
+ zero_hashmap.get(black_box(indexify("iu333"))).map(|x| &x.0),
+ Some("Inuktitut")
+ );
+ assert_eq!(
+ zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0),
+ None
+ );
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn large_zerohashmap_postcard_bytes() -> Vec<u8> {
+ postcard::to_stdvec(&build_zerohashmap(true)).unwrap()
+}
+
+criterion_group!(benches, overview_bench);
+criterion_main!(benches);
+
+/// This type lets us use a u32-index-format VarZeroVec with the ZeroMap.
+///
+/// Eventually we will have a FormatSelector type that lets us do `ZeroMap<FormatSelector<K, Index32>, V>`
+/// (https://github.com/unicode-org/icu4x/issues/2312)
+///
+/// , isn't actually important; it's just more convenient to use make_varule to get the
+/// full suite of traits instead of `#[derive(VarULE)]`. (With `#[derive(VarULE)]` we would have to manually
+/// define a Serialize implementation, and that would be gnarly)
+/// https://github.com/unicode-org/icu4x/issues/2310 tracks being able to do this with derive(ULE)
+#[zerovec::make_varule(Index32Str)]
+#[zerovec::skip_derive(ZeroMapKV)]
+#[derive(Eq, PartialEq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
+#[zerovec::derive(Serialize, Deserialize, Hash)]
+pub(crate) struct Index32StrBorrowed<'a>(#[serde(borrow)] pub &'a str);
+
+impl<'a> ZeroMapKV<'a> for Index32Str {
+ type Container = VarZeroVec<'a, Index32Str, Index32>;
+ type Slice = VarZeroSlice<Index32Str, Index32>;
+ type GetType = Index32Str;
+ type OwnedType = Box<Index32Str>;
+}
+
+#[inline]
+fn indexify(s: &str) -> &Index32Str {
+ unsafe { &*(s as *const str as *const Index32Str) }
+}
diff --git a/third_party/rust/zerovec/benches/zerovec.rs b/third_party/rust/zerovec/benches/zerovec.rs
new file mode 100644
index 0000000000..5ed9421603
--- /dev/null
+++ b/third_party/rust/zerovec/benches/zerovec.rs
@@ -0,0 +1,165 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use rand::SeedableRng;
+use rand_distr::{Distribution, LogNormal};
+use rand_pcg::Lcg64Xsh32;
+use std::fmt;
+
+#[path = "../src/samples.rs"]
+mod samples;
+use samples::*;
+
+use zerovec::ule::*;
+use zerovec::ZeroVec;
+
+#[repr(align(8))]
+#[derive(Default)]
+struct AlignedBuffer(Vec<u8>);
+
+/// Generate a large list of u32s for stress testing.
+#[allow(dead_code)]
+fn get_needles_and_haystack() -> (Vec<u32>, Vec<u32>) {
+ // Lcg64Xsh32 is a small, fast PRNG for reproducible benchmarks.
+ // LogNormal(10, 1) generates numbers with mean 36315 and mode 8103, a distribution that, in
+ // spirit, correlates with Unicode properties (many low values and a long tail of high values)
+ let mut rng = Lcg64Xsh32::seed_from_u64(2021);
+ let dist = LogNormal::new(10.0, 1.0).unwrap();
+ let haystack = {
+ let mut unsorted: Vec<u32> = (&dist)
+ .sample_iter(&mut rng)
+ .take(1000)
+ .map(|f| f as u32)
+ .collect();
+ unsorted.sort_unstable();
+ unsorted
+ };
+ let needles: Vec<u32> = (&dist)
+ .sample_iter(&mut rng)
+ .take(100)
+ .map(|f| f as u32)
+ .collect();
+ (needles, haystack)
+}
+
+#[allow(dead_code, clippy::ptr_arg)]
+fn vec_to_unaligned_uvec<'a, T>(vec: &Vec<T>, buffer: &'a mut AlignedBuffer) -> ZeroVec<'a, T>
+where
+ T: EqULE + Copy + PartialEq + fmt::Debug,
+{
+ // Pad with zero to ensure it is not aligned
+ buffer.0.push(0);
+ buffer
+ .0
+ .extend(ZeroVec::from_slice_or_alloc(vec.as_slice()).as_bytes());
+ ZeroVec::<T>::parse_byte_slice(&buffer.0[1..]).unwrap()
+}
+
+fn overview_bench(c: &mut Criterion) {
+ c.bench_function("zerovec/overview", |b| {
+ b.iter(|| {
+ ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE))
+ .unwrap()
+ .iter()
+ .sum::<u32>()
+ });
+ });
+
+ #[cfg(feature = "bench")]
+ {
+ sum_benches(c);
+ binary_search_benches(c);
+ }
+}
+
+#[cfg(feature = "bench")]
+fn sum_benches(c: &mut Criterion) {
+ let normal_slice = &TEST_SLICE[0..19];
+ let aligned_ule_slice = <u32 as AsULE>::ULE::parse_byte_slice(&TEST_BUFFER_LE[0..76]).unwrap();
+ let unalign_ule_slice = <u32 as AsULE>::ULE::parse_byte_slice(&TEST_BUFFER_LE[1..77]).unwrap();
+
+ assert_eq!(normal_slice.len(), aligned_ule_slice.len());
+ assert_eq!(normal_slice.len(), unalign_ule_slice.len());
+
+ c.bench_function("zerovec/sum/sample/slice", |b| {
+ b.iter(|| {
+ black_box(normal_slice)
+ .iter()
+ .copied()
+ .fold(0u32, |sum, val| sum.wrapping_add(val))
+ })
+ });
+
+ c.bench_function("zerovec/sum/sample/zerovec_aligned", |b| {
+ b.iter(|| {
+ ZeroVec::<u32>::new_borrowed(black_box(aligned_ule_slice))
+ .iter()
+ .fold(0u32, |sum, val| sum.wrapping_add(val))
+ });
+ });
+
+ c.bench_function("zerovec/sum/sample/zerovec_unaligned", |b| {
+ b.iter(|| {
+ ZeroVec::<u32>::new_borrowed(black_box(unalign_ule_slice))
+ .iter()
+ .fold(0u32, |sum, val| sum.wrapping_add(val))
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn binary_search_benches(c: &mut Criterion) {
+ c.bench_function("zerovec/binary_search/sample/slice", |b| {
+ b.iter(|| black_box(&TEST_SLICE).binary_search(&0x0c0d0c));
+ });
+
+ c.bench_function("zerovec/binary_search/sample/zerovec", |b| {
+ let zerovec = ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap();
+ b.iter(|| zerovec.binary_search(&0x0c0d0c));
+ });
+
+ let (needles_100, haystack) = get_needles_and_haystack();
+ // Only search for 50 needles to put all figures in nanoseconds
+ let needles_50 = &needles_100[0..50];
+
+ // *** Binary search vec of 1000 `u32` 50 times ***
+ c.bench_function("zerovec/binary_search/log_normal/slice", |b| {
+ b.iter(|| {
+ black_box(&needles_50)
+ .iter()
+ .map(|needle| black_box(&haystack).binary_search(needle))
+ .filter(|r| r.is_ok())
+ .count()
+ });
+ });
+
+ let mut buffer = AlignedBuffer::default();
+ let zerovec = vec_to_unaligned_uvec(black_box(&haystack), &mut buffer);
+ assert_eq!(zerovec, haystack.as_slice());
+
+ // *** Binary search vec of 1000 `u32` 50 times ***
+ c.bench_function("zerovec/binary_search/log_normal/zerovec", |b| {
+ b.iter(|| {
+ black_box(&needles_50)
+ .iter()
+ .map(|needle| black_box(&zerovec).binary_search(needle))
+ .filter(|r| r.is_ok())
+ .count()
+ });
+ });
+
+ let single_needle = 36315;
+
+ c.bench_function("zerovec/binary_search/log_normal/single/slice", |b| {
+ b.iter(|| black_box(&haystack).binary_search(&single_needle));
+ });
+
+ c.bench_function("zerovec/binary_search/log_normal/single/zerovec", |b| {
+ b.iter(|| black_box(&zerovec).binary_search(&single_needle));
+ });
+}
+
+criterion_group!(benches, overview_bench,);
+criterion_main!(benches);
diff --git a/third_party/rust/zerovec/benches/zerovec_iai.rs b/third_party/rust/zerovec/benches/zerovec_iai.rs
new file mode 100644
index 0000000000..c34a6aebc0
--- /dev/null
+++ b/third_party/rust/zerovec/benches/zerovec_iai.rs
@@ -0,0 +1,65 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use iai::black_box;
+
+#[path = "../src/samples.rs"]
+mod samples;
+use samples::*;
+
+use zerovec::ule::VarULE;
+use zerovec::VarZeroSlice;
+use zerovec::ZeroVec;
+
+fn sum_slice() -> u32 {
+ black_box(TEST_SLICE).iter().sum::<u32>()
+}
+
+fn sum_zerovec() -> u32 {
+ ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE))
+ .unwrap()
+ .iter()
+ .sum::<u32>()
+}
+
+fn binarysearch_slice() -> Result<usize, usize> {
+ black_box(TEST_SLICE).binary_search(&0x0c0d0c)
+}
+
+fn binarysearch_zerovec() -> Result<usize, usize> {
+ ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE))
+ .unwrap()
+ .binary_search(&0x0c0d0c)
+}
+
+fn varzeroslice_parse_get() -> Option<&'static str> {
+ let slice: &'static VarZeroSlice<str> =
+ VarZeroSlice::parse_byte_slice(black_box(TEST_VARZEROSLICE_BYTES)).unwrap();
+ slice.get(black_box(1))
+}
+
+fn varzeroslice_get() -> Option<&'static str> {
+ // Safety: The bytes are valid.
+ let slice: &'static VarZeroSlice<str> =
+ unsafe { VarZeroSlice::from_byte_slice_unchecked(black_box(TEST_VARZEROSLICE_BYTES)) };
+ slice.get(black_box(1))
+}
+
+fn varzeroslice_get_unchecked() -> &'static str {
+ // Safety: The bytes are valid.
+ let slice: &'static VarZeroSlice<str> =
+ unsafe { VarZeroSlice::from_byte_slice_unchecked(black_box(TEST_VARZEROSLICE_BYTES)) };
+ // Safety: The VarZeroVec has length 4.
+ unsafe { slice.get_unchecked(black_box(1)) }
+}
+
+iai::main!(
+ sum_slice,
+ sum_zerovec,
+ binarysearch_slice,
+ binarysearch_zerovec,
+ varzeroslice_parse_get,
+ varzeroslice_get,
+ varzeroslice_get_unchecked,
+);
diff --git a/third_party/rust/zerovec/benches/zerovec_serde.rs b/third_party/rust/zerovec/benches/zerovec_serde.rs
new file mode 100644
index 0000000000..3a9bb051a2
--- /dev/null
+++ b/third_party/rust/zerovec/benches/zerovec_serde.rs
@@ -0,0 +1,145 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use rand::SeedableRng;
+use rand_distr::{Distribution, LogNormal};
+use rand_pcg::Lcg64Xsh32;
+
+#[path = "../src/samples.rs"]
+mod samples;
+use samples::*;
+
+use zerovec::ZeroVec;
+
+/// Generate a large list of u32s for stress testing.
+#[allow(dead_code)]
+fn random_numbers(count: usize) -> Vec<u32> {
+ // Lcg64Xsh32 is a small, fast PRNG for reproducible benchmarks.
+ // LogNormal(10, 1) generates numbers with mean 36315 and mode 8103, a distribution that, in
+ // spirit, correlates with Unicode properties (many low values and a long tail of high values)
+ let mut rng = Lcg64Xsh32::seed_from_u64(2021);
+ let dist = LogNormal::new(10.0, 1.0).unwrap();
+ (&dist)
+ .sample_iter(&mut rng)
+ .take(count)
+ .map(|f| f as u32)
+ .collect()
+}
+
+fn overview_bench(c: &mut Criterion) {
+ c.bench_function("zerovec_serde/overview", |b| {
+ // Same as "zerovec_serde/deserialize_sum/u32/zerovec"
+ let buffer = bincode::serialize(
+ &ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap(),
+ )
+ .unwrap();
+ b.iter(|| {
+ bincode::deserialize::<ZeroVec<u32>>(&buffer)
+ .unwrap()
+ .iter()
+ .sum::<u32>()
+ });
+ });
+
+ #[cfg(feature = "bench")]
+ {
+ u32_benches(c);
+ char_benches(c);
+ stress_benches(c);
+ }
+}
+
+#[cfg(feature = "bench")]
+fn u32_benches(c: &mut Criterion) {
+ c.bench_function("zerovec_serde/serialize/u32/slice", |b| {
+ b.iter(|| bincode::serialize(&Vec::from(black_box(TEST_SLICE))));
+ });
+
+ c.bench_function("zerovec_serde/deserialize_sum/u32/slice", |b| {
+ let buffer = bincode::serialize(&Vec::from(black_box(TEST_SLICE))).unwrap();
+ b.iter(|| {
+ bincode::deserialize::<Vec<u32>>(&buffer)
+ .unwrap()
+ .iter()
+ .sum::<u32>()
+ });
+ });
+
+ c.bench_function("zerovec_serde/serialize/u32/zerovec", |b| {
+ b.iter(|| bincode::serialize(&ZeroVec::from_slice_or_alloc(black_box(TEST_SLICE))));
+ });
+
+ c.bench_function("zerovec_serde/deserialize_sum/u32/zerovec", |b| {
+ let buffer = bincode::serialize(
+ &ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap(),
+ )
+ .unwrap();
+ b.iter(|| {
+ bincode::deserialize::<ZeroVec<u32>>(&buffer)
+ .unwrap()
+ .iter()
+ .sum::<u32>()
+ });
+ });
+}
+
+#[cfg(feature = "bench")]
+fn char_benches(c: &mut Criterion) {
+ const ORIGINAL_CHARS: &[char] = &[
+ 'ⶢ', '⺇', 'Ⱜ', '◁', '◩', '⌂', '⼅', '⏻', '⢜', '◊', 'ⲫ', '⏷', '◢', '⟉', '℞',
+ ];
+
+ let char_zero_vec = &ZeroVec::alloc_from_slice(ORIGINAL_CHARS);
+
+ c.bench_function("zerovec_serde/serialize/char/slice", |b| {
+ b.iter(|| bincode::serialize(black_box(&Vec::from(ORIGINAL_CHARS))));
+ });
+
+ c.bench_function("zerovec_serde/deserialize/char/slice", |b| {
+ let buffer = bincode::serialize(black_box(&Vec::from(ORIGINAL_CHARS))).unwrap();
+ b.iter(|| bincode::deserialize::<Vec<char>>(&buffer));
+ });
+
+ c.bench_function("zerovec_serde/serialize/char/zerovec", |b| {
+ b.iter(|| bincode::serialize(black_box(char_zero_vec)));
+ });
+
+ c.bench_function("zerovec_serde/deserialize/char/zerovec", |b| {
+ let buffer = bincode::serialize(black_box(char_zero_vec)).unwrap();
+ b.iter(|| bincode::deserialize::<ZeroVec<char>>(&buffer));
+ });
+}
+
+#[cfg(feature = "bench")]
+fn stress_benches(c: &mut Criterion) {
+ let number_vec = random_numbers(100);
+ let bincode_vec = bincode::serialize(&number_vec).unwrap();
+ let zerovec_aligned = ZeroVec::from_slice_or_alloc(number_vec.as_slice());
+ let bincode_zerovec = bincode::serialize(&zerovec_aligned).unwrap();
+
+ // *** Deserialize vec of 100 `u32` ***
+ c.bench_function("zerovec_serde/deserialize/stress/vec", |b| {
+ b.iter(|| bincode::deserialize::<Vec<u32>>(&bincode_vec));
+ });
+
+ // *** Deserialize vec of 100 `u32` ***
+ c.bench_function("zerovec_serde/deserialize/stress/zerovec", |b| {
+ b.iter(|| bincode::deserialize::<ZeroVec<u32>>(&bincode_zerovec));
+ });
+
+ // *** Compute sum of vec of 100 `u32` ***
+ c.bench_function("zerovec_serde/sum/stress/vec", |b| {
+ b.iter(|| black_box(&number_vec).iter().sum::<u32>());
+ });
+
+ // *** Compute sum of vec of 100 `u32` ***
+ let zerovec = ZeroVec::<u32>::parse_byte_slice(zerovec_aligned.as_bytes()).unwrap();
+ c.bench_function("zerovec_serde/sum/stress/zerovec", |b| {
+ b.iter(|| black_box(&zerovec).iter().sum::<u32>());
+ });
+}
+
+criterion_group!(benches, overview_bench,);
+criterion_main!(benches);