// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use std::collections::HashMap; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use zerovec::maps::ZeroMapKV; use zerovec::vecs::{Index32, VarZeroSlice, VarZeroVec}; use zerovec::{ZeroHashMap, ZeroMap}; const DATA: [(&str, &str); 16] = [ ("ar", "Arabic"), ("bn", "Bangla"), ("ccp", "Chakma"), ("chr", "Cherokee"), ("el", "Greek"), ("en", "English"), ("eo", "Esperanto"), ("es", "Spanish"), ("fr", "French"), ("iu", "Inuktitut"), ("ja", "Japanese"), ("ru", "Russian"), ("sr", "Serbian"), ("th", "Thai"), ("tr", "Turkish"), ("zh", "Chinese"), ]; const POSTCARD: [u8; 282] = [ 102, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 7, 0, 0, 0, 10, 0, 0, 0, 12, 0, 0, 0, 14, 0, 0, 0, 16, 0, 0, 0, 18, 0, 0, 0, 20, 0, 0, 0, 22, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0, 0, 28, 0, 0, 0, 30, 0, 0, 0, 32, 0, 0, 0, 97, 114, 98, 110, 99, 99, 112, 99, 104, 114, 101, 108, 101, 110, 101, 111, 101, 115, 102, 114, 105, 117, 106, 97, 114, 117, 115, 114, 116, 104, 116, 114, 122, 104, 177, 1, 16, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 12, 0, 0, 0, 18, 0, 0, 0, 26, 0, 0, 0, 31, 0, 0, 0, 38, 0, 0, 0, 47, 0, 0, 0, 54, 0, 0, 0, 60, 0, 0, 0, 69, 0, 0, 0, 77, 0, 0, 0, 84, 0, 0, 0, 91, 0, 0, 0, 95, 0, 0, 0, 102, 0, 0, 0, 65, 114, 97, 98, 105, 99, 66, 97, 110, 103, 108, 97, 67, 104, 97, 107, 109, 97, 67, 104, 101, 114, 111, 107, 101, 101, 71, 114, 101, 101, 107, 69, 110, 103, 108, 105, 115, 104, 69, 115, 112, 101, 114, 97, 110, 116, 111, 83, 112, 97, 110, 105, 115, 104, 70, 114, 101, 110, 99, 104, 73, 110, 117, 107, 116, 105, 116, 117, 116, 74, 97, 112, 97, 110, 101, 115, 101, 82, 117, 115, 115, 105, 97, 110, 83, 101, 114, 98, 105, 97, 110, 84, 104, 97, 105, 84, 117, 114, 107, 105, 115, 104, 67, 104, 105, 110, 101, 115, 101, ]; const POSTCARD_HASHMAP: [u8; 176] = [ 16, 2, 114, 117, 7, 82, 117, 115, 115, 105, 97, 110, 3, 99, 99, 112, 6, 67, 104, 97, 107, 109, 97, 3, 99, 104, 114, 8, 67, 104, 101, 114, 111, 107, 101, 101, 2, 116, 114, 7, 84, 117, 114, 107, 105, 115, 104, 2, 116, 104, 4, 84, 104, 97, 105, 2, 106, 97, 8, 74, 97, 112, 97, 110, 101, 115, 101, 2, 101, 115, 7, 83, 112, 97, 110, 105, 115, 104, 2, 101, 111, 9, 69, 115, 112, 101, 114, 97, 110, 116, 111, 2, 122, 104, 7, 67, 104, 105, 110, 101, 115, 101, 2, 115, 114, 7, 83, 101, 114, 98, 105, 97, 110, 2, 101, 110, 7, 69, 110, 103, 108, 105, 115, 104, 2, 105, 117, 9, 73, 110, 117, 107, 116, 105, 116, 117, 116, 2, 102, 114, 6, 70, 114, 101, 110, 99, 104, 2, 98, 110, 6, 66, 97, 110, 103, 108, 97, 2, 101, 108, 5, 71, 114, 101, 101, 107, 2, 97, 114, 6, 65, 114, 97, 98, 105, 99, ]; const POSTCARD_ZEROHASHMAP: [u8; 412] = [ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 102, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 11, 0, 0, 0, 13, 0, 0, 0, 15, 0, 0, 0, 17, 0, 0, 0, 19, 0, 0, 0, 21, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0, 0, 28, 0, 0, 0, 30, 0, 0, 0, 32, 0, 0, 0, 101, 110, 102, 114, 106, 97, 101, 108, 99, 104, 114, 98, 110, 115, 114, 105, 117, 101, 111, 116, 114, 99, 99, 112, 122, 104, 114, 117, 101, 115, 116, 104, 97, 114, 177, 1, 16, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 13, 0, 0, 0, 21, 0, 0, 0, 26, 0, 0, 0, 34, 0, 0, 0, 40, 0, 0, 0, 47, 0, 0, 0, 56, 0, 0, 0, 65, 0, 0, 0, 72, 0, 0, 0, 78, 0, 0, 0, 85, 0, 0, 0, 92, 0, 0, 0, 99, 0, 0, 0, 103, 0, 0, 0, 69, 110, 103, 108, 105, 115, 104, 70, 114, 101, 110, 99, 104, 74, 97, 112, 97, 110, 101, 115, 101, 71, 114, 101, 101, 107, 67, 104, 101, 114, 111, 107, 101, 101, 66, 97, 110, 103, 108, 97, 83, 101, 114, 98, 105, 97, 110, 73, 110, 117, 107, 116, 105, 116, 117, 116, 69, 115, 112, 101, 114, 97, 110, 116, 111, 84, 117, 114, 107, 105, 115, 104, 67, 104, 97, 107, 109, 97, 67, 104, 105, 110, 101, 115, 101, 82, 117, 115, 115, 105, 97, 110, 83, 112, 97, 110, 105, 115, 104, 84, 104, 97, 105, 65, 114, 97, 98, 105, 99, ]; /// Run this function to print new data to the console. /// Requires the optional `serde` Cargo feature. #[allow(dead_code)] fn generate_zeromap() { let map = build_zeromap(false); let buf = postcard::to_stdvec(&map).unwrap(); println!("{buf:?}"); } /// Run this function to print new data to the console. /// Requires the optional `serde` Cargo feature. #[allow(dead_code)] fn generate_hashmap() { let map = build_hashmap(false); let buf = postcard::to_stdvec(&map).unwrap(); println!("{buf:?}"); } /// Run this function to print new data to the console. /// Requires the optional `serde` Cargo feature. #[allow(dead_code)] fn generate_zerohashmap() { let map = build_zerohashmap(false); let buf = postcard::to_stdvec(&map).unwrap(); println!("{buf:?}"); } fn overview_bench(c: &mut Criterion) { bench_zeromap(c); bench_hashmap(c); bench_zerohashmap(c); } fn bench_zeromap(c: &mut Criterion) { // Uncomment the following line to re-generate the const data. // generate_hashmap(); bench_deserialize(c); #[cfg(feature = "bench")] bench_deserialize_large(c); bench_lookup(c); #[cfg(feature = "bench")] bench_lookup_large(c); } fn build_zeromap(large: bool) -> ZeroMap<'static, Index32Str, Index32Str> { // TODO(#2826): This should use ZeroMap::from_iter, however that currently takes // *minutes*, whereas this code runs in milliseconds let mut keys = Vec::new(); let mut values = Vec::new(); let mut data = DATA.to_vec(); data.sort(); for &(key, value) in data.iter() { if large { for n in 0..8192 { keys.push(format!("{key}{n:04}")); values.push(indexify(value)); } } else { keys.push(key.to_owned()); values.push(indexify(value)); } } let keys = keys.iter().map(|s| indexify(s)).collect::>(); // keys are sorted by construction unsafe { ZeroMap::from_parts_unchecked(VarZeroVec::from(&keys), VarZeroVec::from(&values)) } } fn bench_deserialize(c: &mut Criterion) { c.bench_function("zeromap/deserialize/small", |b| { b.iter(|| { let map: ZeroMap = postcard::from_bytes(black_box(&POSTCARD)).unwrap(); assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut")); }) }); } #[cfg(feature = "bench")] fn bench_deserialize_large(c: &mut Criterion) { let buf = large_zeromap_postcard_bytes(); c.bench_function("zeromap/deserialize/large", |b| { b.iter(|| { let map: ZeroMap = postcard::from_bytes(black_box(&buf)).unwrap(); assert_eq!(map.get(indexify("iu3333")).map(|x| &x.0), Some("Inuktitut")); }) }); } fn bench_lookup(c: &mut Criterion) { let map: ZeroMap = postcard::from_bytes(black_box(&POSTCARD)).unwrap(); c.bench_function("zeromap/lookup/small", |b| { b.iter(|| { assert_eq!( map.get(black_box(indexify("iu"))).map(|x| &x.0), Some("Inuktitut") ); assert_eq!(map.get(black_box(indexify("zz"))).map(|x| &x.0), None); }); }); } #[cfg(feature = "bench")] fn bench_lookup_large(c: &mut Criterion) { let buf = large_zeromap_postcard_bytes(); let map: ZeroMap = postcard::from_bytes(&buf).unwrap(); c.bench_function("zeromap/lookup/large", |b| { b.iter(|| { assert_eq!( map.get(black_box(indexify("iu3333"))).map(|x| &x.0), Some("Inuktitut") ); assert_eq!(map.get(black_box(indexify("zz"))).map(|x| &x.0), None); }); }); } #[cfg(feature = "bench")] fn large_zeromap_postcard_bytes() -> Vec { postcard::to_stdvec(&build_zeromap(true)).unwrap() } fn bench_hashmap(c: &mut Criterion) { // Uncomment the following line to re-generate the const data. // generate_hashmap(); bench_deserialize_hashmap(c); #[cfg(feature = "bench")] bench_deserialize_large_hashmap(c); bench_lookup_hashmap(c); #[cfg(feature = "bench")] bench_lookup_large_hashmap(c); } fn build_hashmap(large: bool) -> HashMap { let mut map: HashMap = HashMap::new(); for &(key, value) in DATA.iter() { if large { for n in 0..8192 { map.insert(format!("{key}{n}"), value.to_owned()); } } else { map.insert(key.to_owned(), value.to_owned()); } } map } fn bench_deserialize_hashmap(c: &mut Criterion) { c.bench_function("zeromap/deserialize/small/hashmap", |b| { b.iter(|| { let map: HashMap = postcard::from_bytes(black_box(&POSTCARD_HASHMAP)).unwrap(); assert_eq!(map.get("iu"), Some(&"Inuktitut".to_owned())); }) }); } #[cfg(feature = "bench")] fn bench_deserialize_large_hashmap(c: &mut Criterion) { let buf = large_hashmap_postcard_bytes(); c.bench_function("zeromap/deserialize/large/hashmap", |b| { b.iter(|| { let map: HashMap = postcard::from_bytes(black_box(&buf)).unwrap(); assert_eq!(map.get("iu3333"), Some(&"Inuktitut".to_owned())); }) }); } fn bench_lookup_hashmap(c: &mut Criterion) { let map: HashMap = postcard::from_bytes(black_box(&POSTCARD_HASHMAP)).unwrap(); c.bench_function("zeromap/lookup/small/hashmap", |b| { b.iter(|| { assert_eq!(map.get(black_box("iu")), Some(&"Inuktitut".to_owned())); assert_eq!(map.get(black_box("zz")), None); }); }); } #[cfg(feature = "bench")] fn bench_lookup_large_hashmap(c: &mut Criterion) { let buf = large_hashmap_postcard_bytes(); let map: HashMap = postcard::from_bytes(&buf).unwrap(); c.bench_function("zeromap/lookup/large/hashmap", |b| { b.iter(|| { assert_eq!(map.get(black_box("iu3333")), Some(&"Inuktitut".to_owned())); assert_eq!(map.get(black_box("zz")), None); }); }); } #[cfg(feature = "bench")] fn large_hashmap_postcard_bytes() -> Vec { postcard::to_stdvec(&build_hashmap(true)).unwrap() } fn bench_zerohashmap(c: &mut Criterion) { // Uncomment the following line to re-generate the const data. // generate_zerohashmap(); bench_deserialize_zerohashmap(c); #[cfg(feature = "bench")] bench_deserialize_large_zerohashmap(c); bench_zerohashmap_lookup(c); #[cfg(feature = "bench")] bench_zerohashmap_lookup_large(c); } fn build_zerohashmap(large: bool) -> ZeroHashMap<'static, Index32Str, Index32Str> { let mut kv = Vec::new(); for (key, value) in DATA.iter() { if large { for n in 0..512 { kv.push((format!("{key}{n}"), indexify(value))); } } else { kv.push((key.to_string(), indexify(value))); } } ZeroHashMap::from_iter(kv.iter().map(|kv| (indexify(&kv.0), kv.1))) } fn bench_deserialize_zerohashmap(c: &mut Criterion) { c.bench_function("zerohashmap/deserialize/small", |b| { b.iter(|| { let map: ZeroHashMap = postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap(); assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut")); }) }); } fn bench_deserialize_large_zerohashmap(c: &mut Criterion) { let buf = large_zerohashmap_postcard_bytes(); c.bench_function("zerohashmap/deserialize/large", |b| { b.iter(|| { let map: ZeroHashMap = postcard::from_bytes(black_box(&buf)).unwrap(); assert_eq!(map.get(indexify("iu333")).map(|x| &x.0), Some("Inuktitut")); }) }); } fn bench_zerohashmap_lookup(c: &mut Criterion) { let zero_hashmap: ZeroHashMap = postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap(); c.bench_function("zerohashmap/lookup/small", |b| { b.iter(|| { assert_eq!( zero_hashmap.get(black_box(indexify("iu"))).map(|x| &x.0), Some("Inuktitut") ); assert_eq!( zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0), None ); }); }); } #[cfg(feature = "bench")] fn bench_zerohashmap_lookup_large(c: &mut Criterion) { let buf = large_zerohashmap_postcard_bytes(); let zero_hashmap: ZeroHashMap = postcard::from_bytes(&buf).unwrap(); c.bench_function("zerohashmap/lookup/large", |b| { b.iter(|| { assert_eq!( zero_hashmap.get(black_box(indexify("iu333"))).map(|x| &x.0), Some("Inuktitut") ); assert_eq!( zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0), None ); }); }); } #[cfg(feature = "bench")] fn large_zerohashmap_postcard_bytes() -> Vec { postcard::to_stdvec(&build_zerohashmap(true)).unwrap() } criterion_group!(benches, overview_bench); criterion_main!(benches); /// This type lets us use a u32-index-format VarZeroVec with the ZeroMap. /// /// Eventually we will have a FormatSelector type that lets us do `ZeroMap, V>` /// (https://github.com/unicode-org/icu4x/issues/2312) /// /// , isn't actually important; it's just more convenient to use make_varule to get the /// full suite of traits instead of `#[derive(VarULE)]`. (With `#[derive(VarULE)]` we would have to manually /// define a Serialize implementation, and that would be gnarly) /// https://github.com/unicode-org/icu4x/issues/2310 tracks being able to do this with derive(ULE) #[zerovec::make_varule(Index32Str)] #[zerovec::skip_derive(ZeroMapKV)] #[derive(Eq, PartialEq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] #[zerovec::derive(Serialize, Deserialize, Hash)] pub(crate) struct Index32StrBorrowed<'a>(#[serde(borrow)] pub &'a str); impl<'a> ZeroMapKV<'a> for Index32Str { type Container = VarZeroVec<'a, Index32Str, Index32>; type Slice = VarZeroSlice; type GetType = Index32Str; type OwnedType = Box; } #[inline] fn indexify(s: &str) -> &Index32Str { unsafe { &*(s as *const str as *const Index32Str) } }