// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). //! Zero-copy vector abstractions for arbitrary types, backed by byte slices. //! //! `zerovec` enables a far wider range of types — beyond just `&[u8]` and `&str` — to participate in //! zero-copy deserialization from byte slices. It is `serde` compatible and comes equipped with //! proc macros //! //! Clients upgrading to `zerovec` benefit from zero heap allocations when deserializing //! read-only data. //! //! This crate has four main types: //! //! - [`ZeroVec<'a, T>`] (and [`ZeroSlice`](ZeroSlice)) for fixed-width types like `u32` //! - [`VarZeroVec<'a, T>`] (and [`VarZeroSlice`](ZeroSlice)) for variable-width types like `str` //! - [`ZeroMap<'a, K, V>`] to map from `K` to `V` //! - [`ZeroMap2d<'a, K0, K1, V>`] to map from the pair `(K0, K1)` to `V` //! //! The first two are intended as close-to-drop-in replacements for `Vec` in Serde structs. The third and fourth are //! intended as a replacement for `HashMap` or [`LiteMap`](docs.rs/litemap). When used with Serde derives, **be sure to apply //! `#[serde(borrow)]` to these types**, same as one would for [`Cow<'a, T>`]. //! //! [`ZeroVec<'a, T>`], [`VarZeroVec<'a, T>`], [`ZeroMap<'a, K, V>`], and [`ZeroMap2d<'a, K0, K1, V>`] all behave like //! [`Cow<'a, T>`] in that they abstract over either borrowed or owned data. When performing deserialization //! from human-readable formats (like `json` and `xml`), typically these types will allocate and fully own their data, whereas if deserializing //! from binary formats like `bincode` and `postcard`, these types will borrow data directly from the buffer being deserialized from, //! avoiding allocations and only performing validity checks. As such, this crate can be pretty fast (see [below](#Performance) for more information) //! on deserialization. //! //! See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how this crate //! works under the hood. //! //! # Cargo features //! //! This crate has several optional Cargo features: //! - `serde`: Allows serializing and deserializing `zerovec`'s abstractions via [`serde`](https://docs.rs/serde) //! - `yoke`: Enables implementations of `Yokeable` from the [`yoke`](https://docs.rs/yoke/) crate, which is also useful //! in situations involving a lot of zero-copy deserialization. //! - `derive`: Makes it easier to use custom types in these collections by providing the [`#[make_ule]`](crate::make_ule) and //! [`#[make_varule]`](crate::make_varule) proc macros, which generate appropriate [`ULE`](crate::ule::ULE) and //! [`VarULE`](crate::ule::VarULE)-conformant types for a given "normal" type. //! - `std`: Enabled `std::Error` implementations for error types. This crate is by default `no_std` with a dependency on `alloc`. //! //! [`ZeroVec<'a, T>`]: ZeroVec //! [`VarZeroVec<'a, T>`]: VarZeroVec //! [`ZeroMap<'a, K, V>`]: ZeroMap //! [`ZeroMap2d<'a, K0, K1, V>`]: ZeroMap2d //! [`Cow<'a, T>`]: alloc::borrow::Cow //! //! # Examples //! //! Serialize and deserialize a struct with ZeroVec and VarZeroVec with Bincode: //! //! ``` //! # #[cfg(feature = "serde")] { //! use zerovec::{VarZeroVec, ZeroVec}; //! //! // This example requires the "serde" feature //! #[derive(serde::Serialize, serde::Deserialize)] //! pub struct DataStruct<'data> { //! #[serde(borrow)] //! nums: ZeroVec<'data, u32>, //! #[serde(borrow)] //! chars: ZeroVec<'data, char>, //! #[serde(borrow)] //! strs: VarZeroVec<'data, str>, //! } //! //! let data = DataStruct { //! nums: ZeroVec::from_slice_or_alloc(&[211, 281, 421, 461]), //! chars: ZeroVec::alloc_from_slice(&['ö', '冇', 'म']), //! strs: VarZeroVec::from(&["hello", "world"]), //! }; //! let bincode_bytes = //! bincode::serialize(&data).expect("Serialization should be successful"); //! assert_eq!(bincode_bytes.len(), 67); //! //! let deserialized: DataStruct = bincode::deserialize(&bincode_bytes) //! .expect("Deserialization should be successful"); //! assert_eq!(deserialized.nums.first(), Some(211)); //! assert_eq!(deserialized.chars.get(1), Some('冇')); //! assert_eq!(deserialized.strs.get(1), Some("world")); //! // The deserialization will not have allocated anything //! assert!(!deserialized.nums.is_owned()); //! # } // feature = "serde" //! ``` //! //! Use custom types inside of ZeroVec: //! //! ```rust //! # #[cfg(all(feature = "serde", feature = "derive"))] { //! use zerovec::{ZeroVec, VarZeroVec, ZeroMap}; //! use std::borrow::Cow; //! use zerovec::ule::encode_varule_to_box; //! //! // custom fixed-size ULE type for ZeroVec //! #[zerovec::make_ule(DateULE)] //! #[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] //! struct Date { //! y: u64, //! m: u8, //! d: u8 //! } //! //! // custom variable sized VarULE type for VarZeroVec //! #[zerovec::make_varule(PersonULE)] //! #[zerovec::derive(Serialize, Deserialize)] // add Serde impls to PersonULE //! #[derive(Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] //! struct Person<'a> { //! birthday: Date, //! favorite_character: char, //! #[serde(borrow)] //! name: Cow<'a, str>, //! } //! //! #[derive(serde::Serialize, serde::Deserialize)] //! struct Data<'a> { //! #[serde(borrow)] //! important_dates: ZeroVec<'a, Date>, //! // note: VarZeroVec always must reference the ULE type directly //! #[serde(borrow)] //! important_people: VarZeroVec<'a, PersonULE>, //! #[serde(borrow)] //! birthdays_to_people: ZeroMap<'a, Date, PersonULE> //! } //! //! //! let person1 = Person { //! birthday: Date { y: 1990, m: 9, d: 7}, //! favorite_character: 'π', //! name: Cow::from("Kate") //! }; //! let person2 = Person { //! birthday: Date { y: 1960, m: 5, d: 25}, //! favorite_character: '冇', //! name: Cow::from("Jesse") //! }; //! //! let important_dates = ZeroVec::alloc_from_slice(&[Date { y: 1943, m: 3, d: 20}, Date { y: 1976, m: 8, d: 2}, Date { y: 1998, m: 2, d: 15}]); //! let important_people = VarZeroVec::from(&[&person1, &person2]); //! let mut birthdays_to_people: ZeroMap = ZeroMap::new(); //! // `.insert_var_v()` is slightly more convenient over `.insert()` for custom ULE types //! birthdays_to_people.insert_var_v(&person1.birthday, &person1); //! birthdays_to_people.insert_var_v(&person2.birthday, &person2); //! //! let data = Data { important_dates, important_people, birthdays_to_people }; //! //! let bincode_bytes = bincode::serialize(&data) //! .expect("Serialization should be successful"); //! assert_eq!(bincode_bytes.len(), 168); //! //! let deserialized: Data = bincode::deserialize(&bincode_bytes) //! .expect("Deserialization should be successful"); //! //! assert_eq!(deserialized.important_dates.get(0).unwrap().y, 1943); //! assert_eq!(&deserialized.important_people.get(1).unwrap().name, "Jesse"); //! assert_eq!(&deserialized.important_people.get(0).unwrap().name, "Kate"); //! assert_eq!(&deserialized.birthdays_to_people.get(&person1.birthday).unwrap().name, "Kate"); //! //! } // feature = serde and derive //! ``` //! //! # Performance //! //! `zerovec` is designed for fast deserialization from byte buffers with zero memory allocations //! while minimizing performance regressions for common vector operations. //! //! Benchmark results on x86_64: //! //! | Operation | `Vec` | `zerovec` | //! |---|---|---| //! | Deserialize vec of 100 `u32` | 233.18 ns | 14.120 ns | //! | Compute sum of vec of 100 `u32` (read every element) | 8.7472 ns | 10.775 ns | //! | Binary search vec of 1000 `u32` 50 times | 442.80 ns | 472.51 ns | //! | Deserialize vec of 100 strings | 7.3740 μs\* | 1.4495 μs | //! | Count chars in vec of 100 strings (read every element) | 747.50 ns | 955.28 ns | //! | Binary search vec of 500 strings 10 times | 466.09 ns | 790.33 ns | //! //! \* *This result is reported for `Vec`. However, Serde also supports deserializing to the partially-zero-copy `Vec<&str>`; this gives 1.8420 μs, much faster than `Vec` but a bit slower than `zerovec`.* //! //! | Operation | `HashMap` | `LiteMap` | `ZeroMap` | //! |---|---|---|---| //! | Deserialize a small map | 2.72 μs | 1.28 μs | 480 ns | //! | Deserialize a large map | 50.5 ms | 18.3 ms | 3.74 ms | //! | Look up from a small deserialized map | 49 ns | 42 ns | 54 ns | //! | Look up from a large deserialized map | 51 ns | 155 ns | 213 ns | //! //! Small = 16 elements, large = 131,072 elements. Maps contain ``. //! //! The benches used to generate the above table can be found in the `benches` directory in the project repository. //! `zeromap` benches are named by convention, e.g. `zeromap/deserialize/small`, `zeromap/lookup/large`. The type //! is appended for baseline comparisons, e.g. `zeromap/lookup/small/hashmap`. // https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations #![cfg_attr(not(any(test, feature = "std")), no_std)] #![cfg_attr( not(test), deny( clippy::indexing_slicing, clippy::unwrap_used, clippy::expect_used, clippy::panic, clippy::exhaustive_structs, clippy::exhaustive_enums, // TODO(#2266): enable missing_debug_implementations, ) )] // this crate does a lot of nuanced lifetime manipulation, being explicit // is better here. #![allow(clippy::needless_lifetimes)] extern crate alloc; mod error; mod flexzerovec; mod map; mod map2d; #[cfg(test)] pub mod samples; mod varzerovec; mod zerovec; // This must be after `mod zerovec` for some impls on `ZeroSlice` // to show up in the right spot in the docs pub mod ule; #[cfg(feature = "yoke")] mod yoke_impls; mod zerofrom_impls; pub use crate::error::ZeroVecError; pub use crate::map::map::ZeroMap; pub use crate::map2d::map::ZeroMap2d; pub use crate::varzerovec::{slice::VarZeroSlice, vec::VarZeroVec}; pub use crate::zerovec::{ZeroSlice, ZeroVec}; pub(crate) use flexzerovec::chunk_to_usize; #[doc(hidden)] pub mod __zerovec_internal_reexport { pub use zerofrom::ZeroFrom; pub use alloc::boxed; #[cfg(feature = "serde")] pub use serde; } pub mod maps { //! This module contains additional utility types and traits for working with //! [`ZeroMap`] and [`ZeroMap2d`]. See their docs for more details on the general purpose //! of these types. //! //! [`ZeroMapBorrowed`] and [`ZeroMap2dBorrowed`] are versions of [`ZeroMap`] and [`ZeroMap2d`] //! that can be used when you wish to guarantee that the map data is always borrowed, leading to //! relaxed lifetime constraints. //! //! The [`ZeroMapKV`] trait is required to be implemented on any type that needs to be used //! within a map type. [`ZeroVecLike`] and [`MutableZeroVecLike`] are traits used in the //! internal workings of the map types, and should typically not be used or implemented by //! users of this crate. #[doc(no_inline)] pub use crate::map::ZeroMap; pub use crate::map::ZeroMapBorrowed; #[doc(no_inline)] pub use crate::map2d::ZeroMap2d; pub use crate::map2d::ZeroMap2dBorrowed; pub use crate::map::{MutableZeroVecLike, ZeroMapKV, ZeroVecLike}; pub use crate::map2d::ZeroMap2dCursor; } pub mod vecs { //! This module contains additional utility types for working with //! [`ZeroVec`] and [`VarZeroVec`]. See their docs for more details on the general purpose //! of these types. //! //! [`ZeroSlice`] and [`VarZeroSlice`] provide slice-like versions of the vector types //! for use behind references and in custom ULE types. //! //! [`VarZeroVecOwned`] is a special owned/mutable version of [`VarZeroVec`], allowing //! direct manipulation of the backing buffer. #[doc(no_inline)] pub use crate::zerovec::{ZeroSlice, ZeroVec}; #[doc(no_inline)] pub use crate::varzerovec::{VarZeroSlice, VarZeroVec}; pub use crate::varzerovec::{Index16, Index32, VarZeroVecFormat, VarZeroVecOwned}; pub use crate::flexzerovec::{FlexZeroSlice, FlexZeroVec, FlexZeroVecOwned}; } // Proc macro reexports // // These exist so that our docs can use intra-doc links. // Due to quirks of how rustdoc does documentation on reexports, these must be in this module and not reexported from // a submodule /// Generate a corresponding [`ULE`] type and the relevant [`AsULE`] implementations for this type /// /// This can be attached to structs containing only [`AsULE`] types, or C-like enums that have `#[repr(u8)]` /// and all explicit discriminants. /// /// The type must be [`Copy`], [`PartialEq`], and [`Eq`]. /// /// `#[make_ule]` will automatically derive the following traits on the [`ULE`] type: /// /// - [`Ord`] and [`PartialOrd`] /// - [`ZeroMapKV`] /// /// To disable one of the automatic derives, use `#[zerovec::skip_derive(...)]` like so: `#[zerovec::skip_derive(ZeroMapKV)]`. /// `Ord` and `PartialOrd` are implemented as a unit and can only be disabled as a group with `#[zerovec::skip_derive(Ord)]`. /// /// The following traits are available to derive, but not automatic: /// /// - [`Debug`] /// /// To enable one of these additional derives, use `#[zerovec::derive(...)]` like so: `#[zerovec::derive(Debug)]`. /// /// In most cases these derives will defer to the impl of the same trait on the current type, so such impls must exist. /// /// For enums, this attribute will generate a crate-public `fn new_from_u8(value: u8) -> Option` /// method on the main type that allows one to construct the value from a u8. If this method is desired /// to be more public, it should be wrapped. /// /// [`ULE`]: ule::ULE /// [`AsULE`]: ule::AsULE /// [`ZeroMapKV`]: maps::ZeroMapKV /// /// # Example /// /// ```rust /// use zerovec::ZeroVec; /// /// #[zerovec::make_ule(DateULE)] /// #[derive( /// Copy, /// Clone, /// PartialEq, /// Eq, /// Ord, /// PartialOrd, /// serde::Serialize, /// serde::Deserialize, /// )] /// struct Date { /// y: u64, /// m: u8, /// d: u8, /// } /// /// #[derive(serde::Serialize, serde::Deserialize)] /// struct Dates<'a> { /// #[serde(borrow)] /// dates: ZeroVec<'a, Date>, /// } /// /// let dates = Dates { /// dates: ZeroVec::alloc_from_slice(&[ /// Date { /// y: 1985, /// m: 9, /// d: 3, /// }, /// Date { /// y: 1970, /// m: 2, /// d: 20, /// }, /// Date { /// y: 1990, /// m: 6, /// d: 13, /// }, /// ]), /// }; /// /// let bincode_bytes = /// bincode::serialize(&dates).expect("Serialization should be successful"); /// /// // Will deserialize without allocations /// let deserialized: Dates = bincode::deserialize(&bincode_bytes) /// .expect("Deserialization should be successful"); /// /// assert_eq!(deserialized.dates.get(1).unwrap().y, 1970); /// assert_eq!(deserialized.dates.get(2).unwrap().d, 13); /// ``` #[cfg(feature = "derive")] pub use zerovec_derive::make_ule; /// Generate a corresponding [`VarULE`] type and the relevant [`EncodeAsVarULE`]/[`zerofrom::ZeroFrom`] /// implementations for this type /// /// This can be attached to structs containing only [`AsULE`] types with the last fields being /// [`Cow<'a, str>`](alloc::borrow::Cow), [`ZeroSlice`], or [`VarZeroSlice`]. If there is more than one such field, it will be represented /// using [`MultiFieldsULE`](crate::ule::MultiFieldsULE) and getters will be generated. /// /// The type must be [`PartialEq`] and [`Eq`]. /// /// [`EncodeAsVarULE`] and [`zerofrom::ZeroFrom`] are useful for avoiding the need to deal with /// the [`VarULE`] type directly. In particular, it is recommended to use [`zerofrom::ZeroFrom`] /// to convert the [`VarULE`] type back to this type in a cheap, zero-copy way (see the example below /// for more details). /// /// `#[make_varule]` will automatically derive the following traits on the [`VarULE`] type: /// /// - [`Ord`] and [`PartialOrd`] /// - [`ZeroMapKV`] /// /// To disable one of the automatic derives, use `#[zerovec::skip_derive(...)]` like so: `#[zerovec::skip_derive(ZeroMapKV)]`. /// `Ord` and `PartialOrd` are implemented as a unit and can only be disabled as a group with `#[zerovec::skip_derive(Ord)]`. /// /// The following traits are available to derive, but not automatic: /// /// - [`Debug`] /// - [`Serialize`](serde::Serialize) /// - [`Deserialize`](serde::Deserialize) /// /// To enable one of these additional derives, use `#[zerovec::derive(...)]` like so: `#[zerovec::derive(Debug)]`. /// /// In most cases these derives will defer to the impl of the same trait on the current type, so such impls must exist. /// /// This implementation will also by default autogenerate [`Ord`] and [`PartialOrd`] on the [`VarULE`] type based on /// the implementation on `Self`. You can opt out of this with `#[zerovec::skip_derive(Ord)]` /// /// Note that this implementation will autogenerate [`EncodeAsVarULE`] impls for _both_ `Self` and `&Self` /// for convenience. This allows for a little more flexibility encoding slices. /// /// [`EncodeAsVarULE`]: ule::EncodeAsVarULE /// [`VarULE`]: ule::VarULE /// [`ULE`]: ule::ULE /// [`AsULE`]: ule::AsULE /// [`ZeroMapKV`]: maps::ZeroMapKV /// /// # Example /// /// ```rust /// use std::borrow::Cow; /// use zerofrom::ZeroFrom; /// use zerovec::ule::encode_varule_to_box; /// use zerovec::{VarZeroVec, ZeroMap, ZeroVec}; /// /// // custom fixed-size ULE type for ZeroVec /// #[zerovec::make_ule(DateULE)] /// #[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] /// struct Date { /// y: u64, /// m: u8, /// d: u8, /// } /// /// // custom variable sized VarULE type for VarZeroVec /// #[zerovec::make_varule(PersonULE)] /// #[zerovec::derive(Serialize, Deserialize)] /// #[derive(Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] /// struct Person<'a> { /// birthday: Date, /// favorite_character: char, /// #[serde(borrow)] /// name: Cow<'a, str>, /// } /// /// #[derive(serde::Serialize, serde::Deserialize)] /// struct Data<'a> { /// // note: VarZeroVec always must reference the ULE type directly /// #[serde(borrow)] /// important_people: VarZeroVec<'a, PersonULE>, /// } /// /// let person1 = Person { /// birthday: Date { /// y: 1990, /// m: 9, /// d: 7, /// }, /// favorite_character: 'π', /// name: Cow::from("Kate"), /// }; /// let person2 = Person { /// birthday: Date { /// y: 1960, /// m: 5, /// d: 25, /// }, /// favorite_character: '冇', /// name: Cow::from("Jesse"), /// }; /// /// let important_people = VarZeroVec::from(&[person1, person2]); /// let data = Data { important_people }; /// /// let bincode_bytes = bincode::serialize(&data).expect("Serialization should be successful"); /// /// // Will deserialize without allocations /// let deserialized: Data = /// bincode::deserialize(&bincode_bytes).expect("Deserialization should be successful"); /// /// assert_eq!(&deserialized.important_people.get(1).unwrap().name, "Jesse"); /// assert_eq!(&deserialized.important_people.get(0).unwrap().name, "Kate"); /// /// // Since VarZeroVec produces PersonULE types, it's convenient to use ZeroFrom /// // to recoup Person values in a zero-copy way /// let person_converted: Person = /// ZeroFrom::zero_from(deserialized.important_people.get(1).unwrap()); /// assert_eq!(person_converted.name, "Jesse"); /// assert_eq!(person_converted.birthday.y, 1960); /// ``` #[cfg(feature = "derive")] pub use zerovec_derive::make_varule;