diff options
Diffstat (limited to '')
-rw-r--r-- | vendor/zerovec/src/lib.rs | 512 |
1 files changed, 512 insertions, 0 deletions
diff --git a/vendor/zerovec/src/lib.rs b/vendor/zerovec/src/lib.rs new file mode 100644 index 000000000..9a37c762e --- /dev/null +++ b/vendor/zerovec/src/lib.rs @@ -0,0 +1,512 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Zero-copy vector abstractions for arbitrary types, backed by byte slices. +//! +//! `zerovec` enables a far wider range of types — beyond just `&[u8]` and `&str` — to participate in +//! zero-copy deserialization from byte slices. It is `serde` compatible and comes equipped with +//! proc macros +//! +//! Clients upgrading to `zerovec` benefit from zero heap allocations when deserializing +//! read-only data. +//! +//! This crate has four main types: +//! +//! - [`ZeroVec<'a, T>`] (and [`ZeroSlice<T>`](ZeroSlice)) for fixed-width types like `u32` +//! - [`VarZeroVec<'a, T>`] (and [`VarZeroSlice<T>`](ZeroSlice)) for variable-width types like `str` +//! - [`ZeroMap<'a, K, V>`] to map from `K` to `V` +//! - [`ZeroMap2d<'a, K0, K1, V>`] to map from the pair `(K0, K1)` to `V` +//! +//! The first two are intended as close-to-drop-in replacements for `Vec<T>` in Serde structs. The third and fourth are +//! intended as a replacement for `HashMap` or [`LiteMap`](docs.rs/litemap). When used with Serde derives, **be sure to apply +//! `#[serde(borrow)]` to these types**, same as one would for [`Cow<'a, T>`]. +//! +//! [`ZeroVec<'a, T>`], [`VarZeroVec<'a, T>`], [`ZeroMap<'a, K, V>`], and [`ZeroMap2d<'a, K0, K1, V>`] all behave like +//! [`Cow<'a, T>`] in that they abstract over either borrowed or owned data. When performing deserialization +//! from human-readable formats (like `json` and `xml`), typically these types will allocate and fully own their data, whereas if deserializing +//! from binary formats like `bincode` and `postcard`, these types will borrow data directly from the buffer being deserialized from, +//! avoiding allocations and only performing validity checks. As such, this crate can be pretty fast (see [below](#Performance) for more information) +//! on deserialization. +//! +//! See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how this crate +//! works under the hood. +//! +//! # Cargo features +//! +//! This crate has five optional features: +//! - `serde`: Allows serializing and deserializing `zerovec`'s abstractions via [`serde`](https://docs.rs/serde) +//! - `yoke`: Enables implementations of `Yokeable` from the [`yoke`](https://docs.rs/yoke/) crate, which is also useful +//! in situations involving a lot of zero-copy deserialization. +//! - `derive`: Makes it easier to use custom types in these collections by providing the [`#[make_ule]`](crate::make_ule) and +//! [`#[make_varule]`](crate::make_varule) proc macros, which generate appropriate [`ULE`](crate::ule::ULE) and +//! [`VarULE`](crate::ule::VarULE)-conformant types for a given "normal" type. +//! - `std`: Enabled `std::Error` implementations for error types. This crate is by default `no_std` with a dependency on `alloc`. +//! +//! [`ZeroVec<'a, T>`]: ZeroVec +//! [`VarZeroVec<'a, T>`]: VarZeroVec +//! [`ZeroMap<'a, K, V>`]: ZeroMap +//! [`ZeroMap2d<'a, K0, K1, V>`]: ZeroMap2d +//! [`Cow<'a, T>`]: alloc::borrow::Cow +//! +//! # Examples +//! +//! Serialize and deserialize a struct with ZeroVec and VarZeroVec with Bincode: +//! +//! ``` +//! # #[cfg(feature = "serde")] { +//! use zerovec::{VarZeroVec, ZeroVec}; +//! +//! // This example requires the "serde" feature +//! #[derive(serde::Serialize, serde::Deserialize)] +//! pub struct DataStruct<'data> { +//! #[serde(borrow)] +//! nums: ZeroVec<'data, u32>, +//! #[serde(borrow)] +//! chars: ZeroVec<'data, char>, +//! #[serde(borrow)] +//! strs: VarZeroVec<'data, str>, +//! } +//! +//! let data = DataStruct { +//! nums: ZeroVec::from_slice_or_alloc(&[211, 281, 421, 461]), +//! chars: ZeroVec::alloc_from_slice(&['ö', '冇', 'म']), +//! strs: VarZeroVec::from(&["hello", "world"]), +//! }; +//! let bincode_bytes = +//! bincode::serialize(&data).expect("Serialization should be successful"); +//! assert_eq!(bincode_bytes.len(), 67); +//! +//! let deserialized: DataStruct = bincode::deserialize(&bincode_bytes) +//! .expect("Deserialization should be successful"); +//! assert_eq!(deserialized.nums.first(), Some(211)); +//! assert_eq!(deserialized.chars.get(1), Some('冇')); +//! assert_eq!(deserialized.strs.get(1), Some("world")); +//! // The deserialization will not have allocated anything +//! assert!(!deserialized.nums.is_owned()); +//! # } // feature = "serde" +//! ``` +//! +//! Use custom types inside of ZeroVec: +//! +//! ```rust +//! # #[cfg(all(feature = "serde", feature = "derive"))] { +//! use zerovec::{ZeroVec, VarZeroVec, ZeroMap}; +//! use std::borrow::Cow; +//! use zerovec::ule::encode_varule_to_box; +//! +//! // custom fixed-size ULE type for ZeroVec +//! #[zerovec::make_ule(DateULE)] +//! #[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] +//! struct Date { +//! y: u64, +//! m: u8, +//! d: u8 +//! } +//! +//! // custom variable sized VarULE type for VarZeroVec +//! #[zerovec::make_varule(PersonULE)] +//! #[zerovec::derive(Serialize, Deserialize)] // add Serde impls to PersonULE +//! #[derive(Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] +//! struct Person<'a> { +//! birthday: Date, +//! favorite_character: char, +//! #[serde(borrow)] +//! name: Cow<'a, str>, +//! } +//! +//! #[derive(serde::Serialize, serde::Deserialize)] +//! struct Data<'a> { +//! #[serde(borrow)] +//! important_dates: ZeroVec<'a, Date>, +//! // note: VarZeroVec always must reference the ULE type directly +//! #[serde(borrow)] +//! important_people: VarZeroVec<'a, PersonULE>, +//! #[serde(borrow)] +//! birthdays_to_people: ZeroMap<'a, Date, PersonULE> +//! } +//! +//! +//! let person1 = Person { +//! birthday: Date { y: 1990, m: 9, d: 7}, +//! favorite_character: 'π', +//! name: Cow::from("Kate") +//! }; +//! let person2 = Person { +//! birthday: Date { y: 1960, m: 5, d: 25}, +//! favorite_character: '冇', +//! name: Cow::from("Jesse") +//! }; +//! +//! let important_dates = ZeroVec::alloc_from_slice(&[Date { y: 1943, m: 3, d: 20}, Date { y: 1976, m: 8, d: 2}, Date { y: 1998, m: 2, d: 15}]); +//! let important_people = VarZeroVec::from(&[&person1, &person2]); +//! let mut birthdays_to_people: ZeroMap<Date, PersonULE> = ZeroMap::new(); +//! // `.insert_var_v()` is slightly more convenient over `.insert()` for custom ULE types +//! birthdays_to_people.insert_var_v(&person1.birthday, &person1); +//! birthdays_to_people.insert_var_v(&person2.birthday, &person2); +//! +//! let data = Data { important_dates, important_people, birthdays_to_people }; +//! +//! let bincode_bytes = bincode::serialize(&data) +//! .expect("Serialization should be successful"); +//! assert_eq!(bincode_bytes.len(), 168); +//! +//! let deserialized: Data = bincode::deserialize(&bincode_bytes) +//! .expect("Deserialization should be successful"); +//! +//! assert_eq!(deserialized.important_dates.get(0).unwrap().y, 1943); +//! assert_eq!(&deserialized.important_people.get(1).unwrap().name, "Jesse"); +//! assert_eq!(&deserialized.important_people.get(0).unwrap().name, "Kate"); +//! assert_eq!(&deserialized.birthdays_to_people.get(&person1.birthday).unwrap().name, "Kate"); +//! +//! } // feature = serde and derive +//! ``` +//! +//! # Performance +//! +//! `zerovec` is designed for fast deserialization from byte buffers with zero memory allocations +//! while minimizing performance regressions for common vector operations. +//! +//! Benchmark results on x86_64: +//! +//! | Operation | `Vec<T>` | `zerovec` | +//! |---|---|---| +//! | Deserialize vec of 100 `u32` | 233.18 ns | 14.120 ns | +//! | Compute sum of vec of 100 `u32` (read every element) | 8.7472 ns | 10.775 ns | +//! | Binary search vec of 1000 `u32` 50 times | 442.80 ns | 472.51 ns | +//! | Deserialize vec of 100 strings | 7.3740 μs\* | 1.4495 μs | +//! | Count chars in vec of 100 strings (read every element) | 747.50 ns | 955.28 ns | +//! | Binary search vec of 500 strings 10 times | 466.09 ns | 790.33 ns | +//! +//! \* *This result is reported for `Vec<String>`. However, Serde also supports deserializing to the partially-zero-copy `Vec<&str>`; this gives 1.8420 μs, much faster than `Vec<String>` but a bit slower than `zerovec`.* +//! +//! | Operation | `HashMap<K,V>` | `LiteMap<K,V>` | `ZeroMap<K,V>` | +//! |---|---|---|---| +//! | Deserialize a small map | 2.72 μs | 1.28 μs | 480 ns | +//! | Deserialize a large map | 50.5 ms | 18.3 ms | 3.74 ms | +//! | Look up from a small deserialized map | 49 ns | 42 ns | 54 ns | +//! | Look up from a large deserialized map | 51 ns | 155 ns | 213 ns | +//! +//! Small = 16 elements, large = 131,072 elements. Maps contain `<String, String>`. +//! +//! The benches used to generate the above table can be found in the `benches` directory in the project repository. +//! `zeromap` benches are named by convention, e.g. `zeromap/deserialize/small`, `zeromap/lookup/large`. The type +//! is appended for baseline comparisons, e.g. `zeromap/lookup/small/hashmap`. + +// https://github.com/unicode-org/icu4x/blob/main/docs/process/boilerplate.md#library-annotations +#![cfg_attr(not(any(test, feature = "std")), no_std)] +#![cfg_attr( + not(test), + deny( + clippy::indexing_slicing, + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::exhaustive_structs, + clippy::exhaustive_enums, + // TODO(#2266): enable missing_debug_implementations, + ) +)] +// this crate does a lot of nuanced lifetime manipulation, being explicit +// is better here. +#![allow(clippy::needless_lifetimes)] + +extern crate alloc; + +mod error; +mod flexzerovec; +mod map; +mod map2d; +#[cfg(test)] +pub mod samples; +mod varzerovec; +mod zerovec; + +// This must be after `mod zerovec` for some impls on `ZeroSlice<RawBytesULE>` +// to show up in the right spot in the docs +pub mod ule; + +#[cfg(feature = "yoke")] +mod yoke_impls; +mod zerofrom_impls; + +pub use crate::error::ZeroVecError; +pub use crate::map::map::ZeroMap; +pub use crate::map2d::map::ZeroMap2d; +pub use crate::varzerovec::{slice::VarZeroSlice, vec::VarZeroVec}; +pub use crate::zerovec::{ZeroSlice, ZeroVec}; + +pub(crate) use flexzerovec::chunk_to_usize; + +#[doc(hidden)] +pub mod __zerovec_internal_reexport { + pub use zerofrom::ZeroFrom; + + pub use alloc::boxed; + + #[cfg(feature = "serde")] + pub use serde; +} + +pub mod maps { + //! This module contains additional utility types and traits for working with + //! [`ZeroMap`] and [`ZeroMap2d`]. See their docs for more details on the general purpose + //! of these types. + //! + //! [`ZeroMapBorrowed`] and [`ZeroMap2dBorrowed`] are versions of [`ZeroMap`] and [`ZeroMap2d`] + //! that can be used when you wish to guarantee that the map data is always borrowed, leading to + //! relaxed lifetime constraints. + //! + //! The [`ZeroMapKV`] trait is required to be implemented on any type that needs to be used + //! within a map type. [`ZeroVecLike`] and [`MutableZeroVecLike`] are traits used in the + //! internal workings of the map types, and should typically not be used or implemented by + //! users of this crate. + #[doc(no_inline)] + pub use crate::map::ZeroMap; + pub use crate::map::ZeroMapBorrowed; + + #[doc(no_inline)] + pub use crate::map2d::ZeroMap2d; + pub use crate::map2d::ZeroMap2dBorrowed; + + pub use crate::map::{MutableZeroVecLike, ZeroMapKV, ZeroVecLike}; +} + +pub mod vecs { + //! This module contains additional utility types for working with + //! [`ZeroVec`] and [`VarZeroVec`]. See their docs for more details on the general purpose + //! of these types. + //! + //! [`ZeroSlice`] and [`VarZeroSlice`] provide slice-like versions of the vector types + //! for use behind references and in custom ULE types. + //! + //! [`VarZeroVecOwned`] is a special owned/mutable version of [`VarZeroVec`], allowing + //! direct manipulation of the backing buffer. + + #[doc(no_inline)] + pub use crate::zerovec::{ZeroSlice, ZeroVec}; + + #[doc(no_inline)] + pub use crate::varzerovec::{VarZeroSlice, VarZeroVec}; + + pub use crate::varzerovec::{Index16, Index32, VarZeroVecFormat, VarZeroVecOwned}; + + pub use crate::flexzerovec::{FlexZeroSlice, FlexZeroVec, FlexZeroVecOwned}; +} + +// Proc macro reexports +// +// These exist so that our docs can use intra-doc links. +// Due to quirks of how rustdoc does documentation on reexports, these must be in this module and not reexported from +// a submodule + +/// Generate a corresponding [`ULE`] type and the relevant [`AsULE`] implementations for this type +/// +/// This can be attached to structs containing only [`AsULE`] types, or C-like enums that have `#[repr(u8)]` +/// and all explicit discriminants. +/// +/// The type must be [`Copy`], [`PartialEq`], and [`Eq`]. +/// +/// `#[make_ule]` will automatically derive the following traits on the [`ULE`] type: +/// +/// - [`Ord`] and [`PartialOrd`] +/// - [`ZeroMapKV`] +/// +/// To disable one of the automatic derives, use `#[zerovec::skip_derive(...)]` like so: `#[zerovec::skip_derive(ZeroMapKV)]`. +/// `Ord` and `PartialOrd` are implemented as a unit and can only be disabled as a group with `#[zerovec::skip_derive(Ord)]`. +/// +/// The following traits are available to derive, but not automatic: +/// +/// - [`Debug`] +/// +/// To enable one of these additional derives, use `#[zerovec::derive(...)]` like so: `#[zerovec::derive(Debug)]`. +/// +/// In most cases these derives will defer to the impl of the same trait on the current type, so such impls must exist. +/// +/// For enums, this attribute will generate a crate-public `fn new_from_u8(value: u8) -> Option<Self>` +/// method on the main type that allows one to construct the value from a u8. If this method is desired +/// to be more public, it should be wrapped. +/// +/// [`ULE`]: ule::ULE +/// [`AsULE`]: ule::AsULE +/// [`ZeroMapKV`]: maps::ZeroMapKV +/// +/// # Example +/// +/// ```rust +/// use zerovec::ZeroVec; +/// +/// #[zerovec::make_ule(DateULE)] +/// #[derive( +/// Copy, +/// Clone, +/// PartialEq, +/// Eq, +/// Ord, +/// PartialOrd, +/// serde::Serialize, +/// serde::Deserialize, +/// )] +/// struct Date { +/// y: u64, +/// m: u8, +/// d: u8, +/// } +/// +/// #[derive(serde::Serialize, serde::Deserialize)] +/// struct Dates<'a> { +/// #[serde(borrow)] +/// dates: ZeroVec<'a, Date>, +/// } +/// +/// let dates = Dates { +/// dates: ZeroVec::alloc_from_slice(&[ +/// Date { +/// y: 1985, +/// m: 9, +/// d: 3, +/// }, +/// Date { +/// y: 1970, +/// m: 2, +/// d: 20, +/// }, +/// Date { +/// y: 1990, +/// m: 6, +/// d: 13, +/// }, +/// ]), +/// }; +/// +/// let bincode_bytes = +/// bincode::serialize(&dates).expect("Serialization should be successful"); +/// +/// // Will deserialize without allocations +/// let deserialized: Dates = bincode::deserialize(&bincode_bytes) +/// .expect("Deserialization should be successful"); +/// +/// assert_eq!(deserialized.dates.get(1).unwrap().y, 1970); +/// assert_eq!(deserialized.dates.get(2).unwrap().d, 13); +/// ``` +#[cfg(feature = "derive")] +pub use zerovec_derive::make_ule; + +/// Generate a corresponding [`VarULE`] type and the relevant [`EncodeAsVarULE`]/[`zerofrom::ZeroFrom`] +/// implementations for this type +/// +/// This can be attached to structs containing only [`AsULE`] types with the last field being [`Cow<'a, str>`](alloc::borrow::Cow), +/// [`Cow<'a, str>`](alloc::borrow::Cow), [`ZeroSlice`], or [`VarZeroSlice`]. +/// +/// The type must be [`PartialEq`] and [`Eq`]. +/// +/// [`EncodeAsVarULE`] and [`zerofrom::ZeroFrom`] are useful for avoiding the need to deal with +/// the [`VarULE`] type directly. In particular, it is recommended to use [`zerofrom::ZeroFrom`] +/// to convert the [`VarULE`] type back to this type in a cheap, zero-copy way (see the example below +/// for more details). +/// +/// `#[make_varule]` will automatically derive the following traits on the [`VarULE`] type: +/// +/// - [`Ord`] and [`PartialOrd`] +/// - [`ZeroMapKV`] +/// +/// To disable one of the automatic derives, use `#[zerovec::skip_derive(...)]` like so: `#[zerovec::skip_derive(ZeroMapKV)]`. +/// `Ord` and `PartialOrd` are implemented as a unit and can only be disabled as a group with `#[zerovec::skip_derive(Ord)]`. +/// +/// The following traits are available to derive, but not automatic: +/// +/// - [`Debug`] +/// - [`Serialize`](serde::Serialize) +/// - [`Deserialize`](serde::Deserialize) +/// +/// To enable one of these additional derives, use `#[zerovec::derive(...)]` like so: `#[zerovec::derive(Debug)]`. +/// +/// In most cases these derives will defer to the impl of the same trait on the current type, so such impls must exist. +/// +/// This implementation will also by default autogenerate [`Ord`] and [`PartialOrd`] on the [`VarULE`] type based on +/// the implementation on `Self`. You can opt out of this with `#[zerovec::skip_derive(Ord)]` +/// +/// Note that this implementation will autogenerate [`EncodeAsVarULE`] impls for _both_ `Self` and `&Self` +/// for convenience. This allows for a little more flexibility encoding slices. +/// +/// [`EncodeAsVarULE`]: ule::EncodeAsVarULE +/// [`VarULE`]: ule::VarULE +/// [`ULE`]: ule::ULE +/// [`AsULE`]: ule::AsULE +/// [`ZeroMapKV`]: maps::ZeroMapKV +/// +/// # Example +/// +/// ```rust +/// use std::borrow::Cow; +/// use zerofrom::ZeroFrom; +/// use zerovec::ule::encode_varule_to_box; +/// use zerovec::{VarZeroVec, ZeroMap, ZeroVec}; +/// +/// // custom fixed-size ULE type for ZeroVec +/// #[zerovec::make_ule(DateULE)] +/// #[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] +/// struct Date { +/// y: u64, +/// m: u8, +/// d: u8, +/// } +/// +/// // custom variable sized VarULE type for VarZeroVec +/// #[zerovec::make_varule(PersonULE)] +/// #[zerovec::derive(Serialize, Deserialize)] +/// #[derive(Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] +/// struct Person<'a> { +/// birthday: Date, +/// favorite_character: char, +/// #[serde(borrow)] +/// name: Cow<'a, str>, +/// } +/// +/// #[derive(serde::Serialize, serde::Deserialize)] +/// struct Data<'a> { +/// // note: VarZeroVec always must reference the ULE type directly +/// #[serde(borrow)] +/// important_people: VarZeroVec<'a, PersonULE>, +/// } +/// +/// let person1 = Person { +/// birthday: Date { +/// y: 1990, +/// m: 9, +/// d: 7, +/// }, +/// favorite_character: 'π', +/// name: Cow::from("Kate"), +/// }; +/// let person2 = Person { +/// birthday: Date { +/// y: 1960, +/// m: 5, +/// d: 25, +/// }, +/// favorite_character: '冇', +/// name: Cow::from("Jesse"), +/// }; +/// +/// let important_people = VarZeroVec::from(&[person1, person2]); +/// let data = Data { important_people }; +/// +/// let bincode_bytes = bincode::serialize(&data).expect("Serialization should be successful"); +/// +/// // Will deserialize without allocations +/// let deserialized: Data = +/// bincode::deserialize(&bincode_bytes).expect("Deserialization should be successful"); +/// +/// assert_eq!(&deserialized.important_people.get(1).unwrap().name, "Jesse"); +/// assert_eq!(&deserialized.important_people.get(0).unwrap().name, "Kate"); +/// +/// // Since VarZeroVec produces PersonULE types, it's convenient to use ZeroFrom +/// // to recoup Person values in a zero-copy way +/// let person_converted: Person = +/// ZeroFrom::zero_from(deserialized.important_people.get(1).unwrap()); +/// assert_eq!(person_converted.name, "Jesse"); +/// assert_eq!(person_converted.birthday.y, 1960); +/// ``` +#[cfg(feature = "derive")] +pub use zerovec_derive::make_varule; |