diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/zerovec-derive | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/zerovec-derive')
-rw-r--r-- | third_party/rust/zerovec-derive/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | third_party/rust/zerovec-derive/Cargo.lock | 102 | ||||
-rw-r--r-- | third_party/rust/zerovec-derive/Cargo.toml | 64 | ||||
-rw-r--r-- | third_party/rust/zerovec-derive/LICENSE | 44 | ||||
-rw-r--r-- | third_party/rust/zerovec-derive/README.md | 11 | ||||
-rw-r--r-- | third_party/rust/zerovec-derive/examples/derives.rs | 157 | ||||
-rw-r--r-- | third_party/rust/zerovec-derive/examples/make.rs | 116 | ||||
-rw-r--r-- | third_party/rust/zerovec-derive/examples/make_var.rs | 235 | ||||
-rw-r--r-- | third_party/rust/zerovec-derive/src/lib.rs | 43 | ||||
-rw-r--r-- | third_party/rust/zerovec-derive/src/make_ule.rs | 346 | ||||
-rw-r--r-- | third_party/rust/zerovec-derive/src/make_varule.rs | 798 | ||||
-rw-r--r-- | third_party/rust/zerovec-derive/src/ule.rs | 107 | ||||
-rw-r--r-- | third_party/rust/zerovec-derive/src/utils.rs | 317 | ||||
-rw-r--r-- | third_party/rust/zerovec-derive/src/varule.rs | 130 |
14 files changed, 2471 insertions, 0 deletions
diff --git a/third_party/rust/zerovec-derive/.cargo-checksum.json b/third_party/rust/zerovec-derive/.cargo-checksum.json new file mode 100644 index 0000000000..69962f1572 --- /dev/null +++ b/third_party/rust/zerovec-derive/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.lock":"812481e1743cbc759af76c28a9dd74791c24c64c7d57f387f32ba3e175176401","Cargo.toml":"14a7ec653dd56600dbd75f4ff26cf56ebe46443038e4ee7e794555045fb8bab3","LICENSE":"853f87c96f3d249f200fec6db1114427bc8bdf4afddc93c576956d78152ce978","README.md":"bacbc17b2cb457c169257f5e22c78a45d1c4a399eadae383f7732af909616ebc","examples/derives.rs":"2541311f2b532301ab8600927fb2d12a842166ce98d57879997df12dcb928dd2","examples/make.rs":"709829d9aa1fa1d023437dfd8169183690629b71e65cc1316b1f4bf767588cc6","examples/make_var.rs":"933c0ecb44f69f9081e7866a1a782d8501ad947d87b9e4cab6947640c28c3998","src/lib.rs":"029d2b251b8279e7c85d9cc8efb6cad5188da9464b0a27e9de3dd119f4ffef2e","src/make_ule.rs":"5e1abcae7f8ffda8c472f19e4ce2543dc9b4841e644750d2ecb85fcbe0c836f8","src/make_varule.rs":"756384f6f8ae5a40e2a1b02f47bc804f110a907413ea6470152cca13ea912202","src/ule.rs":"1f0a46ff39e43bb19deb9e9a06289350096d4e6ca3f33cb76fec3c84c4a439d8","src/utils.rs":"48cb8041cd0302c6e37f7cfd0704bc8030631f7144a6f1a4019e43c0f1ee2737","src/varule.rs":"b0642df70023b2f04aca692010f6b2a81fd3f3db8612dbae072a200eb04f0913"},"package":"7b4e5997cbf58990550ef1f0e5124a05e47e1ebd33a84af25739be6031a62c20"}
\ No newline at end of file diff --git a/third_party/rust/zerovec-derive/Cargo.lock b/third_party/rust/zerovec-derive/Cargo.lock new file mode 100644 index 0000000000..2f4949cea3 --- /dev/null +++ b/third_party/rust/zerovec-derive/Cargo.lock @@ -0,0 +1,102 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "proc-macro2" +version = "1.0.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + +[[package]] +name = "serde" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "2.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "zerovec-derive" +version = "0.10.1" +dependencies = [ + "bincode", + "proc-macro2", + "quote", + "serde", + "serde_json", + "syn", +] diff --git a/third_party/rust/zerovec-derive/Cargo.toml b/third_party/rust/zerovec-derive/Cargo.toml new file mode 100644 index 0000000000..62fe1035ad --- /dev/null +++ b/third_party/rust/zerovec-derive/Cargo.toml @@ -0,0 +1,64 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +name = "zerovec-derive" +version = "0.10.1" +authors = ["Manish Goregaokar <manishsmail@gmail.com>"] +description = "Custom derive for the zerovec crate" +readme = "README.md" +keywords = [ + "zerocopy", + "serialization", + "zero-copy", + "serde", +] +categories = [ + "rust-patterns", + "memory-management", + "caching", + "no-std", + "data-structures", +] +license-file = "LICENSE" +repository = "https://github.com/unicode-org/icu4x" + +[package.metadata.workspaces] +independent = true + +[lib] +path = "src/lib.rs" +proc_macro = true + +[dependencies.proc-macro2] +version = "1.0.27" + +[dependencies.quote] +version = "1.0.9" + +[dependencies.syn] +version = "2" +features = [ + "derive", + "parsing", + "extra-traits", +] + +[dev-dependencies.bincode] +version = "1.3" + +[dev-dependencies.serde] +version = "1.0" +features = ["derive"] + +[dev-dependencies.serde_json] +version = "1.0" diff --git a/third_party/rust/zerovec-derive/LICENSE b/third_party/rust/zerovec-derive/LICENSE new file mode 100644 index 0000000000..9845aa5f48 --- /dev/null +++ b/third_party/rust/zerovec-derive/LICENSE @@ -0,0 +1,44 @@ +UNICODE LICENSE V3 + +COPYRIGHT AND PERMISSION NOTICE + +Copyright © 2020-2023 Unicode, Inc. + +NOTICE TO USER: Carefully read the following legal agreement. BY +DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR +SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT +DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of data files and any associated documentation (the "Data Files") or +software and any associated documentation (the "Software") to deal in the +Data Files or Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, and/or sell +copies of the Data Files or Software, and to permit persons to whom the +Data Files or Software are furnished to do so, provided that either (a) +this copyright and permission notice appear with all copies of the Data +Files or Software, or (b) this copyright and permission notice appear in +associated Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF +THIRD PARTY RIGHTS. + +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE +BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, +OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA +FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall +not be used in advertising or otherwise to promote the sale, use or other +dealings in these Data Files or Software without prior written +authorization of the copyright holder. + +— + +Portions of ICU4X may have been adapted from ICU4C and/or ICU4J. +ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others. diff --git a/third_party/rust/zerovec-derive/README.md b/third_party/rust/zerovec-derive/README.md new file mode 100644 index 0000000000..e80b8abe55 --- /dev/null +++ b/third_party/rust/zerovec-derive/README.md @@ -0,0 +1,11 @@ +# zerovec-derive [![crates.io](https://img.shields.io/crates/v/zerovec-derive)](https://crates.io/crates/zerovec-derive) + +<!-- cargo-rdme start --> + +Proc macros for generating `ULE`, `VarULE` impls and types for the `zerovec` crate + +<!-- cargo-rdme end --> + +## More Information + +For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x). diff --git a/third_party/rust/zerovec-derive/examples/derives.rs b/third_party/rust/zerovec-derive/examples/derives.rs new file mode 100644 index 0000000000..40f821023d --- /dev/null +++ b/third_party/rust/zerovec-derive/examples/derives.rs @@ -0,0 +1,157 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use zerovec::ule::AsULE; +use zerovec::ule::EncodeAsVarULE; +use zerovec::*; + +#[repr(packed)] +#[derive(ule::ULE, Copy, Clone)] +pub struct FooULE { + a: u8, + b: <u32 as AsULE>::ULE, + c: <char as AsULE>::ULE, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct Foo { + a: u8, + b: u32, + c: char, +} + +impl AsULE for Foo { + type ULE = FooULE; + fn to_unaligned(self) -> FooULE { + FooULE { + a: self.a, + b: self.b.to_unaligned(), + c: self.c.to_unaligned(), + } + } + + fn from_unaligned(other: FooULE) -> Self { + Self { + a: other.a, + b: AsULE::from_unaligned(other.b), + c: AsULE::from_unaligned(other.c), + } + } +} + +#[repr(packed)] +#[derive(ule::VarULE)] +pub struct RelationULE { + /// This maps to (AndOr, Polarity, Operand), + /// with the first bit mapping to AndOr (1 == And), the second bit + /// to Polarity (1 == Positive), and the remaining bits to Operand + /// encoded via Operand::encode. It is unsound for the Operand bits to + /// not be a valid encoded Operand. + andor_polarity_operand: u8, + modulo: <u32 as AsULE>::ULE, + range_list: ZeroSlice<Foo>, +} + +#[derive(Clone, PartialEq, Debug)] +pub struct Relation<'a> { + andor_polarity_operand: u8, + modulo: u32, + range_list: ZeroVec<'a, Foo>, +} + +unsafe impl EncodeAsVarULE<RelationULE> for Relation<'_> { + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + cb(&[ + &[self.andor_polarity_operand], + ule::ULE::as_byte_slice(&[self.modulo.to_unaligned()]), + self.range_list.as_bytes(), + ]) + } +} + +impl RelationULE { + pub fn as_relation(&self) -> Relation { + Relation { + andor_polarity_operand: self.andor_polarity_operand, + modulo: u32::from_unaligned(self.modulo), + range_list: self.range_list.as_zerovec(), + } + } +} + +const TEST_SLICE: &[Foo] = &[ + Foo { + a: 101, + b: 924, + c: '⸘', + }, + Foo { + a: 217, + b: 4228, + c: 'ə', + }, + Foo { + a: 117, + b: 9090, + c: 'ø', + }, +]; + +const TEST_SLICE2: &[Foo] = &[ + Foo { + a: 92, + b: 4, + c: 'å', + }, + Foo { + a: 9, + b: 49993, + c: '±', + }, +]; +fn test_zerovec() { + let zerovec: ZeroVec<Foo> = TEST_SLICE.iter().copied().collect(); + + assert_eq!(zerovec, TEST_SLICE); + + let bytes = zerovec.as_bytes(); + let reparsed: ZeroVec<Foo> = ZeroVec::parse_byte_slice(bytes).expect("Parsing should succeed"); + + assert_eq!(reparsed, TEST_SLICE); +} + +fn test_varzerovec() { + let relation1 = Relation { + andor_polarity_operand: 1, + modulo: 5004, + range_list: TEST_SLICE.iter().copied().collect(), + }; + let relation2 = Relation { + andor_polarity_operand: 5, + modulo: 909, + range_list: TEST_SLICE2.iter().copied().collect(), + }; + + let relations = &[relation1, relation2]; + + let vzv = VarZeroVec::<_>::from(relations); + + for (ule, stack) in vzv.iter().zip(relations.iter()) { + assert_eq!(*stack, ule.as_relation()); + } + + let bytes = vzv.as_bytes(); + + let recovered: VarZeroVec<RelationULE> = + VarZeroVec::parse_byte_slice(bytes).expect("Parsing should succeed"); + + for (ule, stack) in recovered.iter().zip(relations.iter()) { + assert_eq!(*stack, ule.as_relation()); + } +} + +fn main() { + test_zerovec(); + test_varzerovec(); +} diff --git a/third_party/rust/zerovec-derive/examples/make.rs b/third_party/rust/zerovec-derive/examples/make.rs new file mode 100644 index 0000000000..e83673c6db --- /dev/null +++ b/third_party/rust/zerovec-derive/examples/make.rs @@ -0,0 +1,116 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use std::fmt::Debug; +use zerovec::*; + +#[make_ule(StructULE)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +struct Struct { + a: u8, + b: u32, + c: Option<char>, +} + +#[make_ule(HashedStructULE)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[zerovec::derive(Debug, Hash)] +struct HashedStruct { + a: u64, + b: i16, + c: Option<char>, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[make_ule(TupleStructULE)] +struct TupleStruct(u8, char); + +#[make_ule(EnumULE)] +#[repr(u8)] +#[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Debug)] +#[zerovec::derive(Debug, Hash)] +enum Enum { + A = 0, + B = 1, + D = 2, + E = 3, + FooBar = 4, + F = 5, +} + +#[make_ule(OutOfOrderEnumULE)] +#[repr(u8)] +#[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Debug)] +#[allow(unused)] +enum OutOfOrderEnum { + A = 0, + B = 1, + E = 3, + FooBar = 4, + D = 2, + F = 5, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Ord, PartialOrd)] +#[make_ule(NoKVULE)] +#[zerovec::skip_derive(ZeroMapKV)] +struct NoKV(u8, char); + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[make_ule(NoOrdULE)] +#[zerovec::skip_derive(ZeroMapKV, Ord)] +struct NoOrd(u8, char); + +fn test_zerovec<T: ule::AsULE + Debug + PartialEq>(slice: &[T]) { + let zerovec: ZeroVec<T> = slice.iter().copied().collect(); + + assert_eq!(zerovec, slice); + + let bytes = zerovec.as_bytes(); + let name = std::any::type_name::<T>(); + let reparsed: ZeroVec<T> = ZeroVec::parse_byte_slice(bytes) + .unwrap_or_else(|_| panic!("Parsing {name} should succeed")); + + assert_eq!(reparsed, slice); +} + +fn main() { + test_zerovec(TEST_SLICE_STRUCT); + test_zerovec(TEST_SLICE_TUPLESTRUCT); + test_zerovec(TEST_SLICE_ENUM); +} + +const TEST_SLICE_STRUCT: &[Struct] = &[ + Struct { + a: 101, + b: 924, + c: Some('⸘'), + }, + Struct { + a: 217, + b: 4228, + c: Some('ə'), + }, + Struct { + a: 117, + b: 9090, + c: Some('ø'), + }, +]; + +const TEST_SLICE_TUPLESTRUCT: &[TupleStruct] = &[ + TupleStruct(101, 'ř'), + TupleStruct(76, '°'), + TupleStruct(15, 'a'), +]; + +const TEST_SLICE_ENUM: &[Enum] = &[ + Enum::A, + Enum::FooBar, + Enum::F, + Enum::D, + Enum::B, + Enum::FooBar, + Enum::E, +]; diff --git a/third_party/rust/zerovec-derive/examples/make_var.rs b/third_party/rust/zerovec-derive/examples/make_var.rs new file mode 100644 index 0000000000..3433c366f7 --- /dev/null +++ b/third_party/rust/zerovec-derive/examples/make_var.rs @@ -0,0 +1,235 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use std::borrow::Cow; + +use zerofrom::ZeroFrom; +use zerovec::{ule::AsULE, *}; + +#[make_varule(VarStructULE)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, serde::Serialize, serde::Deserialize)] +#[zerovec::derive(Serialize, Deserialize, Debug)] +struct VarStruct<'a> { + a: u32, + b: char, + #[serde(borrow)] + c: Cow<'a, str>, +} + +#[make_varule(VarStructOutOfOrderULE)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, serde::Serialize, serde::Deserialize)] +#[zerovec::derive(Serialize, Deserialize, Debug)] +struct VarStructOutOfOrder<'a> { + a: u32, + #[serde(borrow)] + b: Cow<'a, str>, + c: char, + d: u8, +} + +#[make_varule(VarTupleStructULE)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, serde::Serialize, serde::Deserialize)] +#[zerovec::derive(Serialize, Deserialize, Debug)] +struct VarTupleStruct<'a>(u32, char, #[serde(borrow)] VarZeroVec<'a, str>); + +#[make_varule(NoKVULE)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, serde::Serialize, serde::Deserialize)] +#[zerovec::skip_derive(ZeroMapKV)] +#[zerovec::derive(Serialize, Deserialize, Debug)] +struct NoKV<'a>(u32, char, #[serde(borrow)] VarZeroVec<'a, str>); + +#[make_varule(NoOrdULE)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, serde::Serialize, serde::Deserialize)] +#[zerovec::skip_derive(ZeroMapKV, Ord)] +#[zerovec::derive(Serialize, Deserialize, Debug)] +struct NoOrd<'a>(u32, char, #[serde(borrow)] VarZeroVec<'a, str>); + +#[make_varule(MultiFieldStructULE)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, serde::Serialize, serde::Deserialize)] +#[zerovec::derive(Serialize, Deserialize, Debug)] +struct MultiFieldStruct<'a> { + a: u32, + b: char, + #[serde(borrow)] + c: Cow<'a, str>, + d: u8, + #[serde(borrow)] + e: Cow<'a, str>, + f: char, +} + +#[make_varule(MultiFieldConsecutiveStructULE)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, serde::Serialize, serde::Deserialize)] +#[zerovec::derive(Serialize, Deserialize, Debug)] +struct MultiFieldConsecutiveStruct<'a> { + #[serde(borrow)] + a: Cow<'a, str>, + #[serde(borrow)] + b: Cow<'a, str>, + #[serde(borrow)] + c: Cow<'a, str>, + #[serde(borrow)] + d: Cow<'a, str>, +} + +#[make_varule(CustomVarFieldULE)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, serde::Serialize, serde::Deserialize)] +#[zerovec::derive(Serialize, Deserialize, Debug)] +struct CustomVarField<'a> { + #[zerovec::varule(MultiFieldStructULE)] + #[serde(borrow)] + a: MultiFieldStruct<'a>, + b: u32, +} + +#[make_varule(MultiFieldTupleULE)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, serde::Serialize, serde::Deserialize)] +#[zerovec::derive(Serialize, Deserialize, Debug)] +struct MultiFieldTuple<'a>( + u8, + char, + #[serde(borrow)] VarZeroVec<'a, str>, + #[serde(borrow)] VarZeroVec<'a, [u8]>, + #[serde(borrow)] Cow<'a, str>, +); + +/// The `assert` function should have the body `|(stack, zero)| assert_eq!(stack, &U::zero_from(&zero))` +/// +/// We cannot do this internally because we technically need a different `U` with a shorter lifetime here +/// which would require some gnarly lifetime bounds and perhaps a Yoke dependency. This is just a test, so it's +/// not important to get this 100% perfect +fn assert_zerovec<T, U, F>(slice: &[U], assert: F) +where + T: ule::VarULE + ?Sized + serde::Serialize, + U: ule::EncodeAsVarULE<T> + serde::Serialize, + F: Fn(&U, &T), + for<'a> Box<T>: serde::Deserialize<'a>, +{ + let varzerovec: VarZeroVec<T> = slice.into(); + + assert_eq!(varzerovec.len(), slice.len()); + + for (stack, zero) in slice.iter().zip(varzerovec.iter()) { + assert(stack, zero) + } + + let bytes = varzerovec.as_bytes(); + let name = std::any::type_name::<T>(); + let reparsed: VarZeroVec<T> = VarZeroVec::parse_byte_slice(bytes) + .unwrap_or_else(|_| panic!("Parsing VarZeroVec<{name}> should succeed")); + + assert_eq!(reparsed.len(), slice.len()); + + for (stack, zero) in slice.iter().zip(reparsed.iter()) { + assert(stack, zero) + } + + let bincode = bincode::serialize(&varzerovec).unwrap(); + let deserialized: VarZeroVec<T> = bincode::deserialize(&bincode).unwrap(); + + for (stack, zero) in slice.iter().zip(deserialized.iter()) { + assert(stack, zero) + } + + let json_slice = serde_json::to_string(&slice).unwrap(); + let json_vzv = serde_json::to_string(&varzerovec).unwrap(); + + assert_eq!(json_slice, json_vzv); + + let deserialized: VarZeroVec<T> = serde_json::from_str(&json_vzv).unwrap(); + + for (stack, zero) in slice.iter().zip(deserialized.iter()) { + assert(stack, zero) + } +} + +fn main() { + assert_zerovec::<VarStructULE, VarStruct, _>(TEST_VARSTRUCTS, |stack, zero| { + assert_eq!(stack, &VarStruct::zero_from(zero)) + }); + + assert_zerovec::<MultiFieldStructULE, MultiFieldStruct, _>(TEST_MULTIFIELD, |stack, zero| { + assert_eq!(stack, &MultiFieldStruct::zero_from(zero)) + }); + + assert_zerovec::<MultiFieldConsecutiveStructULE, MultiFieldConsecutiveStruct, _>( + TEST_MULTICONSECUTIVE, + |stack, zero| assert_eq!(stack, &MultiFieldConsecutiveStruct::zero_from(zero)), + ); + + let vartuples = &[ + VarTupleStruct(101, 'ø', TEST_STRINGS1.into()), + VarTupleStruct(9499, '⸘', TEST_STRINGS2.into()), + VarTupleStruct(3478, '月', TEST_STRINGS3.into()), + ]; + assert_zerovec::<VarTupleStructULE, VarTupleStruct, _>(vartuples, |stack, zero| { + assert_eq!(stack, &VarTupleStruct::zero_from(zero)) + }); + + // Test that all fields are accessible on a type using multifieldule + let multi_ule = ule::encode_varule_to_box(&TEST_MULTIFIELD[0]); + assert_eq!(multi_ule.a, TEST_MULTIFIELD[0].a.to_unaligned()); + assert_eq!(multi_ule.b, TEST_MULTIFIELD[0].b.to_unaligned()); + assert_eq!(multi_ule.c(), TEST_MULTIFIELD[0].c); + assert_eq!(multi_ule.d, TEST_MULTIFIELD[0].d); + assert_eq!(multi_ule.e(), TEST_MULTIFIELD[0].e); + assert_eq!(multi_ule.f, TEST_MULTIFIELD[0].f.to_unaligned()); +} + +const TEST_VARSTRUCTS: &[VarStruct<'static>] = &[ + VarStruct { + a: 101, + b: 'ø', + c: Cow::Borrowed("testīng strīng"), + }, + VarStruct { + a: 9499, + b: '⸘', + c: Cow::Borrowed("a diffərənt ştring"), + }, + VarStruct { + a: 3478, + b: '月', + c: Cow::Borrowed("好多嘅 string"), + }, +]; + +const TEST_STRINGS1: &[&str] = &["foo", "bar", "baz"]; +const TEST_STRINGS2: &[&str] = &["hellø", "wørłd"]; +const TEST_STRINGS3: &[&str] = &["łořem", "ɨpsu₥"]; + +const TEST_MULTIFIELD: &[MultiFieldStruct<'static>] = &[ + MultiFieldStruct { + a: 101, + b: 'ø', + c: Cow::Borrowed("testīng strīng"), + d: 8, + e: Cow::Borrowed("another testīng strīng"), + f: 'å', + }, + MultiFieldStruct { + a: 9499, + b: '⸘', + c: Cow::Borrowed("a diffərənt ştring"), + d: 120, + e: Cow::Borrowed("a diffərənt testing ştring"), + f: 'ł', + }, + MultiFieldStruct { + a: 3478, + b: '月', + c: Cow::Borrowed("好多嘅 string"), + d: 89, + e: Cow::Borrowed("many 好多嘅 string"), + f: 'ə', + }, +]; + +const TEST_MULTICONSECUTIVE: &[MultiFieldConsecutiveStruct<'static>] = + &[MultiFieldConsecutiveStruct { + a: Cow::Borrowed("one"), + b: Cow::Borrowed("2"), + c: Cow::Borrowed("three"), + d: Cow::Borrowed("four"), + }]; diff --git a/third_party/rust/zerovec-derive/src/lib.rs b/third_party/rust/zerovec-derive/src/lib.rs new file mode 100644 index 0000000000..9c3007147a --- /dev/null +++ b/third_party/rust/zerovec-derive/src/lib.rs @@ -0,0 +1,43 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Proc macros for generating `ULE`, `VarULE` impls and types for the `zerovec` crate + +use proc_macro::TokenStream; +use syn::{parse_macro_input, DeriveInput, Ident}; +mod make_ule; +mod make_varule; +pub(crate) mod ule; +mod utils; +mod varule; + +/// Full docs for this proc macro can be found on the [`zerovec`](docs.rs/zerovec) crate. +#[proc_macro_derive(ULE)] +pub fn ule_derive(input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as DeriveInput); + TokenStream::from(ule::derive_impl(&input)) +} + +/// Full docs for this proc macro can be found on the [`zerovec`](docs.rs/zerovec) crate. +#[proc_macro_derive(VarULE)] +pub fn varule_derive(input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as DeriveInput); + TokenStream::from(varule::derive_impl(&input, None)) +} + +/// Full docs for this proc macro can be found on the [`zerovec`](docs.rs/zerovec) crate. +#[proc_macro_attribute] +pub fn make_ule(attr: TokenStream, item: TokenStream) -> TokenStream { + let input = parse_macro_input!(item as DeriveInput); + let attr = parse_macro_input!(attr as Ident); + TokenStream::from(make_ule::make_ule_impl(attr, input)) +} + +/// Full docs for this proc macro can be found on the [`zerovec`](docs.rs/zerovec) crate. +#[proc_macro_attribute] +pub fn make_varule(attr: TokenStream, item: TokenStream) -> TokenStream { + let input = parse_macro_input!(item as DeriveInput); + let attr = parse_macro_input!(attr as Ident); + TokenStream::from(make_varule::make_varule_impl(attr, input)) +} diff --git a/third_party/rust/zerovec-derive/src/make_ule.rs b/third_party/rust/zerovec-derive/src/make_ule.rs new file mode 100644 index 0000000000..b31913f088 --- /dev/null +++ b/third_party/rust/zerovec-derive/src/make_ule.rs @@ -0,0 +1,346 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use proc_macro2::TokenStream as TokenStream2; +use quote::quote; + +use crate::utils::{self, FieldInfo, ZeroVecAttrs}; +use std::collections::HashSet; +use syn::spanned::Spanned; +use syn::{parse_quote, Data, DataEnum, DataStruct, DeriveInput, Error, Expr, Fields, Ident, Lit}; + +pub fn make_ule_impl(ule_name: Ident, mut input: DeriveInput) -> TokenStream2 { + if input.generics.type_params().next().is_some() + || input.generics.lifetimes().next().is_some() + || input.generics.const_params().next().is_some() + { + return Error::new( + input.generics.span(), + "#[make_ule] must be applied to a struct without any generics", + ) + .to_compile_error(); + } + let sp = input.span(); + let attrs = match utils::extract_attributes_common(&mut input.attrs, sp, false) { + Ok(val) => val, + Err(e) => return e.to_compile_error(), + }; + + let name = &input.ident; + + let ule_stuff = match input.data { + Data::Struct(ref s) => make_ule_struct_impl(name, &ule_name, &input, s, attrs), + Data::Enum(ref e) => make_ule_enum_impl(name, &ule_name, &input, e, attrs), + _ => { + return Error::new(input.span(), "#[make_ule] must be applied to a struct") + .to_compile_error(); + } + }; + + let zmkv = if attrs.skip_kv { + quote!() + } else { + quote!( + impl<'a> zerovec::maps::ZeroMapKV<'a> for #name { + type Container = zerovec::ZeroVec<'a, #name>; + type Slice = zerovec::ZeroSlice<#name>; + type GetType = #ule_name; + type OwnedType = #name; + } + ) + }; + + let maybe_debug = if attrs.debug { + quote!( + impl core::fmt::Debug for #ule_name { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let this = <#name as zerovec::ule::AsULE>::from_unaligned(*self); + <#name as core::fmt::Debug>::fmt(&this, f) + } + } + ) + } else { + quote!() + }; + + quote!( + #input + + #ule_stuff + + #maybe_debug + + #zmkv + ) +} + +fn make_ule_enum_impl( + name: &Ident, + ule_name: &Ident, + input: &DeriveInput, + enu: &DataEnum, + attrs: ZeroVecAttrs, +) -> TokenStream2 { + // We could support more int reprs in the future if needed + if !utils::has_valid_repr(&input.attrs, |r| r == "u8") { + return Error::new( + input.span(), + "#[make_ule] can only be applied to #[repr(u8)] enums", + ) + .to_compile_error(); + } + + // the next discriminant expected + let mut next = 0; + // Discriminants that have not been found in series (we might find them later) + let mut not_found = HashSet::new(); + + for (i, variant) in enu.variants.iter().enumerate() { + if !matches!(variant.fields, Fields::Unit) { + // This can be supported in the future, see zerovec/design_doc.md + return Error::new( + variant.span(), + "#[make_ule] can only be applied to enums with dataless variants", + ) + .to_compile_error(); + } + + if let Some((_, ref discr)) = variant.discriminant { + if let Some(n) = get_expr_int(discr) { + if n >= next { + for missing in next..n { + not_found.insert(missing); + } + next = n + 1; + } + + not_found.remove(&n); + + // We require explicit discriminants so that it is clear that reordering + // fields would be a breaking change. Furthermore, using explicit discriminants helps ensure that + // platform-specific C ABI choices do not matter. + // We could potentially add in explicit discriminants on the user's behalf in the future, or support + // more complicated sets of explicit discriminant values. + if n != i as u64 {} + } else { + return Error::new( + discr.span(), + "#[make_ule] must be applied to enums with explicit integer discriminants", + ) + .to_compile_error(); + } + } else { + return Error::new( + variant.span(), + "#[make_ule] must be applied to enums with explicit discriminants", + ) + .to_compile_error(); + } + } + + let not_found = not_found.iter().collect::<Vec<_>>(); + + if !not_found.is_empty() { + return Error::new(input.span(), format!("#[make_ule] must be applied to enums with discriminants \ + filling the range from 0 to a maximum; could not find {not_found:?}")) + .to_compile_error(); + } + + let max = next as u8; + + let maybe_ord_derives = if attrs.skip_ord { + quote!() + } else { + quote!(#[derive(Ord, PartialOrd)]) + }; + + let vis = &input.vis; + + let doc = format!("[`ULE`](zerovec::ule::ULE) type for {name}"); + + // Safety (based on the safety checklist on the ULE trait): + // 1. ULE type does not include any uninitialized or padding bytes. + // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant + // 2. ULE type is aligned to 1 byte. + // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) + // 3. The impl of validate_byte_slice() returns an error if any byte is not valid. + // (Guarantees that the byte is in range of the corresponding enum.) + // 4. The impl of validate_byte_slice() returns an error if there are extra bytes. + // (This does not happen since we are backed by 1 byte.) + // 5. The other ULE methods use the default impl. + // 6. ULE type byte equality is semantic equality + quote!( + #[repr(transparent)] + #[derive(Copy, Clone, PartialEq, Eq)] + #maybe_ord_derives + #[doc = #doc] + #vis struct #ule_name(u8); + + unsafe impl zerovec::ule::ULE for #ule_name { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), zerovec::ZeroVecError> { + for byte in bytes { + if *byte >= #max { + return Err(zerovec::ZeroVecError::parse::<Self>()) + } + } + Ok(()) + } + } + + impl zerovec::ule::AsULE for #name { + type ULE = #ule_name; + + fn to_unaligned(self) -> Self::ULE { + // safety: the enum is repr(u8) and can be cast to a u8 + unsafe { + ::core::mem::transmute(self) + } + } + + fn from_unaligned(other: Self::ULE) -> Self { + // safety: the enum is repr(u8) and can be cast from a u8, + // and `#ule_name` guarantees a valid value for this enum. + unsafe { + ::core::mem::transmute(other) + } + } + } + + impl #name { + /// Attempt to construct the value from its corresponding integer, + /// returning `None` if not possible + pub(crate) fn new_from_u8(value: u8) -> Option<Self> { + if value <= #max { + unsafe { + Some(::core::mem::transmute(value)) + } + } else { + None + } + } + } + ) +} + +fn get_expr_int(e: &Expr) -> Option<u64> { + if let Ok(Lit::Int(ref i)) = syn::parse2(quote!(#e)) { + return i.base10_parse().ok(); + } + + None +} + +fn make_ule_struct_impl( + name: &Ident, + ule_name: &Ident, + input: &DeriveInput, + struc: &DataStruct, + attrs: ZeroVecAttrs, +) -> TokenStream2 { + if struc.fields.iter().next().is_none() { + return Error::new( + input.span(), + "#[make_ule] must be applied to a non-empty struct", + ) + .to_compile_error(); + } + let sized_fields = FieldInfo::make_list(struc.fields.iter()); + let field_inits = crate::ule::make_ule_fields(&sized_fields); + let field_inits = utils::wrap_field_inits(&field_inits, &struc.fields); + + let semi = utils::semi_for(&struc.fields); + let repr_attr = utils::repr_for(&struc.fields); + let vis = &input.vis; + + let doc = format!("[`ULE`](zerovec::ule::ULE) type for [`{name}`]"); + + let ule_struct: DeriveInput = parse_quote!( + #[repr(#repr_attr)] + #[derive(Copy, Clone, PartialEq, Eq)] + #[doc = #doc] + // We suppress the `missing_docs` lint for the fields of the struct. + #[allow(missing_docs)] + #vis struct #ule_name #field_inits #semi + ); + let derived = crate::ule::derive_impl(&ule_struct); + + let mut as_ule_conversions = vec![]; + let mut from_ule_conversions = vec![]; + + for (i, field) in struc.fields.iter().enumerate() { + let ty = &field.ty; + let i = syn::Index::from(i); + if let Some(ref ident) = field.ident { + as_ule_conversions + .push(quote!(#ident: <#ty as zerovec::ule::AsULE>::to_unaligned(self.#ident))); + from_ule_conversions.push( + quote!(#ident: <#ty as zerovec::ule::AsULE>::from_unaligned(unaligned.#ident)), + ); + } else { + as_ule_conversions.push(quote!(<#ty as zerovec::ule::AsULE>::to_unaligned(self.#i))); + from_ule_conversions + .push(quote!(<#ty as zerovec::ule::AsULE>::from_unaligned(unaligned.#i))); + }; + } + + let as_ule_conversions = utils::wrap_field_inits(&as_ule_conversions, &struc.fields); + let from_ule_conversions = utils::wrap_field_inits(&from_ule_conversions, &struc.fields); + let asule_impl = quote!( + impl zerovec::ule::AsULE for #name { + type ULE = #ule_name; + fn to_unaligned(self) -> Self::ULE { + #ule_name #as_ule_conversions + } + fn from_unaligned(unaligned: Self::ULE) -> Self { + Self #from_ule_conversions + } + } + ); + + let maybe_ord_impls = if attrs.skip_ord { + quote!() + } else { + quote!( + impl core::cmp::PartialOrd for #ule_name { + fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { + Some(self.cmp(other)) + } + } + + impl core::cmp::Ord for #ule_name { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + let this = <#name as zerovec::ule::AsULE>::from_unaligned(*self); + let other = <#name as zerovec::ule::AsULE>::from_unaligned(*other); + <#name as core::cmp::Ord>::cmp(&this, &other) + } + } + ) + }; + + let maybe_hash = if attrs.hash { + quote!( + #[allow(clippy::derive_hash_xor_eq)] + impl core::hash::Hash for #ule_name { + fn hash<H>(&self, state: &mut H) where H: core::hash::Hasher { + state.write(<#ule_name as zerovec::ule::ULE>::as_byte_slice(&[*self])); + } + } + ) + } else { + quote!() + }; + + quote!( + #asule_impl + + #ule_struct + + #derived + + #maybe_ord_impls + + #maybe_hash + ) +} diff --git a/third_party/rust/zerovec-derive/src/make_varule.rs b/third_party/rust/zerovec-derive/src/make_varule.rs new file mode 100644 index 0000000000..36a6f6e7a9 --- /dev/null +++ b/third_party/rust/zerovec-derive/src/make_varule.rs @@ -0,0 +1,798 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::utils::{self, FieldInfo}; +use proc_macro2::Span; +use proc_macro2::TokenStream as TokenStream2; +use quote::{quote, ToTokens}; +use syn::spanned::Spanned; +use syn::{ + parse_quote, Data, DeriveInput, Error, Field, Fields, GenericArgument, Ident, Lifetime, + PathArguments, Type, TypePath, +}; + +pub fn make_varule_impl(ule_name: Ident, mut input: DeriveInput) -> TokenStream2 { + if input.generics.type_params().next().is_some() + || input.generics.const_params().next().is_some() + || input.generics.lifetimes().count() > 1 + { + return Error::new( + input.generics.span(), + "#[make_varule] must be applied to a struct without any type or const parameters and at most one lifetime", + ) + .to_compile_error(); + } + + let sp = input.span(); + let attrs = match utils::extract_attributes_common(&mut input.attrs, sp, true) { + Ok(val) => val, + Err(e) => return e.to_compile_error(), + }; + + let lt = input.generics.lifetimes().next(); + + if let Some(lt) = lt { + if lt.colon_token.is_some() || !lt.bounds.is_empty() { + return Error::new( + input.generics.span(), + "#[make_varule] must be applied to a struct without lifetime bounds", + ) + .to_compile_error(); + } + } + + let lt = lt.map(|l| &l.lifetime); + + let name = &input.ident; + let input_span = input.span(); + + let fields = match input.data { + Data::Struct(ref mut s) => &mut s.fields, + _ => { + return Error::new(input.span(), "#[make_varule] must be applied to a struct") + .to_compile_error(); + } + }; + + if fields.is_empty() { + return Error::new( + input.span(), + "#[make_varule] must be applied to a struct with at least one field", + ) + .to_compile_error(); + } + + let mut sized_fields = vec![]; + let mut unsized_fields = vec![]; + + let mut custom_varule_idents = vec![]; + + for field in fields.iter_mut() { + match utils::extract_field_attributes(&mut field.attrs) { + Ok(i) => custom_varule_idents.push(i), + Err(e) => return e.to_compile_error(), + } + } + + for (i, field) in fields.iter().enumerate() { + match UnsizedField::new(field, i, custom_varule_idents[i].clone()) { + Ok(o) => unsized_fields.push(o), + Err(_) => sized_fields.push(FieldInfo::new_for_field(field, i)), + } + } + + if unsized_fields.is_empty() { + let last_field_index = fields.len() - 1; + let last_field = fields.iter().next_back().unwrap(); + + let e = UnsizedField::new( + last_field, + last_field_index, + custom_varule_idents[last_field_index].clone(), + ) + .unwrap_err(); + return Error::new(last_field.span(), e).to_compile_error(); + } + + if unsized_fields[0].field.index != fields.len() - unsized_fields.len() + && unsized_fields[0].field.field.ident.is_none() + { + return Error::new( + unsized_fields.first().unwrap().field.field.span(), + "#[make_varule] requires its unsized fields to be at the end for tuple structs", + ) + .to_compile_error(); + } + + let unsized_field_info = UnsizedFields::new(unsized_fields); + + let mut field_inits = crate::ule::make_ule_fields(&sized_fields); + let last_field_ule = unsized_field_info.varule_ty(); + + let setter = unsized_field_info.varule_setter(); + let vis = &unsized_field_info.varule_vis(); + field_inits.push(quote!(#vis #setter #last_field_ule)); + + let semi = utils::semi_for(fields); + let repr_attr = utils::repr_for(fields); + let field_inits = utils::wrap_field_inits(&field_inits, fields); + let vis = &input.vis; + + let doc = format!( + "[`VarULE`](zerovec::ule::VarULE) type for [`{name}`]. See [`{name}`] for documentation." + ); + let varule_struct: DeriveInput = parse_quote!( + #[repr(#repr_attr)] + #[doc = #doc] + #[allow(missing_docs)] + #vis struct #ule_name #field_inits #semi + ); + + let derived = crate::varule::derive_impl(&varule_struct, unsized_field_info.varule_validator()); + + let maybe_lt_bound = lt.as_ref().map(|lt| quote!(<#lt>)); + + let encode_impl = make_encode_impl( + &sized_fields, + &unsized_field_info, + name, + &ule_name, + &maybe_lt_bound, + ); + + let zf_impl = make_zf_impl( + &sized_fields, + &unsized_field_info, + fields, + name, + &ule_name, + lt, + input_span, + ); + + let eq_impl = quote!( + impl core::cmp::PartialEq for #ule_name { + fn eq(&self, other: &Self) -> bool { + // The VarULE invariants allow us to assume that equality is byte equality + // in non-safety-critical contexts + <Self as zerovec::ule::VarULE>::as_byte_slice(&self) + == <Self as zerovec::ule::VarULE>::as_byte_slice(&other) + } + } + + impl core::cmp::Eq for #ule_name {} + ); + + let zerofrom_fq_path = + quote!(<#name as zerovec::__zerovec_internal_reexport::ZeroFrom<#ule_name>>); + + let maybe_ord_impls = if attrs.skip_ord { + quote!() + } else { + quote!( + impl core::cmp::PartialOrd for #ule_name { + fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { + Some(self.cmp(other)) + } + } + + impl core::cmp::Ord for #ule_name { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + let this = #zerofrom_fq_path::zero_from(self); + let other = #zerofrom_fq_path::zero_from(other); + <#name as core::cmp::Ord>::cmp(&this, &other) + } + } + ) + }; + + let maybe_debug = if attrs.debug { + quote!( + impl core::fmt::Debug for #ule_name { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let this = #zerofrom_fq_path::zero_from(self); + <#name as core::fmt::Debug>::fmt(&this, f) + } + } + ) + } else { + quote!() + }; + + let zmkv = if attrs.skip_kv { + quote!() + } else { + quote!( + impl<'a> zerovec::maps::ZeroMapKV<'a> for #ule_name { + type Container = zerovec::VarZeroVec<'a, #ule_name>; + type Slice = zerovec::VarZeroSlice<#ule_name>; + type GetType = #ule_name; + type OwnedType = zerovec::__zerovec_internal_reexport::boxed::Box<#ule_name>; + } + ) + }; + + let serde_path = quote!(zerovec::__zerovec_internal_reexport::serde); + + let maybe_ser = if attrs.serialize { + quote!( + impl #serde_path::Serialize for #ule_name { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: #serde_path::Serializer { + let this = #zerofrom_fq_path::zero_from(self); + <#name as #serde_path::Serialize>::serialize(&this, serializer) + } + } + ) + } else { + quote!() + }; + + let maybe_de = if attrs.deserialize { + quote!( + impl<'de> #serde_path::Deserialize<'de> for zerovec::__zerovec_internal_reexport::boxed::Box<#ule_name> { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: #serde_path::Deserializer<'de> { + let this = <#name as #serde_path::Deserialize>::deserialize(deserializer)?; + Ok(zerovec::ule::encode_varule_to_box(&this)) + } + } + ) + } else { + quote!() + }; + + let maybe_hash = if attrs.hash { + quote!( + #[allow(clippy::derive_hash_xor_eq)] + impl core::hash::Hash for #ule_name { + fn hash<H>(&self, state: &mut H) where H: core::hash::Hasher { + state.write(<#ule_name as zerovec::ule::VarULE>::as_byte_slice(&self)); + } + } + ) + } else { + quote!() + }; + + let maybe_multi_getters = if let Some(getters) = unsized_field_info.maybe_multi_getters() { + quote! { + impl #ule_name { + #getters + } + } + } else { + quote!() + }; + + quote!( + #input + + #varule_struct + + #maybe_multi_getters + + #encode_impl + + #zf_impl + + #derived + + #maybe_ord_impls + + #eq_impl + + #zmkv + + #maybe_ser + + #maybe_de + + #maybe_debug + + #maybe_hash + ) +} + +fn make_zf_impl( + sized_fields: &[FieldInfo], + unsized_field_info: &UnsizedFields, + fields: &Fields, + name: &Ident, + ule_name: &Ident, + maybe_lt: Option<&Lifetime>, + span: Span, +) -> TokenStream2 { + if !unsized_field_info.has_zf() { + return quote!(); + } + + let lt = if let Some(ref lt) = maybe_lt { + lt + } else { + return Error::new( + span, + "Can only generate ZeroFrom impls for types with lifetimes", + ) + .to_compile_error(); + }; + + let mut field_inits = sized_fields + .iter() + .map(|f| { + let ty = &f.field.ty; + let accessor = &f.accessor; + let setter = f.setter(); + quote!(#setter <#ty as zerovec::ule::AsULE>::from_unaligned(other.#accessor)) + }) + .collect::<Vec<_>>(); + + unsized_field_info.push_zf_setters(lt, &mut field_inits); + + let field_inits = utils::wrap_field_inits(&field_inits, fields); + let zerofrom_trait = quote!(zerovec::__zerovec_internal_reexport::ZeroFrom); + quote!( + impl <#lt> #zerofrom_trait <#lt, #ule_name> for #name <#lt> { + fn zero_from(other: &#lt #ule_name) -> Self { + Self #field_inits + } + } + ) +} + +fn make_encode_impl( + sized_fields: &[FieldInfo], + unsized_field_info: &UnsizedFields, + name: &Ident, + ule_name: &Ident, + maybe_lt_bound: &Option<TokenStream2>, +) -> TokenStream2 { + let mut lengths = vec![]; + + for field in sized_fields { + let ty = &field.field.ty; + lengths.push(quote!(::core::mem::size_of::<<#ty as zerovec::ule::AsULE>::ULE>())); + } + + let (encoders, remaining_offset) = utils::generate_per_field_offsets( + sized_fields, + true, + |field, prev_offset_ident, size_ident| { + let ty = &field.field.ty; + let accessor = &field.accessor; + quote!( + #[allow(clippy::indexing_slicing)] // generate_per_field_offsets produces valid indices + let out = &mut dst[#prev_offset_ident .. #prev_offset_ident + #size_ident]; + let unaligned = zerovec::ule::AsULE::to_unaligned(self.#accessor); + let unaligned_slice = &[unaligned]; + let src = <<#ty as zerovec::ule::AsULE>::ULE as zerovec::ule::ULE>::as_byte_slice(unaligned_slice); + out.copy_from_slice(src); + ) + }, + ); + + let last_encode_len = unsized_field_info.encode_len(); + let last_encode_write = unsized_field_info.encode_write(quote!(out)); + quote!( + unsafe impl #maybe_lt_bound zerovec::ule::EncodeAsVarULE<#ule_name> for #name #maybe_lt_bound { + // Safety: unimplemented as the other two are implemented + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + unreachable!("other two methods implemented") + } + + // Safety: returns the total length of the ULE form by adding up the lengths of each element's ULE forms + fn encode_var_ule_len(&self) -> usize { + #(#lengths +)* #last_encode_len + } + + // Safety: converts each element to ULE form and writes them in sequence + fn encode_var_ule_write(&self, mut dst: &mut [u8]) { + debug_assert_eq!(self.encode_var_ule_len(), dst.len()); + #encoders + + #[allow(clippy::indexing_slicing)] // generate_per_field_offsets produces valid remainder + let out = &mut dst[#remaining_offset..]; + #last_encode_write + } + } + + // This second impl exists to allow for using EncodeAsVarULE without cloning + // + // A blanket impl cannot exist without coherence issues + unsafe impl #maybe_lt_bound zerovec::ule::EncodeAsVarULE<#ule_name> for &'_ #name #maybe_lt_bound { + // Safety: unimplemented as the other two are implemented + fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + unreachable!("other two methods implemented") + } + + // Safety: returns the total length of the ULE form by adding up the lengths of each element's ULE forms + fn encode_var_ule_len(&self) -> usize { + (**self).encode_var_ule_len() + } + + // Safety: converts each element to ULE form and writes them in sequence + fn encode_var_ule_write(&self, mut dst: &mut [u8]) { + (**self).encode_var_ule_write(dst) + } + } + ) +} + +/// Represents a VarULE-compatible type that would typically +/// be found behind a `Cow<'a, _>` in the last field, and is represented +/// roughly the same in owned and borrowed versions +#[derive(Copy, Clone, Debug)] +enum OwnULETy<'a> { + /// [T] where T: AsULE<ULE = Self> + Slice(&'a Type), + /// str + Str, +} + +/// Represents the type of the last field of the struct +#[derive(Clone, Debug)] +enum UnsizedFieldKind<'a> { + Cow(OwnULETy<'a>), + ZeroVec(&'a Type), + VarZeroVec(&'a Type), + /// Custom VarULE type, and the identifier corresponding to the VarULE type + Custom(&'a TypePath, Ident), + + // Generally you should be using the above ones for maximum zero-copy, but these will still work + Growable(OwnULETy<'a>), + Boxed(OwnULETy<'a>), + Ref(OwnULETy<'a>), +} + +#[derive(Clone, Debug)] +struct UnsizedField<'a> { + kind: UnsizedFieldKind<'a>, + field: FieldInfo<'a>, +} + +struct UnsizedFields<'a> { + fields: Vec<UnsizedField<'a>>, +} + +impl<'a> UnsizedFields<'a> { + fn new(fields: Vec<UnsizedField<'a>>) -> Self { + assert!(!fields.is_empty(), "Must have at least one unsized field"); + Self { fields } + } + + // Get the corresponding VarULE type that can store all of these + fn varule_ty(&self) -> TokenStream2 { + if self.fields.len() == 1 { + self.fields[0].kind.varule_ty() + } else { + quote!(zerovec::ule::MultiFieldsULE) + } + } + + // Get the accessor field name in the VarULE type + fn varule_accessor(&self) -> TokenStream2 { + if self.fields.len() == 1 { + self.fields[0].field.accessor.clone() + } else if self.fields[0].field.field.ident.is_some() { + quote!(unsized_fields) + } else { + // first unsized field + self.fields[0].field.accessor.clone() + } + } + + // Get the setter for this type for use in struct definition/creation syntax + fn varule_setter(&self) -> TokenStream2 { + if self.fields.len() == 1 { + self.fields[0].field.setter() + } else if self.fields[0].field.field.ident.is_some() { + quote!(unsized_fields: ) + } else { + quote!() + } + } + + fn varule_vis(&self) -> TokenStream2 { + if self.fields.len() == 1 { + self.fields[0].field.field.vis.to_token_stream() + } else { + // Always private + quote!() + } + } + + // Check if the type has a ZeroFrom impl + fn has_zf(&self) -> bool { + self.fields.iter().all(|f| f.kind.has_zf()) + } + + // Takes all unsized fields on self and encodes them into a byte slice `out` + fn encode_write(&self, out: TokenStream2) -> TokenStream2 { + if self.fields.len() == 1 { + self.fields[0].encode_func(quote!(encode_var_ule_write), quote!(#out)) + } else { + let mut lengths = vec![]; + let mut writers = vec![]; + for (i, field) in self.fields.iter().enumerate() { + lengths.push(field.encode_func(quote!(encode_var_ule_len), quote!())); + let (encodeable_ty, encodeable) = field.encodeable_tokens(); + let varule_ty = field.kind.varule_ty(); + writers + .push(quote!(multi.set_field_at::<#varule_ty, #encodeable_ty>(#i, #encodeable))) + } + + quote!( + let lengths = [#(#lengths),*]; + let mut multi = zerovec::ule::MultiFieldsULE::new_from_lengths_partially_initialized(&lengths, #out); + unsafe { + #(#writers;)* + } + ) + } + } + + // Takes all unsized fields on self and returns the length needed for encoding into a byte slice + fn encode_len(&self) -> TokenStream2 { + if self.fields.len() == 1 { + self.fields[0].encode_func(quote!(encode_var_ule_len), quote!()) + } else { + let mut lengths = vec![]; + for field in self.fields.iter() { + lengths.push(field.encode_func(quote!(encode_var_ule_len), quote!())); + } + quote!(zerovec::ule::MultiFieldsULE::compute_encoded_len_for(&[#(#lengths),*])) + } + } + + /// Constructs ZeroFrom setters for each field of the stack type + fn push_zf_setters(&self, lt: &Lifetime, field_inits: &mut Vec<TokenStream2>) { + let zerofrom_trait = quote!(zerovec::__zerovec_internal_reexport::ZeroFrom); + if self.fields.len() == 1 { + let accessor = self.fields[0].field.accessor.clone(); + let setter = self.fields[0].field.setter(); + let last_field_ty = &self.fields[0].field.field.ty; + let last_field_ule_ty = self.fields[0].kind.varule_ty(); + field_inits.push(quote!(#setter <#last_field_ty as #zerofrom_trait <#lt, #last_field_ule_ty>>::zero_from(&other.#accessor) )); + } else { + for field in self.fields.iter() { + let setter = field.field.setter(); + let getter = field.field.getter(); + let field_ty = &field.field.field.ty; + let field_ule_ty = field.kind.varule_ty(); + + field_inits.push(quote!(#setter + <#field_ty as #zerofrom_trait <#lt, #field_ule_ty>>::zero_from(&other.#getter()) + )); + } + } + } + + fn maybe_multi_getters(&self) -> Option<TokenStream2> { + if self.fields.len() == 1 { + None + } else { + let multi_accessor = self.varule_accessor(); + let field_getters = self.fields.iter().enumerate().map(|(i, field)| { + let getter = field.field.getter(); + + let field_ule_ty = field.kind.varule_ty(); + let doc_name = field.field.getter_doc_name(); + let doc = format!("Access the VarULE type behind {doc_name}"); + quote!( + #[doc = #doc] + pub fn #getter<'a>(&'a self) -> &'a #field_ule_ty { + unsafe { + self.#multi_accessor.get_field::<#field_ule_ty>(#i) + } + } + ) + }); + + Some(quote!(#(#field_getters)*)) + } + } + + /// In case this needs custom validation code, return it + /// + /// The code will validate a variable known as `last_field_bytes` + fn varule_validator(&self) -> Option<TokenStream2> { + if self.fields.len() == 1 { + None + } else { + let mut validators = vec![]; + for (i, field) in self.fields.iter().enumerate() { + let varule_ty = field.kind.varule_ty(); + validators.push(quote!(multi.validate_field::<#varule_ty>(#i)?;)); + } + + Some(quote!( + let multi = zerovec::ule::MultiFieldsULE::parse_byte_slice(last_field_bytes)?; + unsafe { + #(#validators)* + } + )) + } + } +} + +impl<'a> UnsizedField<'a> { + fn new( + field: &'a Field, + index: usize, + custom_varule_ident: Option<Ident>, + ) -> Result<Self, String> { + Ok(UnsizedField { + kind: UnsizedFieldKind::new(&field.ty, custom_varule_ident)?, + field: FieldInfo::new_for_field(field, index), + }) + } + + /// Call `<Self as EncodeAsVarULE<V>>::#method(self.accessor #additional_args)` after adjusting + /// Self and self.accessor to be the right types + fn encode_func(&self, method: TokenStream2, additional_args: TokenStream2) -> TokenStream2 { + let encodeas_trait = quote!(zerovec::ule::EncodeAsVarULE); + let (encodeable_ty, encodeable) = self.encodeable_tokens(); + let varule_ty = self.kind.varule_ty(); + quote!(<#encodeable_ty as #encodeas_trait<#varule_ty>>::#method(#encodeable, #additional_args)) + } + + /// Returns (encodeable_ty, encodeable) + fn encodeable_tokens(&self) -> (TokenStream2, TokenStream2) { + let accessor = self.field.accessor.clone(); + let value = quote!(self.#accessor); + let encodeable = self.kind.encodeable_value(value); + let encodeable_ty = self.kind.encodeable_ty(); + (encodeable_ty, encodeable) + } +} + +impl<'a> UnsizedFieldKind<'a> { + /// Construct a UnsizedFieldKind for the type of a UnsizedFieldKind if possible + fn new( + ty: &'a Type, + custom_varule_ident: Option<Ident>, + ) -> Result<UnsizedFieldKind<'a>, String> { + static PATH_TYPE_IDENTITY_ERROR: &str = + "Can only automatically detect corresponding VarULE types for path types \ + that are Cow, ZeroVec, VarZeroVec, Box, String, or Vec"; + static PATH_TYPE_GENERICS_ERROR: &str = + "Can only automatically detect corresponding VarULE types for path \ + types with at most one lifetime and at most one generic parameter. VarZeroVecFormat + types are not currently supported"; + match *ty { + Type::Reference(ref tyref) => OwnULETy::new(&tyref.elem, "reference").map(UnsizedFieldKind::Ref), + Type::Path(ref typath) => { + if let Some(custom_varule_ident) = custom_varule_ident { + return Ok(UnsizedFieldKind::Custom(typath, custom_varule_ident)); + } + if typath.path.segments.len() != 1 { + return Err("Can only automatically detect corresponding VarULE types for \ + path types with a single path segment".into()); + } + let segment = typath.path.segments.first().unwrap(); + match segment.arguments { + PathArguments::None => { + if segment.ident == "String" { + Ok(UnsizedFieldKind::Growable(OwnULETy::Str)) + } else { + Err(PATH_TYPE_IDENTITY_ERROR.into()) + } + } + PathArguments::AngleBracketed(ref params) => { + // At most one lifetime and exactly one generic parameter + let mut lifetime = None; + let mut generic = None; + for param in ¶ms.args { + match param { + GenericArgument::Lifetime(ref lt) if lifetime.is_none() => { + lifetime = Some(lt) + } + GenericArgument::Type(ref ty) if generic.is_none() => { + generic = Some(ty) + } + _ => return Err(PATH_TYPE_GENERICS_ERROR.into()), + } + } + + // Must be exactly one generic parameter + // (we've handled the zero generics case already) + let generic = if let Some(g) = generic { + g + } else { + return Err(PATH_TYPE_GENERICS_ERROR.into()); + }; + + let ident = segment.ident.to_string(); + + if lifetime.is_some() { + match &*ident { + "ZeroVec" => Ok(UnsizedFieldKind::ZeroVec(generic)), + "VarZeroVec" => Ok(UnsizedFieldKind::VarZeroVec(generic)), + "Cow" => OwnULETy::new(generic, "Cow").map(UnsizedFieldKind::Cow), + _ => Err(PATH_TYPE_IDENTITY_ERROR.into()), + } + } else { + match &*ident { + "Vec" => Ok(UnsizedFieldKind::Growable(OwnULETy::Slice(generic))), + "Box" => OwnULETy::new(generic, "Box").map(UnsizedFieldKind::Boxed), + _ => Err(PATH_TYPE_IDENTITY_ERROR.into()), + } + } + } + _ => Err("Can only automatically detect corresponding VarULE types for path types \ + with none or angle bracketed generics".into()), + } + } + _ => Err("Can only automatically detect corresponding VarULE types for path and reference types".into()), + } + } + /// Get the tokens for the corresponding VarULE type + fn varule_ty(&self) -> TokenStream2 { + match *self { + Self::Ref(ref inner) + | Self::Cow(ref inner) + | Self::Boxed(ref inner) + | Self::Growable(ref inner) => { + let inner_ule = inner.varule_ty(); + quote!(#inner_ule) + } + Self::Custom(_, ref name) => quote!(#name), + Self::ZeroVec(ref inner) => quote!(zerovec::ZeroSlice<#inner>), + Self::VarZeroVec(ref inner) => quote!(zerovec::VarZeroSlice<#inner>), + } + } + + // Takes expr `value` and returns it as a value that can be encoded via EncodeAsVarULE + fn encodeable_value(&self, value: TokenStream2) -> TokenStream2 { + match *self { + Self::Ref(_) | Self::Cow(_) | Self::Growable(_) | Self::Boxed(_) => quote!(&*#value), + + Self::Custom(..) => quote!(&#value), + Self::ZeroVec(_) | Self::VarZeroVec(_) => quote!(&*#value), + } + } + + /// Returns the EncodeAsVarULE type this can be represented as, the same returned by encodeable_value() + fn encodeable_ty(&self) -> TokenStream2 { + match *self { + Self::Ref(ref inner) + | Self::Cow(ref inner) + | Self::Growable(ref inner) + | Self::Boxed(ref inner) => inner.varule_ty(), + + Self::Custom(ref path, _) => quote!(#path), + Self::ZeroVec(ref ty) => quote!(zerovec::ZeroSlice<#ty>), + Self::VarZeroVec(ref ty) => quote!(zerovec::VarZeroSlice<#ty>), + } + } + + fn has_zf(&self) -> bool { + matches!( + *self, + Self::Ref(_) | Self::Cow(_) | Self::ZeroVec(_) | Self::VarZeroVec(_) | Self::Custom(..) + ) + } +} + +impl<'a> OwnULETy<'a> { + fn new(ty: &'a Type, context: &str) -> Result<Self, String> { + match *ty { + Type::Slice(ref slice) => Ok(OwnULETy::Slice(&slice.elem)), + Type::Path(ref typath) => { + if typath.path.is_ident("str") { + Ok(OwnULETy::Str) + } else { + Err(format!("Cannot automatically detect corresponding VarULE type for non-str path type inside a {context}")) + } + } + _ => Err(format!("Cannot automatically detect corresponding VarULE type for non-slice/path type inside a {context}")), + } + } + + /// Get the tokens for the corresponding VarULE type + fn varule_ty(&self) -> TokenStream2 { + match *self { + OwnULETy::Slice(s) => quote!([#s]), + OwnULETy::Str => quote!(str), + } + } +} diff --git a/third_party/rust/zerovec-derive/src/ule.rs b/third_party/rust/zerovec-derive/src/ule.rs new file mode 100644 index 0000000000..6a03c008f4 --- /dev/null +++ b/third_party/rust/zerovec-derive/src/ule.rs @@ -0,0 +1,107 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use proc_macro2::TokenStream as TokenStream2; +use quote::quote; + +use crate::utils::{self, FieldInfo}; +use syn::spanned::Spanned; +use syn::{Data, DeriveInput, Error}; + +pub fn derive_impl(input: &DeriveInput) -> TokenStream2 { + if !utils::has_valid_repr(&input.attrs, |r| r == "packed" || r == "transparent") { + return Error::new( + input.span(), + "derive(ULE) must be applied to a #[repr(packed)] or #[repr(transparent)] type", + ) + .to_compile_error(); + } + if input.generics.type_params().next().is_some() + || input.generics.lifetimes().next().is_some() + || input.generics.const_params().next().is_some() + { + return Error::new( + input.generics.span(), + "derive(ULE) must be applied to a struct without any generics", + ) + .to_compile_error(); + } + let struc = if let Data::Struct(ref s) = input.data { + if s.fields.iter().next().is_none() { + return Error::new( + input.span(), + "derive(ULE) must be applied to a non-empty struct", + ) + .to_compile_error(); + } + s + } else { + return Error::new(input.span(), "derive(ULE) must be applied to a struct") + .to_compile_error(); + }; + + let fields = FieldInfo::make_list(struc.fields.iter()); + let (validators, remaining_offset) = generate_ule_validators(&fields); + + let name = &input.ident; + + // Safety (based on the safety checklist on the ULE trait): + // 1. #name does not include any uninitialized or padding bytes. + // (achieved by enforcing #[repr(transparent)] or #[repr(packed)] on a struct of only ULE types) + // 2. #name is aligned to 1 byte. + // (achieved by enforcing #[repr(transparent)] or #[repr(packed)] on a struct of only ULE types) + // 3. The impl of validate_byte_slice() returns an error if any byte is not valid. + // 4. The impl of validate_byte_slice() returns an error if there are extra bytes. + // 5. The other ULE methods use the default impl. + // 6. [This impl does not enforce the non-safety equality constraint, it is up to the user to do so, ideally via a custom derive] + quote! { + unsafe impl zerovec::ule::ULE for #name { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), zerovec::ZeroVecError> { + const SIZE: usize = ::core::mem::size_of::<#name>(); + #[allow(clippy::modulo_one)] + if bytes.len() % SIZE != 0 { + return Err(zerovec::ZeroVecError::length::<Self>(bytes.len())); + } + // Validate the bytes + #[allow(clippy::indexing_slicing)] // We're slicing a chunk of known size + for chunk in bytes.chunks_exact(SIZE) { + #validators + debug_assert_eq!(#remaining_offset, SIZE); + } + Ok(()) + } + } + } +} + +/// Given an slice over ULE struct fields, returns code validating that a slice variable `bytes` contains valid instances of those ULE types +/// in order, plus the byte offset of any remaining unvalidated bytes. ULE types should not have any remaining bytes, but VarULE types will since +/// the last field is the unsized one. +pub(crate) fn generate_ule_validators( + fields: &[FieldInfo], + // (validators, remaining_offset) +) -> (TokenStream2, syn::Ident) { + utils::generate_per_field_offsets(fields, false, |field, prev_offset_ident, size_ident| { + let ty = &field.field.ty; + quote! { + #[allow(clippy::indexing_slicing)] // generate_per_field_offsets produces valid indices + <#ty as zerovec::ule::ULE>::validate_byte_slice(&bytes[#prev_offset_ident .. #prev_offset_ident + #size_ident])?; + } + }) +} + +/// Make corresponding ULE fields for each field +pub(crate) fn make_ule_fields(fields: &[FieldInfo]) -> Vec<TokenStream2> { + fields + .iter() + .map(|f| { + let ty = &f.field.ty; + let ty = quote!(<#ty as zerovec::ule::AsULE>::ULE); + let setter = f.setter(); + let vis = &f.field.vis; + quote!(#vis #setter #ty) + }) + .collect::<Vec<_>>() +} diff --git a/third_party/rust/zerovec-derive/src/utils.rs b/third_party/rust/zerovec-derive/src/utils.rs new file mode 100644 index 0000000000..e4155076f7 --- /dev/null +++ b/third_party/rust/zerovec-derive/src/utils.rs @@ -0,0 +1,317 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use quote::{quote, ToTokens}; + +use proc_macro2::Span; +use proc_macro2::TokenStream as TokenStream2; +use syn::parse::{Parse, ParseStream}; +use syn::punctuated::Punctuated; +use syn::spanned::Spanned; +use syn::{Attribute, Error, Field, Fields, Ident, Index, Result, Token}; + +// Check that there are repr attributes satisfying the given predicate +pub fn has_valid_repr(attrs: &[Attribute], predicate: impl Fn(&Ident) -> bool + Copy) -> bool { + attrs.iter().filter(|a| a.path().is_ident("repr")).any(|a| { + a.parse_args::<IdentListAttribute>() + .ok() + .and_then(|s| s.idents.iter().find(|s| predicate(s)).map(|_| ())) + .is_some() + }) +} + +// An attribute that is a list of idents +struct IdentListAttribute { + idents: Punctuated<Ident, Token![,]>, +} + +impl Parse for IdentListAttribute { + fn parse(input: ParseStream) -> Result<Self> { + Ok(IdentListAttribute { + idents: input.parse_terminated(Ident::parse, Token![,])?, + }) + } +} + +/// Given a set of entries for struct field definitions to go inside a `struct {}` definition, +/// wrap in a () or {} based on the type of field +pub fn wrap_field_inits(streams: &[TokenStream2], fields: &Fields) -> TokenStream2 { + match *fields { + Fields::Named(_) => quote!( { #(#streams),* } ), + Fields::Unnamed(_) => quote!( ( #(#streams),* ) ), + Fields::Unit => { + unreachable!("#[make_(var)ule] should have already checked that there are fields") + } + } +} + +/// Return a semicolon token if necessary after the struct definition +pub fn semi_for(f: &Fields) -> TokenStream2 { + if let Fields::Unnamed(..) = *f { + quote!(;) + } else { + quote!() + } +} + +/// Returns the repr attribute to be applied to the resultant ULE or VarULE type +pub fn repr_for(f: &Fields) -> TokenStream2 { + if f.len() == 1 { + quote!(transparent) + } else { + quote!(packed) + } +} + +fn suffixed_ident(name: &str, suffix: usize, s: Span) -> Ident { + Ident::new(&format!("{name}_{suffix}"), s) +} + +/// Given an iterator over ULE or AsULE struct fields, returns code that calculates field sizes and generates a line +/// of code per field based on the per_field_code function (whose parameters are the field, the identifier of the const +/// for the previous offset, the identifier for the const for the next offset, and the field index) +pub(crate) fn generate_per_field_offsets<'a>( + fields: &[FieldInfo<'a>], + // Whether the fields are ULE types or AsULE (and need conversion) + fields_are_asule: bool, + // (field, prev_offset_ident, size_ident) + mut per_field_code: impl FnMut(&FieldInfo<'a>, &Ident, &Ident) -> TokenStream2, /* (code, remaining_offset) */ +) -> (TokenStream2, syn::Ident) { + let mut prev_offset_ident = Ident::new("ZERO", Span::call_site()); + let mut code = quote!( + const ZERO: usize = 0; + ); + + for (i, field_info) in fields.iter().enumerate() { + let field = &field_info.field; + let ty = &field.ty; + let ty = if fields_are_asule { + quote!(<#ty as zerovec::ule::AsULE>::ULE) + } else { + quote!(#ty) + }; + let new_offset_ident = suffixed_ident("OFFSET", i, field.span()); + let size_ident = suffixed_ident("SIZE", i, field.span()); + let pf_code = per_field_code(field_info, &prev_offset_ident, &size_ident); + code = quote! { + #code; + const #size_ident: usize = ::core::mem::size_of::<#ty>(); + const #new_offset_ident: usize = #prev_offset_ident + #size_ident; + #pf_code; + }; + + prev_offset_ident = new_offset_ident; + } + + (code, prev_offset_ident) +} + +#[derive(Clone, Debug)] +pub(crate) struct FieldInfo<'a> { + pub accessor: TokenStream2, + pub field: &'a Field, + pub index: usize, +} + +impl<'a> FieldInfo<'a> { + pub fn make_list(iter: impl Iterator<Item = &'a Field>) -> Vec<Self> { + iter.enumerate() + .map(|(i, field)| Self::new_for_field(field, i)) + .collect() + } + + pub fn new_for_field(f: &'a Field, index: usize) -> Self { + if let Some(ref i) = f.ident { + FieldInfo { + accessor: quote!(#i), + field: f, + index, + } + } else { + let idx = Index::from(index); + FieldInfo { + accessor: quote!(#idx), + field: f, + index, + } + } + } + + /// Get the code for setting this field in struct decl/brace syntax + /// + /// Use self.accessor for dot-notation accesses + pub fn setter(&self) -> TokenStream2 { + if let Some(ref i) = self.field.ident { + quote!(#i: ) + } else { + quote!() + } + } + + /// Produce a name for a getter for the field + pub fn getter(&self) -> TokenStream2 { + if let Some(ref i) = self.field.ident { + quote!(#i) + } else { + suffixed_ident("field", self.index, self.field.span()).into_token_stream() + } + } + + /// Produce a prose name for the field for use in docs + pub fn getter_doc_name(&self) -> String { + if let Some(ref i) = self.field.ident { + format!("the unsized `{i}` field") + } else { + format!("tuple struct field #{}", self.index) + } + } +} + +/// Extracts all `zerovec::name(..)` attribute +pub fn extract_parenthetical_zerovec_attrs( + attrs: &mut Vec<Attribute>, + name: &str, +) -> Result<Vec<Ident>> { + let mut ret = vec![]; + let mut error = None; + attrs.retain(|a| { + // skip the "zerovec" part + let second_segment = a.path().segments.iter().nth(1); + + if let Some(second) = second_segment { + if second.ident == name { + let list = match a.parse_args::<IdentListAttribute>() { + Ok(l) => l, + Err(_) => { + error = Some(Error::new( + a.span(), + format!("#[zerovec::{name}(..)] takes in a comma separated list of identifiers"), + )); + return false; + } + }; + ret.extend(list.idents.iter().cloned()); + return false; + } + } + + true + }); + + if let Some(error) = error { + return Err(error); + } + Ok(ret) +} + +/// Removes all attributes with `zerovec` in the name and places them in a separate vector +pub fn extract_zerovec_attributes(attrs: &mut Vec<Attribute>) -> Vec<Attribute> { + let mut ret = vec![]; + attrs.retain(|a| { + if a.path().segments.len() == 2 && a.path().segments[0].ident == "zerovec" { + ret.push(a.clone()); + return false; + } + true + }); + ret +} + +/// Extract attributes from field, and return them +/// +/// Only current field attribute is `zerovec::varule(VarUleType)` +pub fn extract_field_attributes(attrs: &mut Vec<Attribute>) -> Result<Option<Ident>> { + let mut zerovec_attrs = extract_zerovec_attributes(attrs); + let varule = extract_parenthetical_zerovec_attrs(&mut zerovec_attrs, "varule")?; + + if varule.len() > 1 { + return Err(Error::new( + varule[1].span(), + "Found multiple #[zerovec::varule()] on one field", + )); + } + + if !zerovec_attrs.is_empty() { + return Err(Error::new( + zerovec_attrs[1].span(), + "Found unusable #[zerovec::] attrs on field, only #[zerovec::varule()] supported", + )); + } + + Ok(varule.get(0).cloned()) +} + +#[derive(Default, Copy, Clone)] +pub struct ZeroVecAttrs { + pub skip_kv: bool, + pub skip_ord: bool, + pub serialize: bool, + pub deserialize: bool, + pub debug: bool, + pub hash: bool, +} + +/// Removes all known zerovec:: attributes from struct attrs and validates them +pub fn extract_attributes_common( + attrs: &mut Vec<Attribute>, + span: Span, + is_var: bool, +) -> Result<ZeroVecAttrs> { + let mut zerovec_attrs = extract_zerovec_attributes(attrs); + + let derive = extract_parenthetical_zerovec_attrs(&mut zerovec_attrs, "derive")?; + let skip = extract_parenthetical_zerovec_attrs(&mut zerovec_attrs, "skip_derive")?; + + let name = if is_var { "make_varule" } else { "make_ule" }; + + if let Some(attr) = zerovec_attrs.get(0) { + return Err(Error::new( + attr.span(), + format!("Found unknown or duplicate attribute for #[{name}]"), + )); + } + + let mut attrs = ZeroVecAttrs::default(); + + for ident in derive { + if ident == "Serialize" { + attrs.serialize = true; + } else if ident == "Deserialize" { + attrs.deserialize = true; + } else if ident == "Debug" { + attrs.debug = true; + } else if ident == "Hash" { + attrs.hash = true; + } else { + return Err(Error::new( + ident.span(), + format!( + "Found unknown derive attribute for #[{name}]: #[zerovec::derive({ident})]" + ), + )); + } + } + + for ident in skip { + if ident == "ZeroMapKV" { + attrs.skip_kv = true; + } else if ident == "Ord" { + attrs.skip_ord = true; + } else { + return Err(Error::new( + ident.span(), + format!("Found unknown derive attribute for #[{name}]: #[zerovec::skip_derive({ident})]"), + )); + } + } + + if (attrs.serialize || attrs.deserialize) && !is_var { + return Err(Error::new( + span, + "#[make_ule] does not support #[zerovec::derive(Serialize, Deserialize)]", + )); + } + + Ok(attrs) +} diff --git a/third_party/rust/zerovec-derive/src/varule.rs b/third_party/rust/zerovec-derive/src/varule.rs new file mode 100644 index 0000000000..4a586f9547 --- /dev/null +++ b/third_party/rust/zerovec-derive/src/varule.rs @@ -0,0 +1,130 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::utils::{self, FieldInfo}; +use proc_macro2::Span; +use proc_macro2::TokenStream as TokenStream2; +use quote::quote; +use syn::spanned::Spanned; +use syn::{Data, DeriveInput, Error, Ident}; + +/// Implementation for derive(VarULE). `custom_varule_validator` validates the last field bytes `last_field_bytes` +/// if specified, if not, the VarULE implementation will be used. +pub fn derive_impl( + input: &DeriveInput, + custom_varule_validator: Option<TokenStream2>, +) -> TokenStream2 { + if !utils::has_valid_repr(&input.attrs, |r| r == "packed" || r == "transparent") { + return Error::new( + input.span(), + "derive(VarULE) must be applied to a #[repr(packed)] or #[repr(transparent)] type", + ) + .to_compile_error(); + } + if input.generics.type_params().next().is_some() + || input.generics.lifetimes().next().is_some() + || input.generics.const_params().next().is_some() + { + return Error::new( + input.generics.span(), + "derive(VarULE) must be applied to a struct without any generics", + ) + .to_compile_error(); + } + let struc = if let Data::Struct(ref s) = input.data { + if s.fields.iter().next().is_none() { + return Error::new( + input.span(), + "derive(VarULE) must be applied to a non-empty struct", + ) + .to_compile_error(); + } + s + } else { + return Error::new(input.span(), "derive(VarULE) must be applied to a struct") + .to_compile_error(); + }; + + let n_fields = struc.fields.len(); + + let ule_fields = FieldInfo::make_list(struc.fields.iter().take(n_fields - 1)); + + let sizes = ule_fields.iter().map(|f| { + let ty = &f.field.ty; + quote!(::core::mem::size_of::<#ty>()) + }); + let (validators, remaining_offset) = if n_fields > 1 { + // generate ULE validators + crate::ule::generate_ule_validators(&ule_fields) + } else { + // no ULE subfields + ( + quote!( + const ZERO: usize = 0; + ), + Ident::new("ZERO", Span::call_site()), + ) + }; + + let unsized_field = &struc + .fields + .iter() + .next_back() + .expect("Already verified that struct is not empty") + .ty; + + let name = &input.ident; + let ule_size = Ident::new( + &format!("__IMPL_VarULE_FOR_{name}_ULE_SIZE"), + Span::call_site(), + ); + + let last_field_validator = if let Some(custom_varule_validator) = custom_varule_validator { + custom_varule_validator + } else { + quote!(<#unsized_field as zerovec::ule::VarULE>::validate_byte_slice(last_field_bytes)?;) + }; + + // Safety (based on the safety checklist on the ULE trait): + // 1. #name does not include any uninitialized or padding bytes + // (achieved by enforcing #[repr(transparent)] or #[repr(packed)] on a struct of only ULE types) + // 2. #name is aligned to 1 byte. + // (achieved by enforcing #[repr(transparent)] or #[repr(packed)] on a struct of only ULE types) + // 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid. + // 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety + // 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. + // 6. The other VarULE methods use the default impl + // 7. [This impl does not enforce the non-safety equality constraint, it is up to the user to do so, ideally via a custom derive] + quote! { + // The size of the ULE section of this type + const #ule_size: usize = 0 #(+ #sizes)*; + unsafe impl zerovec::ule::VarULE for #name { + #[inline] + fn validate_byte_slice(bytes: &[u8]) -> Result<(), zerovec::ZeroVecError> { + + if bytes.len() < #ule_size { + return Err(zerovec::ZeroVecError::parse::<Self>()); + } + #validators + debug_assert_eq!(#remaining_offset, #ule_size); + #[allow(clippy::indexing_slicing)] // TODO explain + let last_field_bytes = &bytes[#remaining_offset..]; + #last_field_validator + Ok(()) + } + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + // just the unsized part + #[allow(clippy::indexing_slicing)] // TODO explain + let unsized_bytes = &bytes[#ule_size..]; + let unsized_ref = <#unsized_field as zerovec::ule::VarULE>::from_byte_slice_unchecked(unsized_bytes); + // We should use the pointer metadata APIs here when they are stable: https://github.com/rust-lang/rust/issues/81513 + // For now we rely on all DST metadata being a usize to extract it via a fake slice pointer + let (_ptr, metadata): (usize, usize) = ::core::mem::transmute(unsized_ref); + let entire_struct_as_slice: *const [u8] = ::core::slice::from_raw_parts(bytes.as_ptr(), metadata); + &*(entire_struct_as_slice as *const Self) + } + } + } +} |