diff options
Diffstat (limited to 'vendor/icu_provider/src/key.rs')
-rw-r--r-- | vendor/icu_provider/src/key.rs | 660 |
1 files changed, 660 insertions, 0 deletions
diff --git a/vendor/icu_provider/src/key.rs b/vendor/icu_provider/src/key.rs new file mode 100644 index 000000000..2f55e4d46 --- /dev/null +++ b/vendor/icu_provider/src/key.rs @@ -0,0 +1,660 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::error::{DataError, DataErrorKind}; +use crate::helpers; + +use alloc::borrow::Cow; +use core::fmt; +use core::fmt::Write; +use core::ops::Deref; +use writeable::{LengthHint, Writeable}; +use zerovec::ule::*; + +#[doc(hidden)] +#[macro_export] +macro_rules! leading_tag { + () => { + "\nicu4x_key_tag" + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! trailing_tag { + () => { + "\n" + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! tagged { + ($without_tags:expr) => { + concat!( + $crate::leading_tag!(), + $without_tags, + $crate::trailing_tag!() + ) + }; +} + +/// A compact hash of a [`DataKey`]. Useful for keys in maps. +/// +/// The hash will be stable over time within major releases. +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, ULE)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[repr(transparent)] +pub struct DataKeyHash([u8; 4]); + +impl DataKeyHash { + const fn compute_from_path(path: DataKeyPath) -> Self { + let hash = helpers::fxhash_32( + path.tagged.as_bytes(), + leading_tag!().len(), + trailing_tag!().len(), + ); + Self(hash.to_le_bytes()) + } + + /// Gets the hash value as a byte array. + pub const fn to_bytes(self) -> [u8; 4] { + self.0 + } +} + +impl<'a> zerovec::maps::ZeroMapKV<'a> for DataKeyHash { + type Container = zerovec::ZeroVec<'a, DataKeyHash>; + type Slice = zerovec::ZeroSlice<DataKeyHash>; + type GetType = <DataKeyHash as AsULE>::ULE; + type OwnedType = DataKeyHash; +} + +impl AsULE for DataKeyHash { + type ULE = Self; + #[inline] + fn to_unaligned(self) -> Self::ULE { + self + } + #[inline] + fn from_unaligned(unaligned: Self::ULE) -> Self { + unaligned + } +} + +// Safe since the ULE type is `self`. +unsafe impl EqULE for DataKeyHash {} + +/// Hint for what to prioritize during fallback when data is unavailable. +/// +/// For example, if `"en-US"` is requested, but we have no data for that specific locale, +/// fallback may take us to `"en"` or `"und-US"` to check for data. +#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] +#[non_exhaustive] +pub enum FallbackPriority { + /// Prioritize the language. This is the default behavior. + /// + /// For example, `"en-US"` should go to `"en"` and then `"und"`. + Language, + /// Prioritize the region. + /// + /// For example, `"en-US"` should go to `"und-US"` and then `"und"`. + Region, + /// Collation-specific fallback rules. Similar to language priority. + /// + /// For example, `"zh-Hant"` goes to `"zh"` before `"und"`. + Collation, +} + +impl FallbackPriority { + /// Const-friendly version of [`Default::default`]. + pub const fn const_default() -> Self { + Self::Language + } +} + +impl Default for FallbackPriority { + fn default() -> Self { + Self::const_default() + } +} + +/// What additional data to load when performing fallback. +#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] +#[non_exhaustive] +pub enum FallbackSupplement { + /// Collation supplement; see `CollationFallbackSupplementV1Marker` + Collation, +} + +/// The string path of a data key. For example, "foo@1" +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct DataKeyPath { + // This string literal is wrapped in leading_tag!() and trailing_tag!() to make it detectable + // in a compiled binary. + tagged: &'static str, +} + +impl DataKeyPath { + /// Gets the path as a static string slice. + #[inline] + pub const fn get(self) -> &'static str { + /// core::slice::from_raw_parts(a, b) = core::mem::transmute((a, b)) hack + /// ```compile_fail + /// const unsafe fn canary() { core::slice::from_raw_parts(0 as *const u8, 0); } + /// ``` + const _: () = (); + unsafe { + // Safe due to invariant that self.path is tagged correctly + core::str::from_utf8_unchecked(core::mem::transmute(( + self.tagged.as_ptr().add(leading_tag!().len()), + self.tagged.len() - trailing_tag!().len() - leading_tag!().len(), + ))) + } + } +} + +impl Deref for DataKeyPath { + type Target = str; + #[inline] + fn deref(&self) -> &Self::Target { + self.get() + } +} + +/// Metadata statically associated with a particular [`DataKey`]. +#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)] +#[non_exhaustive] +pub struct DataKeyMetadata { + /// What to prioritize when fallbacking on this [`DataKey`]. + pub fallback_priority: FallbackPriority, + /// A Unicode extension keyword to consider when loading data for this [`DataKey`]. + pub extension_key: Option<icu_locid::extensions::unicode::Key>, + /// Optional choice for additional fallbacking data required for loading this marker. + /// + /// For more information, see `LocaleFallbackConfig::fallback_supplement`. + pub fallback_supplement: Option<FallbackSupplement>, +} + +impl DataKeyMetadata { + /// Const-friendly version of [`Default::default`]. + pub const fn const_default() -> Self { + Self { + fallback_priority: FallbackPriority::const_default(), + extension_key: None, + fallback_supplement: None, + } + } + + #[doc(hidden)] + pub const fn construct_internal( + fallback_priority: FallbackPriority, + extension_key: Option<icu_locid::extensions::unicode::Key>, + fallback_supplement: Option<FallbackSupplement>, + ) -> Self { + Self { + fallback_priority, + extension_key, + fallback_supplement, + } + } +} + +impl Default for DataKeyMetadata { + #[inline] + fn default() -> Self { + Self::const_default() + } +} + +/// Used for loading data from an ICU4X data provider. +/// +/// A resource key is tightly coupled with the code that uses it to load data at runtime. +/// Executables can be searched for `DataKey` instances to produce optimized data files. +/// Therefore, users should not generally create DataKey instances; they should instead use +/// the ones exported by a component. +/// +/// `DataKey`s are created with the [`data_key!`] macro: +/// +/// ``` +/// # use icu_provider::prelude::DataKey; +/// const K: DataKey = icu_provider::data_key!("foo/bar@1"); +/// ``` +/// +/// The human-readable path string ends with `@` followed by one or more digits (the version +/// number). Paths do not contain characters other than ASCII letters and digits, `_`, `/`. +/// +/// Invalid paths are compile-time errors (as [`data_key!`] uses `const`). +/// +/// ```compile_fail,E0080 +/// # use icu_provider::prelude::DataKey; +/// const K: DataKey = icu_provider::data_key!("foo/../bar@1"); +/// ``` +#[derive(Copy, Clone)] +pub struct DataKey { + path: DataKeyPath, + hash: DataKeyHash, + metadata: DataKeyMetadata, +} + +impl PartialEq for DataKey { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.hash == other.hash && self.path == other.path && self.metadata == other.metadata + } +} + +impl Eq for DataKey {} + +impl Ord for DataKey { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.path + .cmp(&other.path) + .then_with(|| self.metadata.cmp(&other.metadata)) + } +} + +impl PartialOrd for DataKey { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { + Some(self.cmp(other)) + } +} + +impl core::hash::Hash for DataKey { + #[inline] + fn hash<H: core::hash::Hasher>(&self, state: &mut H) { + self.hash.hash(state) + } +} + +impl DataKey { + /// Gets a human-readable representation of a [`DataKey`]. + /// + /// The human-readable path string ends with `@` followed by one or more digits (the version + /// number). Paths do not contain characters other than ASCII letters and digits, `_`, `/`. + /// + /// Useful for reading and writing data to a file system. + #[inline] + pub const fn path(self) -> DataKeyPath { + self.path + } + + /// Gets a platform-independent hash of a [`DataKey`]. + /// + /// The hash is 4 bytes and allows for fast key comparison. + /// + /// # Example + /// + /// ``` + /// use icu_provider::DataKey; + /// use icu_provider::DataKeyHash; + /// + /// const KEY: DataKey = icu_provider::data_key!("foo@1"); + /// const KEY_HASH: DataKeyHash = KEY.hashed(); + /// + /// assert_eq!(KEY_HASH.to_bytes(), [0xe2, 0xb6, 0x17, 0x71]); + /// ``` + #[inline] + pub const fn hashed(self) -> DataKeyHash { + self.hash + } + + /// Gets the metadata associated with this [`DataKey`]. + #[inline] + pub const fn metadata(self) -> DataKeyMetadata { + self.metadata + } + + /// Constructs a [`DataKey`] from a path and metadata. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::data_key; + /// use icu_provider::DataKey; + /// + /// const CONST_KEY: DataKey = data_key!("foo@1"); + /// + /// let runtime_key = + /// DataKey::from_path_and_metadata(CONST_KEY.path(), CONST_KEY.metadata()); + /// + /// assert_eq!(CONST_KEY, runtime_key); + /// ``` + #[inline] + pub const fn from_path_and_metadata(path: DataKeyPath, metadata: DataKeyMetadata) -> Self { + Self { + path, + hash: DataKeyHash::compute_from_path(path), + metadata, + } + } + + #[doc(hidden)] + // Error is a str of the expected character class and the index where it wasn't encountered + // The indexing operations in this function have been reviewed in detail and won't panic. + #[allow(clippy::indexing_slicing)] + pub const fn construct_internal( + path: &'static str, + metadata: DataKeyMetadata, + ) -> Result<Self, (&'static str, usize)> { + if path.len() < leading_tag!().len() + trailing_tag!().len() { + return Err(("tag", 0)); + } + // Start and end of the untagged part + let start = leading_tag!().len(); + let end = path.len() - trailing_tag!().len(); + + // Check tags + let mut i = 0; + while i < leading_tag!().len() { + if path.as_bytes()[i] != leading_tag!().as_bytes()[i] { + return Err(("tag", 0)); + } + i += 1; + } + i = 0; + while i < trailing_tag!().len() { + if path.as_bytes()[end + i] != trailing_tag!().as_bytes()[i] { + return Err(("tag", end + 1)); + } + i += 1; + } + + match Self::validate_path_manual_slice(path, start, end) { + Ok(()) => (), + Err(e) => return Err(e), + }; + + let path = DataKeyPath { tagged: path }; + + Ok(Self { + path, + hash: DataKeyHash::compute_from_path(path), + metadata, + }) + } + + const fn validate_path_manual_slice( + path: &'static str, + start: usize, + end: usize, + ) -> Result<(), (&'static str, usize)> { + debug_assert!(start <= end); + debug_assert!(end <= path.len()); + // Regex: [a-zA-Z0-9_][a-zA-Z0-9_/]*@[0-9]+ + enum State { + Empty, + Body, + At, + Version, + } + use State::*; + let mut i = start; + let mut state = Empty; + loop { + let byte = if i < end { + #[allow(clippy::indexing_slicing)] // protected by debug assertion + Some(path.as_bytes()[i]) + } else { + None + }; + state = match (state, byte) { + (Empty | Body, Some(b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_')) => Body, + (Body, Some(b'/')) => Body, + (Body, Some(b'@')) => At, + (At | Version, Some(b'0'..=b'9')) => Version, + // One of these cases will be hit at the latest when i == end, so the loop converges. + (Version, None) => { + return Ok(()); + } + + (Empty, _) => return Err(("[a-zA-Z0-9_]", i)), + (Body, _) => return Err(("[a-zA-z0-9_/@]", i)), + (At, _) => return Err(("[0-9]", i)), + (Version, _) => return Err(("[0-9]", i)), + }; + i += 1; + } + } + + /// Returns [`Ok`] if this data key matches the argument, or the appropriate error. + /// + /// Convenience method for data providers that support a single [`DataKey`]. + /// + /// # Examples + /// + /// ``` + /// use icu_provider::prelude::*; + /// + /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1"); + /// const FOO_BAZ: DataKey = icu_provider::data_key!("foo/baz@1"); + /// const BAR_BAZ: DataKey = icu_provider::data_key!("bar/baz@1"); + /// + /// assert!(matches!(FOO_BAR.match_key(FOO_BAR), Ok(()))); + /// assert!(matches!( + /// FOO_BAR.match_key(FOO_BAZ), + /// Err(DataError { + /// kind: DataErrorKind::MissingDataKey, + /// .. + /// }) + /// )); + /// assert!(matches!( + /// FOO_BAR.match_key(BAR_BAZ), + /// Err(DataError { + /// kind: DataErrorKind::MissingDataKey, + /// .. + /// }) + /// )); + /// + /// // The error context contains the argument: + /// assert_eq!(FOO_BAR.match_key(BAR_BAZ).unwrap_err().key, Some(BAR_BAZ)); + /// ``` + pub fn match_key(self, key: Self) -> Result<(), DataError> { + if self == key { + Ok(()) + } else { + Err(DataErrorKind::MissingDataKey.with_key(key)) + } + } +} + +/// See [`DataKey`]. +#[macro_export] +macro_rules! data_key { + ($path:expr) => {{ + $crate::data_key!($path, $crate::DataKeyMetadata::const_default()) + }}; + ($path:expr, $metadata:expr) => {{ + // Force the DataKey into a const context + const RESOURCE_KEY_MACRO_CONST: $crate::DataKey = { + match $crate::DataKey::construct_internal($crate::tagged!($path), $metadata) { + Ok(v) => v, + #[allow(clippy::panic)] // Const context + Err(_) => panic!(concat!("Invalid resource key: ", $path)), + // TODO Once formatting is const: + // Err((expected, index)) => panic!( + // "Invalid resource key {:?}: expected {:?}, found {:?} ", + // $path, + // expected, + // $crate::tagged!($path).get(index..)) + // ); + } + }; + RESOURCE_KEY_MACRO_CONST + }}; +} + +impl fmt::Debug for DataKey { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("DataKey{")?; + fmt::Display::fmt(self, f)?; + f.write_char('}')?; + Ok(()) + } +} + +impl Writeable for DataKey { + fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { + self.path().write_to(sink) + } + + fn writeable_length_hint(&self) -> LengthHint { + self.path().writeable_length_hint() + } + + fn write_to_string(&self) -> Cow<str> { + Cow::Borrowed(self.path().get()) + } +} + +writeable::impl_display_with_writeable!(DataKey); + +#[test] +fn test_path_syntax() { + // Valid keys: + DataKey::construct_internal(tagged!("hello/world@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello/world/foo@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello/world@999"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello_world/foo@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello_458/world@1"), Default::default()).unwrap(); + DataKey::construct_internal(tagged!("hello_world@1"), Default::default()).unwrap(); + + // No version: + assert_eq!( + DataKey::construct_internal(tagged!("hello/world"), Default::default()), + Err(( + "[a-zA-z0-9_/@]", + concat!(leading_tag!(), "hello/world").len() + )) + ); + + assert_eq!( + DataKey::construct_internal(tagged!("hello/world@"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "hello/world@").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("hello/world@foo"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "hello/world@").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("hello/world@1foo"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "hello/world@1").len())) + ); + + // Meta no longer accepted: + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[R]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[u-ca]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[R][u-ca]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + + // Invalid meta: + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[U]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[uca]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[u-"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[u-caa]"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + assert_eq!( + DataKey::construct_internal(tagged!("foo@1[R"), Default::default()), + Err(("[0-9]", concat!(leading_tag!(), "foo@1").len())) + ); + + // Invalid characters: + assert_eq!( + DataKey::construct_internal(tagged!("你好/世界@1"), Default::default()), + Err(("[a-zA-Z0-9_]", leading_tag!().len())) + ); + + // Invalid tag: + assert_eq!( + DataKey::construct_internal( + concat!("hello/world@1", trailing_tag!()), + Default::default() + ), + Err(("tag", 0)) + ); + assert_eq!( + DataKey::construct_internal(concat!(leading_tag!(), "hello/world@1"), Default::default()), + Err(("tag", concat!(leading_tag!(), "hello/world@1").len())) + ); + assert_eq!( + DataKey::construct_internal("hello/world@1", Default::default()), + Err(("tag", 0)) + ); +} + +#[test] +fn test_key_to_string() { + struct KeyTestCase { + pub key: DataKey, + pub expected: &'static str, + } + + for cas in [ + KeyTestCase { + key: data_key!("core/cardinal@1"), + expected: "core/cardinal@1", + }, + KeyTestCase { + key: data_key!("core/maxlengthsubcatg@1"), + expected: "core/maxlengthsubcatg@1", + }, + KeyTestCase { + key: data_key!("core/cardinal@65535"), + expected: "core/cardinal@65535", + }, + ] { + assert_eq!(cas.expected, cas.key.to_string()); + writeable::assert_writeable_eq!(&cas.key, cas.expected); + } +} + +#[test] +fn test_key_hash() { + struct KeyTestCase { + pub key: DataKey, + pub hash: DataKeyHash, + pub path: &'static str, + } + + for cas in [ + KeyTestCase { + key: data_key!("core/cardinal@1"), + hash: DataKeyHash([172, 207, 42, 236]), + path: "core/cardinal@1", + }, + KeyTestCase { + key: data_key!("core/maxlengthsubcatg@1"), + hash: DataKeyHash([193, 6, 79, 61]), + path: "core/maxlengthsubcatg@1", + }, + KeyTestCase { + key: data_key!("core/cardinal@65535"), + hash: DataKeyHash([176, 131, 182, 223]), + path: "core/cardinal@65535", + }, + ] { + assert_eq!(cas.hash, cas.key.hashed(), "{}", cas.path); + assert_eq!(cas.path, &*cas.key.path(), "{}", cas.path); + } +} |