summaryrefslogtreecommitdiffstats
path: root/vendor/icu_provider/src/key.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/icu_provider/src/key.rs')
-rw-r--r--vendor/icu_provider/src/key.rs660
1 files changed, 660 insertions, 0 deletions
diff --git a/vendor/icu_provider/src/key.rs b/vendor/icu_provider/src/key.rs
new file mode 100644
index 000000000..2f55e4d46
--- /dev/null
+++ b/vendor/icu_provider/src/key.rs
@@ -0,0 +1,660 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::error::{DataError, DataErrorKind};
+use crate::helpers;
+
+use alloc::borrow::Cow;
+use core::fmt;
+use core::fmt::Write;
+use core::ops::Deref;
+use writeable::{LengthHint, Writeable};
+use zerovec::ule::*;
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! leading_tag {
+ () => {
+ "\nicu4x_key_tag"
+ };
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! trailing_tag {
+ () => {
+ "\n"
+ };
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! tagged {
+ ($without_tags:expr) => {
+ concat!(
+ $crate::leading_tag!(),
+ $without_tags,
+ $crate::trailing_tag!()
+ )
+ };
+}
+
+/// A compact hash of a [`DataKey`]. Useful for keys in maps.
+///
+/// The hash will be stable over time within major releases.
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Hash, ULE)]
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
+#[repr(transparent)]
+pub struct DataKeyHash([u8; 4]);
+
+impl DataKeyHash {
+ const fn compute_from_path(path: DataKeyPath) -> Self {
+ let hash = helpers::fxhash_32(
+ path.tagged.as_bytes(),
+ leading_tag!().len(),
+ trailing_tag!().len(),
+ );
+ Self(hash.to_le_bytes())
+ }
+
+ /// Gets the hash value as a byte array.
+ pub const fn to_bytes(self) -> [u8; 4] {
+ self.0
+ }
+}
+
+impl<'a> zerovec::maps::ZeroMapKV<'a> for DataKeyHash {
+ type Container = zerovec::ZeroVec<'a, DataKeyHash>;
+ type Slice = zerovec::ZeroSlice<DataKeyHash>;
+ type GetType = <DataKeyHash as AsULE>::ULE;
+ type OwnedType = DataKeyHash;
+}
+
+impl AsULE for DataKeyHash {
+ type ULE = Self;
+ #[inline]
+ fn to_unaligned(self) -> Self::ULE {
+ self
+ }
+ #[inline]
+ fn from_unaligned(unaligned: Self::ULE) -> Self {
+ unaligned
+ }
+}
+
+// Safe since the ULE type is `self`.
+unsafe impl EqULE for DataKeyHash {}
+
+/// Hint for what to prioritize during fallback when data is unavailable.
+///
+/// For example, if `"en-US"` is requested, but we have no data for that specific locale,
+/// fallback may take us to `"en"` or `"und-US"` to check for data.
+#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)]
+#[non_exhaustive]
+pub enum FallbackPriority {
+ /// Prioritize the language. This is the default behavior.
+ ///
+ /// For example, `"en-US"` should go to `"en"` and then `"und"`.
+ Language,
+ /// Prioritize the region.
+ ///
+ /// For example, `"en-US"` should go to `"und-US"` and then `"und"`.
+ Region,
+ /// Collation-specific fallback rules. Similar to language priority.
+ ///
+ /// For example, `"zh-Hant"` goes to `"zh"` before `"und"`.
+ Collation,
+}
+
+impl FallbackPriority {
+ /// Const-friendly version of [`Default::default`].
+ pub const fn const_default() -> Self {
+ Self::Language
+ }
+}
+
+impl Default for FallbackPriority {
+ fn default() -> Self {
+ Self::const_default()
+ }
+}
+
+/// What additional data to load when performing fallback.
+#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)]
+#[non_exhaustive]
+pub enum FallbackSupplement {
+ /// Collation supplement; see `CollationFallbackSupplementV1Marker`
+ Collation,
+}
+
+/// The string path of a data key. For example, "foo@1"
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct DataKeyPath {
+ // This string literal is wrapped in leading_tag!() and trailing_tag!() to make it detectable
+ // in a compiled binary.
+ tagged: &'static str,
+}
+
+impl DataKeyPath {
+ /// Gets the path as a static string slice.
+ #[inline]
+ pub const fn get(self) -> &'static str {
+ /// core::slice::from_raw_parts(a, b) = core::mem::transmute((a, b)) hack
+ /// ```compile_fail
+ /// const unsafe fn canary() { core::slice::from_raw_parts(0 as *const u8, 0); }
+ /// ```
+ const _: () = ();
+ unsafe {
+ // Safe due to invariant that self.path is tagged correctly
+ core::str::from_utf8_unchecked(core::mem::transmute((
+ self.tagged.as_ptr().add(leading_tag!().len()),
+ self.tagged.len() - trailing_tag!().len() - leading_tag!().len(),
+ )))
+ }
+ }
+}
+
+impl Deref for DataKeyPath {
+ type Target = str;
+ #[inline]
+ fn deref(&self) -> &Self::Target {
+ self.get()
+ }
+}
+
+/// Metadata statically associated with a particular [`DataKey`].
+#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)]
+#[non_exhaustive]
+pub struct DataKeyMetadata {
+ /// What to prioritize when fallbacking on this [`DataKey`].
+ pub fallback_priority: FallbackPriority,
+ /// A Unicode extension keyword to consider when loading data for this [`DataKey`].
+ pub extension_key: Option<icu_locid::extensions::unicode::Key>,
+ /// Optional choice for additional fallbacking data required for loading this marker.
+ ///
+ /// For more information, see `LocaleFallbackConfig::fallback_supplement`.
+ pub fallback_supplement: Option<FallbackSupplement>,
+}
+
+impl DataKeyMetadata {
+ /// Const-friendly version of [`Default::default`].
+ pub const fn const_default() -> Self {
+ Self {
+ fallback_priority: FallbackPriority::const_default(),
+ extension_key: None,
+ fallback_supplement: None,
+ }
+ }
+
+ #[doc(hidden)]
+ pub const fn construct_internal(
+ fallback_priority: FallbackPriority,
+ extension_key: Option<icu_locid::extensions::unicode::Key>,
+ fallback_supplement: Option<FallbackSupplement>,
+ ) -> Self {
+ Self {
+ fallback_priority,
+ extension_key,
+ fallback_supplement,
+ }
+ }
+}
+
+impl Default for DataKeyMetadata {
+ #[inline]
+ fn default() -> Self {
+ Self::const_default()
+ }
+}
+
+/// Used for loading data from an ICU4X data provider.
+///
+/// A resource key is tightly coupled with the code that uses it to load data at runtime.
+/// Executables can be searched for `DataKey` instances to produce optimized data files.
+/// Therefore, users should not generally create DataKey instances; they should instead use
+/// the ones exported by a component.
+///
+/// `DataKey`s are created with the [`data_key!`] macro:
+///
+/// ```
+/// # use icu_provider::prelude::DataKey;
+/// const K: DataKey = icu_provider::data_key!("foo/bar@1");
+/// ```
+///
+/// The human-readable path string ends with `@` followed by one or more digits (the version
+/// number). Paths do not contain characters other than ASCII letters and digits, `_`, `/`.
+///
+/// Invalid paths are compile-time errors (as [`data_key!`] uses `const`).
+///
+/// ```compile_fail,E0080
+/// # use icu_provider::prelude::DataKey;
+/// const K: DataKey = icu_provider::data_key!("foo/../bar@1");
+/// ```
+#[derive(Copy, Clone)]
+pub struct DataKey {
+ path: DataKeyPath,
+ hash: DataKeyHash,
+ metadata: DataKeyMetadata,
+}
+
+impl PartialEq for DataKey {
+ #[inline]
+ fn eq(&self, other: &Self) -> bool {
+ self.hash == other.hash && self.path == other.path && self.metadata == other.metadata
+ }
+}
+
+impl Eq for DataKey {}
+
+impl Ord for DataKey {
+ fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+ self.path
+ .cmp(&other.path)
+ .then_with(|| self.metadata.cmp(&other.metadata))
+ }
+}
+
+impl PartialOrd for DataKey {
+ #[inline]
+ fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl core::hash::Hash for DataKey {
+ #[inline]
+ fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
+ self.hash.hash(state)
+ }
+}
+
+impl DataKey {
+ /// Gets a human-readable representation of a [`DataKey`].
+ ///
+ /// The human-readable path string ends with `@` followed by one or more digits (the version
+ /// number). Paths do not contain characters other than ASCII letters and digits, `_`, `/`.
+ ///
+ /// Useful for reading and writing data to a file system.
+ #[inline]
+ pub const fn path(self) -> DataKeyPath {
+ self.path
+ }
+
+ /// Gets a platform-independent hash of a [`DataKey`].
+ ///
+ /// The hash is 4 bytes and allows for fast key comparison.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use icu_provider::DataKey;
+ /// use icu_provider::DataKeyHash;
+ ///
+ /// const KEY: DataKey = icu_provider::data_key!("foo@1");
+ /// const KEY_HASH: DataKeyHash = KEY.hashed();
+ ///
+ /// assert_eq!(KEY_HASH.to_bytes(), [0xe2, 0xb6, 0x17, 0x71]);
+ /// ```
+ #[inline]
+ pub const fn hashed(self) -> DataKeyHash {
+ self.hash
+ }
+
+ /// Gets the metadata associated with this [`DataKey`].
+ #[inline]
+ pub const fn metadata(self) -> DataKeyMetadata {
+ self.metadata
+ }
+
+ /// Constructs a [`DataKey`] from a path and metadata.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_provider::data_key;
+ /// use icu_provider::DataKey;
+ ///
+ /// const CONST_KEY: DataKey = data_key!("foo@1");
+ ///
+ /// let runtime_key =
+ /// DataKey::from_path_and_metadata(CONST_KEY.path(), CONST_KEY.metadata());
+ ///
+ /// assert_eq!(CONST_KEY, runtime_key);
+ /// ```
+ #[inline]
+ pub const fn from_path_and_metadata(path: DataKeyPath, metadata: DataKeyMetadata) -> Self {
+ Self {
+ path,
+ hash: DataKeyHash::compute_from_path(path),
+ metadata,
+ }
+ }
+
+ #[doc(hidden)]
+ // Error is a str of the expected character class and the index where it wasn't encountered
+ // The indexing operations in this function have been reviewed in detail and won't panic.
+ #[allow(clippy::indexing_slicing)]
+ pub const fn construct_internal(
+ path: &'static str,
+ metadata: DataKeyMetadata,
+ ) -> Result<Self, (&'static str, usize)> {
+ if path.len() < leading_tag!().len() + trailing_tag!().len() {
+ return Err(("tag", 0));
+ }
+ // Start and end of the untagged part
+ let start = leading_tag!().len();
+ let end = path.len() - trailing_tag!().len();
+
+ // Check tags
+ let mut i = 0;
+ while i < leading_tag!().len() {
+ if path.as_bytes()[i] != leading_tag!().as_bytes()[i] {
+ return Err(("tag", 0));
+ }
+ i += 1;
+ }
+ i = 0;
+ while i < trailing_tag!().len() {
+ if path.as_bytes()[end + i] != trailing_tag!().as_bytes()[i] {
+ return Err(("tag", end + 1));
+ }
+ i += 1;
+ }
+
+ match Self::validate_path_manual_slice(path, start, end) {
+ Ok(()) => (),
+ Err(e) => return Err(e),
+ };
+
+ let path = DataKeyPath { tagged: path };
+
+ Ok(Self {
+ path,
+ hash: DataKeyHash::compute_from_path(path),
+ metadata,
+ })
+ }
+
+ const fn validate_path_manual_slice(
+ path: &'static str,
+ start: usize,
+ end: usize,
+ ) -> Result<(), (&'static str, usize)> {
+ debug_assert!(start <= end);
+ debug_assert!(end <= path.len());
+ // Regex: [a-zA-Z0-9_][a-zA-Z0-9_/]*@[0-9]+
+ enum State {
+ Empty,
+ Body,
+ At,
+ Version,
+ }
+ use State::*;
+ let mut i = start;
+ let mut state = Empty;
+ loop {
+ let byte = if i < end {
+ #[allow(clippy::indexing_slicing)] // protected by debug assertion
+ Some(path.as_bytes()[i])
+ } else {
+ None
+ };
+ state = match (state, byte) {
+ (Empty | Body, Some(b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_')) => Body,
+ (Body, Some(b'/')) => Body,
+ (Body, Some(b'@')) => At,
+ (At | Version, Some(b'0'..=b'9')) => Version,
+ // One of these cases will be hit at the latest when i == end, so the loop converges.
+ (Version, None) => {
+ return Ok(());
+ }
+
+ (Empty, _) => return Err(("[a-zA-Z0-9_]", i)),
+ (Body, _) => return Err(("[a-zA-z0-9_/@]", i)),
+ (At, _) => return Err(("[0-9]", i)),
+ (Version, _) => return Err(("[0-9]", i)),
+ };
+ i += 1;
+ }
+ }
+
+ /// Returns [`Ok`] if this data key matches the argument, or the appropriate error.
+ ///
+ /// Convenience method for data providers that support a single [`DataKey`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use icu_provider::prelude::*;
+ ///
+ /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1");
+ /// const FOO_BAZ: DataKey = icu_provider::data_key!("foo/baz@1");
+ /// const BAR_BAZ: DataKey = icu_provider::data_key!("bar/baz@1");
+ ///
+ /// assert!(matches!(FOO_BAR.match_key(FOO_BAR), Ok(())));
+ /// assert!(matches!(
+ /// FOO_BAR.match_key(FOO_BAZ),
+ /// Err(DataError {
+ /// kind: DataErrorKind::MissingDataKey,
+ /// ..
+ /// })
+ /// ));
+ /// assert!(matches!(
+ /// FOO_BAR.match_key(BAR_BAZ),
+ /// Err(DataError {
+ /// kind: DataErrorKind::MissingDataKey,
+ /// ..
+ /// })
+ /// ));
+ ///
+ /// // The error context contains the argument:
+ /// assert_eq!(FOO_BAR.match_key(BAR_BAZ).unwrap_err().key, Some(BAR_BAZ));
+ /// ```
+ pub fn match_key(self, key: Self) -> Result<(), DataError> {
+ if self == key {
+ Ok(())
+ } else {
+ Err(DataErrorKind::MissingDataKey.with_key(key))
+ }
+ }
+}
+
+/// See [`DataKey`].
+#[macro_export]
+macro_rules! data_key {
+ ($path:expr) => {{
+ $crate::data_key!($path, $crate::DataKeyMetadata::const_default())
+ }};
+ ($path:expr, $metadata:expr) => {{
+ // Force the DataKey into a const context
+ const RESOURCE_KEY_MACRO_CONST: $crate::DataKey = {
+ match $crate::DataKey::construct_internal($crate::tagged!($path), $metadata) {
+ Ok(v) => v,
+ #[allow(clippy::panic)] // Const context
+ Err(_) => panic!(concat!("Invalid resource key: ", $path)),
+ // TODO Once formatting is const:
+ // Err((expected, index)) => panic!(
+ // "Invalid resource key {:?}: expected {:?}, found {:?} ",
+ // $path,
+ // expected,
+ // $crate::tagged!($path).get(index..))
+ // );
+ }
+ };
+ RESOURCE_KEY_MACRO_CONST
+ }};
+}
+
+impl fmt::Debug for DataKey {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str("DataKey{")?;
+ fmt::Display::fmt(self, f)?;
+ f.write_char('}')?;
+ Ok(())
+ }
+}
+
+impl Writeable for DataKey {
+ fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+ self.path().write_to(sink)
+ }
+
+ fn writeable_length_hint(&self) -> LengthHint {
+ self.path().writeable_length_hint()
+ }
+
+ fn write_to_string(&self) -> Cow<str> {
+ Cow::Borrowed(self.path().get())
+ }
+}
+
+writeable::impl_display_with_writeable!(DataKey);
+
+#[test]
+fn test_path_syntax() {
+ // Valid keys:
+ DataKey::construct_internal(tagged!("hello/world@1"), Default::default()).unwrap();
+ DataKey::construct_internal(tagged!("hello/world/foo@1"), Default::default()).unwrap();
+ DataKey::construct_internal(tagged!("hello/world@999"), Default::default()).unwrap();
+ DataKey::construct_internal(tagged!("hello_world/foo@1"), Default::default()).unwrap();
+ DataKey::construct_internal(tagged!("hello_458/world@1"), Default::default()).unwrap();
+ DataKey::construct_internal(tagged!("hello_world@1"), Default::default()).unwrap();
+
+ // No version:
+ assert_eq!(
+ DataKey::construct_internal(tagged!("hello/world"), Default::default()),
+ Err((
+ "[a-zA-z0-9_/@]",
+ concat!(leading_tag!(), "hello/world").len()
+ ))
+ );
+
+ assert_eq!(
+ DataKey::construct_internal(tagged!("hello/world@"), Default::default()),
+ Err(("[0-9]", concat!(leading_tag!(), "hello/world@").len()))
+ );
+ assert_eq!(
+ DataKey::construct_internal(tagged!("hello/world@foo"), Default::default()),
+ Err(("[0-9]", concat!(leading_tag!(), "hello/world@").len()))
+ );
+ assert_eq!(
+ DataKey::construct_internal(tagged!("hello/world@1foo"), Default::default()),
+ Err(("[0-9]", concat!(leading_tag!(), "hello/world@1").len()))
+ );
+
+ // Meta no longer accepted:
+ assert_eq!(
+ DataKey::construct_internal(tagged!("foo@1[R]"), Default::default()),
+ Err(("[0-9]", concat!(leading_tag!(), "foo@1").len()))
+ );
+ assert_eq!(
+ DataKey::construct_internal(tagged!("foo@1[u-ca]"), Default::default()),
+ Err(("[0-9]", concat!(leading_tag!(), "foo@1").len()))
+ );
+ assert_eq!(
+ DataKey::construct_internal(tagged!("foo@1[R][u-ca]"), Default::default()),
+ Err(("[0-9]", concat!(leading_tag!(), "foo@1").len()))
+ );
+
+ // Invalid meta:
+ assert_eq!(
+ DataKey::construct_internal(tagged!("foo@1[U]"), Default::default()),
+ Err(("[0-9]", concat!(leading_tag!(), "foo@1").len()))
+ );
+ assert_eq!(
+ DataKey::construct_internal(tagged!("foo@1[uca]"), Default::default()),
+ Err(("[0-9]", concat!(leading_tag!(), "foo@1").len()))
+ );
+ assert_eq!(
+ DataKey::construct_internal(tagged!("foo@1[u-"), Default::default()),
+ Err(("[0-9]", concat!(leading_tag!(), "foo@1").len()))
+ );
+ assert_eq!(
+ DataKey::construct_internal(tagged!("foo@1[u-caa]"), Default::default()),
+ Err(("[0-9]", concat!(leading_tag!(), "foo@1").len()))
+ );
+ assert_eq!(
+ DataKey::construct_internal(tagged!("foo@1[R"), Default::default()),
+ Err(("[0-9]", concat!(leading_tag!(), "foo@1").len()))
+ );
+
+ // Invalid characters:
+ assert_eq!(
+ DataKey::construct_internal(tagged!("你好/世界@1"), Default::default()),
+ Err(("[a-zA-Z0-9_]", leading_tag!().len()))
+ );
+
+ // Invalid tag:
+ assert_eq!(
+ DataKey::construct_internal(
+ concat!("hello/world@1", trailing_tag!()),
+ Default::default()
+ ),
+ Err(("tag", 0))
+ );
+ assert_eq!(
+ DataKey::construct_internal(concat!(leading_tag!(), "hello/world@1"), Default::default()),
+ Err(("tag", concat!(leading_tag!(), "hello/world@1").len()))
+ );
+ assert_eq!(
+ DataKey::construct_internal("hello/world@1", Default::default()),
+ Err(("tag", 0))
+ );
+}
+
+#[test]
+fn test_key_to_string() {
+ struct KeyTestCase {
+ pub key: DataKey,
+ pub expected: &'static str,
+ }
+
+ for cas in [
+ KeyTestCase {
+ key: data_key!("core/cardinal@1"),
+ expected: "core/cardinal@1",
+ },
+ KeyTestCase {
+ key: data_key!("core/maxlengthsubcatg@1"),
+ expected: "core/maxlengthsubcatg@1",
+ },
+ KeyTestCase {
+ key: data_key!("core/cardinal@65535"),
+ expected: "core/cardinal@65535",
+ },
+ ] {
+ assert_eq!(cas.expected, cas.key.to_string());
+ writeable::assert_writeable_eq!(&cas.key, cas.expected);
+ }
+}
+
+#[test]
+fn test_key_hash() {
+ struct KeyTestCase {
+ pub key: DataKey,
+ pub hash: DataKeyHash,
+ pub path: &'static str,
+ }
+
+ for cas in [
+ KeyTestCase {
+ key: data_key!("core/cardinal@1"),
+ hash: DataKeyHash([172, 207, 42, 236]),
+ path: "core/cardinal@1",
+ },
+ KeyTestCase {
+ key: data_key!("core/maxlengthsubcatg@1"),
+ hash: DataKeyHash([193, 6, 79, 61]),
+ path: "core/maxlengthsubcatg@1",
+ },
+ KeyTestCase {
+ key: data_key!("core/cardinal@65535"),
+ hash: DataKeyHash([176, 131, 182, 223]),
+ path: "core/cardinal@65535",
+ },
+ ] {
+ assert_eq!(cas.hash, cas.key.hashed(), "{}", cas.path);
+ assert_eq!(cas.path, &*cas.key.path(), "{}", cas.path);
+ }
+}