summaryrefslogtreecommitdiffstats
path: root/vendor/icu_provider/src/key.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/icu_provider/src/key.rs')
-rw-r--r--vendor/icu_provider/src/key.rs173
1 files changed, 118 insertions, 55 deletions
diff --git a/vendor/icu_provider/src/key.rs b/vendor/icu_provider/src/key.rs
index 8c76608fc..0e1e1006e 100644
--- a/vendor/icu_provider/src/key.rs
+++ b/vendor/icu_provider/src/key.rs
@@ -3,8 +3,8 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use crate::error::{DataError, DataErrorKind};
-use crate::helpers;
+use crate::fallback::{LocaleFallbackConfig, LocaleFallbackPriority, LocaleFallbackSupplement};
use alloc::borrow::Cow;
use core::fmt;
use core::fmt::Write;
@@ -50,7 +50,7 @@ pub struct DataKeyHash([u8; 4]);
impl DataKeyHash {
const fn compute_from_path(path: DataKeyPath) -> Self {
- let hash = helpers::fxhash_32(
+ let hash = fxhash_32(
path.tagged.as_bytes(),
leading_tag!().len(),
trailing_tag!().len(),
@@ -64,6 +64,79 @@ impl DataKeyHash {
}
}
+/// Const function to compute the FxHash of a byte array.
+///
+/// FxHash is a speedy hash algorithm used within rustc. The algorithm is satisfactory for our
+/// use case since the strings being hashed originate from a trusted source (the ICU4X
+/// components), and the hashes are computed at compile time, so we can check for collisions.
+///
+/// We could have considered a SHA or other cryptographic hash function. However, we are using
+/// FxHash because:
+///
+/// 1. There is precedent for this algorithm in Rust
+/// 2. The algorithm is easy to implement as a const function
+/// 3. The amount of code is small enough that we can reasonably keep the algorithm in-tree
+/// 4. FxHash is designed to output 32-bit or 64-bit values, whereas SHA outputs more bits,
+/// such that truncation would be required in order to fit into a u32, partially reducing
+/// the benefit of a cryptographically secure algorithm
+// The indexing operations in this function have been reviewed in detail and won't panic.
+#[allow(clippy::indexing_slicing)]
+const fn fxhash_32(bytes: &[u8], ignore_leading: usize, ignore_trailing: usize) -> u32 {
+ // This code is adapted from https://github.com/rust-lang/rustc-hash,
+ // whose license text is reproduced below.
+ //
+ // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+ // file at the top-level directory of this distribution and at
+ // http://rust-lang.org/COPYRIGHT.
+ //
+ // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+ // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+ // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+ // option. This file may not be copied, modified, or distributed
+ // except according to those terms.
+
+ if ignore_leading + ignore_trailing >= bytes.len() {
+ return 0;
+ }
+
+ #[inline]
+ const fn hash_word_32(mut hash: u32, word: u32) -> u32 {
+ const ROTATE: u32 = 5;
+ const SEED32: u32 = 0x9e_37_79_b9;
+ hash = hash.rotate_left(ROTATE);
+ hash ^= word;
+ hash = hash.wrapping_mul(SEED32);
+ hash
+ }
+
+ let mut cursor = ignore_leading;
+ let end = bytes.len() - ignore_trailing;
+ let mut hash = 0;
+
+ while end - cursor >= 4 {
+ let word = u32::from_le_bytes([
+ bytes[cursor],
+ bytes[cursor + 1],
+ bytes[cursor + 2],
+ bytes[cursor + 3],
+ ]);
+ hash = hash_word_32(hash, word);
+ cursor += 4;
+ }
+
+ if end - cursor >= 2 {
+ let word = u16::from_le_bytes([bytes[cursor], bytes[cursor + 1]]);
+ hash = hash_word_32(hash, word as u32);
+ cursor += 2;
+ }
+
+ if end - cursor >= 1 {
+ hash = hash_word_32(hash, bytes[cursor] as u32);
+ }
+
+ hash
+}
+
impl<'a> zerovec::maps::ZeroMapKV<'a> for DataKeyHash {
type Container = zerovec::ZeroVec<'a, DataKeyHash>;
type Slice = zerovec::ZeroSlice<DataKeyHash>;
@@ -86,48 +159,6 @@ impl AsULE for DataKeyHash {
// Safe since the ULE type is `self`.
unsafe impl EqULE for DataKeyHash {}
-/// Hint for what to prioritize during fallback when data is unavailable.
-///
-/// For example, if `"en-US"` is requested, but we have no data for that specific locale,
-/// fallback may take us to `"en"` or `"und-US"` to check for data.
-#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)]
-#[non_exhaustive]
-pub enum FallbackPriority {
- /// Prioritize the language. This is the default behavior.
- ///
- /// For example, `"en-US"` should go to `"en"` and then `"und"`.
- Language,
- /// Prioritize the region.
- ///
- /// For example, `"en-US"` should go to `"und-US"` and then `"und"`.
- Region,
- /// Collation-specific fallback rules. Similar to language priority.
- ///
- /// For example, `"zh-Hant"` goes to `"zh"` before `"und"`.
- Collation,
-}
-
-impl FallbackPriority {
- /// Const-friendly version of [`Default::default`].
- pub const fn const_default() -> Self {
- Self::Language
- }
-}
-
-impl Default for FallbackPriority {
- fn default() -> Self {
- Self::const_default()
- }
-}
-
-/// What additional data to load when performing fallback.
-#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord)]
-#[non_exhaustive]
-pub enum FallbackSupplement {
- /// Collation supplement; see `CollationFallbackSupplementV1Marker`
- Collation,
-}
-
/// The string path of a data key. For example, "foo@1"
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct DataKeyPath {
@@ -163,35 +194,42 @@ impl Deref for DataKeyPath {
#[non_exhaustive]
pub struct DataKeyMetadata {
/// What to prioritize when fallbacking on this [`DataKey`].
- pub fallback_priority: FallbackPriority,
+ pub fallback_priority: LocaleFallbackPriority,
/// A Unicode extension keyword to consider when loading data for this [`DataKey`].
pub extension_key: Option<icu_locid::extensions::unicode::Key>,
/// Optional choice for additional fallbacking data required for loading this marker.
///
/// For more information, see `LocaleFallbackConfig::fallback_supplement`.
- pub fallback_supplement: Option<FallbackSupplement>,
+ pub fallback_supplement: Option<LocaleFallbackSupplement>,
+ /// Whether the key has a singleton value, as opposed to per-locale values. Singleton
+ /// keys behave differently, e.g. they never perform fallback, and can be optimized
+ /// in data providers.
+ pub singleton: bool,
}
impl DataKeyMetadata {
/// Const-friendly version of [`Default::default`].
pub const fn const_default() -> Self {
Self {
- fallback_priority: FallbackPriority::const_default(),
+ fallback_priority: LocaleFallbackPriority::const_default(),
extension_key: None,
fallback_supplement: None,
+ singleton: false,
}
}
#[doc(hidden)]
pub const fn construct_internal(
- fallback_priority: FallbackPriority,
+ fallback_priority: LocaleFallbackPriority,
extension_key: Option<icu_locid::extensions::unicode::Key>,
- fallback_supplement: Option<FallbackSupplement>,
+ fallback_supplement: Option<LocaleFallbackSupplement>,
+ singleton: bool,
) -> Self {
Self {
fallback_priority,
extension_key,
fallback_supplement,
+ singleton,
}
}
}
@@ -302,6 +340,16 @@ impl DataKey {
self.metadata
}
+ /// Returns the [`LocaleFallbackConfig`] for this [`DataKey`].
+ #[inline]
+ pub const fn fallback_config(self) -> LocaleFallbackConfig {
+ let mut config = LocaleFallbackConfig::const_default();
+ config.priority = self.metadata.fallback_priority;
+ config.extension_key = self.metadata.extension_key;
+ config.fallback_supplement = self.metadata.fallback_supplement;
+ config
+ }
+
/// Constructs a [`DataKey`] from a path and metadata.
///
/// # Examples
@@ -620,35 +668,50 @@ fn test_key_to_string() {
},
] {
writeable::assert_writeable_eq!(&cas.key, cas.expected);
+ assert_eq!(cas.expected, &*cas.key.path());
}
}
#[test]
+fn test_hash_word_32() {
+ assert_eq!(0, fxhash_32(b"", 0, 0));
+ assert_eq!(0, fxhash_32(b"a", 1, 0));
+ assert_eq!(0, fxhash_32(b"a", 0, 1));
+ assert_eq!(0, fxhash_32(b"a", 0, 10));
+ assert_eq!(0, fxhash_32(b"a", 10, 0));
+ assert_eq!(0, fxhash_32(b"a", 1, 1));
+ assert_eq!(0xF3051F19, fxhash_32(b"a", 0, 0));
+ assert_eq!(0x2F9DF119, fxhash_32(b"ab", 0, 0));
+ assert_eq!(0xCB1D9396, fxhash_32(b"abc", 0, 0));
+ assert_eq!(0x8628F119, fxhash_32(b"abcd", 0, 0));
+ assert_eq!(0xBEBDB56D, fxhash_32(b"abcde", 0, 0));
+ assert_eq!(0x1CE8476D, fxhash_32(b"abcdef", 0, 0));
+ assert_eq!(0xC0F176A4, fxhash_32(b"abcdefg", 0, 0));
+ assert_eq!(0x09AB476D, fxhash_32(b"abcdefgh", 0, 0));
+ assert_eq!(0xB72F5D88, fxhash_32(b"abcdefghi", 0, 0));
+}
+
+#[test]
fn test_key_hash() {
struct KeyTestCase {
pub key: DataKey,
pub hash: DataKeyHash,
- pub path: &'static str,
}
for cas in [
KeyTestCase {
key: data_key!("core/cardinal@1"),
hash: DataKeyHash([172, 207, 42, 236]),
- path: "core/cardinal@1",
},
KeyTestCase {
key: data_key!("core/maxlengthsubcatg@1"),
hash: DataKeyHash([193, 6, 79, 61]),
- path: "core/maxlengthsubcatg@1",
},
KeyTestCase {
key: data_key!("core/cardinal@65535"),
hash: DataKeyHash([176, 131, 182, 223]),
- path: "core/cardinal@65535",
},
] {
- assert_eq!(cas.hash, cas.key.hashed(), "{}", cas.path);
- assert_eq!(cas.path, &*cas.key.path(), "{}", cas.path);
+ assert_eq!(cas.hash, cas.key.hashed(), "{}", cas.key);
}
}