summaryrefslogtreecommitdiffstats
path: root/third_party/rust/icu_properties/src/trievalue.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/rust/icu_properties/src/trievalue.rs
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/icu_properties/src/trievalue.rs')
-rw-r--r--third_party/rust/icu_properties/src/trievalue.rs248
1 files changed, 248 insertions, 0 deletions
diff --git a/third_party/rust/icu_properties/src/trievalue.rs b/third_party/rust/icu_properties/src/trievalue.rs
new file mode 100644
index 0000000000..d8b65e4aa9
--- /dev/null
+++ b/third_party/rust/icu_properties/src/trievalue.rs
@@ -0,0 +1,248 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::provider::bidi_data::{
+ CheckedBidiPairedBracketType, MirroredPairedBracketData, MirroredPairedBracketDataTryFromError,
+};
+use crate::script::ScriptWithExt;
+use crate::{
+ BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup,
+ GraphemeClusterBreak, IndicSyllabicCategory, LineBreak, Script, SentenceBreak, WordBreak,
+};
+use core::convert::TryInto;
+use core::num::TryFromIntError;
+use zerovec::ule::{AsULE, RawBytesULE};
+
+use icu_collections::codepointtrie::TrieValue;
+
+use core::convert::TryFrom;
+
+impl TrieValue for CanonicalCombiningClass {
+ type TryFromU32Error = TryFromIntError;
+
+ fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
+ u8::try_from(i).map(Self)
+ }
+
+ fn to_u32(self) -> u32 {
+ u32::from(self.0)
+ }
+}
+
+impl TrieValue for BidiClass {
+ type TryFromU32Error = TryFromIntError;
+
+ fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
+ u8::try_from(i).map(Self)
+ }
+
+ fn to_u32(self) -> u32 {
+ u32::from(self.0)
+ }
+}
+
+impl TrieValue for GeneralCategory {
+ type TryFromU32Error = &'static str;
+
+ fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
+ // If the u32 is out of range, fall back to u8::MAX, which is out of range of the GeneralCategory enum.
+ GeneralCategory::new_from_u8(i.try_into().unwrap_or(u8::MAX))
+ .ok_or("Cannot parse GeneralCategory from integer")
+ }
+
+ fn to_u32(self) -> u32 {
+ u32::from(self as u8)
+ }
+}
+
+impl TrieValue for Script {
+ type TryFromU32Error = TryFromIntError;
+
+ fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
+ u16::try_from(i).map(Script)
+ }
+
+ fn to_u32(self) -> u32 {
+ u32::from(self.0)
+ }
+}
+
+impl TrieValue for ScriptWithExt {
+ type TryFromU32Error = TryFromIntError;
+
+ fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
+ u16::try_from(i).map(Self)
+ }
+
+ fn to_u32(self) -> u32 {
+ u32::from(self.0)
+ }
+}
+
+impl TrieValue for EastAsianWidth {
+ type TryFromU32Error = TryFromIntError;
+
+ fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
+ u8::try_from(i).map(Self)
+ }
+
+ fn to_u32(self) -> u32 {
+ u32::from(self.0)
+ }
+}
+
+impl TrieValue for LineBreak {
+ type TryFromU32Error = TryFromIntError;
+
+ fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
+ u8::try_from(i).map(Self)
+ }
+
+ fn to_u32(self) -> u32 {
+ u32::from(self.0)
+ }
+}
+
+impl TrieValue for GraphemeClusterBreak {
+ type TryFromU32Error = TryFromIntError;
+
+ fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
+ u8::try_from(i).map(Self)
+ }
+
+ fn to_u32(self) -> u32 {
+ u32::from(self.0)
+ }
+}
+
+impl TrieValue for WordBreak {
+ type TryFromU32Error = TryFromIntError;
+
+ fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
+ u8::try_from(i).map(Self)
+ }
+
+ fn to_u32(self) -> u32 {
+ u32::from(self.0)
+ }
+}
+
+impl TrieValue for SentenceBreak {
+ type TryFromU32Error = TryFromIntError;
+
+ fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
+ u8::try_from(i).map(Self)
+ }
+
+ fn to_u32(self) -> u32 {
+ u32::from(self.0)
+ }
+}
+
+impl TrieValue for CheckedBidiPairedBracketType {
+ type TryFromU32Error = TryFromIntError;
+
+ fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
+ Ok(match i {
+ 1 => CheckedBidiPairedBracketType::Open,
+ 2 => CheckedBidiPairedBracketType::Close,
+ _ => CheckedBidiPairedBracketType::None,
+ })
+ }
+}
+
+impl TrieValue for IndicSyllabicCategory {
+ type TryFromU32Error = TryFromIntError;
+
+ fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
+ u8::try_from(i).map(Self)
+ }
+
+ fn to_u32(self) -> u32 {
+ u32::from(self.0)
+ }
+}
+
+// GCG is not used inside tries, but it is used in the name lookup type, and we want
+// to squeeze it into a u16 for storage. Its named mask values are specced so we can
+// do this in code.
+//
+// This is done by:
+// - Single-value masks are translated to their corresponding GeneralCategory values
+// - we know all of the multi-value masks and we give them special values
+// - Anything else goes to 0xFF00, though this code path shouldn't be hit unless working with malformed icuexportdata
+//
+// In the reverse direction, unknown values go to the empty mask, but this codepath should not be hit except
+// with malformed ICU4X generated data.
+impl AsULE for GeneralCategoryGroup {
+ type ULE = RawBytesULE<2>;
+ fn to_unaligned(self) -> Self::ULE {
+ let value = gcg_to_packed_u16(self);
+ value.to_unaligned()
+ }
+ fn from_unaligned(ule: Self::ULE) -> Self {
+ let value = ule.as_unsigned_int();
+ packed_u16_to_gcg(value)
+ }
+}
+
+fn packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup {
+ match value {
+ 0xFFFF => GeneralCategoryGroup::CasedLetter,
+ 0xFFFE => GeneralCategoryGroup::Letter,
+ 0xFFFD => GeneralCategoryGroup::Mark,
+ 0xFFFC => GeneralCategoryGroup::Number,
+ 0xFFFB => GeneralCategoryGroup::Separator,
+ 0xFFFA => GeneralCategoryGroup::Other,
+ 0xFFF9 => GeneralCategoryGroup::Punctuation,
+ 0xFFF8 => GeneralCategoryGroup::Symbol,
+ v if v < 32 => GeneralCategory::new_from_u8(v as u8)
+ .map(|gc| gc.into())
+ .unwrap_or(GeneralCategoryGroup(0)),
+ // unknown values produce an empty mask
+ _ => GeneralCategoryGroup(0),
+ }
+}
+
+fn gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16 {
+ // if it's a single property, translate to that property
+ if gcg.0.count_ones() == 1 {
+ // inverse operation of a bitshift
+ gcg.0.trailing_zeros() as u16
+ } else {
+ match gcg {
+ GeneralCategoryGroup::CasedLetter => 0xFFFF,
+ GeneralCategoryGroup::Letter => 0xFFFE,
+ GeneralCategoryGroup::Mark => 0xFFFD,
+ GeneralCategoryGroup::Number => 0xFFFC,
+ GeneralCategoryGroup::Separator => 0xFFFB,
+ GeneralCategoryGroup::Other => 0xFFFA,
+ GeneralCategoryGroup::Punctuation => 0xFFF9,
+ GeneralCategoryGroup::Symbol => 0xFFF8,
+ _ => 0xFF00, // random sentinel value
+ }
+ }
+}
+
+impl TrieValue for GeneralCategoryGroup {
+ type TryFromU32Error = TryFromIntError;
+ fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
+ // Even though we're dealing with u32s here, TrieValue is about converting
+ // trie storage types to the actual type. This type will always be a packed u16
+ // in our case since the names map upcasts from u16
+ u16::try_from(i).map(packed_u16_to_gcg)
+ }
+
+ fn to_u32(self) -> u32 {
+ u32::from(gcg_to_packed_u16(self))
+ }
+}
+
+impl TrieValue for MirroredPairedBracketData {
+ type TryFromU32Error = MirroredPairedBracketDataTryFromError;
+
+ fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
+ Self::try_from(i)
+ }
+}