diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/icu_properties/src/trievalue.rs | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/icu_properties/src/trievalue.rs')
-rw-r--r-- | third_party/rust/icu_properties/src/trievalue.rs | 248 |
1 files changed, 248 insertions, 0 deletions
diff --git a/third_party/rust/icu_properties/src/trievalue.rs b/third_party/rust/icu_properties/src/trievalue.rs new file mode 100644 index 0000000000..d8b65e4aa9 --- /dev/null +++ b/third_party/rust/icu_properties/src/trievalue.rs @@ -0,0 +1,248 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::provider::bidi_data::{ + CheckedBidiPairedBracketType, MirroredPairedBracketData, MirroredPairedBracketDataTryFromError, +}; +use crate::script::ScriptWithExt; +use crate::{ + BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup, + GraphemeClusterBreak, IndicSyllabicCategory, LineBreak, Script, SentenceBreak, WordBreak, +}; +use core::convert::TryInto; +use core::num::TryFromIntError; +use zerovec::ule::{AsULE, RawBytesULE}; + +use icu_collections::codepointtrie::TrieValue; + +use core::convert::TryFrom; + +impl TrieValue for CanonicalCombiningClass { + type TryFromU32Error = TryFromIntError; + + fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { + u8::try_from(i).map(Self) + } + + fn to_u32(self) -> u32 { + u32::from(self.0) + } +} + +impl TrieValue for BidiClass { + type TryFromU32Error = TryFromIntError; + + fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { + u8::try_from(i).map(Self) + } + + fn to_u32(self) -> u32 { + u32::from(self.0) + } +} + +impl TrieValue for GeneralCategory { + type TryFromU32Error = &'static str; + + fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { + // If the u32 is out of range, fall back to u8::MAX, which is out of range of the GeneralCategory enum. + GeneralCategory::new_from_u8(i.try_into().unwrap_or(u8::MAX)) + .ok_or("Cannot parse GeneralCategory from integer") + } + + fn to_u32(self) -> u32 { + u32::from(self as u8) + } +} + +impl TrieValue for Script { + type TryFromU32Error = TryFromIntError; + + fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { + u16::try_from(i).map(Script) + } + + fn to_u32(self) -> u32 { + u32::from(self.0) + } +} + +impl TrieValue for ScriptWithExt { + type TryFromU32Error = TryFromIntError; + + fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { + u16::try_from(i).map(Self) + } + + fn to_u32(self) -> u32 { + u32::from(self.0) + } +} + +impl TrieValue for EastAsianWidth { + type TryFromU32Error = TryFromIntError; + + fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { + u8::try_from(i).map(Self) + } + + fn to_u32(self) -> u32 { + u32::from(self.0) + } +} + +impl TrieValue for LineBreak { + type TryFromU32Error = TryFromIntError; + + fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { + u8::try_from(i).map(Self) + } + + fn to_u32(self) -> u32 { + u32::from(self.0) + } +} + +impl TrieValue for GraphemeClusterBreak { + type TryFromU32Error = TryFromIntError; + + fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { + u8::try_from(i).map(Self) + } + + fn to_u32(self) -> u32 { + u32::from(self.0) + } +} + +impl TrieValue for WordBreak { + type TryFromU32Error = TryFromIntError; + + fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { + u8::try_from(i).map(Self) + } + + fn to_u32(self) -> u32 { + u32::from(self.0) + } +} + +impl TrieValue for SentenceBreak { + type TryFromU32Error = TryFromIntError; + + fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { + u8::try_from(i).map(Self) + } + + fn to_u32(self) -> u32 { + u32::from(self.0) + } +} + +impl TrieValue for CheckedBidiPairedBracketType { + type TryFromU32Error = TryFromIntError; + + fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { + Ok(match i { + 1 => CheckedBidiPairedBracketType::Open, + 2 => CheckedBidiPairedBracketType::Close, + _ => CheckedBidiPairedBracketType::None, + }) + } +} + +impl TrieValue for IndicSyllabicCategory { + type TryFromU32Error = TryFromIntError; + + fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { + u8::try_from(i).map(Self) + } + + fn to_u32(self) -> u32 { + u32::from(self.0) + } +} + +// GCG is not used inside tries, but it is used in the name lookup type, and we want +// to squeeze it into a u16 for storage. Its named mask values are specced so we can +// do this in code. +// +// This is done by: +// - Single-value masks are translated to their corresponding GeneralCategory values +// - we know all of the multi-value masks and we give them special values +// - Anything else goes to 0xFF00, though this code path shouldn't be hit unless working with malformed icuexportdata +// +// In the reverse direction, unknown values go to the empty mask, but this codepath should not be hit except +// with malformed ICU4X generated data. +impl AsULE for GeneralCategoryGroup { + type ULE = RawBytesULE<2>; + fn to_unaligned(self) -> Self::ULE { + let value = gcg_to_packed_u16(self); + value.to_unaligned() + } + fn from_unaligned(ule: Self::ULE) -> Self { + let value = ule.as_unsigned_int(); + packed_u16_to_gcg(value) + } +} + +fn packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup { + match value { + 0xFFFF => GeneralCategoryGroup::CasedLetter, + 0xFFFE => GeneralCategoryGroup::Letter, + 0xFFFD => GeneralCategoryGroup::Mark, + 0xFFFC => GeneralCategoryGroup::Number, + 0xFFFB => GeneralCategoryGroup::Separator, + 0xFFFA => GeneralCategoryGroup::Other, + 0xFFF9 => GeneralCategoryGroup::Punctuation, + 0xFFF8 => GeneralCategoryGroup::Symbol, + v if v < 32 => GeneralCategory::new_from_u8(v as u8) + .map(|gc| gc.into()) + .unwrap_or(GeneralCategoryGroup(0)), + // unknown values produce an empty mask + _ => GeneralCategoryGroup(0), + } +} + +fn gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16 { + // if it's a single property, translate to that property + if gcg.0.count_ones() == 1 { + // inverse operation of a bitshift + gcg.0.trailing_zeros() as u16 + } else { + match gcg { + GeneralCategoryGroup::CasedLetter => 0xFFFF, + GeneralCategoryGroup::Letter => 0xFFFE, + GeneralCategoryGroup::Mark => 0xFFFD, + GeneralCategoryGroup::Number => 0xFFFC, + GeneralCategoryGroup::Separator => 0xFFFB, + GeneralCategoryGroup::Other => 0xFFFA, + GeneralCategoryGroup::Punctuation => 0xFFF9, + GeneralCategoryGroup::Symbol => 0xFFF8, + _ => 0xFF00, // random sentinel value + } + } +} + +impl TrieValue for GeneralCategoryGroup { + type TryFromU32Error = TryFromIntError; + fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { + // Even though we're dealing with u32s here, TrieValue is about converting + // trie storage types to the actual type. This type will always be a packed u16 + // in our case since the names map upcasts from u16 + u16::try_from(i).map(packed_u16_to_gcg) + } + + fn to_u32(self) -> u32 { + u32::from(gcg_to_packed_u16(self)) + } +} + +impl TrieValue for MirroredPairedBracketData { + type TryFromU32Error = MirroredPairedBracketDataTryFromError; + + fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { + Self::try_from(i) + } +} |