diff options
Diffstat (limited to 'third_party/rust/unicode-bidi/src/explicit.rs')
-rw-r--r-- | third_party/rust/unicode-bidi/src/explicit.rs | 129 |
1 files changed, 74 insertions, 55 deletions
diff --git a/third_party/rust/unicode-bidi/src/explicit.rs b/third_party/rust/unicode-bidi/src/explicit.rs index d4ad897b54..5760ab8ece 100644 --- a/third_party/rust/unicode-bidi/src/explicit.rs +++ b/third_party/rust/unicode-bidi/src/explicit.rs @@ -11,19 +11,25 @@ //! //! <http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions> -use alloc::vec::Vec; +#[cfg(feature = "smallvec")] +use smallvec::{smallvec, SmallVec}; use super::char_data::{ is_rtl, BidiClass::{self, *}, }; use super::level::Level; +use super::prepare::removed_by_x9; +use super::LevelRunVec; use super::TextSource; -/// Compute explicit embedding levels for one paragraph of text (X1-X8). +/// Compute explicit embedding levels for one paragraph of text (X1-X8), and identify +/// level runs (BD7) for use when determining Isolating Run Sequences (X10). /// /// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`, /// for each char in `text`. +/// +/// `runs` returns the list of level runs (BD7) of the text. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn compute<'a, T: TextSource<'a> + ?Sized>( text: &'a T, @@ -31,35 +37,44 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( original_classes: &[BidiClass], levels: &mut [Level], processing_classes: &mut [BidiClass], + runs: &mut LevelRunVec, ) { assert_eq!(text.len(), original_classes.len()); // <http://www.unicode.org/reports/tr9/#X1> - let mut stack = DirectionalStatusStack::new(); - stack.push(para_level, OverrideStatus::Neutral); + #[cfg(feature = "smallvec")] + let mut stack: SmallVec<[Status; 8]> = smallvec![Status { + level: para_level, + status: OverrideStatus::Neutral, + }]; + #[cfg(not(feature = "smallvec"))] + let mut stack = vec![Status { + level: para_level, + status: OverrideStatus::Neutral, + }]; let mut overflow_isolate_count = 0u32; let mut overflow_embedding_count = 0u32; let mut valid_isolate_count = 0u32; + let mut current_run_level = Level::ltr(); + let mut current_run_start = 0; + for (i, len) in text.indices_lengths() { + let last = stack.last().unwrap(); + match original_classes[i] { // Rules X2-X5c RLE | LRE | RLO | LRO | RLI | LRI | FSI => { - let last_level = stack.last().level; - // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> - levels[i] = last_level; + levels[i] = last.level; // X5a-X5c: Isolate initiators get the level of the last entry on the stack. - let is_isolate = match original_classes[i] { - RLI | LRI | FSI => true, - _ => false, - }; + let is_isolate = matches!(original_classes[i], RLI | LRI | FSI); if is_isolate { // Redundant due to "Retaining explicit formatting characters" step. - // levels[i] = last_level; - match stack.last().status { + // levels[i] = last.level; + match last.status { OverrideStatus::RTL => processing_classes[i] = R, OverrideStatus::LTR => processing_classes[i] = L, _ => {} @@ -67,22 +82,25 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( } let new_level = if is_rtl(original_classes[i]) { - last_level.new_explicit_next_rtl() + last.level.new_explicit_next_rtl() } else { - last_level.new_explicit_next_ltr() + last.level.new_explicit_next_ltr() }; + if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0 { let new_level = new_level.unwrap(); - stack.push( - new_level, - match original_classes[i] { + + stack.push(Status { + level: new_level, + status: match original_classes[i] { RLO => OverrideStatus::RTL, LRO => OverrideStatus::LTR, RLI | LRI | FSI => OverrideStatus::Isolate, _ => OverrideStatus::Neutral, }, - ); + }); + if is_isolate { valid_isolate_count += 1; } else { @@ -110,21 +128,21 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( overflow_isolate_count -= 1; } else if valid_isolate_count > 0 { overflow_embedding_count = 0; - loop { - // Pop everything up to and including the last Isolate status. - match stack.vec.pop() { - None - | Some(Status { - status: OverrideStatus::Isolate, - .. - }) => break, - _ => continue, - } - } + + while !matches!( + stack.pop(), + None | Some(Status { + status: OverrideStatus::Isolate, + .. + }) + ) {} + valid_isolate_count -= 1; } - let last = stack.last(); + + let last = stack.last().unwrap(); levels[i] = last.level; + match last.status { OverrideStatus::RTL => processing_classes[i] = R, OverrideStatus::LTR => processing_classes[i] = L, @@ -138,11 +156,12 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( // do nothing } else if overflow_embedding_count > 0 { overflow_embedding_count -= 1; - } else if stack.last().status != OverrideStatus::Isolate && stack.vec.len() >= 2 { - stack.vec.pop(); + } else if last.status != OverrideStatus::Isolate && stack.len() >= 2 { + stack.pop(); } + // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> - levels[i] = stack.last().level; + levels[i] = stack.last().unwrap().level; // X9 part of retaining explicit formatting characters. processing_classes[i] = BN; } @@ -153,8 +172,8 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( // <http://www.unicode.org/reports/tr9/#X6> _ => { - let last = stack.last(); levels[i] = last.level; + // This condition is not in the spec, but I am pretty sure that is a spec bug. // https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf if original_classes[i] != BN { @@ -172,6 +191,26 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( levels[i + j] = levels[i]; processing_classes[i + j] = processing_classes[i]; } + + // Identify level runs to be passed to prepare::isolating_run_sequences(). + if i == 0 { + // Initialize for the first (or only) run. + current_run_level = levels[i]; + } else { + // Check if we need to start a new level run. + // <https://www.unicode.org/reports/tr9/#BD7> + if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level { + // End the last run and start a new one. + runs.push(current_run_start..i); + current_run_level = levels[i]; + current_run_start = i; + } + } + } + + // Append the trailing level run, if non-empty. + if levels.len() > current_run_start { + runs.push(current_run_start..levels.len()); } } @@ -188,23 +227,3 @@ enum OverrideStatus { LTR, Isolate, } - -struct DirectionalStatusStack { - vec: Vec<Status>, -} - -impl DirectionalStatusStack { - fn new() -> Self { - DirectionalStatusStack { - vec: Vec::with_capacity(Level::max_explicit_depth() as usize + 2), - } - } - - fn push(&mut self, level: Level, status: OverrideStatus) { - self.vec.push(Status { level, status }); - } - - fn last(&self) -> &Status { - self.vec.last().unwrap() - } -} |