diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 01:13:27 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 01:13:27 +0000 |
commit | 40a355a42d4a9444dc753c04c6608dade2f06a23 (patch) | |
tree | 871fc667d2de662f171103ce5ec067014ef85e61 /third_party/rust/unicode-bidi/src | |
parent | Adding upstream version 124.0.1. (diff) | |
download | firefox-40a355a42d4a9444dc753c04c6608dade2f06a23.tar.xz firefox-40a355a42d4a9444dc753c04c6608dade2f06a23.zip |
Adding upstream version 125.0.1.upstream/125.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/unicode-bidi/src')
-rw-r--r-- | third_party/rust/unicode-bidi/src/char_data/mod.rs | 5 | ||||
-rw-r--r-- | third_party/rust/unicode-bidi/src/char_data/tables.rs | 4 | ||||
-rw-r--r-- | third_party/rust/unicode-bidi/src/deprecated.rs | 9 | ||||
-rw-r--r-- | third_party/rust/unicode-bidi/src/explicit.rs | 129 | ||||
-rw-r--r-- | third_party/rust/unicode-bidi/src/implicit.rs | 93 | ||||
-rw-r--r-- | third_party/rust/unicode-bidi/src/level.rs | 15 | ||||
-rw-r--r-- | third_party/rust/unicode-bidi/src/lib.rs | 129 | ||||
-rw-r--r-- | third_party/rust/unicode-bidi/src/prepare.rs | 266 | ||||
-rw-r--r-- | third_party/rust/unicode-bidi/src/utf16.rs | 36 |
9 files changed, 443 insertions, 243 deletions
diff --git a/third_party/rust/unicode-bidi/src/char_data/mod.rs b/third_party/rust/unicode-bidi/src/char_data/mod.rs index 4edf5b8f4c..543b0ed8fd 100644 --- a/third_party/rust/unicode-bidi/src/char_data/mod.rs +++ b/third_party/rust/unicode-bidi/src/char_data/mod.rs @@ -59,10 +59,7 @@ pub(crate) fn bidi_matched_opening_bracket(c: char) -> Option<BidiMatchedOpening } pub fn is_rtl(bidi_class: BidiClass) -> bool { - match bidi_class { - RLE | RLO | RLI => true, - _ => false, - } + matches!(bidi_class, RLE | RLO | RLI) } #[cfg(feature = "hardcoded-data")] diff --git a/third_party/rust/unicode-bidi/src/char_data/tables.rs b/third_party/rust/unicode-bidi/src/char_data/tables.rs index ecdcf496d1..f10265d214 100644 --- a/third_party/rust/unicode-bidi/src/char_data/tables.rs +++ b/third_party/rust/unicode-bidi/src/char_data/tables.rs @@ -45,7 +45,7 @@ pub enum BidiClass { use self::BidiClass::*; #[cfg(feature = "hardcoded-data")] -pub const bidi_class_table: &'static [(char, char, BidiClass)] = &[ +pub const bidi_class_table: &[(char, char, BidiClass)] = &[ ('\u{0}', '\u{8}', BN), ('\u{9}', '\u{9}', S), ('\u{a}', '\u{a}', B), ('\u{b}', '\u{b}', S), ('\u{c}', '\u{c}', WS), ('\u{d}', '\u{d}', B), ('\u{e}', '\u{1b}', BN), ('\u{1c}', '\u{1e}', B), ('\u{1f}', '\u{1f}', S), ('\u{20}', '\u{20}', WS), ('\u{21}', '\u{22}', ON), ('\u{23}', @@ -516,7 +516,7 @@ pub const bidi_class_table: &'static [(char, char, BidiClass)] = &[ '\u{e01ef}', NSM), ('\u{f0000}', '\u{ffffd}', L), ('\u{100000}', '\u{10fffd}', L) ]; -pub const bidi_pairs_table: &'static [(char, char, Option<char>)] = &[ +pub const bidi_pairs_table: &[(char, char, Option<char>)] = &[ ('\u{28}', '\u{29}', None), ('\u{5b}', '\u{5d}', None), ('\u{7b}', '\u{7d}', None), ('\u{f3a}', '\u{f3b}', None), ('\u{f3c}', '\u{f3d}', None), ('\u{169b}', '\u{169c}', None), ('\u{2045}', '\u{2046}', None), ('\u{207d}', '\u{207e}', None), ('\u{208d}', '\u{208e}', None), ('\u{2308}', diff --git a/third_party/rust/unicode-bidi/src/deprecated.rs b/third_party/rust/unicode-bidi/src/deprecated.rs index 74a24f5b8b..c903663e99 100644 --- a/third_party/rust/unicode-bidi/src/deprecated.rs +++ b/third_party/rust/unicode-bidi/src/deprecated.rs @@ -9,8 +9,6 @@ //! This module holds deprecated assets only. -use alloc::vec::Vec; - use super::*; /// Find the level runs within a line and return them in visual order. @@ -71,10 +69,8 @@ pub fn visual_runs(line: Range<usize>, levels: &[Level]) -> Vec<LevelRun> { // Found the start of a sequence. Now find the end. let mut seq_end = seq_start + 1; - while seq_end < run_count { - if levels[runs[seq_end].start] < max_level { - break; - } + + while seq_end < run_count && levels[runs[seq_end].start] >= max_level { seq_end += 1; } @@ -83,6 +79,7 @@ pub fn visual_runs(line: Range<usize>, levels: &[Level]) -> Vec<LevelRun> { seq_start = seq_end; } + max_level .lower(1) .expect("Lowering embedding level below zero"); diff --git a/third_party/rust/unicode-bidi/src/explicit.rs b/third_party/rust/unicode-bidi/src/explicit.rs index d4ad897b54..5760ab8ece 100644 --- a/third_party/rust/unicode-bidi/src/explicit.rs +++ b/third_party/rust/unicode-bidi/src/explicit.rs @@ -11,19 +11,25 @@ //! //! <http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions> -use alloc::vec::Vec; +#[cfg(feature = "smallvec")] +use smallvec::{smallvec, SmallVec}; use super::char_data::{ is_rtl, BidiClass::{self, *}, }; use super::level::Level; +use super::prepare::removed_by_x9; +use super::LevelRunVec; use super::TextSource; -/// Compute explicit embedding levels for one paragraph of text (X1-X8). +/// Compute explicit embedding levels for one paragraph of text (X1-X8), and identify +/// level runs (BD7) for use when determining Isolating Run Sequences (X10). /// /// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`, /// for each char in `text`. +/// +/// `runs` returns the list of level runs (BD7) of the text. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn compute<'a, T: TextSource<'a> + ?Sized>( text: &'a T, @@ -31,35 +37,44 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( original_classes: &[BidiClass], levels: &mut [Level], processing_classes: &mut [BidiClass], + runs: &mut LevelRunVec, ) { assert_eq!(text.len(), original_classes.len()); // <http://www.unicode.org/reports/tr9/#X1> - let mut stack = DirectionalStatusStack::new(); - stack.push(para_level, OverrideStatus::Neutral); + #[cfg(feature = "smallvec")] + let mut stack: SmallVec<[Status; 8]> = smallvec![Status { + level: para_level, + status: OverrideStatus::Neutral, + }]; + #[cfg(not(feature = "smallvec"))] + let mut stack = vec![Status { + level: para_level, + status: OverrideStatus::Neutral, + }]; let mut overflow_isolate_count = 0u32; let mut overflow_embedding_count = 0u32; let mut valid_isolate_count = 0u32; + let mut current_run_level = Level::ltr(); + let mut current_run_start = 0; + for (i, len) in text.indices_lengths() { + let last = stack.last().unwrap(); + match original_classes[i] { // Rules X2-X5c RLE | LRE | RLO | LRO | RLI | LRI | FSI => { - let last_level = stack.last().level; - // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> - levels[i] = last_level; + levels[i] = last.level; // X5a-X5c: Isolate initiators get the level of the last entry on the stack. - let is_isolate = match original_classes[i] { - RLI | LRI | FSI => true, - _ => false, - }; + let is_isolate = matches!(original_classes[i], RLI | LRI | FSI); if is_isolate { // Redundant due to "Retaining explicit formatting characters" step. - // levels[i] = last_level; - match stack.last().status { + // levels[i] = last.level; + match last.status { OverrideStatus::RTL => processing_classes[i] = R, OverrideStatus::LTR => processing_classes[i] = L, _ => {} @@ -67,22 +82,25 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( } let new_level = if is_rtl(original_classes[i]) { - last_level.new_explicit_next_rtl() + last.level.new_explicit_next_rtl() } else { - last_level.new_explicit_next_ltr() + last.level.new_explicit_next_ltr() }; + if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0 { let new_level = new_level.unwrap(); - stack.push( - new_level, - match original_classes[i] { + + stack.push(Status { + level: new_level, + status: match original_classes[i] { RLO => OverrideStatus::RTL, LRO => OverrideStatus::LTR, RLI | LRI | FSI => OverrideStatus::Isolate, _ => OverrideStatus::Neutral, }, - ); + }); + if is_isolate { valid_isolate_count += 1; } else { @@ -110,21 +128,21 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( overflow_isolate_count -= 1; } else if valid_isolate_count > 0 { overflow_embedding_count = 0; - loop { - // Pop everything up to and including the last Isolate status. - match stack.vec.pop() { - None - | Some(Status { - status: OverrideStatus::Isolate, - .. - }) => break, - _ => continue, - } - } + + while !matches!( + stack.pop(), + None | Some(Status { + status: OverrideStatus::Isolate, + .. + }) + ) {} + valid_isolate_count -= 1; } - let last = stack.last(); + + let last = stack.last().unwrap(); levels[i] = last.level; + match last.status { OverrideStatus::RTL => processing_classes[i] = R, OverrideStatus::LTR => processing_classes[i] = L, @@ -138,11 +156,12 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( // do nothing } else if overflow_embedding_count > 0 { overflow_embedding_count -= 1; - } else if stack.last().status != OverrideStatus::Isolate && stack.vec.len() >= 2 { - stack.vec.pop(); + } else if last.status != OverrideStatus::Isolate && stack.len() >= 2 { + stack.pop(); } + // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> - levels[i] = stack.last().level; + levels[i] = stack.last().unwrap().level; // X9 part of retaining explicit formatting characters. processing_classes[i] = BN; } @@ -153,8 +172,8 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( // <http://www.unicode.org/reports/tr9/#X6> _ => { - let last = stack.last(); levels[i] = last.level; + // This condition is not in the spec, but I am pretty sure that is a spec bug. // https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf if original_classes[i] != BN { @@ -172,6 +191,26 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( levels[i + j] = levels[i]; processing_classes[i + j] = processing_classes[i]; } + + // Identify level runs to be passed to prepare::isolating_run_sequences(). + if i == 0 { + // Initialize for the first (or only) run. + current_run_level = levels[i]; + } else { + // Check if we need to start a new level run. + // <https://www.unicode.org/reports/tr9/#BD7> + if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level { + // End the last run and start a new one. + runs.push(current_run_start..i); + current_run_level = levels[i]; + current_run_start = i; + } + } + } + + // Append the trailing level run, if non-empty. + if levels.len() > current_run_start { + runs.push(current_run_start..levels.len()); } } @@ -188,23 +227,3 @@ enum OverrideStatus { LTR, Isolate, } - -struct DirectionalStatusStack { - vec: Vec<Status>, -} - -impl DirectionalStatusStack { - fn new() -> Self { - DirectionalStatusStack { - vec: Vec::with_capacity(Level::max_explicit_depth() as usize + 2), - } - } - - fn push(&mut self, level: Level, status: OverrideStatus) { - self.vec.push(Status { level, status }); - } - - fn last(&self) -> &Status { - self.vec.last().unwrap() - } -} diff --git a/third_party/rust/unicode-bidi/src/implicit.rs b/third_party/rust/unicode-bidi/src/implicit.rs index 0311053c0a..334afec049 100644 --- a/third_party/rust/unicode-bidi/src/implicit.rs +++ b/third_party/rust/unicode-bidi/src/implicit.rs @@ -9,8 +9,11 @@ //! 3.3.4 - 3.3.6. Resolve implicit levels and types. +#[cfg(not(feature = "smallvec"))] use alloc::vec::Vec; use core::cmp::max; +#[cfg(feature = "smallvec")] +use smallvec::SmallVec; use super::char_data::BidiClass::{self, *}; use super::level::Level; @@ -39,7 +42,13 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>( // The previous class for the purposes of rule W1, not tracking changes from any other rules. let mut prev_class_before_w1 = sequence.sos; let mut last_strong_is_al = false; + #[cfg(feature = "smallvec")] + let mut et_run_indices = SmallVec::<[usize; 8]>::new(); // for W5 + #[cfg(not(feature = "smallvec"))] let mut et_run_indices = Vec::new(); // for W5 + #[cfg(feature = "smallvec")] + let mut bn_run_indices = SmallVec::<[usize; 8]>::new(); // for W5 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> + #[cfg(not(feature = "smallvec"))] let mut bn_run_indices = Vec::new(); // for W5 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> for (run_index, level_run) in sequence.runs.iter().enumerate() { @@ -177,7 +186,7 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>( _ => { // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> // If there was a BN run before this, that's now a part of this ET run. - et_run_indices.extend(&bn_run_indices); + et_run_indices.extend(bn_run_indices.clone()); // In case this is followed by an EN. et_run_indices.push(i); @@ -224,26 +233,29 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>( // W7. If the previous strong char was L, change EN to L. let mut last_strong_is_l = sequence.sos == L; - for run in &sequence.runs { - for i in run.clone() { - match processing_classes[i] { - EN if last_strong_is_l => { - processing_classes[i] = L; - } - L => { - last_strong_is_l = true; - } - R | AL => { - last_strong_is_l = false; - } - // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> - // Already scanning past BN here. - _ => {} + for i in sequence.runs.iter().cloned().flatten() { + match processing_classes[i] { + EN if last_strong_is_l => { + processing_classes[i] = L; } + L => { + last_strong_is_l = true; + } + R | AL => { + last_strong_is_l = false; + } + // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> + // Already scanning past BN here. + _ => {} } } } +#[cfg(feature = "smallvec")] +type BracketPairVec = SmallVec<[BracketPair; 8]>; +#[cfg(not(feature = "smallvec"))] +type BracketPairVec = Vec<BracketPair>; + /// 3.3.5 Resolving Neutral Types /// /// <http://www.unicode.org/reports/tr9/#Resolving_Neutral_Types> @@ -267,7 +279,14 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( // > Identify the bracket pairs in the current isolating run sequence according to BD16. // We use processing_classes, not original_classes, due to BD14/BD15 - let bracket_pairs = identify_bracket_pairs(text, data_source, sequence, processing_classes); + let mut bracket_pairs = BracketPairVec::new(); + identify_bracket_pairs( + text, + data_source, + sequence, + processing_classes, + &mut bracket_pairs, + ); // > For each bracket-pair element in the list of pairs of text positions // @@ -308,7 +327,7 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( found_e = true; } else if class == not_e { found_not_e = true; - } else if class == BidiClass::EN || class == BidiClass::AN { + } else if matches!(class, BidiClass::EN | BidiClass::AN) { // > Within this scope, bidirectional types EN and AN are treated as R. if e == BidiClass::L { found_not_e = true; @@ -337,15 +356,15 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( .iter_backwards_from(pair.start, pair.start_run) .map(|i| processing_classes[i]) .find(|class| { - *class == BidiClass::L - || *class == BidiClass::R - || *class == BidiClass::EN - || *class == BidiClass::AN + matches!( + class, + BidiClass::L | BidiClass::R | BidiClass::EN | BidiClass::AN + ) }) .unwrap_or(sequence.sos); // > Within this scope, bidirectional types EN and AN are treated as R. - if previous_strong == BidiClass::EN || previous_strong == BidiClass::AN { + if matches!(previous_strong, BidiClass::EN | BidiClass::AN) { previous_strong = BidiClass::R; } @@ -413,6 +432,9 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( let mut prev_class = sequence.sos; while let Some(mut i) = indices.next() { // Process sequences of NI characters. + #[cfg(feature = "smallvec")] + let mut ni_run = SmallVec::<[usize; 8]>::new(); + #[cfg(not(feature = "smallvec"))] let mut ni_run = Vec::new(); // The BN is for <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> if is_NI(processing_classes[i]) || processing_classes[i] == BN { @@ -484,9 +506,12 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>( data_source: &D, run_sequence: &IsolatingRunSequence, original_classes: &[BidiClass], -) -> Vec<BracketPair> { - let mut ret = vec![]; - let mut stack = vec![]; + bracket_pairs: &mut BracketPairVec, +) { + #[cfg(feature = "smallvec")] + let mut stack = SmallVec::<[(char, usize, usize); 8]>::new(); + #[cfg(not(feature = "smallvec"))] + let mut stack = Vec::new(); for (run_index, level_run) in run_sequence.runs.iter().enumerate() { for (i, ch) in text.subrange(level_run.clone()).char_indices() { @@ -532,7 +557,7 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>( start_run: element.2, end_run: run_index, }; - ret.push(pair); + bracket_pairs.push(pair); // > Pop the stack through the current stack element inclusively. stack.truncate(stack_index); @@ -545,8 +570,7 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>( } // > Sort the list of pairs of text positions in ascending order based on // > the text position of the opening paired bracket. - ret.sort_by_key(|r| r.start); - ret + bracket_pairs.sort_by_key(|r| r.start); } /// 3.3.6 Resolving Implicit Levels @@ -555,11 +579,11 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>( /// /// <http://www.unicode.org/reports/tr9/#Resolving_Implicit_Levels> #[cfg_attr(feature = "flame_it", flamer::flame)] -pub fn resolve_levels(original_classes: &[BidiClass], levels: &mut [Level]) -> Level { +pub fn resolve_levels(processing_classes: &[BidiClass], levels: &mut [Level]) -> Level { let mut max_level = Level::ltr(); - assert_eq!(original_classes.len(), levels.len()); + assert_eq!(processing_classes.len(), levels.len()); for i in 0..levels.len() { - match (levels[i].is_rtl(), original_classes[i]) { + match (levels[i].is_rtl(), processing_classes[i]) { (false, AN) | (false, EN) => levels[i].raise(2).expect("Level number error"), (false, R) | (true, L) | (true, EN) | (true, AN) => { levels[i].raise(1).expect("Level number error") @@ -578,8 +602,5 @@ pub fn resolve_levels(original_classes: &[BidiClass], levels: &mut [Level]) -> L /// <http://www.unicode.org/reports/tr9/#NI> #[allow(non_snake_case)] fn is_NI(class: BidiClass) -> bool { - match class { - B | S | WS | ON | FSI | LRI | RLI | PDI => true, - _ => false, - } + matches!(class, B | S | WS | ON | FSI | LRI | RLI | PDI) } diff --git a/third_party/rust/unicode-bidi/src/level.rs b/third_party/rust/unicode-bidi/src/level.rs index ef4f6d9e40..5ece0251a5 100644 --- a/third_party/rust/unicode-bidi/src/level.rs +++ b/third_party/rust/unicode-bidi/src/level.rs @@ -13,9 +13,10 @@ //! //! <http://www.unicode.org/reports/tr9/#BD2> -use alloc::string::{String, ToString}; -use alloc::vec::Vec; -use core::convert::{From, Into}; +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; use core::slice; use super::char_data::BidiClass; @@ -219,11 +220,11 @@ pub fn has_rtl(levels: &[Level]) -> bool { levels.iter().any(|&lvl| lvl.is_rtl()) } -impl Into<u8> for Level { +impl From<Level> for u8 { /// Convert to the level number #[inline] - fn into(self) -> u8 { - self.number() + fn from(val: Level) -> Self { + val.number() } } @@ -244,7 +245,7 @@ impl<'a> PartialEq<&'a str> for Level { } /// Used for matching levels in conformance tests -impl<'a> PartialEq<String> for Level { +impl PartialEq<String> for Level { #[inline] fn eq(&self, s: &String) -> bool { self == &s.as_str() diff --git a/third_party/rust/unicode-bidi/src/lib.rs b/third_party/rust/unicode-bidi/src/lib.rs index 1072b67fe0..489927588a 100644 --- a/third_party/rust/unicode-bidi/src/lib.rs +++ b/third_party/rust/unicode-bidi/src/lib.rs @@ -71,6 +71,8 @@ extern crate std; #[macro_use] extern crate alloc; +#[cfg(feature = "smallvec")] +extern crate smallvec; pub mod data_source; pub mod deprecated; @@ -86,7 +88,7 @@ mod prepare; pub use crate::char_data::{BidiClass, UNICODE_VERSION}; pub use crate::data_source::BidiDataSource; pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL}; -pub use crate::prepare::LevelRun; +pub use crate::prepare::{LevelRun, LevelRunVec}; #[cfg(feature = "hardcoded-data")] pub use crate::char_data::{bidi_class, HardcodedBidiData}; @@ -99,6 +101,8 @@ use core::cmp; use core::iter::repeat; use core::ops::Range; use core::str::CharIndices; +#[cfg(feature = "smallvec")] +use smallvec::SmallVec; use crate::format_chars as chars; use crate::BidiClass::*; @@ -244,8 +248,14 @@ struct InitialInfoExt<'text> { /// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that /// requires no further bidi processing (i.e. there are no RTL characters or bidi - /// control codes present). - pure_ltr: Vec<bool>, + /// control codes present), and whether any bidi isolation controls are present. + flags: Vec<ParagraphInfoFlags>, +} + +#[derive(PartialEq, Debug)] +struct ParagraphInfoFlags { + is_pure_ltr: bool, + has_isolate_controls: bool, } impl<'text> InitialInfoExt<'text> { @@ -265,12 +275,12 @@ impl<'text> InitialInfoExt<'text> { default_para_level: Option<Level>, ) -> InitialInfoExt<'a> { let mut paragraphs = Vec::<ParagraphInfo>::new(); - let mut pure_ltr = Vec::<bool>::new(); - let (original_classes, _, _) = compute_initial_info( + let mut flags = Vec::<ParagraphInfoFlags>::new(); + let (original_classes, _, _, _) = compute_initial_info( data_source, text, default_para_level, - Some((&mut paragraphs, &mut pure_ltr)), + Some((&mut paragraphs, &mut flags)), ); InitialInfoExt { @@ -279,7 +289,7 @@ impl<'text> InitialInfoExt<'text> { original_classes, paragraphs, }, - pure_ltr, + flags, } } } @@ -295,16 +305,19 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( data_source: &D, text: &'a T, default_para_level: Option<Level>, - mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<bool>)>, -) -> (Vec<BidiClass>, Level, bool) { + mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<ParagraphInfoFlags>)>, +) -> (Vec<BidiClass>, Level, bool, bool) { let mut original_classes = Vec::with_capacity(text.len()); // The stack contains the starting code unit index for each nested isolate we're inside. + #[cfg(feature = "smallvec")] + let mut isolate_stack = SmallVec::<[usize; 8]>::new(); + #[cfg(not(feature = "smallvec"))] let mut isolate_stack = Vec::new(); debug_assert!( - if let Some((ref paragraphs, ref pure_ltr)) = split_paragraphs { - paragraphs.is_empty() && pure_ltr.is_empty() + if let Some((ref paragraphs, ref flags)) = split_paragraphs { + paragraphs.is_empty() && flags.is_empty() } else { true } @@ -316,6 +329,8 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( // Per-paragraph flag: can subsequent processing be skipped? Set to false if any // RTL characters or bidi control characters are encountered in the paragraph. let mut is_pure_ltr = true; + // Set to true if any bidi isolation controls are present in the paragraph. + let mut has_isolate_controls = false; #[cfg(feature = "flame_it")] flame::start("compute_initial_info(): iter text.char_indices()"); @@ -334,7 +349,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( match class { B => { - if let Some((ref mut paragraphs, ref mut pure_ltr)) = split_paragraphs { + if let Some((ref mut paragraphs, ref mut flags)) = split_paragraphs { // P1. Split the text into separate paragraphs. The paragraph separator is kept // with the previous paragraph. let para_end = i + len; @@ -343,7 +358,10 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( // P3. If no character is found in p2, set the paragraph level to zero. level: para_level.unwrap_or(LTR_LEVEL), }); - pure_ltr.push(is_pure_ltr); + flags.push(ParagraphInfoFlags { + is_pure_ltr, + has_isolate_controls, + }); // Reset state for the start of the next paragraph. para_start = para_end; // TODO: Support defaulting to direction of previous paragraph @@ -351,6 +369,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( // <http://www.unicode.org/reports/tr9/#HL1> para_level = default_para_level; is_pure_ltr = true; + has_isolate_controls = false; isolate_stack.clear(); } } @@ -387,6 +406,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( RLI | LRI | FSI => { is_pure_ltr = false; + has_isolate_controls = true; isolate_stack.push(i); } @@ -398,15 +418,18 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( } } - if let Some((paragraphs, pure_ltr)) = split_paragraphs { + if let Some((paragraphs, flags)) = split_paragraphs { if para_start < text.len() { paragraphs.push(ParagraphInfo { range: para_start..text.len(), level: para_level.unwrap_or(LTR_LEVEL), }); - pure_ltr.push(is_pure_ltr); + flags.push(ParagraphInfoFlags { + is_pure_ltr, + has_isolate_controls, + }); } - debug_assert_eq!(paragraphs.len(), pure_ltr.len()); + debug_assert_eq!(paragraphs.len(), flags.len()); } debug_assert_eq!(original_classes.len(), text.len()); @@ -417,6 +440,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( original_classes, para_level.unwrap_or(LTR_LEVEL), is_pure_ltr, + has_isolate_controls, ) } @@ -475,20 +499,21 @@ impl<'text> BidiInfo<'text> { text: &'a str, default_para_level: Option<Level>, ) -> BidiInfo<'a> { - let InitialInfoExt { base, pure_ltr, .. } = + let InitialInfoExt { base, flags, .. } = InitialInfoExt::new_with_data_source(data_source, text, default_para_level); let mut levels = Vec::<Level>::with_capacity(text.len()); let mut processing_classes = base.original_classes.clone(); - for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) { + for (para, flags) in base.paragraphs.iter().zip(flags.iter()) { let text = &text[para.range.clone()]; let original_classes = &base.original_classes[para.range.clone()]; compute_bidi_info_for_para( data_source, para, - *is_pure_ltr, + flags.is_pure_ltr, + flags.has_isolate_controls, text, original_classes, &mut processing_classes, @@ -713,7 +738,7 @@ impl<'text> ParagraphBidiInfo<'text> { ) -> ParagraphBidiInfo<'a> { // Here we could create a ParagraphInitialInfo struct to parallel the one // used by BidiInfo, but there doesn't seem any compelling reason for it. - let (original_classes, paragraph_level, is_pure_ltr) = + let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) = compute_initial_info(data_source, text, default_para_level, None); let mut levels = Vec::<Level>::with_capacity(text.len()); @@ -731,6 +756,7 @@ impl<'text> ParagraphBidiInfo<'text> { data_source, ¶_info, is_pure_ltr, + has_isolate_controls, text, &original_classes, &mut processing_classes, @@ -855,12 +881,12 @@ impl<'text> ParagraphBidiInfo<'text> { /// /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3 /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4 -fn reorder_line<'text>( - text: &'text str, +fn reorder_line( + text: &str, line: Range<usize>, levels: Vec<Level>, runs: Vec<LevelRun>, -) -> Cow<'text, str> { +) -> Cow<'_, str> { // If all isolating run sequences are LTR, no reordering is needed if runs.iter().all(|run| levels[run.start].is_ltr()) { return text[line].into(); @@ -1059,6 +1085,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> data_source: &D, para: &ParagraphInfo, is_pure_ltr: bool, + has_isolate_controls: bool, text: &'a T, original_classes: &[BidiClass], processing_classes: &mut [BidiClass], @@ -1072,6 +1099,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> let processing_classes = &mut processing_classes[para.range.clone()]; let levels = &mut levels[para.range.clone()]; + let mut level_runs = LevelRunVec::new(); explicit::compute( text, @@ -1079,9 +1107,18 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> original_classes, levels, processing_classes, + &mut level_runs, ); - let sequences = prepare::isolating_run_sequences(para.level, original_classes, levels); + let mut sequences = prepare::IsolatingRunSequenceVec::new(); + prepare::isolating_run_sequences( + para.level, + original_classes, + levels, + level_runs, + has_isolate_controls, + &mut sequences, + ); for sequence in &sequences { implicit::resolve_weak(text, sequence, processing_classes); implicit::resolve_neutral( @@ -1093,6 +1130,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> processing_classes, ); } + implicit::resolve_levels(processing_classes, levels); assign_levels_to_removed_chars(para.level, original_classes, levels); @@ -1122,20 +1160,20 @@ fn reorder_levels<'a, T: TextSource<'a> + ?Sized>( B | S => { assert_eq!(reset_to, None); reset_to = Some(i + T::char_len(c)); - if reset_from == None { + if reset_from.is_none() { reset_from = Some(i); } } // Whitespace, isolate formatting WS | FSI | LRI | RLI | PDI => { - if reset_from == None { + if reset_from.is_none() { reset_from = Some(i); } } // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> // same as above + set the level RLE | LRE | RLO | LRO | PDF | BN => { - if reset_from == None { + if reset_from.is_none() { reset_from = Some(i); } // also set the level to previous @@ -1294,8 +1332,8 @@ fn get_base_direction_impl<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( let mut isolate_level = 0; for c in text.chars() { match data_source.bidi_class(c) { - LRI | RLI | FSI => isolate_level = isolate_level + 1, - PDI if isolate_level > 0 => isolate_level = isolate_level - 1, + LRI | RLI | FSI => isolate_level += 1, + PDI if isolate_level > 0 => isolate_level -= 1, L if isolate_level == 0 => return Direction::Ltr, R | AL if isolate_level == 0 => return Direction::Rtl, B if !use_full_text => break, @@ -1342,7 +1380,7 @@ impl<'text> TextSource<'text> for str { } #[inline] fn indices_lengths(&'text self) -> Self::IndexLenIter { - Utf8IndexLenIter::new(&self) + Utf8IndexLenIter::new(self) } #[inline] fn char_len(ch: char) -> usize { @@ -1544,6 +1582,24 @@ mod tests { let tests = vec![ ( // text + "", + // base level + Some(RTL_LEVEL), + // levels + Level::vec(&[]), + // original_classes + vec![], + // paragraphs + vec![], + // levels_u16 + Level::vec(&[]), + // original_classes_u16 + vec![], + // paragraphs_u16 + vec![], + ), + ( + // text "abc123", // base level Some(LTR_LEVEL), @@ -1703,6 +1759,19 @@ mod tests { paragraphs: t.4.clone(), } ); + // If it was empty, also test that ParagraphBidiInfo handles it safely. + if t.4.len() == 0 { + assert_eq!( + ParagraphBidiInfo::new(t.0, t.1), + ParagraphBidiInfo { + text: t.0, + original_classes: t.3.clone(), + levels: t.2.clone(), + paragraph_level: RTL_LEVEL, + is_pure_ltr: true, + } + ) + } // If it was a single paragraph, also test ParagraphBidiInfo. if t.4.len() == 1 { assert_eq!( diff --git a/third_party/rust/unicode-bidi/src/prepare.rs b/third_party/rust/unicode-bidi/src/prepare.rs index 9234e1aa61..f7b35ad689 100644 --- a/third_party/rust/unicode-bidi/src/prepare.rs +++ b/third_party/rust/unicode-bidi/src/prepare.rs @@ -14,6 +14,8 @@ use alloc::vec::Vec; use core::cmp::max; use core::ops::Range; +#[cfg(feature = "smallvec")] +use smallvec::{smallvec, SmallVec}; use super::level::Level; use super::BidiClass::{self, *}; @@ -23,6 +25,11 @@ use super::BidiClass::{self, *}; /// Represented as a range of byte indices. pub type LevelRun = Range<usize>; +#[cfg(feature = "smallvec")] +pub type LevelRunVec = SmallVec<[LevelRun; 8]>; +#[cfg(not(feature = "smallvec"))] +pub type LevelRunVec = Vec<LevelRun>; + /// Output of `isolating_run_sequences` (steps X9-X10) #[derive(Debug, PartialEq)] pub struct IsolatingRunSequence { @@ -31,6 +38,11 @@ pub struct IsolatingRunSequence { pub eos: BidiClass, // End-of-sequence type. } +#[cfg(feature = "smallvec")] +pub type IsolatingRunSequenceVec = SmallVec<[IsolatingRunSequence; 8]>; +#[cfg(not(feature = "smallvec"))] +pub type IsolatingRunSequenceVec = Vec<IsolatingRunSequence>; + /// Compute the set of isolating run sequences. /// /// An isolating run sequence is a maximal sequence of level runs such that for all level runs @@ -43,8 +55,59 @@ pub fn isolating_run_sequences( para_level: Level, original_classes: &[BidiClass], levels: &[Level], -) -> Vec<IsolatingRunSequence> { - let runs = level_runs(levels, original_classes); + runs: LevelRunVec, + has_isolate_controls: bool, + isolating_run_sequences: &mut IsolatingRunSequenceVec, +) { + // Per http://www.unicode.org/reports/tr9/#BD13: + // "In the absence of isolate initiators, each isolating run sequence in a paragraph + // consists of exactly one level run, and each level run constitutes a separate + // isolating run sequence." + // We can take a simplified path to handle this case. + if !has_isolate_controls { + isolating_run_sequences.reserve_exact(runs.len()); + for run in runs { + // Determine the `sos` and `eos` class for the sequence. + // <http://www.unicode.org/reports/tr9/#X10> + + let run_levels = &levels[run.clone()]; + let run_classes = &original_classes[run.clone()]; + let seq_level = run_levels[run_classes + .iter() + .position(|c| not_removed_by_x9(c)) + .unwrap_or(0)]; + + let end_level = run_levels[run_classes + .iter() + .rposition(|c| not_removed_by_x9(c)) + .unwrap_or(run.end - run.start - 1)]; + + // Get the level of the last non-removed char before the run. + let pred_level = match original_classes[..run.start] + .iter() + .rposition(not_removed_by_x9) + { + Some(idx) => levels[idx], + None => para_level, + }; + + // Get the level of the next non-removed char after the run. + let succ_level = match original_classes[run.end..] + .iter() + .position(not_removed_by_x9) + { + Some(idx) => levels[run.end + idx], + None => para_level, + }; + + isolating_run_sequences.push(IsolatingRunSequence { + runs: vec![run], + sos: max(seq_level, pred_level).bidi_class(), + eos: max(end_level, succ_level).bidi_class(), + }); + } + return; + } // Compute the set of isolating run sequences. // <http://www.unicode.org/reports/tr9/#BD13> @@ -52,10 +115,13 @@ pub fn isolating_run_sequences( // When we encounter an isolate initiator, we push the current sequence onto the // stack so we can resume it after the matching PDI. - let mut stack = vec![Vec::new()]; + #[cfg(feature = "smallvec")] + let mut stack: SmallVec<[Vec<Range<usize>>; 8]> = smallvec![vec![]]; + #[cfg(not(feature = "smallvec"))] + let mut stack = vec![vec![]]; for run in runs { - assert!(run.len() > 0); + assert!(!run.is_empty()); assert!(!stack.is_empty()); let start_class = original_classes[run.start]; @@ -67,8 +133,7 @@ pub fn isolating_run_sequences( .iter() .copied() .rev() - .filter(not_removed_by_x9) - .next() + .find(not_removed_by_x9) .unwrap_or(start_class); let mut sequence = if start_class == PDI && stack.len() > 1 { @@ -81,7 +146,7 @@ pub fn isolating_run_sequences( sequence.push(run); - if let RLI | LRI | FSI = end_class { + if matches!(end_class, RLI | LRI | FSI) { // Resume this sequence after the isolate. stack.push(sequence); } else { @@ -89,90 +154,82 @@ pub fn isolating_run_sequences( sequences.push(sequence); } } - // Pop any remaning sequences off the stack. + // Pop any remaining sequences off the stack. sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty())); // Determine the `sos` and `eos` class for each sequence. // <http://www.unicode.org/reports/tr9/#X10> - sequences - .into_iter() - .map(|sequence: Vec<LevelRun>| { - assert!(!sequence.is_empty()); + for sequence in sequences { + assert!(!sequence.is_empty()); - let mut result = IsolatingRunSequence { - runs: sequence, - sos: L, - eos: L, - }; + let start_of_seq = sequence[0].start; + let runs_len = sequence.len(); + let end_of_seq = sequence[runs_len - 1].end; - let start_of_seq = result.runs[0].start; - let runs_len = result.runs.len(); - let end_of_seq = result.runs[runs_len - 1].end; - - // > (not counting characters removed by X9) - let seq_level = result - .iter_forwards_from(start_of_seq, 0) - .filter(|i| not_removed_by_x9(&original_classes[*i])) - .map(|i| levels[i]) - .next() - .unwrap_or(levels[start_of_seq]); - - // XXXManishearth the spec talks of a start and end level, - // but for a given IRS the two should be equivalent, yes? - let end_level = result - .iter_backwards_from(end_of_seq, runs_len - 1) - .filter(|i| not_removed_by_x9(&original_classes[*i])) - .map(|i| levels[i]) - .next() - .unwrap_or(levels[end_of_seq - 1]); - - #[cfg(test)] - for run in result.runs.clone() { - for idx in run { - if not_removed_by_x9(&original_classes[idx]) { - assert_eq!(seq_level, levels[idx]); - } - } + let mut result = IsolatingRunSequence { + runs: sequence, + sos: L, + eos: L, + }; + + // > (not counting characters removed by X9) + let seq_level = levels[result + .iter_forwards_from(start_of_seq, 0) + .find(|i| not_removed_by_x9(&original_classes[*i])) + .unwrap_or(start_of_seq)]; + + // XXXManishearth the spec talks of a start and end level, + // but for a given IRS the two should be equivalent, yes? + let end_level = levels[result + .iter_backwards_from(end_of_seq, runs_len - 1) + .find(|i| not_removed_by_x9(&original_classes[*i])) + .unwrap_or(end_of_seq - 1)]; + + #[cfg(test)] + for idx in result.runs.clone().into_iter().flatten() { + if not_removed_by_x9(&original_classes[idx]) { + assert_eq!(seq_level, levels[idx]); } + } + + // Get the level of the last non-removed char before the runs. + let pred_level = match original_classes[..start_of_seq] + .iter() + .rposition(not_removed_by_x9) + { + Some(idx) => levels[idx], + None => para_level, + }; - // Get the level of the last non-removed char before the runs. - let pred_level = match original_classes[..start_of_seq] + // Get the last non-removed character to check if it is an isolate initiator. + // The spec calls for an unmatched one, but matched isolate initiators + // will never be at the end of a level run (otherwise there would be more to the run). + // We unwrap_or(BN) because BN marks removed classes and it won't matter for the check. + let last_non_removed = original_classes[..end_of_seq] + .iter() + .copied() + .rev() + .find(not_removed_by_x9) + .unwrap_or(BN); + + // Get the level of the next non-removed char after the runs. + let succ_level = if matches!(last_non_removed, RLI | LRI | FSI) { + para_level + } else { + match original_classes[end_of_seq..] .iter() - .rposition(not_removed_by_x9) + .position(not_removed_by_x9) { - Some(idx) => levels[idx], + Some(idx) => levels[end_of_seq + idx], None => para_level, - }; + } + }; - // Get the last non-removed character to check if it is an isolate initiator. - // The spec calls for an unmatched one, but matched isolate initiators - // will never be at the end of a level run (otherwise there would be more to the run). - // We unwrap_or(BN) because BN marks removed classes and it won't matter for the check. - let last_non_removed = original_classes[..end_of_seq] - .iter() - .copied() - .rev() - .find(not_removed_by_x9) - .unwrap_or(BN); - - // Get the level of the next non-removed char after the runs. - let succ_level = if let RLI | LRI | FSI = last_non_removed { - para_level - } else { - match original_classes[end_of_seq..] - .iter() - .position(not_removed_by_x9) - { - Some(idx) => levels[end_of_seq + idx], - None => para_level, - } - }; + result.sos = max(seq_level, pred_level).bidi_class(); + result.eos = max(end_level, succ_level).bidi_class(); - result.sos = max(seq_level, pred_level).bidi_class(); - result.eos = max(end_level, succ_level).bidi_class(); - result - }) - .collect() + isolating_run_sequences.push(result); + } } impl IsolatingRunSequence { @@ -219,6 +276,9 @@ impl IsolatingRunSequence { /// Finds the level runs in a paragraph. /// /// <http://www.unicode.org/reports/tr9/#BD7> +/// +/// This is only used by tests; normally level runs are identified during explicit::compute. +#[cfg(test)] fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun> { assert_eq!(levels.len(), original_classes.len()); @@ -246,10 +306,7 @@ fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun> /// /// <http://www.unicode.org/reports/tr9/#X9> pub fn removed_by_x9(class: BidiClass) -> bool { - match class { - RLE | LRE | RLO | LRO | PDF | BN => true, - _ => false, - } + matches!(class, RLE | LRE | RLO | LRO | PDF | BN) } // For use as a predicate for `position` / `rposition` @@ -281,7 +338,14 @@ mod tests { let classes = &[L, RLE, L, PDF, RLE, L, PDF, L]; let levels = &[0, 1, 1, 1, 1, 1, 1, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + false, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(), @@ -294,7 +358,14 @@ mod tests { let classes = &[L, RLI, L, PDI, RLI, L, PDI, L]; let levels = &[0, 0, 1, 0, 0, 1, 0, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + true, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(), @@ -307,7 +378,14 @@ mod tests { let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L]; let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + true, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(), @@ -326,7 +404,14 @@ mod tests { let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L]; let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + false, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); // text1 @@ -385,7 +470,14 @@ mod tests { let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L]; let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + true, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); // text1·RLI·PDI·RLI·PDI·text6 diff --git a/third_party/rust/unicode-bidi/src/utf16.rs b/third_party/rust/unicode-bidi/src/utf16.rs index dcd9baf2be..11b386f91e 100644 --- a/third_party/rust/unicode-bidi/src/utf16.rs +++ b/third_party/rust/unicode-bidi/src/utf16.rs @@ -18,7 +18,9 @@ use crate::{ compute_bidi_info_for_para, compute_initial_info, level, para_direction, reorder_levels, reorder_visual, visual_runs_for_line, }; -use crate::{BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo}; +use crate::{ + BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo, ParagraphInfoFlags, +}; #[cfg(feature = "hardcoded-data")] use crate::HardcodedBidiData; @@ -83,7 +85,7 @@ struct InitialInfoExt<'text> { /// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that /// requires no further bidi processing (i.e. there are no RTL characters or bidi /// control codes present). - pure_ltr: Vec<bool>, + flags: Vec<ParagraphInfoFlags>, } impl<'text> InitialInfoExt<'text> { @@ -103,12 +105,12 @@ impl<'text> InitialInfoExt<'text> { default_para_level: Option<Level>, ) -> InitialInfoExt<'a> { let mut paragraphs = Vec::<ParagraphInfo>::new(); - let mut pure_ltr = Vec::<bool>::new(); - let (original_classes, _, _) = compute_initial_info( + let mut flags = Vec::<ParagraphInfoFlags>::new(); + let (original_classes, _, _, _) = compute_initial_info( data_source, text, default_para_level, - Some((&mut paragraphs, &mut pure_ltr)), + Some((&mut paragraphs, &mut flags)), ); InitialInfoExt { @@ -117,7 +119,7 @@ impl<'text> InitialInfoExt<'text> { original_classes, paragraphs, }, - pure_ltr, + flags, } } } @@ -177,20 +179,21 @@ impl<'text> BidiInfo<'text> { text: &'a [u16], default_para_level: Option<Level>, ) -> BidiInfo<'a> { - let InitialInfoExt { base, pure_ltr, .. } = + let InitialInfoExt { base, flags, .. } = InitialInfoExt::new_with_data_source(data_source, text, default_para_level); let mut levels = Vec::<Level>::with_capacity(text.len()); let mut processing_classes = base.original_classes.clone(); - for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) { + for (para, flags) in base.paragraphs.iter().zip(flags.iter()) { let text = &text[para.range.clone()]; let original_classes = &base.original_classes[para.range.clone()]; compute_bidi_info_for_para( data_source, para, - *is_pure_ltr, + flags.is_pure_ltr, + flags.has_isolate_controls, text, original_classes, &mut processing_classes, @@ -411,7 +414,7 @@ impl<'text> ParagraphBidiInfo<'text> { ) -> ParagraphBidiInfo<'a> { // Here we could create a ParagraphInitialInfo struct to parallel the one // used by BidiInfo, but there doesn't seem any compelling reason for it. - let (original_classes, paragraph_level, is_pure_ltr) = + let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) = compute_initial_info(data_source, text, default_para_level, None); let mut levels = Vec::<Level>::with_capacity(text.len()); @@ -429,6 +432,7 @@ impl<'text> ParagraphBidiInfo<'text> { data_source, ¶_info, is_pure_ltr, + has_isolate_controls, text, &original_classes, &mut processing_classes, @@ -551,12 +555,12 @@ impl<'text> ParagraphBidiInfo<'text> { /// /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3 /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4 -fn reorder_line<'text>( - text: &'text [u16], +fn reorder_line( + text: &[u16], line: Range<usize>, levels: Vec<Level>, runs: Vec<LevelRun>, -) -> Cow<'text, [u16]> { +) -> Cow<'_, [u16]> { // If all isolating run sequences are LTR, no reordering is needed if runs.iter().all(|run| levels[run.start].is_ltr()) { return text[line].into(); @@ -668,15 +672,15 @@ impl<'text> TextSource<'text> for [u16] { } #[inline] fn chars(&'text self) -> Self::CharIter { - Utf16CharIter::new(&self) + Utf16CharIter::new(self) } #[inline] fn char_indices(&'text self) -> Self::CharIndexIter { - Utf16CharIndexIter::new(&self) + Utf16CharIndexIter::new(self) } #[inline] fn indices_lengths(&'text self) -> Self::IndexLenIter { - Utf16IndexLenIter::new(&self) + Utf16IndexLenIter::new(self) } #[inline] fn char_len(ch: char) -> usize { |