diff options
Diffstat (limited to 'third_party/rust/unicode-bidi/src/lib.rs')
-rw-r--r-- | third_party/rust/unicode-bidi/src/lib.rs | 129 |
1 files changed, 99 insertions, 30 deletions
diff --git a/third_party/rust/unicode-bidi/src/lib.rs b/third_party/rust/unicode-bidi/src/lib.rs index 1072b67fe0..489927588a 100644 --- a/third_party/rust/unicode-bidi/src/lib.rs +++ b/third_party/rust/unicode-bidi/src/lib.rs @@ -71,6 +71,8 @@ extern crate std; #[macro_use] extern crate alloc; +#[cfg(feature = "smallvec")] +extern crate smallvec; pub mod data_source; pub mod deprecated; @@ -86,7 +88,7 @@ mod prepare; pub use crate::char_data::{BidiClass, UNICODE_VERSION}; pub use crate::data_source::BidiDataSource; pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL}; -pub use crate::prepare::LevelRun; +pub use crate::prepare::{LevelRun, LevelRunVec}; #[cfg(feature = "hardcoded-data")] pub use crate::char_data::{bidi_class, HardcodedBidiData}; @@ -99,6 +101,8 @@ use core::cmp; use core::iter::repeat; use core::ops::Range; use core::str::CharIndices; +#[cfg(feature = "smallvec")] +use smallvec::SmallVec; use crate::format_chars as chars; use crate::BidiClass::*; @@ -244,8 +248,14 @@ struct InitialInfoExt<'text> { /// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that /// requires no further bidi processing (i.e. there are no RTL characters or bidi - /// control codes present). - pure_ltr: Vec<bool>, + /// control codes present), and whether any bidi isolation controls are present. + flags: Vec<ParagraphInfoFlags>, +} + +#[derive(PartialEq, Debug)] +struct ParagraphInfoFlags { + is_pure_ltr: bool, + has_isolate_controls: bool, } impl<'text> InitialInfoExt<'text> { @@ -265,12 +275,12 @@ impl<'text> InitialInfoExt<'text> { default_para_level: Option<Level>, ) -> InitialInfoExt<'a> { let mut paragraphs = Vec::<ParagraphInfo>::new(); - let mut pure_ltr = Vec::<bool>::new(); - let (original_classes, _, _) = compute_initial_info( + let mut flags = Vec::<ParagraphInfoFlags>::new(); + let (original_classes, _, _, _) = compute_initial_info( data_source, text, default_para_level, - Some((&mut paragraphs, &mut pure_ltr)), + Some((&mut paragraphs, &mut flags)), ); InitialInfoExt { @@ -279,7 +289,7 @@ impl<'text> InitialInfoExt<'text> { original_classes, paragraphs, }, - pure_ltr, + flags, } } } @@ -295,16 +305,19 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( data_source: &D, text: &'a T, default_para_level: Option<Level>, - mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<bool>)>, -) -> (Vec<BidiClass>, Level, bool) { + mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<ParagraphInfoFlags>)>, +) -> (Vec<BidiClass>, Level, bool, bool) { let mut original_classes = Vec::with_capacity(text.len()); // The stack contains the starting code unit index for each nested isolate we're inside. + #[cfg(feature = "smallvec")] + let mut isolate_stack = SmallVec::<[usize; 8]>::new(); + #[cfg(not(feature = "smallvec"))] let mut isolate_stack = Vec::new(); debug_assert!( - if let Some((ref paragraphs, ref pure_ltr)) = split_paragraphs { - paragraphs.is_empty() && pure_ltr.is_empty() + if let Some((ref paragraphs, ref flags)) = split_paragraphs { + paragraphs.is_empty() && flags.is_empty() } else { true } @@ -316,6 +329,8 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( // Per-paragraph flag: can subsequent processing be skipped? Set to false if any // RTL characters or bidi control characters are encountered in the paragraph. let mut is_pure_ltr = true; + // Set to true if any bidi isolation controls are present in the paragraph. + let mut has_isolate_controls = false; #[cfg(feature = "flame_it")] flame::start("compute_initial_info(): iter text.char_indices()"); @@ -334,7 +349,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( match class { B => { - if let Some((ref mut paragraphs, ref mut pure_ltr)) = split_paragraphs { + if let Some((ref mut paragraphs, ref mut flags)) = split_paragraphs { // P1. Split the text into separate paragraphs. The paragraph separator is kept // with the previous paragraph. let para_end = i + len; @@ -343,7 +358,10 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( // P3. If no character is found in p2, set the paragraph level to zero. level: para_level.unwrap_or(LTR_LEVEL), }); - pure_ltr.push(is_pure_ltr); + flags.push(ParagraphInfoFlags { + is_pure_ltr, + has_isolate_controls, + }); // Reset state for the start of the next paragraph. para_start = para_end; // TODO: Support defaulting to direction of previous paragraph @@ -351,6 +369,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( // <http://www.unicode.org/reports/tr9/#HL1> para_level = default_para_level; is_pure_ltr = true; + has_isolate_controls = false; isolate_stack.clear(); } } @@ -387,6 +406,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( RLI | LRI | FSI => { is_pure_ltr = false; + has_isolate_controls = true; isolate_stack.push(i); } @@ -398,15 +418,18 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( } } - if let Some((paragraphs, pure_ltr)) = split_paragraphs { + if let Some((paragraphs, flags)) = split_paragraphs { if para_start < text.len() { paragraphs.push(ParagraphInfo { range: para_start..text.len(), level: para_level.unwrap_or(LTR_LEVEL), }); - pure_ltr.push(is_pure_ltr); + flags.push(ParagraphInfoFlags { + is_pure_ltr, + has_isolate_controls, + }); } - debug_assert_eq!(paragraphs.len(), pure_ltr.len()); + debug_assert_eq!(paragraphs.len(), flags.len()); } debug_assert_eq!(original_classes.len(), text.len()); @@ -417,6 +440,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( original_classes, para_level.unwrap_or(LTR_LEVEL), is_pure_ltr, + has_isolate_controls, ) } @@ -475,20 +499,21 @@ impl<'text> BidiInfo<'text> { text: &'a str, default_para_level: Option<Level>, ) -> BidiInfo<'a> { - let InitialInfoExt { base, pure_ltr, .. } = + let InitialInfoExt { base, flags, .. } = InitialInfoExt::new_with_data_source(data_source, text, default_para_level); let mut levels = Vec::<Level>::with_capacity(text.len()); let mut processing_classes = base.original_classes.clone(); - for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) { + for (para, flags) in base.paragraphs.iter().zip(flags.iter()) { let text = &text[para.range.clone()]; let original_classes = &base.original_classes[para.range.clone()]; compute_bidi_info_for_para( data_source, para, - *is_pure_ltr, + flags.is_pure_ltr, + flags.has_isolate_controls, text, original_classes, &mut processing_classes, @@ -713,7 +738,7 @@ impl<'text> ParagraphBidiInfo<'text> { ) -> ParagraphBidiInfo<'a> { // Here we could create a ParagraphInitialInfo struct to parallel the one // used by BidiInfo, but there doesn't seem any compelling reason for it. - let (original_classes, paragraph_level, is_pure_ltr) = + let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) = compute_initial_info(data_source, text, default_para_level, None); let mut levels = Vec::<Level>::with_capacity(text.len()); @@ -731,6 +756,7 @@ impl<'text> ParagraphBidiInfo<'text> { data_source, ¶_info, is_pure_ltr, + has_isolate_controls, text, &original_classes, &mut processing_classes, @@ -855,12 +881,12 @@ impl<'text> ParagraphBidiInfo<'text> { /// /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3 /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4 -fn reorder_line<'text>( - text: &'text str, +fn reorder_line( + text: &str, line: Range<usize>, levels: Vec<Level>, runs: Vec<LevelRun>, -) -> Cow<'text, str> { +) -> Cow<'_, str> { // If all isolating run sequences are LTR, no reordering is needed if runs.iter().all(|run| levels[run.start].is_ltr()) { return text[line].into(); @@ -1059,6 +1085,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> data_source: &D, para: &ParagraphInfo, is_pure_ltr: bool, + has_isolate_controls: bool, text: &'a T, original_classes: &[BidiClass], processing_classes: &mut [BidiClass], @@ -1072,6 +1099,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> let processing_classes = &mut processing_classes[para.range.clone()]; let levels = &mut levels[para.range.clone()]; + let mut level_runs = LevelRunVec::new(); explicit::compute( text, @@ -1079,9 +1107,18 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> original_classes, levels, processing_classes, + &mut level_runs, ); - let sequences = prepare::isolating_run_sequences(para.level, original_classes, levels); + let mut sequences = prepare::IsolatingRunSequenceVec::new(); + prepare::isolating_run_sequences( + para.level, + original_classes, + levels, + level_runs, + has_isolate_controls, + &mut sequences, + ); for sequence in &sequences { implicit::resolve_weak(text, sequence, processing_classes); implicit::resolve_neutral( @@ -1093,6 +1130,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> processing_classes, ); } + implicit::resolve_levels(processing_classes, levels); assign_levels_to_removed_chars(para.level, original_classes, levels); @@ -1122,20 +1160,20 @@ fn reorder_levels<'a, T: TextSource<'a> + ?Sized>( B | S => { assert_eq!(reset_to, None); reset_to = Some(i + T::char_len(c)); - if reset_from == None { + if reset_from.is_none() { reset_from = Some(i); } } // Whitespace, isolate formatting WS | FSI | LRI | RLI | PDI => { - if reset_from == None { + if reset_from.is_none() { reset_from = Some(i); } } // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> // same as above + set the level RLE | LRE | RLO | LRO | PDF | BN => { - if reset_from == None { + if reset_from.is_none() { reset_from = Some(i); } // also set the level to previous @@ -1294,8 +1332,8 @@ fn get_base_direction_impl<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( let mut isolate_level = 0; for c in text.chars() { match data_source.bidi_class(c) { - LRI | RLI | FSI => isolate_level = isolate_level + 1, - PDI if isolate_level > 0 => isolate_level = isolate_level - 1, + LRI | RLI | FSI => isolate_level += 1, + PDI if isolate_level > 0 => isolate_level -= 1, L if isolate_level == 0 => return Direction::Ltr, R | AL if isolate_level == 0 => return Direction::Rtl, B if !use_full_text => break, @@ -1342,7 +1380,7 @@ impl<'text> TextSource<'text> for str { } #[inline] fn indices_lengths(&'text self) -> Self::IndexLenIter { - Utf8IndexLenIter::new(&self) + Utf8IndexLenIter::new(self) } #[inline] fn char_len(ch: char) -> usize { @@ -1544,6 +1582,24 @@ mod tests { let tests = vec![ ( // text + "", + // base level + Some(RTL_LEVEL), + // levels + Level::vec(&[]), + // original_classes + vec![], + // paragraphs + vec![], + // levels_u16 + Level::vec(&[]), + // original_classes_u16 + vec![], + // paragraphs_u16 + vec![], + ), + ( + // text "abc123", // base level Some(LTR_LEVEL), @@ -1703,6 +1759,19 @@ mod tests { paragraphs: t.4.clone(), } ); + // If it was empty, also test that ParagraphBidiInfo handles it safely. + if t.4.len() == 0 { + assert_eq!( + ParagraphBidiInfo::new(t.0, t.1), + ParagraphBidiInfo { + text: t.0, + original_classes: t.3.clone(), + levels: t.2.clone(), + paragraph_level: RTL_LEVEL, + is_pure_ltr: true, + } + ) + } // If it was a single paragraph, also test ParagraphBidiInfo. if t.4.len() == 1 { assert_eq!( |