summaryrefslogtreecommitdiffstats
path: root/third_party/rust/unicode-bidi/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/unicode-bidi/src/lib.rs')
-rw-r--r--third_party/rust/unicode-bidi/src/lib.rs129
1 files changed, 99 insertions, 30 deletions
diff --git a/third_party/rust/unicode-bidi/src/lib.rs b/third_party/rust/unicode-bidi/src/lib.rs
index 1072b67fe0..489927588a 100644
--- a/third_party/rust/unicode-bidi/src/lib.rs
+++ b/third_party/rust/unicode-bidi/src/lib.rs
@@ -71,6 +71,8 @@
extern crate std;
#[macro_use]
extern crate alloc;
+#[cfg(feature = "smallvec")]
+extern crate smallvec;
pub mod data_source;
pub mod deprecated;
@@ -86,7 +88,7 @@ mod prepare;
pub use crate::char_data::{BidiClass, UNICODE_VERSION};
pub use crate::data_source::BidiDataSource;
pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL};
-pub use crate::prepare::LevelRun;
+pub use crate::prepare::{LevelRun, LevelRunVec};
#[cfg(feature = "hardcoded-data")]
pub use crate::char_data::{bidi_class, HardcodedBidiData};
@@ -99,6 +101,8 @@ use core::cmp;
use core::iter::repeat;
use core::ops::Range;
use core::str::CharIndices;
+#[cfg(feature = "smallvec")]
+use smallvec::SmallVec;
use crate::format_chars as chars;
use crate::BidiClass::*;
@@ -244,8 +248,14 @@ struct InitialInfoExt<'text> {
/// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
/// requires no further bidi processing (i.e. there are no RTL characters or bidi
- /// control codes present).
- pure_ltr: Vec<bool>,
+ /// control codes present), and whether any bidi isolation controls are present.
+ flags: Vec<ParagraphInfoFlags>,
+}
+
+#[derive(PartialEq, Debug)]
+struct ParagraphInfoFlags {
+ is_pure_ltr: bool,
+ has_isolate_controls: bool,
}
impl<'text> InitialInfoExt<'text> {
@@ -265,12 +275,12 @@ impl<'text> InitialInfoExt<'text> {
default_para_level: Option<Level>,
) -> InitialInfoExt<'a> {
let mut paragraphs = Vec::<ParagraphInfo>::new();
- let mut pure_ltr = Vec::<bool>::new();
- let (original_classes, _, _) = compute_initial_info(
+ let mut flags = Vec::<ParagraphInfoFlags>::new();
+ let (original_classes, _, _, _) = compute_initial_info(
data_source,
text,
default_para_level,
- Some((&mut paragraphs, &mut pure_ltr)),
+ Some((&mut paragraphs, &mut flags)),
);
InitialInfoExt {
@@ -279,7 +289,7 @@ impl<'text> InitialInfoExt<'text> {
original_classes,
paragraphs,
},
- pure_ltr,
+ flags,
}
}
}
@@ -295,16 +305,19 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
data_source: &D,
text: &'a T,
default_para_level: Option<Level>,
- mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<bool>)>,
-) -> (Vec<BidiClass>, Level, bool) {
+ mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<ParagraphInfoFlags>)>,
+) -> (Vec<BidiClass>, Level, bool, bool) {
let mut original_classes = Vec::with_capacity(text.len());
// The stack contains the starting code unit index for each nested isolate we're inside.
+ #[cfg(feature = "smallvec")]
+ let mut isolate_stack = SmallVec::<[usize; 8]>::new();
+ #[cfg(not(feature = "smallvec"))]
let mut isolate_stack = Vec::new();
debug_assert!(
- if let Some((ref paragraphs, ref pure_ltr)) = split_paragraphs {
- paragraphs.is_empty() && pure_ltr.is_empty()
+ if let Some((ref paragraphs, ref flags)) = split_paragraphs {
+ paragraphs.is_empty() && flags.is_empty()
} else {
true
}
@@ -316,6 +329,8 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
// Per-paragraph flag: can subsequent processing be skipped? Set to false if any
// RTL characters or bidi control characters are encountered in the paragraph.
let mut is_pure_ltr = true;
+ // Set to true if any bidi isolation controls are present in the paragraph.
+ let mut has_isolate_controls = false;
#[cfg(feature = "flame_it")]
flame::start("compute_initial_info(): iter text.char_indices()");
@@ -334,7 +349,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
match class {
B => {
- if let Some((ref mut paragraphs, ref mut pure_ltr)) = split_paragraphs {
+ if let Some((ref mut paragraphs, ref mut flags)) = split_paragraphs {
// P1. Split the text into separate paragraphs. The paragraph separator is kept
// with the previous paragraph.
let para_end = i + len;
@@ -343,7 +358,10 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
// P3. If no character is found in p2, set the paragraph level to zero.
level: para_level.unwrap_or(LTR_LEVEL),
});
- pure_ltr.push(is_pure_ltr);
+ flags.push(ParagraphInfoFlags {
+ is_pure_ltr,
+ has_isolate_controls,
+ });
// Reset state for the start of the next paragraph.
para_start = para_end;
// TODO: Support defaulting to direction of previous paragraph
@@ -351,6 +369,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
// <http://www.unicode.org/reports/tr9/#HL1>
para_level = default_para_level;
is_pure_ltr = true;
+ has_isolate_controls = false;
isolate_stack.clear();
}
}
@@ -387,6 +406,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
RLI | LRI | FSI => {
is_pure_ltr = false;
+ has_isolate_controls = true;
isolate_stack.push(i);
}
@@ -398,15 +418,18 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
}
}
- if let Some((paragraphs, pure_ltr)) = split_paragraphs {
+ if let Some((paragraphs, flags)) = split_paragraphs {
if para_start < text.len() {
paragraphs.push(ParagraphInfo {
range: para_start..text.len(),
level: para_level.unwrap_or(LTR_LEVEL),
});
- pure_ltr.push(is_pure_ltr);
+ flags.push(ParagraphInfoFlags {
+ is_pure_ltr,
+ has_isolate_controls,
+ });
}
- debug_assert_eq!(paragraphs.len(), pure_ltr.len());
+ debug_assert_eq!(paragraphs.len(), flags.len());
}
debug_assert_eq!(original_classes.len(), text.len());
@@ -417,6 +440,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
original_classes,
para_level.unwrap_or(LTR_LEVEL),
is_pure_ltr,
+ has_isolate_controls,
)
}
@@ -475,20 +499,21 @@ impl<'text> BidiInfo<'text> {
text: &'a str,
default_para_level: Option<Level>,
) -> BidiInfo<'a> {
- let InitialInfoExt { base, pure_ltr, .. } =
+ let InitialInfoExt { base, flags, .. } =
InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
let mut levels = Vec::<Level>::with_capacity(text.len());
let mut processing_classes = base.original_classes.clone();
- for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) {
+ for (para, flags) in base.paragraphs.iter().zip(flags.iter()) {
let text = &text[para.range.clone()];
let original_classes = &base.original_classes[para.range.clone()];
compute_bidi_info_for_para(
data_source,
para,
- *is_pure_ltr,
+ flags.is_pure_ltr,
+ flags.has_isolate_controls,
text,
original_classes,
&mut processing_classes,
@@ -713,7 +738,7 @@ impl<'text> ParagraphBidiInfo<'text> {
) -> ParagraphBidiInfo<'a> {
// Here we could create a ParagraphInitialInfo struct to parallel the one
// used by BidiInfo, but there doesn't seem any compelling reason for it.
- let (original_classes, paragraph_level, is_pure_ltr) =
+ let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) =
compute_initial_info(data_source, text, default_para_level, None);
let mut levels = Vec::<Level>::with_capacity(text.len());
@@ -731,6 +756,7 @@ impl<'text> ParagraphBidiInfo<'text> {
data_source,
&para_info,
is_pure_ltr,
+ has_isolate_controls,
text,
&original_classes,
&mut processing_classes,
@@ -855,12 +881,12 @@ impl<'text> ParagraphBidiInfo<'text> {
///
/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
-fn reorder_line<'text>(
- text: &'text str,
+fn reorder_line(
+ text: &str,
line: Range<usize>,
levels: Vec<Level>,
runs: Vec<LevelRun>,
-) -> Cow<'text, str> {
+) -> Cow<'_, str> {
// If all isolating run sequences are LTR, no reordering is needed
if runs.iter().all(|run| levels[run.start].is_ltr()) {
return text[line].into();
@@ -1059,6 +1085,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
data_source: &D,
para: &ParagraphInfo,
is_pure_ltr: bool,
+ has_isolate_controls: bool,
text: &'a T,
original_classes: &[BidiClass],
processing_classes: &mut [BidiClass],
@@ -1072,6 +1099,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
let processing_classes = &mut processing_classes[para.range.clone()];
let levels = &mut levels[para.range.clone()];
+ let mut level_runs = LevelRunVec::new();
explicit::compute(
text,
@@ -1079,9 +1107,18 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
original_classes,
levels,
processing_classes,
+ &mut level_runs,
);
- let sequences = prepare::isolating_run_sequences(para.level, original_classes, levels);
+ let mut sequences = prepare::IsolatingRunSequenceVec::new();
+ prepare::isolating_run_sequences(
+ para.level,
+ original_classes,
+ levels,
+ level_runs,
+ has_isolate_controls,
+ &mut sequences,
+ );
for sequence in &sequences {
implicit::resolve_weak(text, sequence, processing_classes);
implicit::resolve_neutral(
@@ -1093,6 +1130,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
processing_classes,
);
}
+
implicit::resolve_levels(processing_classes, levels);
assign_levels_to_removed_chars(para.level, original_classes, levels);
@@ -1122,20 +1160,20 @@ fn reorder_levels<'a, T: TextSource<'a> + ?Sized>(
B | S => {
assert_eq!(reset_to, None);
reset_to = Some(i + T::char_len(c));
- if reset_from == None {
+ if reset_from.is_none() {
reset_from = Some(i);
}
}
// Whitespace, isolate formatting
WS | FSI | LRI | RLI | PDI => {
- if reset_from == None {
+ if reset_from.is_none() {
reset_from = Some(i);
}
}
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
// same as above + set the level
RLE | LRE | RLO | LRO | PDF | BN => {
- if reset_from == None {
+ if reset_from.is_none() {
reset_from = Some(i);
}
// also set the level to previous
@@ -1294,8 +1332,8 @@ fn get_base_direction_impl<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
let mut isolate_level = 0;
for c in text.chars() {
match data_source.bidi_class(c) {
- LRI | RLI | FSI => isolate_level = isolate_level + 1,
- PDI if isolate_level > 0 => isolate_level = isolate_level - 1,
+ LRI | RLI | FSI => isolate_level += 1,
+ PDI if isolate_level > 0 => isolate_level -= 1,
L if isolate_level == 0 => return Direction::Ltr,
R | AL if isolate_level == 0 => return Direction::Rtl,
B if !use_full_text => break,
@@ -1342,7 +1380,7 @@ impl<'text> TextSource<'text> for str {
}
#[inline]
fn indices_lengths(&'text self) -> Self::IndexLenIter {
- Utf8IndexLenIter::new(&self)
+ Utf8IndexLenIter::new(self)
}
#[inline]
fn char_len(ch: char) -> usize {
@@ -1544,6 +1582,24 @@ mod tests {
let tests = vec![
(
// text
+ "",
+ // base level
+ Some(RTL_LEVEL),
+ // levels
+ Level::vec(&[]),
+ // original_classes
+ vec![],
+ // paragraphs
+ vec![],
+ // levels_u16
+ Level::vec(&[]),
+ // original_classes_u16
+ vec![],
+ // paragraphs_u16
+ vec![],
+ ),
+ (
+ // text
"abc123",
// base level
Some(LTR_LEVEL),
@@ -1703,6 +1759,19 @@ mod tests {
paragraphs: t.4.clone(),
}
);
+ // If it was empty, also test that ParagraphBidiInfo handles it safely.
+ if t.4.len() == 0 {
+ assert_eq!(
+ ParagraphBidiInfo::new(t.0, t.1),
+ ParagraphBidiInfo {
+ text: t.0,
+ original_classes: t.3.clone(),
+ levels: t.2.clone(),
+ paragraph_level: RTL_LEVEL,
+ is_pure_ltr: true,
+ }
+ )
+ }
// If it was a single paragraph, also test ParagraphBidiInfo.
if t.4.len() == 1 {
assert_eq!(