summaryrefslogtreecommitdiffstats
path: root/third_party/rust/unicode-bidi/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 01:13:27 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 01:13:27 +0000
commit40a355a42d4a9444dc753c04c6608dade2f06a23 (patch)
tree871fc667d2de662f171103ce5ec067014ef85e61 /third_party/rust/unicode-bidi/src
parentAdding upstream version 124.0.1. (diff)
downloadfirefox-40a355a42d4a9444dc753c04c6608dade2f06a23.tar.xz
firefox-40a355a42d4a9444dc753c04c6608dade2f06a23.zip
Adding upstream version 125.0.1.upstream/125.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/unicode-bidi/src')
-rw-r--r--third_party/rust/unicode-bidi/src/char_data/mod.rs5
-rw-r--r--third_party/rust/unicode-bidi/src/char_data/tables.rs4
-rw-r--r--third_party/rust/unicode-bidi/src/deprecated.rs9
-rw-r--r--third_party/rust/unicode-bidi/src/explicit.rs129
-rw-r--r--third_party/rust/unicode-bidi/src/implicit.rs93
-rw-r--r--third_party/rust/unicode-bidi/src/level.rs15
-rw-r--r--third_party/rust/unicode-bidi/src/lib.rs129
-rw-r--r--third_party/rust/unicode-bidi/src/prepare.rs266
-rw-r--r--third_party/rust/unicode-bidi/src/utf16.rs36
9 files changed, 443 insertions, 243 deletions
diff --git a/third_party/rust/unicode-bidi/src/char_data/mod.rs b/third_party/rust/unicode-bidi/src/char_data/mod.rs
index 4edf5b8f4c..543b0ed8fd 100644
--- a/third_party/rust/unicode-bidi/src/char_data/mod.rs
+++ b/third_party/rust/unicode-bidi/src/char_data/mod.rs
@@ -59,10 +59,7 @@ pub(crate) fn bidi_matched_opening_bracket(c: char) -> Option<BidiMatchedOpening
}
pub fn is_rtl(bidi_class: BidiClass) -> bool {
- match bidi_class {
- RLE | RLO | RLI => true,
- _ => false,
- }
+ matches!(bidi_class, RLE | RLO | RLI)
}
#[cfg(feature = "hardcoded-data")]
diff --git a/third_party/rust/unicode-bidi/src/char_data/tables.rs b/third_party/rust/unicode-bidi/src/char_data/tables.rs
index ecdcf496d1..f10265d214 100644
--- a/third_party/rust/unicode-bidi/src/char_data/tables.rs
+++ b/third_party/rust/unicode-bidi/src/char_data/tables.rs
@@ -45,7 +45,7 @@ pub enum BidiClass {
use self::BidiClass::*;
#[cfg(feature = "hardcoded-data")]
-pub const bidi_class_table: &'static [(char, char, BidiClass)] = &[
+pub const bidi_class_table: &[(char, char, BidiClass)] = &[
('\u{0}', '\u{8}', BN), ('\u{9}', '\u{9}', S), ('\u{a}', '\u{a}', B), ('\u{b}', '\u{b}', S),
('\u{c}', '\u{c}', WS), ('\u{d}', '\u{d}', B), ('\u{e}', '\u{1b}', BN), ('\u{1c}', '\u{1e}', B),
('\u{1f}', '\u{1f}', S), ('\u{20}', '\u{20}', WS), ('\u{21}', '\u{22}', ON), ('\u{23}',
@@ -516,7 +516,7 @@ pub const bidi_class_table: &'static [(char, char, BidiClass)] = &[
'\u{e01ef}', NSM), ('\u{f0000}', '\u{ffffd}', L), ('\u{100000}', '\u{10fffd}', L)
];
-pub const bidi_pairs_table: &'static [(char, char, Option<char>)] = &[
+pub const bidi_pairs_table: &[(char, char, Option<char>)] = &[
('\u{28}', '\u{29}', None), ('\u{5b}', '\u{5d}', None), ('\u{7b}', '\u{7d}', None), ('\u{f3a}',
'\u{f3b}', None), ('\u{f3c}', '\u{f3d}', None), ('\u{169b}', '\u{169c}', None), ('\u{2045}',
'\u{2046}', None), ('\u{207d}', '\u{207e}', None), ('\u{208d}', '\u{208e}', None), ('\u{2308}',
diff --git a/third_party/rust/unicode-bidi/src/deprecated.rs b/third_party/rust/unicode-bidi/src/deprecated.rs
index 74a24f5b8b..c903663e99 100644
--- a/third_party/rust/unicode-bidi/src/deprecated.rs
+++ b/third_party/rust/unicode-bidi/src/deprecated.rs
@@ -9,8 +9,6 @@
//! This module holds deprecated assets only.
-use alloc::vec::Vec;
-
use super::*;
/// Find the level runs within a line and return them in visual order.
@@ -71,10 +69,8 @@ pub fn visual_runs(line: Range<usize>, levels: &[Level]) -> Vec<LevelRun> {
// Found the start of a sequence. Now find the end.
let mut seq_end = seq_start + 1;
- while seq_end < run_count {
- if levels[runs[seq_end].start] < max_level {
- break;
- }
+
+ while seq_end < run_count && levels[runs[seq_end].start] >= max_level {
seq_end += 1;
}
@@ -83,6 +79,7 @@ pub fn visual_runs(line: Range<usize>, levels: &[Level]) -> Vec<LevelRun> {
seq_start = seq_end;
}
+
max_level
.lower(1)
.expect("Lowering embedding level below zero");
diff --git a/third_party/rust/unicode-bidi/src/explicit.rs b/third_party/rust/unicode-bidi/src/explicit.rs
index d4ad897b54..5760ab8ece 100644
--- a/third_party/rust/unicode-bidi/src/explicit.rs
+++ b/third_party/rust/unicode-bidi/src/explicit.rs
@@ -11,19 +11,25 @@
//!
//! <http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions>
-use alloc::vec::Vec;
+#[cfg(feature = "smallvec")]
+use smallvec::{smallvec, SmallVec};
use super::char_data::{
is_rtl,
BidiClass::{self, *},
};
use super::level::Level;
+use super::prepare::removed_by_x9;
+use super::LevelRunVec;
use super::TextSource;
-/// Compute explicit embedding levels for one paragraph of text (X1-X8).
+/// Compute explicit embedding levels for one paragraph of text (X1-X8), and identify
+/// level runs (BD7) for use when determining Isolating Run Sequences (X10).
///
/// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`,
/// for each char in `text`.
+///
+/// `runs` returns the list of level runs (BD7) of the text.
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn compute<'a, T: TextSource<'a> + ?Sized>(
text: &'a T,
@@ -31,35 +37,44 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>(
original_classes: &[BidiClass],
levels: &mut [Level],
processing_classes: &mut [BidiClass],
+ runs: &mut LevelRunVec,
) {
assert_eq!(text.len(), original_classes.len());
// <http://www.unicode.org/reports/tr9/#X1>
- let mut stack = DirectionalStatusStack::new();
- stack.push(para_level, OverrideStatus::Neutral);
+ #[cfg(feature = "smallvec")]
+ let mut stack: SmallVec<[Status; 8]> = smallvec![Status {
+ level: para_level,
+ status: OverrideStatus::Neutral,
+ }];
+ #[cfg(not(feature = "smallvec"))]
+ let mut stack = vec![Status {
+ level: para_level,
+ status: OverrideStatus::Neutral,
+ }];
let mut overflow_isolate_count = 0u32;
let mut overflow_embedding_count = 0u32;
let mut valid_isolate_count = 0u32;
+ let mut current_run_level = Level::ltr();
+ let mut current_run_start = 0;
+
for (i, len) in text.indices_lengths() {
+ let last = stack.last().unwrap();
+
match original_classes[i] {
// Rules X2-X5c
RLE | LRE | RLO | LRO | RLI | LRI | FSI => {
- let last_level = stack.last().level;
-
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
- levels[i] = last_level;
+ levels[i] = last.level;
// X5a-X5c: Isolate initiators get the level of the last entry on the stack.
- let is_isolate = match original_classes[i] {
- RLI | LRI | FSI => true,
- _ => false,
- };
+ let is_isolate = matches!(original_classes[i], RLI | LRI | FSI);
if is_isolate {
// Redundant due to "Retaining explicit formatting characters" step.
- // levels[i] = last_level;
- match stack.last().status {
+ // levels[i] = last.level;
+ match last.status {
OverrideStatus::RTL => processing_classes[i] = R,
OverrideStatus::LTR => processing_classes[i] = L,
_ => {}
@@ -67,22 +82,25 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>(
}
let new_level = if is_rtl(original_classes[i]) {
- last_level.new_explicit_next_rtl()
+ last.level.new_explicit_next_rtl()
} else {
- last_level.new_explicit_next_ltr()
+ last.level.new_explicit_next_ltr()
};
+
if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0
{
let new_level = new_level.unwrap();
- stack.push(
- new_level,
- match original_classes[i] {
+
+ stack.push(Status {
+ level: new_level,
+ status: match original_classes[i] {
RLO => OverrideStatus::RTL,
LRO => OverrideStatus::LTR,
RLI | LRI | FSI => OverrideStatus::Isolate,
_ => OverrideStatus::Neutral,
},
- );
+ });
+
if is_isolate {
valid_isolate_count += 1;
} else {
@@ -110,21 +128,21 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>(
overflow_isolate_count -= 1;
} else if valid_isolate_count > 0 {
overflow_embedding_count = 0;
- loop {
- // Pop everything up to and including the last Isolate status.
- match stack.vec.pop() {
- None
- | Some(Status {
- status: OverrideStatus::Isolate,
- ..
- }) => break,
- _ => continue,
- }
- }
+
+ while !matches!(
+ stack.pop(),
+ None | Some(Status {
+ status: OverrideStatus::Isolate,
+ ..
+ })
+ ) {}
+
valid_isolate_count -= 1;
}
- let last = stack.last();
+
+ let last = stack.last().unwrap();
levels[i] = last.level;
+
match last.status {
OverrideStatus::RTL => processing_classes[i] = R,
OverrideStatus::LTR => processing_classes[i] = L,
@@ -138,11 +156,12 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>(
// do nothing
} else if overflow_embedding_count > 0 {
overflow_embedding_count -= 1;
- } else if stack.last().status != OverrideStatus::Isolate && stack.vec.len() >= 2 {
- stack.vec.pop();
+ } else if last.status != OverrideStatus::Isolate && stack.len() >= 2 {
+ stack.pop();
}
+
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
- levels[i] = stack.last().level;
+ levels[i] = stack.last().unwrap().level;
// X9 part of retaining explicit formatting characters.
processing_classes[i] = BN;
}
@@ -153,8 +172,8 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>(
// <http://www.unicode.org/reports/tr9/#X6>
_ => {
- let last = stack.last();
levels[i] = last.level;
+
// This condition is not in the spec, but I am pretty sure that is a spec bug.
// https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf
if original_classes[i] != BN {
@@ -172,6 +191,26 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>(
levels[i + j] = levels[i];
processing_classes[i + j] = processing_classes[i];
}
+
+ // Identify level runs to be passed to prepare::isolating_run_sequences().
+ if i == 0 {
+ // Initialize for the first (or only) run.
+ current_run_level = levels[i];
+ } else {
+ // Check if we need to start a new level run.
+ // <https://www.unicode.org/reports/tr9/#BD7>
+ if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level {
+ // End the last run and start a new one.
+ runs.push(current_run_start..i);
+ current_run_level = levels[i];
+ current_run_start = i;
+ }
+ }
+ }
+
+ // Append the trailing level run, if non-empty.
+ if levels.len() > current_run_start {
+ runs.push(current_run_start..levels.len());
}
}
@@ -188,23 +227,3 @@ enum OverrideStatus {
LTR,
Isolate,
}
-
-struct DirectionalStatusStack {
- vec: Vec<Status>,
-}
-
-impl DirectionalStatusStack {
- fn new() -> Self {
- DirectionalStatusStack {
- vec: Vec::with_capacity(Level::max_explicit_depth() as usize + 2),
- }
- }
-
- fn push(&mut self, level: Level, status: OverrideStatus) {
- self.vec.push(Status { level, status });
- }
-
- fn last(&self) -> &Status {
- self.vec.last().unwrap()
- }
-}
diff --git a/third_party/rust/unicode-bidi/src/implicit.rs b/third_party/rust/unicode-bidi/src/implicit.rs
index 0311053c0a..334afec049 100644
--- a/third_party/rust/unicode-bidi/src/implicit.rs
+++ b/third_party/rust/unicode-bidi/src/implicit.rs
@@ -9,8 +9,11 @@
//! 3.3.4 - 3.3.6. Resolve implicit levels and types.
+#[cfg(not(feature = "smallvec"))]
use alloc::vec::Vec;
use core::cmp::max;
+#[cfg(feature = "smallvec")]
+use smallvec::SmallVec;
use super::char_data::BidiClass::{self, *};
use super::level::Level;
@@ -39,7 +42,13 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>(
// The previous class for the purposes of rule W1, not tracking changes from any other rules.
let mut prev_class_before_w1 = sequence.sos;
let mut last_strong_is_al = false;
+ #[cfg(feature = "smallvec")]
+ let mut et_run_indices = SmallVec::<[usize; 8]>::new(); // for W5
+ #[cfg(not(feature = "smallvec"))]
let mut et_run_indices = Vec::new(); // for W5
+ #[cfg(feature = "smallvec")]
+ let mut bn_run_indices = SmallVec::<[usize; 8]>::new(); // for W5 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
+ #[cfg(not(feature = "smallvec"))]
let mut bn_run_indices = Vec::new(); // for W5 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
for (run_index, level_run) in sequence.runs.iter().enumerate() {
@@ -177,7 +186,7 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>(
_ => {
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
// If there was a BN run before this, that's now a part of this ET run.
- et_run_indices.extend(&bn_run_indices);
+ et_run_indices.extend(bn_run_indices.clone());
// In case this is followed by an EN.
et_run_indices.push(i);
@@ -224,26 +233,29 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>(
// W7. If the previous strong char was L, change EN to L.
let mut last_strong_is_l = sequence.sos == L;
- for run in &sequence.runs {
- for i in run.clone() {
- match processing_classes[i] {
- EN if last_strong_is_l => {
- processing_classes[i] = L;
- }
- L => {
- last_strong_is_l = true;
- }
- R | AL => {
- last_strong_is_l = false;
- }
- // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
- // Already scanning past BN here.
- _ => {}
+ for i in sequence.runs.iter().cloned().flatten() {
+ match processing_classes[i] {
+ EN if last_strong_is_l => {
+ processing_classes[i] = L;
}
+ L => {
+ last_strong_is_l = true;
+ }
+ R | AL => {
+ last_strong_is_l = false;
+ }
+ // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
+ // Already scanning past BN here.
+ _ => {}
}
}
}
+#[cfg(feature = "smallvec")]
+type BracketPairVec = SmallVec<[BracketPair; 8]>;
+#[cfg(not(feature = "smallvec"))]
+type BracketPairVec = Vec<BracketPair>;
+
/// 3.3.5 Resolving Neutral Types
///
/// <http://www.unicode.org/reports/tr9/#Resolving_Neutral_Types>
@@ -267,7 +279,14 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
// > Identify the bracket pairs in the current isolating run sequence according to BD16.
// We use processing_classes, not original_classes, due to BD14/BD15
- let bracket_pairs = identify_bracket_pairs(text, data_source, sequence, processing_classes);
+ let mut bracket_pairs = BracketPairVec::new();
+ identify_bracket_pairs(
+ text,
+ data_source,
+ sequence,
+ processing_classes,
+ &mut bracket_pairs,
+ );
// > For each bracket-pair element in the list of pairs of text positions
//
@@ -308,7 +327,7 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
found_e = true;
} else if class == not_e {
found_not_e = true;
- } else if class == BidiClass::EN || class == BidiClass::AN {
+ } else if matches!(class, BidiClass::EN | BidiClass::AN) {
// > Within this scope, bidirectional types EN and AN are treated as R.
if e == BidiClass::L {
found_not_e = true;
@@ -337,15 +356,15 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
.iter_backwards_from(pair.start, pair.start_run)
.map(|i| processing_classes[i])
.find(|class| {
- *class == BidiClass::L
- || *class == BidiClass::R
- || *class == BidiClass::EN
- || *class == BidiClass::AN
+ matches!(
+ class,
+ BidiClass::L | BidiClass::R | BidiClass::EN | BidiClass::AN
+ )
})
.unwrap_or(sequence.sos);
// > Within this scope, bidirectional types EN and AN are treated as R.
- if previous_strong == BidiClass::EN || previous_strong == BidiClass::AN {
+ if matches!(previous_strong, BidiClass::EN | BidiClass::AN) {
previous_strong = BidiClass::R;
}
@@ -413,6 +432,9 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
let mut prev_class = sequence.sos;
while let Some(mut i) = indices.next() {
// Process sequences of NI characters.
+ #[cfg(feature = "smallvec")]
+ let mut ni_run = SmallVec::<[usize; 8]>::new();
+ #[cfg(not(feature = "smallvec"))]
let mut ni_run = Vec::new();
// The BN is for <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
if is_NI(processing_classes[i]) || processing_classes[i] == BN {
@@ -484,9 +506,12 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>(
data_source: &D,
run_sequence: &IsolatingRunSequence,
original_classes: &[BidiClass],
-) -> Vec<BracketPair> {
- let mut ret = vec![];
- let mut stack = vec![];
+ bracket_pairs: &mut BracketPairVec,
+) {
+ #[cfg(feature = "smallvec")]
+ let mut stack = SmallVec::<[(char, usize, usize); 8]>::new();
+ #[cfg(not(feature = "smallvec"))]
+ let mut stack = Vec::new();
for (run_index, level_run) in run_sequence.runs.iter().enumerate() {
for (i, ch) in text.subrange(level_run.clone()).char_indices() {
@@ -532,7 +557,7 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>(
start_run: element.2,
end_run: run_index,
};
- ret.push(pair);
+ bracket_pairs.push(pair);
// > Pop the stack through the current stack element inclusively.
stack.truncate(stack_index);
@@ -545,8 +570,7 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>(
}
// > Sort the list of pairs of text positions in ascending order based on
// > the text position of the opening paired bracket.
- ret.sort_by_key(|r| r.start);
- ret
+ bracket_pairs.sort_by_key(|r| r.start);
}
/// 3.3.6 Resolving Implicit Levels
@@ -555,11 +579,11 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>(
///
/// <http://www.unicode.org/reports/tr9/#Resolving_Implicit_Levels>
#[cfg_attr(feature = "flame_it", flamer::flame)]
-pub fn resolve_levels(original_classes: &[BidiClass], levels: &mut [Level]) -> Level {
+pub fn resolve_levels(processing_classes: &[BidiClass], levels: &mut [Level]) -> Level {
let mut max_level = Level::ltr();
- assert_eq!(original_classes.len(), levels.len());
+ assert_eq!(processing_classes.len(), levels.len());
for i in 0..levels.len() {
- match (levels[i].is_rtl(), original_classes[i]) {
+ match (levels[i].is_rtl(), processing_classes[i]) {
(false, AN) | (false, EN) => levels[i].raise(2).expect("Level number error"),
(false, R) | (true, L) | (true, EN) | (true, AN) => {
levels[i].raise(1).expect("Level number error")
@@ -578,8 +602,5 @@ pub fn resolve_levels(original_classes: &[BidiClass], levels: &mut [Level]) -> L
/// <http://www.unicode.org/reports/tr9/#NI>
#[allow(non_snake_case)]
fn is_NI(class: BidiClass) -> bool {
- match class {
- B | S | WS | ON | FSI | LRI | RLI | PDI => true,
- _ => false,
- }
+ matches!(class, B | S | WS | ON | FSI | LRI | RLI | PDI)
}
diff --git a/third_party/rust/unicode-bidi/src/level.rs b/third_party/rust/unicode-bidi/src/level.rs
index ef4f6d9e40..5ece0251a5 100644
--- a/third_party/rust/unicode-bidi/src/level.rs
+++ b/third_party/rust/unicode-bidi/src/level.rs
@@ -13,9 +13,10 @@
//!
//! <http://www.unicode.org/reports/tr9/#BD2>
-use alloc::string::{String, ToString};
-use alloc::vec::Vec;
-use core::convert::{From, Into};
+use alloc::{
+ string::{String, ToString},
+ vec::Vec,
+};
use core::slice;
use super::char_data::BidiClass;
@@ -219,11 +220,11 @@ pub fn has_rtl(levels: &[Level]) -> bool {
levels.iter().any(|&lvl| lvl.is_rtl())
}
-impl Into<u8> for Level {
+impl From<Level> for u8 {
/// Convert to the level number
#[inline]
- fn into(self) -> u8 {
- self.number()
+ fn from(val: Level) -> Self {
+ val.number()
}
}
@@ -244,7 +245,7 @@ impl<'a> PartialEq<&'a str> for Level {
}
/// Used for matching levels in conformance tests
-impl<'a> PartialEq<String> for Level {
+impl PartialEq<String> for Level {
#[inline]
fn eq(&self, s: &String) -> bool {
self == &s.as_str()
diff --git a/third_party/rust/unicode-bidi/src/lib.rs b/third_party/rust/unicode-bidi/src/lib.rs
index 1072b67fe0..489927588a 100644
--- a/third_party/rust/unicode-bidi/src/lib.rs
+++ b/third_party/rust/unicode-bidi/src/lib.rs
@@ -71,6 +71,8 @@
extern crate std;
#[macro_use]
extern crate alloc;
+#[cfg(feature = "smallvec")]
+extern crate smallvec;
pub mod data_source;
pub mod deprecated;
@@ -86,7 +88,7 @@ mod prepare;
pub use crate::char_data::{BidiClass, UNICODE_VERSION};
pub use crate::data_source::BidiDataSource;
pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL};
-pub use crate::prepare::LevelRun;
+pub use crate::prepare::{LevelRun, LevelRunVec};
#[cfg(feature = "hardcoded-data")]
pub use crate::char_data::{bidi_class, HardcodedBidiData};
@@ -99,6 +101,8 @@ use core::cmp;
use core::iter::repeat;
use core::ops::Range;
use core::str::CharIndices;
+#[cfg(feature = "smallvec")]
+use smallvec::SmallVec;
use crate::format_chars as chars;
use crate::BidiClass::*;
@@ -244,8 +248,14 @@ struct InitialInfoExt<'text> {
/// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
/// requires no further bidi processing (i.e. there are no RTL characters or bidi
- /// control codes present).
- pure_ltr: Vec<bool>,
+ /// control codes present), and whether any bidi isolation controls are present.
+ flags: Vec<ParagraphInfoFlags>,
+}
+
+#[derive(PartialEq, Debug)]
+struct ParagraphInfoFlags {
+ is_pure_ltr: bool,
+ has_isolate_controls: bool,
}
impl<'text> InitialInfoExt<'text> {
@@ -265,12 +275,12 @@ impl<'text> InitialInfoExt<'text> {
default_para_level: Option<Level>,
) -> InitialInfoExt<'a> {
let mut paragraphs = Vec::<ParagraphInfo>::new();
- let mut pure_ltr = Vec::<bool>::new();
- let (original_classes, _, _) = compute_initial_info(
+ let mut flags = Vec::<ParagraphInfoFlags>::new();
+ let (original_classes, _, _, _) = compute_initial_info(
data_source,
text,
default_para_level,
- Some((&mut paragraphs, &mut pure_ltr)),
+ Some((&mut paragraphs, &mut flags)),
);
InitialInfoExt {
@@ -279,7 +289,7 @@ impl<'text> InitialInfoExt<'text> {
original_classes,
paragraphs,
},
- pure_ltr,
+ flags,
}
}
}
@@ -295,16 +305,19 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
data_source: &D,
text: &'a T,
default_para_level: Option<Level>,
- mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<bool>)>,
-) -> (Vec<BidiClass>, Level, bool) {
+ mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<ParagraphInfoFlags>)>,
+) -> (Vec<BidiClass>, Level, bool, bool) {
let mut original_classes = Vec::with_capacity(text.len());
// The stack contains the starting code unit index for each nested isolate we're inside.
+ #[cfg(feature = "smallvec")]
+ let mut isolate_stack = SmallVec::<[usize; 8]>::new();
+ #[cfg(not(feature = "smallvec"))]
let mut isolate_stack = Vec::new();
debug_assert!(
- if let Some((ref paragraphs, ref pure_ltr)) = split_paragraphs {
- paragraphs.is_empty() && pure_ltr.is_empty()
+ if let Some((ref paragraphs, ref flags)) = split_paragraphs {
+ paragraphs.is_empty() && flags.is_empty()
} else {
true
}
@@ -316,6 +329,8 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
// Per-paragraph flag: can subsequent processing be skipped? Set to false if any
// RTL characters or bidi control characters are encountered in the paragraph.
let mut is_pure_ltr = true;
+ // Set to true if any bidi isolation controls are present in the paragraph.
+ let mut has_isolate_controls = false;
#[cfg(feature = "flame_it")]
flame::start("compute_initial_info(): iter text.char_indices()");
@@ -334,7 +349,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
match class {
B => {
- if let Some((ref mut paragraphs, ref mut pure_ltr)) = split_paragraphs {
+ if let Some((ref mut paragraphs, ref mut flags)) = split_paragraphs {
// P1. Split the text into separate paragraphs. The paragraph separator is kept
// with the previous paragraph.
let para_end = i + len;
@@ -343,7 +358,10 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
// P3. If no character is found in p2, set the paragraph level to zero.
level: para_level.unwrap_or(LTR_LEVEL),
});
- pure_ltr.push(is_pure_ltr);
+ flags.push(ParagraphInfoFlags {
+ is_pure_ltr,
+ has_isolate_controls,
+ });
// Reset state for the start of the next paragraph.
para_start = para_end;
// TODO: Support defaulting to direction of previous paragraph
@@ -351,6 +369,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
// <http://www.unicode.org/reports/tr9/#HL1>
para_level = default_para_level;
is_pure_ltr = true;
+ has_isolate_controls = false;
isolate_stack.clear();
}
}
@@ -387,6 +406,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
RLI | LRI | FSI => {
is_pure_ltr = false;
+ has_isolate_controls = true;
isolate_stack.push(i);
}
@@ -398,15 +418,18 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
}
}
- if let Some((paragraphs, pure_ltr)) = split_paragraphs {
+ if let Some((paragraphs, flags)) = split_paragraphs {
if para_start < text.len() {
paragraphs.push(ParagraphInfo {
range: para_start..text.len(),
level: para_level.unwrap_or(LTR_LEVEL),
});
- pure_ltr.push(is_pure_ltr);
+ flags.push(ParagraphInfoFlags {
+ is_pure_ltr,
+ has_isolate_controls,
+ });
}
- debug_assert_eq!(paragraphs.len(), pure_ltr.len());
+ debug_assert_eq!(paragraphs.len(), flags.len());
}
debug_assert_eq!(original_classes.len(), text.len());
@@ -417,6 +440,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
original_classes,
para_level.unwrap_or(LTR_LEVEL),
is_pure_ltr,
+ has_isolate_controls,
)
}
@@ -475,20 +499,21 @@ impl<'text> BidiInfo<'text> {
text: &'a str,
default_para_level: Option<Level>,
) -> BidiInfo<'a> {
- let InitialInfoExt { base, pure_ltr, .. } =
+ let InitialInfoExt { base, flags, .. } =
InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
let mut levels = Vec::<Level>::with_capacity(text.len());
let mut processing_classes = base.original_classes.clone();
- for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) {
+ for (para, flags) in base.paragraphs.iter().zip(flags.iter()) {
let text = &text[para.range.clone()];
let original_classes = &base.original_classes[para.range.clone()];
compute_bidi_info_for_para(
data_source,
para,
- *is_pure_ltr,
+ flags.is_pure_ltr,
+ flags.has_isolate_controls,
text,
original_classes,
&mut processing_classes,
@@ -713,7 +738,7 @@ impl<'text> ParagraphBidiInfo<'text> {
) -> ParagraphBidiInfo<'a> {
// Here we could create a ParagraphInitialInfo struct to parallel the one
// used by BidiInfo, but there doesn't seem any compelling reason for it.
- let (original_classes, paragraph_level, is_pure_ltr) =
+ let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) =
compute_initial_info(data_source, text, default_para_level, None);
let mut levels = Vec::<Level>::with_capacity(text.len());
@@ -731,6 +756,7 @@ impl<'text> ParagraphBidiInfo<'text> {
data_source,
&para_info,
is_pure_ltr,
+ has_isolate_controls,
text,
&original_classes,
&mut processing_classes,
@@ -855,12 +881,12 @@ impl<'text> ParagraphBidiInfo<'text> {
///
/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
-fn reorder_line<'text>(
- text: &'text str,
+fn reorder_line(
+ text: &str,
line: Range<usize>,
levels: Vec<Level>,
runs: Vec<LevelRun>,
-) -> Cow<'text, str> {
+) -> Cow<'_, str> {
// If all isolating run sequences are LTR, no reordering is needed
if runs.iter().all(|run| levels[run.start].is_ltr()) {
return text[line].into();
@@ -1059,6 +1085,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
data_source: &D,
para: &ParagraphInfo,
is_pure_ltr: bool,
+ has_isolate_controls: bool,
text: &'a T,
original_classes: &[BidiClass],
processing_classes: &mut [BidiClass],
@@ -1072,6 +1099,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
let processing_classes = &mut processing_classes[para.range.clone()];
let levels = &mut levels[para.range.clone()];
+ let mut level_runs = LevelRunVec::new();
explicit::compute(
text,
@@ -1079,9 +1107,18 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
original_classes,
levels,
processing_classes,
+ &mut level_runs,
);
- let sequences = prepare::isolating_run_sequences(para.level, original_classes, levels);
+ let mut sequences = prepare::IsolatingRunSequenceVec::new();
+ prepare::isolating_run_sequences(
+ para.level,
+ original_classes,
+ levels,
+ level_runs,
+ has_isolate_controls,
+ &mut sequences,
+ );
for sequence in &sequences {
implicit::resolve_weak(text, sequence, processing_classes);
implicit::resolve_neutral(
@@ -1093,6 +1130,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
processing_classes,
);
}
+
implicit::resolve_levels(processing_classes, levels);
assign_levels_to_removed_chars(para.level, original_classes, levels);
@@ -1122,20 +1160,20 @@ fn reorder_levels<'a, T: TextSource<'a> + ?Sized>(
B | S => {
assert_eq!(reset_to, None);
reset_to = Some(i + T::char_len(c));
- if reset_from == None {
+ if reset_from.is_none() {
reset_from = Some(i);
}
}
// Whitespace, isolate formatting
WS | FSI | LRI | RLI | PDI => {
- if reset_from == None {
+ if reset_from.is_none() {
reset_from = Some(i);
}
}
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
// same as above + set the level
RLE | LRE | RLO | LRO | PDF | BN => {
- if reset_from == None {
+ if reset_from.is_none() {
reset_from = Some(i);
}
// also set the level to previous
@@ -1294,8 +1332,8 @@ fn get_base_direction_impl<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
let mut isolate_level = 0;
for c in text.chars() {
match data_source.bidi_class(c) {
- LRI | RLI | FSI => isolate_level = isolate_level + 1,
- PDI if isolate_level > 0 => isolate_level = isolate_level - 1,
+ LRI | RLI | FSI => isolate_level += 1,
+ PDI if isolate_level > 0 => isolate_level -= 1,
L if isolate_level == 0 => return Direction::Ltr,
R | AL if isolate_level == 0 => return Direction::Rtl,
B if !use_full_text => break,
@@ -1342,7 +1380,7 @@ impl<'text> TextSource<'text> for str {
}
#[inline]
fn indices_lengths(&'text self) -> Self::IndexLenIter {
- Utf8IndexLenIter::new(&self)
+ Utf8IndexLenIter::new(self)
}
#[inline]
fn char_len(ch: char) -> usize {
@@ -1544,6 +1582,24 @@ mod tests {
let tests = vec![
(
// text
+ "",
+ // base level
+ Some(RTL_LEVEL),
+ // levels
+ Level::vec(&[]),
+ // original_classes
+ vec![],
+ // paragraphs
+ vec![],
+ // levels_u16
+ Level::vec(&[]),
+ // original_classes_u16
+ vec![],
+ // paragraphs_u16
+ vec![],
+ ),
+ (
+ // text
"abc123",
// base level
Some(LTR_LEVEL),
@@ -1703,6 +1759,19 @@ mod tests {
paragraphs: t.4.clone(),
}
);
+ // If it was empty, also test that ParagraphBidiInfo handles it safely.
+ if t.4.len() == 0 {
+ assert_eq!(
+ ParagraphBidiInfo::new(t.0, t.1),
+ ParagraphBidiInfo {
+ text: t.0,
+ original_classes: t.3.clone(),
+ levels: t.2.clone(),
+ paragraph_level: RTL_LEVEL,
+ is_pure_ltr: true,
+ }
+ )
+ }
// If it was a single paragraph, also test ParagraphBidiInfo.
if t.4.len() == 1 {
assert_eq!(
diff --git a/third_party/rust/unicode-bidi/src/prepare.rs b/third_party/rust/unicode-bidi/src/prepare.rs
index 9234e1aa61..f7b35ad689 100644
--- a/third_party/rust/unicode-bidi/src/prepare.rs
+++ b/third_party/rust/unicode-bidi/src/prepare.rs
@@ -14,6 +14,8 @@
use alloc::vec::Vec;
use core::cmp::max;
use core::ops::Range;
+#[cfg(feature = "smallvec")]
+use smallvec::{smallvec, SmallVec};
use super::level::Level;
use super::BidiClass::{self, *};
@@ -23,6 +25,11 @@ use super::BidiClass::{self, *};
/// Represented as a range of byte indices.
pub type LevelRun = Range<usize>;
+#[cfg(feature = "smallvec")]
+pub type LevelRunVec = SmallVec<[LevelRun; 8]>;
+#[cfg(not(feature = "smallvec"))]
+pub type LevelRunVec = Vec<LevelRun>;
+
/// Output of `isolating_run_sequences` (steps X9-X10)
#[derive(Debug, PartialEq)]
pub struct IsolatingRunSequence {
@@ -31,6 +38,11 @@ pub struct IsolatingRunSequence {
pub eos: BidiClass, // End-of-sequence type.
}
+#[cfg(feature = "smallvec")]
+pub type IsolatingRunSequenceVec = SmallVec<[IsolatingRunSequence; 8]>;
+#[cfg(not(feature = "smallvec"))]
+pub type IsolatingRunSequenceVec = Vec<IsolatingRunSequence>;
+
/// Compute the set of isolating run sequences.
///
/// An isolating run sequence is a maximal sequence of level runs such that for all level runs
@@ -43,8 +55,59 @@ pub fn isolating_run_sequences(
para_level: Level,
original_classes: &[BidiClass],
levels: &[Level],
-) -> Vec<IsolatingRunSequence> {
- let runs = level_runs(levels, original_classes);
+ runs: LevelRunVec,
+ has_isolate_controls: bool,
+ isolating_run_sequences: &mut IsolatingRunSequenceVec,
+) {
+ // Per http://www.unicode.org/reports/tr9/#BD13:
+ // "In the absence of isolate initiators, each isolating run sequence in a paragraph
+ // consists of exactly one level run, and each level run constitutes a separate
+ // isolating run sequence."
+ // We can take a simplified path to handle this case.
+ if !has_isolate_controls {
+ isolating_run_sequences.reserve_exact(runs.len());
+ for run in runs {
+ // Determine the `sos` and `eos` class for the sequence.
+ // <http://www.unicode.org/reports/tr9/#X10>
+
+ let run_levels = &levels[run.clone()];
+ let run_classes = &original_classes[run.clone()];
+ let seq_level = run_levels[run_classes
+ .iter()
+ .position(|c| not_removed_by_x9(c))
+ .unwrap_or(0)];
+
+ let end_level = run_levels[run_classes
+ .iter()
+ .rposition(|c| not_removed_by_x9(c))
+ .unwrap_or(run.end - run.start - 1)];
+
+ // Get the level of the last non-removed char before the run.
+ let pred_level = match original_classes[..run.start]
+ .iter()
+ .rposition(not_removed_by_x9)
+ {
+ Some(idx) => levels[idx],
+ None => para_level,
+ };
+
+ // Get the level of the next non-removed char after the run.
+ let succ_level = match original_classes[run.end..]
+ .iter()
+ .position(not_removed_by_x9)
+ {
+ Some(idx) => levels[run.end + idx],
+ None => para_level,
+ };
+
+ isolating_run_sequences.push(IsolatingRunSequence {
+ runs: vec![run],
+ sos: max(seq_level, pred_level).bidi_class(),
+ eos: max(end_level, succ_level).bidi_class(),
+ });
+ }
+ return;
+ }
// Compute the set of isolating run sequences.
// <http://www.unicode.org/reports/tr9/#BD13>
@@ -52,10 +115,13 @@ pub fn isolating_run_sequences(
// When we encounter an isolate initiator, we push the current sequence onto the
// stack so we can resume it after the matching PDI.
- let mut stack = vec![Vec::new()];
+ #[cfg(feature = "smallvec")]
+ let mut stack: SmallVec<[Vec<Range<usize>>; 8]> = smallvec![vec![]];
+ #[cfg(not(feature = "smallvec"))]
+ let mut stack = vec![vec![]];
for run in runs {
- assert!(run.len() > 0);
+ assert!(!run.is_empty());
assert!(!stack.is_empty());
let start_class = original_classes[run.start];
@@ -67,8 +133,7 @@ pub fn isolating_run_sequences(
.iter()
.copied()
.rev()
- .filter(not_removed_by_x9)
- .next()
+ .find(not_removed_by_x9)
.unwrap_or(start_class);
let mut sequence = if start_class == PDI && stack.len() > 1 {
@@ -81,7 +146,7 @@ pub fn isolating_run_sequences(
sequence.push(run);
- if let RLI | LRI | FSI = end_class {
+ if matches!(end_class, RLI | LRI | FSI) {
// Resume this sequence after the isolate.
stack.push(sequence);
} else {
@@ -89,90 +154,82 @@ pub fn isolating_run_sequences(
sequences.push(sequence);
}
}
- // Pop any remaning sequences off the stack.
+ // Pop any remaining sequences off the stack.
sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty()));
// Determine the `sos` and `eos` class for each sequence.
// <http://www.unicode.org/reports/tr9/#X10>
- sequences
- .into_iter()
- .map(|sequence: Vec<LevelRun>| {
- assert!(!sequence.is_empty());
+ for sequence in sequences {
+ assert!(!sequence.is_empty());
- let mut result = IsolatingRunSequence {
- runs: sequence,
- sos: L,
- eos: L,
- };
+ let start_of_seq = sequence[0].start;
+ let runs_len = sequence.len();
+ let end_of_seq = sequence[runs_len - 1].end;
- let start_of_seq = result.runs[0].start;
- let runs_len = result.runs.len();
- let end_of_seq = result.runs[runs_len - 1].end;
-
- // > (not counting characters removed by X9)
- let seq_level = result
- .iter_forwards_from(start_of_seq, 0)
- .filter(|i| not_removed_by_x9(&original_classes[*i]))
- .map(|i| levels[i])
- .next()
- .unwrap_or(levels[start_of_seq]);
-
- // XXXManishearth the spec talks of a start and end level,
- // but for a given IRS the two should be equivalent, yes?
- let end_level = result
- .iter_backwards_from(end_of_seq, runs_len - 1)
- .filter(|i| not_removed_by_x9(&original_classes[*i]))
- .map(|i| levels[i])
- .next()
- .unwrap_or(levels[end_of_seq - 1]);
-
- #[cfg(test)]
- for run in result.runs.clone() {
- for idx in run {
- if not_removed_by_x9(&original_classes[idx]) {
- assert_eq!(seq_level, levels[idx]);
- }
- }
+ let mut result = IsolatingRunSequence {
+ runs: sequence,
+ sos: L,
+ eos: L,
+ };
+
+ // > (not counting characters removed by X9)
+ let seq_level = levels[result
+ .iter_forwards_from(start_of_seq, 0)
+ .find(|i| not_removed_by_x9(&original_classes[*i]))
+ .unwrap_or(start_of_seq)];
+
+ // XXXManishearth the spec talks of a start and end level,
+ // but for a given IRS the two should be equivalent, yes?
+ let end_level = levels[result
+ .iter_backwards_from(end_of_seq, runs_len - 1)
+ .find(|i| not_removed_by_x9(&original_classes[*i]))
+ .unwrap_or(end_of_seq - 1)];
+
+ #[cfg(test)]
+ for idx in result.runs.clone().into_iter().flatten() {
+ if not_removed_by_x9(&original_classes[idx]) {
+ assert_eq!(seq_level, levels[idx]);
}
+ }
+
+ // Get the level of the last non-removed char before the runs.
+ let pred_level = match original_classes[..start_of_seq]
+ .iter()
+ .rposition(not_removed_by_x9)
+ {
+ Some(idx) => levels[idx],
+ None => para_level,
+ };
- // Get the level of the last non-removed char before the runs.
- let pred_level = match original_classes[..start_of_seq]
+ // Get the last non-removed character to check if it is an isolate initiator.
+ // The spec calls for an unmatched one, but matched isolate initiators
+ // will never be at the end of a level run (otherwise there would be more to the run).
+ // We unwrap_or(BN) because BN marks removed classes and it won't matter for the check.
+ let last_non_removed = original_classes[..end_of_seq]
+ .iter()
+ .copied()
+ .rev()
+ .find(not_removed_by_x9)
+ .unwrap_or(BN);
+
+ // Get the level of the next non-removed char after the runs.
+ let succ_level = if matches!(last_non_removed, RLI | LRI | FSI) {
+ para_level
+ } else {
+ match original_classes[end_of_seq..]
.iter()
- .rposition(not_removed_by_x9)
+ .position(not_removed_by_x9)
{
- Some(idx) => levels[idx],
+ Some(idx) => levels[end_of_seq + idx],
None => para_level,
- };
+ }
+ };
- // Get the last non-removed character to check if it is an isolate initiator.
- // The spec calls for an unmatched one, but matched isolate initiators
- // will never be at the end of a level run (otherwise there would be more to the run).
- // We unwrap_or(BN) because BN marks removed classes and it won't matter for the check.
- let last_non_removed = original_classes[..end_of_seq]
- .iter()
- .copied()
- .rev()
- .find(not_removed_by_x9)
- .unwrap_or(BN);
-
- // Get the level of the next non-removed char after the runs.
- let succ_level = if let RLI | LRI | FSI = last_non_removed {
- para_level
- } else {
- match original_classes[end_of_seq..]
- .iter()
- .position(not_removed_by_x9)
- {
- Some(idx) => levels[end_of_seq + idx],
- None => para_level,
- }
- };
+ result.sos = max(seq_level, pred_level).bidi_class();
+ result.eos = max(end_level, succ_level).bidi_class();
- result.sos = max(seq_level, pred_level).bidi_class();
- result.eos = max(end_level, succ_level).bidi_class();
- result
- })
- .collect()
+ isolating_run_sequences.push(result);
+ }
}
impl IsolatingRunSequence {
@@ -219,6 +276,9 @@ impl IsolatingRunSequence {
/// Finds the level runs in a paragraph.
///
/// <http://www.unicode.org/reports/tr9/#BD7>
+///
+/// This is only used by tests; normally level runs are identified during explicit::compute.
+#[cfg(test)]
fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun> {
assert_eq!(levels.len(), original_classes.len());
@@ -246,10 +306,7 @@ fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun>
///
/// <http://www.unicode.org/reports/tr9/#X9>
pub fn removed_by_x9(class: BidiClass) -> bool {
- match class {
- RLE | LRE | RLO | LRO | PDF | BN => true,
- _ => false,
- }
+ matches!(class, RLE | LRE | RLO | LRO | PDF | BN)
}
// For use as a predicate for `position` / `rposition`
@@ -281,7 +338,14 @@ mod tests {
let classes = &[L, RLE, L, PDF, RLE, L, PDF, L];
let levels = &[0, 1, 1, 1, 1, 1, 1, 0];
let para_level = Level::ltr();
- let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
+ let mut sequences = IsolatingRunSequenceVec::new();
+ isolating_run_sequences(
+ para_level,
+ classes,
+ &Level::vec(levels),
+ level_runs(&Level::vec(levels), classes).into(),
+ false,
+ &mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
assert_eq!(
sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
@@ -294,7 +358,14 @@ mod tests {
let classes = &[L, RLI, L, PDI, RLI, L, PDI, L];
let levels = &[0, 0, 1, 0, 0, 1, 0, 0];
let para_level = Level::ltr();
- let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
+ let mut sequences = IsolatingRunSequenceVec::new();
+ isolating_run_sequences(
+ para_level,
+ classes,
+ &Level::vec(levels),
+ level_runs(&Level::vec(levels), classes).into(),
+ true,
+ &mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
assert_eq!(
sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
@@ -307,7 +378,14 @@ mod tests {
let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L];
let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0];
let para_level = Level::ltr();
- let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
+ let mut sequences = IsolatingRunSequenceVec::new();
+ isolating_run_sequences(
+ para_level,
+ classes,
+ &Level::vec(levels),
+ level_runs(&Level::vec(levels), classes).into(),
+ true,
+ &mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
assert_eq!(
sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
@@ -326,7 +404,14 @@ mod tests {
let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L];
let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0];
let para_level = Level::ltr();
- let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
+ let mut sequences = IsolatingRunSequenceVec::new();
+ isolating_run_sequences(
+ para_level,
+ classes,
+ &Level::vec(levels),
+ level_runs(&Level::vec(levels), classes).into(),
+ false,
+ &mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
// text1
@@ -385,7 +470,14 @@ mod tests {
let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L];
let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0];
let para_level = Level::ltr();
- let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
+ let mut sequences = IsolatingRunSequenceVec::new();
+ isolating_run_sequences(
+ para_level,
+ classes,
+ &Level::vec(levels),
+ level_runs(&Level::vec(levels), classes).into(),
+ true,
+ &mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
// text1·RLI·PDI·RLI·PDI·text6
diff --git a/third_party/rust/unicode-bidi/src/utf16.rs b/third_party/rust/unicode-bidi/src/utf16.rs
index dcd9baf2be..11b386f91e 100644
--- a/third_party/rust/unicode-bidi/src/utf16.rs
+++ b/third_party/rust/unicode-bidi/src/utf16.rs
@@ -18,7 +18,9 @@ use crate::{
compute_bidi_info_for_para, compute_initial_info, level, para_direction, reorder_levels,
reorder_visual, visual_runs_for_line,
};
-use crate::{BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo};
+use crate::{
+ BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo, ParagraphInfoFlags,
+};
#[cfg(feature = "hardcoded-data")]
use crate::HardcodedBidiData;
@@ -83,7 +85,7 @@ struct InitialInfoExt<'text> {
/// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
/// requires no further bidi processing (i.e. there are no RTL characters or bidi
/// control codes present).
- pure_ltr: Vec<bool>,
+ flags: Vec<ParagraphInfoFlags>,
}
impl<'text> InitialInfoExt<'text> {
@@ -103,12 +105,12 @@ impl<'text> InitialInfoExt<'text> {
default_para_level: Option<Level>,
) -> InitialInfoExt<'a> {
let mut paragraphs = Vec::<ParagraphInfo>::new();
- let mut pure_ltr = Vec::<bool>::new();
- let (original_classes, _, _) = compute_initial_info(
+ let mut flags = Vec::<ParagraphInfoFlags>::new();
+ let (original_classes, _, _, _) = compute_initial_info(
data_source,
text,
default_para_level,
- Some((&mut paragraphs, &mut pure_ltr)),
+ Some((&mut paragraphs, &mut flags)),
);
InitialInfoExt {
@@ -117,7 +119,7 @@ impl<'text> InitialInfoExt<'text> {
original_classes,
paragraphs,
},
- pure_ltr,
+ flags,
}
}
}
@@ -177,20 +179,21 @@ impl<'text> BidiInfo<'text> {
text: &'a [u16],
default_para_level: Option<Level>,
) -> BidiInfo<'a> {
- let InitialInfoExt { base, pure_ltr, .. } =
+ let InitialInfoExt { base, flags, .. } =
InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
let mut levels = Vec::<Level>::with_capacity(text.len());
let mut processing_classes = base.original_classes.clone();
- for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) {
+ for (para, flags) in base.paragraphs.iter().zip(flags.iter()) {
let text = &text[para.range.clone()];
let original_classes = &base.original_classes[para.range.clone()];
compute_bidi_info_for_para(
data_source,
para,
- *is_pure_ltr,
+ flags.is_pure_ltr,
+ flags.has_isolate_controls,
text,
original_classes,
&mut processing_classes,
@@ -411,7 +414,7 @@ impl<'text> ParagraphBidiInfo<'text> {
) -> ParagraphBidiInfo<'a> {
// Here we could create a ParagraphInitialInfo struct to parallel the one
// used by BidiInfo, but there doesn't seem any compelling reason for it.
- let (original_classes, paragraph_level, is_pure_ltr) =
+ let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) =
compute_initial_info(data_source, text, default_para_level, None);
let mut levels = Vec::<Level>::with_capacity(text.len());
@@ -429,6 +432,7 @@ impl<'text> ParagraphBidiInfo<'text> {
data_source,
&para_info,
is_pure_ltr,
+ has_isolate_controls,
text,
&original_classes,
&mut processing_classes,
@@ -551,12 +555,12 @@ impl<'text> ParagraphBidiInfo<'text> {
///
/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
-fn reorder_line<'text>(
- text: &'text [u16],
+fn reorder_line(
+ text: &[u16],
line: Range<usize>,
levels: Vec<Level>,
runs: Vec<LevelRun>,
-) -> Cow<'text, [u16]> {
+) -> Cow<'_, [u16]> {
// If all isolating run sequences are LTR, no reordering is needed
if runs.iter().all(|run| levels[run.start].is_ltr()) {
return text[line].into();
@@ -668,15 +672,15 @@ impl<'text> TextSource<'text> for [u16] {
}
#[inline]
fn chars(&'text self) -> Self::CharIter {
- Utf16CharIter::new(&self)
+ Utf16CharIter::new(self)
}
#[inline]
fn char_indices(&'text self) -> Self::CharIndexIter {
- Utf16CharIndexIter::new(&self)
+ Utf16CharIndexIter::new(self)
}
#[inline]
fn indices_lengths(&'text self) -> Self::IndexLenIter {
- Utf16IndexLenIter::new(&self)
+ Utf16IndexLenIter::new(self)
}
#[inline]
fn char_len(ch: char) -> usize {