diff options
Diffstat (limited to 'third_party/rust/unicode-bidi/src/prepare.rs')
-rw-r--r-- | third_party/rust/unicode-bidi/src/prepare.rs | 266 |
1 files changed, 179 insertions, 87 deletions
diff --git a/third_party/rust/unicode-bidi/src/prepare.rs b/third_party/rust/unicode-bidi/src/prepare.rs index 9234e1aa61..f7b35ad689 100644 --- a/third_party/rust/unicode-bidi/src/prepare.rs +++ b/third_party/rust/unicode-bidi/src/prepare.rs @@ -14,6 +14,8 @@ use alloc::vec::Vec; use core::cmp::max; use core::ops::Range; +#[cfg(feature = "smallvec")] +use smallvec::{smallvec, SmallVec}; use super::level::Level; use super::BidiClass::{self, *}; @@ -23,6 +25,11 @@ use super::BidiClass::{self, *}; /// Represented as a range of byte indices. pub type LevelRun = Range<usize>; +#[cfg(feature = "smallvec")] +pub type LevelRunVec = SmallVec<[LevelRun; 8]>; +#[cfg(not(feature = "smallvec"))] +pub type LevelRunVec = Vec<LevelRun>; + /// Output of `isolating_run_sequences` (steps X9-X10) #[derive(Debug, PartialEq)] pub struct IsolatingRunSequence { @@ -31,6 +38,11 @@ pub struct IsolatingRunSequence { pub eos: BidiClass, // End-of-sequence type. } +#[cfg(feature = "smallvec")] +pub type IsolatingRunSequenceVec = SmallVec<[IsolatingRunSequence; 8]>; +#[cfg(not(feature = "smallvec"))] +pub type IsolatingRunSequenceVec = Vec<IsolatingRunSequence>; + /// Compute the set of isolating run sequences. /// /// An isolating run sequence is a maximal sequence of level runs such that for all level runs @@ -43,8 +55,59 @@ pub fn isolating_run_sequences( para_level: Level, original_classes: &[BidiClass], levels: &[Level], -) -> Vec<IsolatingRunSequence> { - let runs = level_runs(levels, original_classes); + runs: LevelRunVec, + has_isolate_controls: bool, + isolating_run_sequences: &mut IsolatingRunSequenceVec, +) { + // Per http://www.unicode.org/reports/tr9/#BD13: + // "In the absence of isolate initiators, each isolating run sequence in a paragraph + // consists of exactly one level run, and each level run constitutes a separate + // isolating run sequence." + // We can take a simplified path to handle this case. + if !has_isolate_controls { + isolating_run_sequences.reserve_exact(runs.len()); + for run in runs { + // Determine the `sos` and `eos` class for the sequence. + // <http://www.unicode.org/reports/tr9/#X10> + + let run_levels = &levels[run.clone()]; + let run_classes = &original_classes[run.clone()]; + let seq_level = run_levels[run_classes + .iter() + .position(|c| not_removed_by_x9(c)) + .unwrap_or(0)]; + + let end_level = run_levels[run_classes + .iter() + .rposition(|c| not_removed_by_x9(c)) + .unwrap_or(run.end - run.start - 1)]; + + // Get the level of the last non-removed char before the run. + let pred_level = match original_classes[..run.start] + .iter() + .rposition(not_removed_by_x9) + { + Some(idx) => levels[idx], + None => para_level, + }; + + // Get the level of the next non-removed char after the run. + let succ_level = match original_classes[run.end..] + .iter() + .position(not_removed_by_x9) + { + Some(idx) => levels[run.end + idx], + None => para_level, + }; + + isolating_run_sequences.push(IsolatingRunSequence { + runs: vec![run], + sos: max(seq_level, pred_level).bidi_class(), + eos: max(end_level, succ_level).bidi_class(), + }); + } + return; + } // Compute the set of isolating run sequences. // <http://www.unicode.org/reports/tr9/#BD13> @@ -52,10 +115,13 @@ pub fn isolating_run_sequences( // When we encounter an isolate initiator, we push the current sequence onto the // stack so we can resume it after the matching PDI. - let mut stack = vec![Vec::new()]; + #[cfg(feature = "smallvec")] + let mut stack: SmallVec<[Vec<Range<usize>>; 8]> = smallvec![vec![]]; + #[cfg(not(feature = "smallvec"))] + let mut stack = vec![vec![]]; for run in runs { - assert!(run.len() > 0); + assert!(!run.is_empty()); assert!(!stack.is_empty()); let start_class = original_classes[run.start]; @@ -67,8 +133,7 @@ pub fn isolating_run_sequences( .iter() .copied() .rev() - .filter(not_removed_by_x9) - .next() + .find(not_removed_by_x9) .unwrap_or(start_class); let mut sequence = if start_class == PDI && stack.len() > 1 { @@ -81,7 +146,7 @@ pub fn isolating_run_sequences( sequence.push(run); - if let RLI | LRI | FSI = end_class { + if matches!(end_class, RLI | LRI | FSI) { // Resume this sequence after the isolate. stack.push(sequence); } else { @@ -89,90 +154,82 @@ pub fn isolating_run_sequences( sequences.push(sequence); } } - // Pop any remaning sequences off the stack. + // Pop any remaining sequences off the stack. sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty())); // Determine the `sos` and `eos` class for each sequence. // <http://www.unicode.org/reports/tr9/#X10> - sequences - .into_iter() - .map(|sequence: Vec<LevelRun>| { - assert!(!sequence.is_empty()); + for sequence in sequences { + assert!(!sequence.is_empty()); - let mut result = IsolatingRunSequence { - runs: sequence, - sos: L, - eos: L, - }; + let start_of_seq = sequence[0].start; + let runs_len = sequence.len(); + let end_of_seq = sequence[runs_len - 1].end; - let start_of_seq = result.runs[0].start; - let runs_len = result.runs.len(); - let end_of_seq = result.runs[runs_len - 1].end; - - // > (not counting characters removed by X9) - let seq_level = result - .iter_forwards_from(start_of_seq, 0) - .filter(|i| not_removed_by_x9(&original_classes[*i])) - .map(|i| levels[i]) - .next() - .unwrap_or(levels[start_of_seq]); - - // XXXManishearth the spec talks of a start and end level, - // but for a given IRS the two should be equivalent, yes? - let end_level = result - .iter_backwards_from(end_of_seq, runs_len - 1) - .filter(|i| not_removed_by_x9(&original_classes[*i])) - .map(|i| levels[i]) - .next() - .unwrap_or(levels[end_of_seq - 1]); - - #[cfg(test)] - for run in result.runs.clone() { - for idx in run { - if not_removed_by_x9(&original_classes[idx]) { - assert_eq!(seq_level, levels[idx]); - } - } + let mut result = IsolatingRunSequence { + runs: sequence, + sos: L, + eos: L, + }; + + // > (not counting characters removed by X9) + let seq_level = levels[result + .iter_forwards_from(start_of_seq, 0) + .find(|i| not_removed_by_x9(&original_classes[*i])) + .unwrap_or(start_of_seq)]; + + // XXXManishearth the spec talks of a start and end level, + // but for a given IRS the two should be equivalent, yes? + let end_level = levels[result + .iter_backwards_from(end_of_seq, runs_len - 1) + .find(|i| not_removed_by_x9(&original_classes[*i])) + .unwrap_or(end_of_seq - 1)]; + + #[cfg(test)] + for idx in result.runs.clone().into_iter().flatten() { + if not_removed_by_x9(&original_classes[idx]) { + assert_eq!(seq_level, levels[idx]); } + } + + // Get the level of the last non-removed char before the runs. + let pred_level = match original_classes[..start_of_seq] + .iter() + .rposition(not_removed_by_x9) + { + Some(idx) => levels[idx], + None => para_level, + }; - // Get the level of the last non-removed char before the runs. - let pred_level = match original_classes[..start_of_seq] + // Get the last non-removed character to check if it is an isolate initiator. + // The spec calls for an unmatched one, but matched isolate initiators + // will never be at the end of a level run (otherwise there would be more to the run). + // We unwrap_or(BN) because BN marks removed classes and it won't matter for the check. + let last_non_removed = original_classes[..end_of_seq] + .iter() + .copied() + .rev() + .find(not_removed_by_x9) + .unwrap_or(BN); + + // Get the level of the next non-removed char after the runs. + let succ_level = if matches!(last_non_removed, RLI | LRI | FSI) { + para_level + } else { + match original_classes[end_of_seq..] .iter() - .rposition(not_removed_by_x9) + .position(not_removed_by_x9) { - Some(idx) => levels[idx], + Some(idx) => levels[end_of_seq + idx], None => para_level, - }; + } + }; - // Get the last non-removed character to check if it is an isolate initiator. - // The spec calls for an unmatched one, but matched isolate initiators - // will never be at the end of a level run (otherwise there would be more to the run). - // We unwrap_or(BN) because BN marks removed classes and it won't matter for the check. - let last_non_removed = original_classes[..end_of_seq] - .iter() - .copied() - .rev() - .find(not_removed_by_x9) - .unwrap_or(BN); - - // Get the level of the next non-removed char after the runs. - let succ_level = if let RLI | LRI | FSI = last_non_removed { - para_level - } else { - match original_classes[end_of_seq..] - .iter() - .position(not_removed_by_x9) - { - Some(idx) => levels[end_of_seq + idx], - None => para_level, - } - }; + result.sos = max(seq_level, pred_level).bidi_class(); + result.eos = max(end_level, succ_level).bidi_class(); - result.sos = max(seq_level, pred_level).bidi_class(); - result.eos = max(end_level, succ_level).bidi_class(); - result - }) - .collect() + isolating_run_sequences.push(result); + } } impl IsolatingRunSequence { @@ -219,6 +276,9 @@ impl IsolatingRunSequence { /// Finds the level runs in a paragraph. /// /// <http://www.unicode.org/reports/tr9/#BD7> +/// +/// This is only used by tests; normally level runs are identified during explicit::compute. +#[cfg(test)] fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun> { assert_eq!(levels.len(), original_classes.len()); @@ -246,10 +306,7 @@ fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun> /// /// <http://www.unicode.org/reports/tr9/#X9> pub fn removed_by_x9(class: BidiClass) -> bool { - match class { - RLE | LRE | RLO | LRO | PDF | BN => true, - _ => false, - } + matches!(class, RLE | LRE | RLO | LRO | PDF | BN) } // For use as a predicate for `position` / `rposition` @@ -281,7 +338,14 @@ mod tests { let classes = &[L, RLE, L, PDF, RLE, L, PDF, L]; let levels = &[0, 1, 1, 1, 1, 1, 1, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + false, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(), @@ -294,7 +358,14 @@ mod tests { let classes = &[L, RLI, L, PDI, RLI, L, PDI, L]; let levels = &[0, 0, 1, 0, 0, 1, 0, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + true, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(), @@ -307,7 +378,14 @@ mod tests { let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L]; let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + true, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(), @@ -326,7 +404,14 @@ mod tests { let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L]; let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + false, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); // text1 @@ -385,7 +470,14 @@ mod tests { let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L]; let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + true, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); // text1·RLI·PDI·RLI·PDI·text6 |