// Copyright 2015 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! 3.3.3 Preparations for Implicit Processing //! //! use alloc::vec::Vec; use core::cmp::max; use core::ops::Range; use super::level::Level; use super::BidiClass::{self, *}; /// A maximal substring of characters with the same embedding level. /// /// Represented as a range of byte indices. pub type LevelRun = Range; /// Output of `isolating_run_sequences` (steps X9-X10) #[derive(Debug, PartialEq)] pub struct IsolatingRunSequence { pub runs: Vec, pub sos: BidiClass, // Start-of-sequence type. pub eos: BidiClass, // End-of-sequence type. } /// Compute the set of isolating run sequences. /// /// An isolating run sequence is a maximal sequence of level runs such that for all level runs /// except the last one in the sequence, the last character of the run is an isolate initiator /// whose matching PDI is the first character of the next level run in the sequence. /// /// Note: This function does *not* return the sequences in order by their first characters. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn isolating_run_sequences( para_level: Level, original_classes: &[BidiClass], levels: &[Level], ) -> Vec { let runs = level_runs(levels, original_classes); // Compute the set of isolating run sequences. // let mut sequences = Vec::with_capacity(runs.len()); // When we encounter an isolate initiator, we push the current sequence onto the // stack so we can resume it after the matching PDI. let mut stack = vec![Vec::new()]; for run in runs { assert!(run.len() > 0); assert!(!stack.is_empty()); let start_class = original_classes[run.start]; let end_class = original_classes[run.end - 1]; let mut sequence = if start_class == PDI && stack.len() > 1 { // Continue a previous sequence interrupted by an isolate. stack.pop().unwrap() } else { // Start a new sequence. Vec::new() }; sequence.push(run); if let RLI | LRI | FSI = end_class { // Resume this sequence after the isolate. stack.push(sequence); } else { // This sequence is finished. sequences.push(sequence); } } // Pop any remaning sequences off the stack. sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty())); // Determine the `sos` and `eos` class for each sequence. // sequences .into_iter() .map(|sequence: Vec| { assert!(!sequence.is_empty()); let start_of_seq = sequence[0].start; let end_of_seq = sequence[sequence.len() - 1].end; let seq_level = levels[start_of_seq]; #[cfg(test)] for run in sequence.clone() { for idx in run { if not_removed_by_x9(&original_classes[idx]) { assert_eq!(seq_level, levels[idx]); } } } // Get the level of the last non-removed char before the runs. let pred_level = match original_classes[..start_of_seq] .iter() .rposition(not_removed_by_x9) { Some(idx) => levels[idx], None => para_level, }; // Get the level of the next non-removed char after the runs. let succ_level = if let RLI | LRI | FSI = original_classes[end_of_seq - 1] { para_level } else { match original_classes[end_of_seq..] .iter() .position(not_removed_by_x9) { Some(idx) => levels[end_of_seq + idx], None => para_level, } }; IsolatingRunSequence { runs: sequence, sos: max(seq_level, pred_level).bidi_class(), eos: max(seq_level, succ_level).bidi_class(), } }) .collect() } /// Finds the level runs in a paragraph. /// /// fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec { assert_eq!(levels.len(), original_classes.len()); let mut runs = Vec::new(); if levels.is_empty() { return runs; } let mut current_run_level = levels[0]; let mut current_run_start = 0; for i in 1..levels.len() { if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level { // End the last run and start a new one. runs.push(current_run_start..i); current_run_level = levels[i]; current_run_start = i; } } runs.push(current_run_start..levels.len()); runs } /// Should this character be ignored in steps after X9? /// /// pub fn removed_by_x9(class: BidiClass) -> bool { match class { RLE | LRE | RLO | LRO | PDF | BN => true, _ => false, } } // For use as a predicate for `position` / `rposition` pub fn not_removed_by_x9(class: &BidiClass) -> bool { !removed_by_x9(*class) } #[cfg(test)] mod tests { use super::*; #[test] fn test_level_runs() { assert_eq!(level_runs(&Level::vec(&[]), &[]), &[]); assert_eq!( level_runs(&Level::vec(&[0, 0, 0, 1, 1, 2, 0, 0]), &[L; 8]), &[0..3, 3..5, 5..6, 6..8] ); } // From #[rustfmt::skip] #[test] fn test_isolating_run_sequences() { // == Example 1 == // text1·RLE·text2·PDF·RLE·text3·PDF·text4 // index 0 1 2 3 4 5 6 7 let classes = &[L, RLE, L, PDF, RLE, L, PDF, L]; let levels = &[0, 1, 1, 1, 1, 1, 1, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), vec![vec![0..2], vec![2..7], vec![7..8]] ); // == Example 2 == // text1·RLI·text2·PDI·RLI·text3·PDI·text4 // index 0 1 2 3 4 5 6 7 let classes = &[L, RLI, L, PDI, RLI, L, PDI, L]; let levels = &[0, 0, 1, 0, 0, 1, 0, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), vec![vec![0..2, 3..5, 6..8], vec![2..3], vec![5..6]] ); // == Example 3 == // text1·RLI·text2·LRI·text3·RLE·text4·PDF·text5·PDI·text6·PDI·text7 // index 0 1 2 3 4 5 6 7 8 9 10 11 12 let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L]; let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), vec![vec![0..2, 11..13], vec![2..4, 9..11], vec![4..6], vec![6..8], vec![8..9]] ); } // From #[rustfmt::skip] #[test] fn test_isolating_run_sequences_sos_and_eos() { // == Example 1 == // text1·RLE·text2·LRE·text3·PDF·text4·PDF·RLE·text5·PDF·text6 // index 0 1 2 3 4 5 6 7 8 9 10 11 let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L]; let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); // text1 assert_eq!( &sequences[0], &IsolatingRunSequence { runs: vec![0..2], sos: L, eos: R, } ); // text2 assert_eq!( &sequences[1], &IsolatingRunSequence { runs: vec![2..4], sos: R, eos: L, } ); // text3 assert_eq!( &sequences[2], &IsolatingRunSequence { runs: vec![4..6], sos: L, eos: L, } ); // text4 text5 assert_eq!( &sequences[3], &IsolatingRunSequence { runs: vec![6..11], sos: L, eos: R, } ); // text6 assert_eq!( &sequences[4], &IsolatingRunSequence { runs: vec![11..12], sos: R, eos: L, } ); // == Example 2 == // text1·RLI·text2·LRI·text3·PDI·text4·PDI·RLI·text5·PDI·text6 // index 0 1 2 3 4 5 6 7 8 9 10 11 let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L]; let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); // text1·RLI·PDI·RLI·PDI·text6 assert_eq!( &sequences[0], &IsolatingRunSequence { runs: vec![0..2, 7..9, 10..12], sos: L, eos: L, } ); // text2·LRI·PDI·text4 assert_eq!( &sequences[1], &IsolatingRunSequence { runs: vec![2..4, 5..7], sos: R, eos: R, } ); // text3 assert_eq!( &sequences[2], &IsolatingRunSequence { runs: vec![4..5], sos: L, eos: L, } ); // text5 assert_eq!( &sequences[3], &IsolatingRunSequence { runs: vec![9..10], sos: R, eos: R, } ); } #[test] fn test_removed_by_x9() { let rem_classes = &[RLE, LRE, RLO, LRO, PDF, BN]; let not_classes = &[L, RLI, AL, LRI, PDI]; for x in rem_classes { assert_eq!(removed_by_x9(*x), true); } for x in not_classes { assert_eq!(removed_by_x9(*x), false); } } #[test] fn test_not_removed_by_x9() { let non_x9_classes = &[L, R, AL, EN, ES, ET, AN, CS, NSM, B, S, WS, ON, LRI, RLI, FSI, PDI]; for x in non_x9_classes { assert_eq!(not_removed_by_x9(&x), true); } } }