summaryrefslogtreecommitdiffstats
path: root/third_party/rust/unicode-bidi
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/unicode-bidi')
-rw-r--r--third_party/rust/unicode-bidi/.appveyor.yml19
-rw-r--r--third_party/rust/unicode-bidi/.cargo-checksum.json2
-rw-r--r--third_party/rust/unicode-bidi/.github/workflows/main.yml49
-rw-r--r--third_party/rust/unicode-bidi/.rustfmt.toml1
-rw-r--r--third_party/rust/unicode-bidi/Cargo.lock175
-rw-r--r--third_party/rust/unicode-bidi/Cargo.toml10
-rw-r--r--third_party/rust/unicode-bidi/src/char_data/mod.rs5
-rw-r--r--third_party/rust/unicode-bidi/src/char_data/tables.rs4
-rw-r--r--third_party/rust/unicode-bidi/src/deprecated.rs9
-rw-r--r--third_party/rust/unicode-bidi/src/explicit.rs129
-rw-r--r--third_party/rust/unicode-bidi/src/implicit.rs93
-rw-r--r--third_party/rust/unicode-bidi/src/level.rs15
-rw-r--r--third_party/rust/unicode-bidi/src/lib.rs129
-rw-r--r--third_party/rust/unicode-bidi/src/prepare.rs266
-rw-r--r--third_party/rust/unicode-bidi/src/utf16.rs36
15 files changed, 696 insertions, 246 deletions
diff --git a/third_party/rust/unicode-bidi/.appveyor.yml b/third_party/rust/unicode-bidi/.appveyor.yml
new file mode 100644
index 0000000000..1bd43a1145
--- /dev/null
+++ b/third_party/rust/unicode-bidi/.appveyor.yml
@@ -0,0 +1,19 @@
+install:
+ - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
+ - rustup-init -yv --default-toolchain nightly
+ - set PATH=%PATH%;%USERPROFILE%\.cargo\bin
+ - rustc -V
+ - cargo -V
+ - git submodule update --init --recursive
+
+build: false
+
+environment:
+ RUST_BACKTRACE: full
+
+test_script:
+ - cargo build --verbose --all
+ - cargo doc --verbose --all --no-deps
+
+ - cargo test --verbose --all
+ - cargo test --verbose --all --features serde
diff --git a/third_party/rust/unicode-bidi/.cargo-checksum.json b/third_party/rust/unicode-bidi/.cargo-checksum.json
index 3628dc8f6f..fa0b5995c0 100644
--- a/third_party/rust/unicode-bidi/.cargo-checksum.json
+++ b/third_party/rust/unicode-bidi/.cargo-checksum.json
@@ -1 +1 @@
-{"files":{"AUTHORS":"1ff3a7c8519b29544bb28ba9b1e7502df0cb764051fb9a1172e60006aa2b8dcc","COPYRIGHT":"edb20b474f6cbd4f4db066b54a9e0f687d0009d309412a63431189b59b8e2a07","Cargo.toml":"9cd0be282dbaeacf5d1fdf07096114c7b3f16f275755f30a0d2e873ab1fbc150","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"81d3dc6d894a68894d982760b0a907f9dcbb0da179a8063ed9de1d2257518957","src/char_data/mod.rs":"0622df8ce4b4de60aea7e4787635d3187f79f7a3f9001e3d209f58fd07d03887","src/char_data/tables.rs":"50faf4eef73c831a38b735309ff3415e9f65992a0474ff5c055138f91c91ee16","src/data_source.rs":"36fa0785e51c549c1f72f09040cfe515b848d1b23fb30d469770a6b4b17b49df","src/deprecated.rs":"46c5a8bb1e6aa6193eec8269891d4cbbb5fd92214eb55eac7ea5e7ca193386aa","src/explicit.rs":"afa7599674fc8daad2939e5987ec5d937ed9fdbb78b8492b1e137db88d0a3af7","src/format_chars.rs":"678399fec3f4bfaf4093f38cfdb8956288313386dc3511dab9fb58164e8dc01b","src/implicit.rs":"e96484178d1bab97439b2c0cf4b3a8d6ee18459b9de64499aa07607aa304be0c","src/level.rs":"921fb7b8960f6cc703a51936075302db513927b8c5d5d760536b6ff70ddab0dd","src/lib.rs":"ca09c7dedc56ec139fa92fec26c3983346a3b6456992acdfbfe18b324174e0d8","src/prepare.rs":"c4aaad603f5c1be5c4364265aac7590335dc234288a4151e0f30bcefe532e14d","src/utf16.rs":"30d31c4d8c814315b48b56a2dfb755b8b442dde23f76429c6df1af754999fe3b"},"package":"08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"} \ No newline at end of file
+{"files":{".appveyor.yml":"15bdeea0e836ac2ccbb259cde1509a0673a73300e90e970f3e533b189234b6fd",".github/workflows/main.yml":"e0bee93284a8b39c9d419038bfa72a6389ebdae39ce55c40624e764ac1c98a9e",".rustfmt.toml":"168c973274f3f5946e90cac6ae0f017d0832a5c830872d9d3b9b387ad6c1a81e","AUTHORS":"1ff3a7c8519b29544bb28ba9b1e7502df0cb764051fb9a1172e60006aa2b8dcc","COPYRIGHT":"edb20b474f6cbd4f4db066b54a9e0f687d0009d309412a63431189b59b8e2a07","Cargo.lock":"8842f03d0fcea88aa1546244d0455834732603175b293218f8e9a9f44c297b7c","Cargo.toml":"099454ebee9b081080e1521eccbe447db30b17ac36e9e655ed1d0d1e20e657fb","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"81d3dc6d894a68894d982760b0a907f9dcbb0da179a8063ed9de1d2257518957","src/char_data/mod.rs":"8cbdcaacddb3dd9b70d615693fa73d0e7dca6332102a95f0d3ce447df7645284","src/char_data/tables.rs":"8adf126131f573a3b6d2c35849c1cc13c831c9b55c4d3fcb5a3961d8ed7a0d44","src/data_source.rs":"36fa0785e51c549c1f72f09040cfe515b848d1b23fb30d469770a6b4b17b49df","src/deprecated.rs":"f94c0e75dec7e70cb9802e26b7f82fe618dcdd50e9973927bacd4eccc6899c62","src/explicit.rs":"86c3c55bf2cc90aab1411aac6cf05de505ca74e44a76fe829572dd7dc4dd2aa3","src/format_chars.rs":"678399fec3f4bfaf4093f38cfdb8956288313386dc3511dab9fb58164e8dc01b","src/implicit.rs":"8d5b003464aee3f333785c6170a884945251f39601e4ea658e669a2ad575d588","src/level.rs":"ce1eaa9940f1b90bc59aba296488b8cd128aefeb4b6b2e3ecc34da26c569150b","src/lib.rs":"9dff9c105f481a03823de6ad9a0a11733af019649ae211644061d5a525670244","src/prepare.rs":"aeb8b88cfb2d2e6b74473f5903205dd3683d57abcc8801de7b9fdea6a432a0fe","src/utf16.rs":"12ee177127a0b5b0350a1fcc1edf7387c26b51ec5654f724629aab723881c313"},"package":null} \ No newline at end of file
diff --git a/third_party/rust/unicode-bidi/.github/workflows/main.yml b/third_party/rust/unicode-bidi/.github/workflows/main.yml
new file mode 100644
index 0000000000..303bac8b97
--- /dev/null
+++ b/third_party/rust/unicode-bidi/.github/workflows/main.yml
@@ -0,0 +1,49 @@
+name: CI
+
+on:
+ push:
+ branches: ['master', 'auto']
+ pull_request:
+
+jobs:
+ Test:
+ strategy:
+ matrix:
+ os: [ubuntu-latest]
+ rust: [1.47.0, stable, beta, nightly]
+ runs-on: ${{ matrix.os }}
+ steps:
+ - uses: actions/checkout@v4
+ - uses: dtolnay/rust-toolchain@master
+ with:
+ toolchain: ${{ matrix.rust }}
+ - name: Unpin dependencies except on MSRV
+ if: matrix.rust != '1.47.0'
+ run: cargo update
+ - run: cargo build --all-targets
+ - run: cargo test
+ - run: cargo test --features "serde"
+ - run: cargo test --no-default-features
+ - run: cargo test --no-default-features --features=hardcoded-data
+ Fmt:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: dtolnay/rust-toolchain@stable
+ with:
+ components: rustfmt
+ - run: cargo fmt --check
+
+ build_result:
+ name: homu build finished
+ runs-on: ubuntu-latest
+ needs:
+ - "Test"
+ - "Fmt"
+ steps:
+ - name: Mark the job as successful
+ run: exit 0
+ if: success()
+ - name: Mark the job as unsuccessful
+ run: exit 1
+ if: "!success()"
diff --git a/third_party/rust/unicode-bidi/.rustfmt.toml b/third_party/rust/unicode-bidi/.rustfmt.toml
new file mode 100644
index 0000000000..e416686ee5
--- /dev/null
+++ b/third_party/rust/unicode-bidi/.rustfmt.toml
@@ -0,0 +1 @@
+array_width = 80
diff --git a/third_party/rust/unicode-bidi/Cargo.lock b/third_party/rust/unicode-bidi/Cargo.lock
new file mode 100644
index 0000000000..63f01ebdcc
--- /dev/null
+++ b/third_party/rust/unicode-bidi/Cargo.lock
@@ -0,0 +1,175 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+[[package]]
+name = "flame"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde_derive 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde_json 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)",
+ "thread-id 3.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "flamer"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "flame 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "quote 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)",
+ "syn 1.0.109 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "lazy_static"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "libc"
+version = "0.2.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.65"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "unicode-ident 1.0.12 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "proc-macro2 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.1.57"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "ryu"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "serde"
+version = "1.0.156"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "serde_derive 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.156"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "proc-macro2 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)",
+ "quote 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)",
+ "syn 1.0.109 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.99"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "itoa 1.0.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "ryu 1.0.15 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "serde_test"
+version = "1.0.175"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "serde 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "proc-macro2 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)",
+ "quote 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-ident 1.0.12 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "thread-id"
+version = "3.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.149 (registry+https://github.com/rust-lang/crates.io-index)",
+ "redox_syscall 0.1.57 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "unicode-bidi"
+version = "0.3.13"
+dependencies = [
+ "flame 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "flamer 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde_test 1.0.175 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[metadata]
+"checksum flame 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1fc2706461e1ee94f55cab2ed2e3d34ae9536cfa830358ef80acff1a3dacab30"
+"checksum flamer 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "36b732da54fd4ea34452f2431cf464ac7be94ca4b339c9cd3d3d12eb06fe7aab"
+"checksum itoa 1.0.9 (registry+https://github.com/rust-lang/crates.io-index)" = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
+"checksum lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73"
+"checksum libc 0.2.149 (registry+https://github.com/rust-lang/crates.io-index)" = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b"
+"checksum proc-macro2 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)" = "92de25114670a878b1261c79c9f8f729fb97e95bac93f6312f583c60dd6a1dfe"
+"checksum quote 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)" = "5907a1b7c277254a8b15170f6e7c97cfa60ee7872a3217663bb81151e48184bb"
+"checksum redox_syscall 0.1.57 (registry+https://github.com/rust-lang/crates.io-index)" = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
+"checksum ryu 1.0.15 (registry+https://github.com/rust-lang/crates.io-index)" = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
+"checksum serde 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)" = "314b5b092c0ade17c00142951e50ced110ec27cea304b1037c6969246c2469a4"
+"checksum serde_derive 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)" = "d7e29c4601e36bcec74a223228dce795f4cd3616341a4af93520ca1a837c087d"
+"checksum serde_json 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)" = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3"
+"checksum serde_test 1.0.175 (registry+https://github.com/rust-lang/crates.io-index)" = "29baf0f77ca9ad9c6ed46e1b408b5e0f30b5184bcd66884e7f6d36bd7a65a8a4"
+"checksum syn 1.0.109 (registry+https://github.com/rust-lang/crates.io-index)" = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+"checksum thread-id 3.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c7fbf4c9d56b320106cd64fd024dadfa0be7cb4706725fc44a7d7ce952d820c1"
+"checksum unicode-ident 1.0.12 (registry+https://github.com/rust-lang/crates.io-index)" = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
+"checksum winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
diff --git a/third_party/rust/unicode-bidi/Cargo.toml b/third_party/rust/unicode-bidi/Cargo.toml
index 29be4e5a71..584d471c8a 100644
--- a/third_party/rust/unicode-bidi/Cargo.toml
+++ b/third_party/rust/unicode-bidi/Cargo.toml
@@ -11,6 +11,7 @@
[package]
edition = "2018"
+rust-version = "1.47.0"
name = "unicode-bidi"
version = "0.3.15"
authors = ["The Servo Project Developers"]
@@ -61,8 +62,13 @@ features = ["derive"]
optional = true
default-features = false
-[dev-dependencies.serde_test]
-version = ">=0.8, <2.0"
+[dependencies.smallvec]
+version = ">=1.13"
+features = ["union"]
+optional = true
+
+[dev-dependencies]
+serde_test = ">=0.8, <2.0"
[features]
bench_it = []
diff --git a/third_party/rust/unicode-bidi/src/char_data/mod.rs b/third_party/rust/unicode-bidi/src/char_data/mod.rs
index 4edf5b8f4c..543b0ed8fd 100644
--- a/third_party/rust/unicode-bidi/src/char_data/mod.rs
+++ b/third_party/rust/unicode-bidi/src/char_data/mod.rs
@@ -59,10 +59,7 @@ pub(crate) fn bidi_matched_opening_bracket(c: char) -> Option<BidiMatchedOpening
}
pub fn is_rtl(bidi_class: BidiClass) -> bool {
- match bidi_class {
- RLE | RLO | RLI => true,
- _ => false,
- }
+ matches!(bidi_class, RLE | RLO | RLI)
}
#[cfg(feature = "hardcoded-data")]
diff --git a/third_party/rust/unicode-bidi/src/char_data/tables.rs b/third_party/rust/unicode-bidi/src/char_data/tables.rs
index ecdcf496d1..f10265d214 100644
--- a/third_party/rust/unicode-bidi/src/char_data/tables.rs
+++ b/third_party/rust/unicode-bidi/src/char_data/tables.rs
@@ -45,7 +45,7 @@ pub enum BidiClass {
use self::BidiClass::*;
#[cfg(feature = "hardcoded-data")]
-pub const bidi_class_table: &'static [(char, char, BidiClass)] = &[
+pub const bidi_class_table: &[(char, char, BidiClass)] = &[
('\u{0}', '\u{8}', BN), ('\u{9}', '\u{9}', S), ('\u{a}', '\u{a}', B), ('\u{b}', '\u{b}', S),
('\u{c}', '\u{c}', WS), ('\u{d}', '\u{d}', B), ('\u{e}', '\u{1b}', BN), ('\u{1c}', '\u{1e}', B),
('\u{1f}', '\u{1f}', S), ('\u{20}', '\u{20}', WS), ('\u{21}', '\u{22}', ON), ('\u{23}',
@@ -516,7 +516,7 @@ pub const bidi_class_table: &'static [(char, char, BidiClass)] = &[
'\u{e01ef}', NSM), ('\u{f0000}', '\u{ffffd}', L), ('\u{100000}', '\u{10fffd}', L)
];
-pub const bidi_pairs_table: &'static [(char, char, Option<char>)] = &[
+pub const bidi_pairs_table: &[(char, char, Option<char>)] = &[
('\u{28}', '\u{29}', None), ('\u{5b}', '\u{5d}', None), ('\u{7b}', '\u{7d}', None), ('\u{f3a}',
'\u{f3b}', None), ('\u{f3c}', '\u{f3d}', None), ('\u{169b}', '\u{169c}', None), ('\u{2045}',
'\u{2046}', None), ('\u{207d}', '\u{207e}', None), ('\u{208d}', '\u{208e}', None), ('\u{2308}',
diff --git a/third_party/rust/unicode-bidi/src/deprecated.rs b/third_party/rust/unicode-bidi/src/deprecated.rs
index 74a24f5b8b..c903663e99 100644
--- a/third_party/rust/unicode-bidi/src/deprecated.rs
+++ b/third_party/rust/unicode-bidi/src/deprecated.rs
@@ -9,8 +9,6 @@
//! This module holds deprecated assets only.
-use alloc::vec::Vec;
-
use super::*;
/// Find the level runs within a line and return them in visual order.
@@ -71,10 +69,8 @@ pub fn visual_runs(line: Range<usize>, levels: &[Level]) -> Vec<LevelRun> {
// Found the start of a sequence. Now find the end.
let mut seq_end = seq_start + 1;
- while seq_end < run_count {
- if levels[runs[seq_end].start] < max_level {
- break;
- }
+
+ while seq_end < run_count && levels[runs[seq_end].start] >= max_level {
seq_end += 1;
}
@@ -83,6 +79,7 @@ pub fn visual_runs(line: Range<usize>, levels: &[Level]) -> Vec<LevelRun> {
seq_start = seq_end;
}
+
max_level
.lower(1)
.expect("Lowering embedding level below zero");
diff --git a/third_party/rust/unicode-bidi/src/explicit.rs b/third_party/rust/unicode-bidi/src/explicit.rs
index d4ad897b54..5760ab8ece 100644
--- a/third_party/rust/unicode-bidi/src/explicit.rs
+++ b/third_party/rust/unicode-bidi/src/explicit.rs
@@ -11,19 +11,25 @@
//!
//! <http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions>
-use alloc::vec::Vec;
+#[cfg(feature = "smallvec")]
+use smallvec::{smallvec, SmallVec};
use super::char_data::{
is_rtl,
BidiClass::{self, *},
};
use super::level::Level;
+use super::prepare::removed_by_x9;
+use super::LevelRunVec;
use super::TextSource;
-/// Compute explicit embedding levels for one paragraph of text (X1-X8).
+/// Compute explicit embedding levels for one paragraph of text (X1-X8), and identify
+/// level runs (BD7) for use when determining Isolating Run Sequences (X10).
///
/// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`,
/// for each char in `text`.
+///
+/// `runs` returns the list of level runs (BD7) of the text.
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn compute<'a, T: TextSource<'a> + ?Sized>(
text: &'a T,
@@ -31,35 +37,44 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>(
original_classes: &[BidiClass],
levels: &mut [Level],
processing_classes: &mut [BidiClass],
+ runs: &mut LevelRunVec,
) {
assert_eq!(text.len(), original_classes.len());
// <http://www.unicode.org/reports/tr9/#X1>
- let mut stack = DirectionalStatusStack::new();
- stack.push(para_level, OverrideStatus::Neutral);
+ #[cfg(feature = "smallvec")]
+ let mut stack: SmallVec<[Status; 8]> = smallvec![Status {
+ level: para_level,
+ status: OverrideStatus::Neutral,
+ }];
+ #[cfg(not(feature = "smallvec"))]
+ let mut stack = vec![Status {
+ level: para_level,
+ status: OverrideStatus::Neutral,
+ }];
let mut overflow_isolate_count = 0u32;
let mut overflow_embedding_count = 0u32;
let mut valid_isolate_count = 0u32;
+ let mut current_run_level = Level::ltr();
+ let mut current_run_start = 0;
+
for (i, len) in text.indices_lengths() {
+ let last = stack.last().unwrap();
+
match original_classes[i] {
// Rules X2-X5c
RLE | LRE | RLO | LRO | RLI | LRI | FSI => {
- let last_level = stack.last().level;
-
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
- levels[i] = last_level;
+ levels[i] = last.level;
// X5a-X5c: Isolate initiators get the level of the last entry on the stack.
- let is_isolate = match original_classes[i] {
- RLI | LRI | FSI => true,
- _ => false,
- };
+ let is_isolate = matches!(original_classes[i], RLI | LRI | FSI);
if is_isolate {
// Redundant due to "Retaining explicit formatting characters" step.
- // levels[i] = last_level;
- match stack.last().status {
+ // levels[i] = last.level;
+ match last.status {
OverrideStatus::RTL => processing_classes[i] = R,
OverrideStatus::LTR => processing_classes[i] = L,
_ => {}
@@ -67,22 +82,25 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>(
}
let new_level = if is_rtl(original_classes[i]) {
- last_level.new_explicit_next_rtl()
+ last.level.new_explicit_next_rtl()
} else {
- last_level.new_explicit_next_ltr()
+ last.level.new_explicit_next_ltr()
};
+
if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0
{
let new_level = new_level.unwrap();
- stack.push(
- new_level,
- match original_classes[i] {
+
+ stack.push(Status {
+ level: new_level,
+ status: match original_classes[i] {
RLO => OverrideStatus::RTL,
LRO => OverrideStatus::LTR,
RLI | LRI | FSI => OverrideStatus::Isolate,
_ => OverrideStatus::Neutral,
},
- );
+ });
+
if is_isolate {
valid_isolate_count += 1;
} else {
@@ -110,21 +128,21 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>(
overflow_isolate_count -= 1;
} else if valid_isolate_count > 0 {
overflow_embedding_count = 0;
- loop {
- // Pop everything up to and including the last Isolate status.
- match stack.vec.pop() {
- None
- | Some(Status {
- status: OverrideStatus::Isolate,
- ..
- }) => break,
- _ => continue,
- }
- }
+
+ while !matches!(
+ stack.pop(),
+ None | Some(Status {
+ status: OverrideStatus::Isolate,
+ ..
+ })
+ ) {}
+
valid_isolate_count -= 1;
}
- let last = stack.last();
+
+ let last = stack.last().unwrap();
levels[i] = last.level;
+
match last.status {
OverrideStatus::RTL => processing_classes[i] = R,
OverrideStatus::LTR => processing_classes[i] = L,
@@ -138,11 +156,12 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>(
// do nothing
} else if overflow_embedding_count > 0 {
overflow_embedding_count -= 1;
- } else if stack.last().status != OverrideStatus::Isolate && stack.vec.len() >= 2 {
- stack.vec.pop();
+ } else if last.status != OverrideStatus::Isolate && stack.len() >= 2 {
+ stack.pop();
}
+
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
- levels[i] = stack.last().level;
+ levels[i] = stack.last().unwrap().level;
// X9 part of retaining explicit formatting characters.
processing_classes[i] = BN;
}
@@ -153,8 +172,8 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>(
// <http://www.unicode.org/reports/tr9/#X6>
_ => {
- let last = stack.last();
levels[i] = last.level;
+
// This condition is not in the spec, but I am pretty sure that is a spec bug.
// https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf
if original_classes[i] != BN {
@@ -172,6 +191,26 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>(
levels[i + j] = levels[i];
processing_classes[i + j] = processing_classes[i];
}
+
+ // Identify level runs to be passed to prepare::isolating_run_sequences().
+ if i == 0 {
+ // Initialize for the first (or only) run.
+ current_run_level = levels[i];
+ } else {
+ // Check if we need to start a new level run.
+ // <https://www.unicode.org/reports/tr9/#BD7>
+ if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level {
+ // End the last run and start a new one.
+ runs.push(current_run_start..i);
+ current_run_level = levels[i];
+ current_run_start = i;
+ }
+ }
+ }
+
+ // Append the trailing level run, if non-empty.
+ if levels.len() > current_run_start {
+ runs.push(current_run_start..levels.len());
}
}
@@ -188,23 +227,3 @@ enum OverrideStatus {
LTR,
Isolate,
}
-
-struct DirectionalStatusStack {
- vec: Vec<Status>,
-}
-
-impl DirectionalStatusStack {
- fn new() -> Self {
- DirectionalStatusStack {
- vec: Vec::with_capacity(Level::max_explicit_depth() as usize + 2),
- }
- }
-
- fn push(&mut self, level: Level, status: OverrideStatus) {
- self.vec.push(Status { level, status });
- }
-
- fn last(&self) -> &Status {
- self.vec.last().unwrap()
- }
-}
diff --git a/third_party/rust/unicode-bidi/src/implicit.rs b/third_party/rust/unicode-bidi/src/implicit.rs
index 0311053c0a..334afec049 100644
--- a/third_party/rust/unicode-bidi/src/implicit.rs
+++ b/third_party/rust/unicode-bidi/src/implicit.rs
@@ -9,8 +9,11 @@
//! 3.3.4 - 3.3.6. Resolve implicit levels and types.
+#[cfg(not(feature = "smallvec"))]
use alloc::vec::Vec;
use core::cmp::max;
+#[cfg(feature = "smallvec")]
+use smallvec::SmallVec;
use super::char_data::BidiClass::{self, *};
use super::level::Level;
@@ -39,7 +42,13 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>(
// The previous class for the purposes of rule W1, not tracking changes from any other rules.
let mut prev_class_before_w1 = sequence.sos;
let mut last_strong_is_al = false;
+ #[cfg(feature = "smallvec")]
+ let mut et_run_indices = SmallVec::<[usize; 8]>::new(); // for W5
+ #[cfg(not(feature = "smallvec"))]
let mut et_run_indices = Vec::new(); // for W5
+ #[cfg(feature = "smallvec")]
+ let mut bn_run_indices = SmallVec::<[usize; 8]>::new(); // for W5 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
+ #[cfg(not(feature = "smallvec"))]
let mut bn_run_indices = Vec::new(); // for W5 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
for (run_index, level_run) in sequence.runs.iter().enumerate() {
@@ -177,7 +186,7 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>(
_ => {
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
// If there was a BN run before this, that's now a part of this ET run.
- et_run_indices.extend(&bn_run_indices);
+ et_run_indices.extend(bn_run_indices.clone());
// In case this is followed by an EN.
et_run_indices.push(i);
@@ -224,26 +233,29 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>(
// W7. If the previous strong char was L, change EN to L.
let mut last_strong_is_l = sequence.sos == L;
- for run in &sequence.runs {
- for i in run.clone() {
- match processing_classes[i] {
- EN if last_strong_is_l => {
- processing_classes[i] = L;
- }
- L => {
- last_strong_is_l = true;
- }
- R | AL => {
- last_strong_is_l = false;
- }
- // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
- // Already scanning past BN here.
- _ => {}
+ for i in sequence.runs.iter().cloned().flatten() {
+ match processing_classes[i] {
+ EN if last_strong_is_l => {
+ processing_classes[i] = L;
}
+ L => {
+ last_strong_is_l = true;
+ }
+ R | AL => {
+ last_strong_is_l = false;
+ }
+ // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
+ // Already scanning past BN here.
+ _ => {}
}
}
}
+#[cfg(feature = "smallvec")]
+type BracketPairVec = SmallVec<[BracketPair; 8]>;
+#[cfg(not(feature = "smallvec"))]
+type BracketPairVec = Vec<BracketPair>;
+
/// 3.3.5 Resolving Neutral Types
///
/// <http://www.unicode.org/reports/tr9/#Resolving_Neutral_Types>
@@ -267,7 +279,14 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
// > Identify the bracket pairs in the current isolating run sequence according to BD16.
// We use processing_classes, not original_classes, due to BD14/BD15
- let bracket_pairs = identify_bracket_pairs(text, data_source, sequence, processing_classes);
+ let mut bracket_pairs = BracketPairVec::new();
+ identify_bracket_pairs(
+ text,
+ data_source,
+ sequence,
+ processing_classes,
+ &mut bracket_pairs,
+ );
// > For each bracket-pair element in the list of pairs of text positions
//
@@ -308,7 +327,7 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
found_e = true;
} else if class == not_e {
found_not_e = true;
- } else if class == BidiClass::EN || class == BidiClass::AN {
+ } else if matches!(class, BidiClass::EN | BidiClass::AN) {
// > Within this scope, bidirectional types EN and AN are treated as R.
if e == BidiClass::L {
found_not_e = true;
@@ -337,15 +356,15 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
.iter_backwards_from(pair.start, pair.start_run)
.map(|i| processing_classes[i])
.find(|class| {
- *class == BidiClass::L
- || *class == BidiClass::R
- || *class == BidiClass::EN
- || *class == BidiClass::AN
+ matches!(
+ class,
+ BidiClass::L | BidiClass::R | BidiClass::EN | BidiClass::AN
+ )
})
.unwrap_or(sequence.sos);
// > Within this scope, bidirectional types EN and AN are treated as R.
- if previous_strong == BidiClass::EN || previous_strong == BidiClass::AN {
+ if matches!(previous_strong, BidiClass::EN | BidiClass::AN) {
previous_strong = BidiClass::R;
}
@@ -413,6 +432,9 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
let mut prev_class = sequence.sos;
while let Some(mut i) = indices.next() {
// Process sequences of NI characters.
+ #[cfg(feature = "smallvec")]
+ let mut ni_run = SmallVec::<[usize; 8]>::new();
+ #[cfg(not(feature = "smallvec"))]
let mut ni_run = Vec::new();
// The BN is for <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
if is_NI(processing_classes[i]) || processing_classes[i] == BN {
@@ -484,9 +506,12 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>(
data_source: &D,
run_sequence: &IsolatingRunSequence,
original_classes: &[BidiClass],
-) -> Vec<BracketPair> {
- let mut ret = vec![];
- let mut stack = vec![];
+ bracket_pairs: &mut BracketPairVec,
+) {
+ #[cfg(feature = "smallvec")]
+ let mut stack = SmallVec::<[(char, usize, usize); 8]>::new();
+ #[cfg(not(feature = "smallvec"))]
+ let mut stack = Vec::new();
for (run_index, level_run) in run_sequence.runs.iter().enumerate() {
for (i, ch) in text.subrange(level_run.clone()).char_indices() {
@@ -532,7 +557,7 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>(
start_run: element.2,
end_run: run_index,
};
- ret.push(pair);
+ bracket_pairs.push(pair);
// > Pop the stack through the current stack element inclusively.
stack.truncate(stack_index);
@@ -545,8 +570,7 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>(
}
// > Sort the list of pairs of text positions in ascending order based on
// > the text position of the opening paired bracket.
- ret.sort_by_key(|r| r.start);
- ret
+ bracket_pairs.sort_by_key(|r| r.start);
}
/// 3.3.6 Resolving Implicit Levels
@@ -555,11 +579,11 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>(
///
/// <http://www.unicode.org/reports/tr9/#Resolving_Implicit_Levels>
#[cfg_attr(feature = "flame_it", flamer::flame)]
-pub fn resolve_levels(original_classes: &[BidiClass], levels: &mut [Level]) -> Level {
+pub fn resolve_levels(processing_classes: &[BidiClass], levels: &mut [Level]) -> Level {
let mut max_level = Level::ltr();
- assert_eq!(original_classes.len(), levels.len());
+ assert_eq!(processing_classes.len(), levels.len());
for i in 0..levels.len() {
- match (levels[i].is_rtl(), original_classes[i]) {
+ match (levels[i].is_rtl(), processing_classes[i]) {
(false, AN) | (false, EN) => levels[i].raise(2).expect("Level number error"),
(false, R) | (true, L) | (true, EN) | (true, AN) => {
levels[i].raise(1).expect("Level number error")
@@ -578,8 +602,5 @@ pub fn resolve_levels(original_classes: &[BidiClass], levels: &mut [Level]) -> L
/// <http://www.unicode.org/reports/tr9/#NI>
#[allow(non_snake_case)]
fn is_NI(class: BidiClass) -> bool {
- match class {
- B | S | WS | ON | FSI | LRI | RLI | PDI => true,
- _ => false,
- }
+ matches!(class, B | S | WS | ON | FSI | LRI | RLI | PDI)
}
diff --git a/third_party/rust/unicode-bidi/src/level.rs b/third_party/rust/unicode-bidi/src/level.rs
index ef4f6d9e40..5ece0251a5 100644
--- a/third_party/rust/unicode-bidi/src/level.rs
+++ b/third_party/rust/unicode-bidi/src/level.rs
@@ -13,9 +13,10 @@
//!
//! <http://www.unicode.org/reports/tr9/#BD2>
-use alloc::string::{String, ToString};
-use alloc::vec::Vec;
-use core::convert::{From, Into};
+use alloc::{
+ string::{String, ToString},
+ vec::Vec,
+};
use core::slice;
use super::char_data::BidiClass;
@@ -219,11 +220,11 @@ pub fn has_rtl(levels: &[Level]) -> bool {
levels.iter().any(|&lvl| lvl.is_rtl())
}
-impl Into<u8> for Level {
+impl From<Level> for u8 {
/// Convert to the level number
#[inline]
- fn into(self) -> u8 {
- self.number()
+ fn from(val: Level) -> Self {
+ val.number()
}
}
@@ -244,7 +245,7 @@ impl<'a> PartialEq<&'a str> for Level {
}
/// Used for matching levels in conformance tests
-impl<'a> PartialEq<String> for Level {
+impl PartialEq<String> for Level {
#[inline]
fn eq(&self, s: &String) -> bool {
self == &s.as_str()
diff --git a/third_party/rust/unicode-bidi/src/lib.rs b/third_party/rust/unicode-bidi/src/lib.rs
index 1072b67fe0..489927588a 100644
--- a/third_party/rust/unicode-bidi/src/lib.rs
+++ b/third_party/rust/unicode-bidi/src/lib.rs
@@ -71,6 +71,8 @@
extern crate std;
#[macro_use]
extern crate alloc;
+#[cfg(feature = "smallvec")]
+extern crate smallvec;
pub mod data_source;
pub mod deprecated;
@@ -86,7 +88,7 @@ mod prepare;
pub use crate::char_data::{BidiClass, UNICODE_VERSION};
pub use crate::data_source::BidiDataSource;
pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL};
-pub use crate::prepare::LevelRun;
+pub use crate::prepare::{LevelRun, LevelRunVec};
#[cfg(feature = "hardcoded-data")]
pub use crate::char_data::{bidi_class, HardcodedBidiData};
@@ -99,6 +101,8 @@ use core::cmp;
use core::iter::repeat;
use core::ops::Range;
use core::str::CharIndices;
+#[cfg(feature = "smallvec")]
+use smallvec::SmallVec;
use crate::format_chars as chars;
use crate::BidiClass::*;
@@ -244,8 +248,14 @@ struct InitialInfoExt<'text> {
/// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
/// requires no further bidi processing (i.e. there are no RTL characters or bidi
- /// control codes present).
- pure_ltr: Vec<bool>,
+ /// control codes present), and whether any bidi isolation controls are present.
+ flags: Vec<ParagraphInfoFlags>,
+}
+
+#[derive(PartialEq, Debug)]
+struct ParagraphInfoFlags {
+ is_pure_ltr: bool,
+ has_isolate_controls: bool,
}
impl<'text> InitialInfoExt<'text> {
@@ -265,12 +275,12 @@ impl<'text> InitialInfoExt<'text> {
default_para_level: Option<Level>,
) -> InitialInfoExt<'a> {
let mut paragraphs = Vec::<ParagraphInfo>::new();
- let mut pure_ltr = Vec::<bool>::new();
- let (original_classes, _, _) = compute_initial_info(
+ let mut flags = Vec::<ParagraphInfoFlags>::new();
+ let (original_classes, _, _, _) = compute_initial_info(
data_source,
text,
default_para_level,
- Some((&mut paragraphs, &mut pure_ltr)),
+ Some((&mut paragraphs, &mut flags)),
);
InitialInfoExt {
@@ -279,7 +289,7 @@ impl<'text> InitialInfoExt<'text> {
original_classes,
paragraphs,
},
- pure_ltr,
+ flags,
}
}
}
@@ -295,16 +305,19 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
data_source: &D,
text: &'a T,
default_para_level: Option<Level>,
- mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<bool>)>,
-) -> (Vec<BidiClass>, Level, bool) {
+ mut split_paragraphs: Option<(&mut Vec<ParagraphInfo>, &mut Vec<ParagraphInfoFlags>)>,
+) -> (Vec<BidiClass>, Level, bool, bool) {
let mut original_classes = Vec::with_capacity(text.len());
// The stack contains the starting code unit index for each nested isolate we're inside.
+ #[cfg(feature = "smallvec")]
+ let mut isolate_stack = SmallVec::<[usize; 8]>::new();
+ #[cfg(not(feature = "smallvec"))]
let mut isolate_stack = Vec::new();
debug_assert!(
- if let Some((ref paragraphs, ref pure_ltr)) = split_paragraphs {
- paragraphs.is_empty() && pure_ltr.is_empty()
+ if let Some((ref paragraphs, ref flags)) = split_paragraphs {
+ paragraphs.is_empty() && flags.is_empty()
} else {
true
}
@@ -316,6 +329,8 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
// Per-paragraph flag: can subsequent processing be skipped? Set to false if any
// RTL characters or bidi control characters are encountered in the paragraph.
let mut is_pure_ltr = true;
+ // Set to true if any bidi isolation controls are present in the paragraph.
+ let mut has_isolate_controls = false;
#[cfg(feature = "flame_it")]
flame::start("compute_initial_info(): iter text.char_indices()");
@@ -334,7 +349,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
match class {
B => {
- if let Some((ref mut paragraphs, ref mut pure_ltr)) = split_paragraphs {
+ if let Some((ref mut paragraphs, ref mut flags)) = split_paragraphs {
// P1. Split the text into separate paragraphs. The paragraph separator is kept
// with the previous paragraph.
let para_end = i + len;
@@ -343,7 +358,10 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
// P3. If no character is found in p2, set the paragraph level to zero.
level: para_level.unwrap_or(LTR_LEVEL),
});
- pure_ltr.push(is_pure_ltr);
+ flags.push(ParagraphInfoFlags {
+ is_pure_ltr,
+ has_isolate_controls,
+ });
// Reset state for the start of the next paragraph.
para_start = para_end;
// TODO: Support defaulting to direction of previous paragraph
@@ -351,6 +369,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
// <http://www.unicode.org/reports/tr9/#HL1>
para_level = default_para_level;
is_pure_ltr = true;
+ has_isolate_controls = false;
isolate_stack.clear();
}
}
@@ -387,6 +406,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
RLI | LRI | FSI => {
is_pure_ltr = false;
+ has_isolate_controls = true;
isolate_stack.push(i);
}
@@ -398,15 +418,18 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
}
}
- if let Some((paragraphs, pure_ltr)) = split_paragraphs {
+ if let Some((paragraphs, flags)) = split_paragraphs {
if para_start < text.len() {
paragraphs.push(ParagraphInfo {
range: para_start..text.len(),
level: para_level.unwrap_or(LTR_LEVEL),
});
- pure_ltr.push(is_pure_ltr);
+ flags.push(ParagraphInfoFlags {
+ is_pure_ltr,
+ has_isolate_controls,
+ });
}
- debug_assert_eq!(paragraphs.len(), pure_ltr.len());
+ debug_assert_eq!(paragraphs.len(), flags.len());
}
debug_assert_eq!(original_classes.len(), text.len());
@@ -417,6 +440,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
original_classes,
para_level.unwrap_or(LTR_LEVEL),
is_pure_ltr,
+ has_isolate_controls,
)
}
@@ -475,20 +499,21 @@ impl<'text> BidiInfo<'text> {
text: &'a str,
default_para_level: Option<Level>,
) -> BidiInfo<'a> {
- let InitialInfoExt { base, pure_ltr, .. } =
+ let InitialInfoExt { base, flags, .. } =
InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
let mut levels = Vec::<Level>::with_capacity(text.len());
let mut processing_classes = base.original_classes.clone();
- for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) {
+ for (para, flags) in base.paragraphs.iter().zip(flags.iter()) {
let text = &text[para.range.clone()];
let original_classes = &base.original_classes[para.range.clone()];
compute_bidi_info_for_para(
data_source,
para,
- *is_pure_ltr,
+ flags.is_pure_ltr,
+ flags.has_isolate_controls,
text,
original_classes,
&mut processing_classes,
@@ -713,7 +738,7 @@ impl<'text> ParagraphBidiInfo<'text> {
) -> ParagraphBidiInfo<'a> {
// Here we could create a ParagraphInitialInfo struct to parallel the one
// used by BidiInfo, but there doesn't seem any compelling reason for it.
- let (original_classes, paragraph_level, is_pure_ltr) =
+ let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) =
compute_initial_info(data_source, text, default_para_level, None);
let mut levels = Vec::<Level>::with_capacity(text.len());
@@ -731,6 +756,7 @@ impl<'text> ParagraphBidiInfo<'text> {
data_source,
&para_info,
is_pure_ltr,
+ has_isolate_controls,
text,
&original_classes,
&mut processing_classes,
@@ -855,12 +881,12 @@ impl<'text> ParagraphBidiInfo<'text> {
///
/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
-fn reorder_line<'text>(
- text: &'text str,
+fn reorder_line(
+ text: &str,
line: Range<usize>,
levels: Vec<Level>,
runs: Vec<LevelRun>,
-) -> Cow<'text, str> {
+) -> Cow<'_, str> {
// If all isolating run sequences are LTR, no reordering is needed
if runs.iter().all(|run| levels[run.start].is_ltr()) {
return text[line].into();
@@ -1059,6 +1085,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
data_source: &D,
para: &ParagraphInfo,
is_pure_ltr: bool,
+ has_isolate_controls: bool,
text: &'a T,
original_classes: &[BidiClass],
processing_classes: &mut [BidiClass],
@@ -1072,6 +1099,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
let processing_classes = &mut processing_classes[para.range.clone()];
let levels = &mut levels[para.range.clone()];
+ let mut level_runs = LevelRunVec::new();
explicit::compute(
text,
@@ -1079,9 +1107,18 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
original_classes,
levels,
processing_classes,
+ &mut level_runs,
);
- let sequences = prepare::isolating_run_sequences(para.level, original_classes, levels);
+ let mut sequences = prepare::IsolatingRunSequenceVec::new();
+ prepare::isolating_run_sequences(
+ para.level,
+ original_classes,
+ levels,
+ level_runs,
+ has_isolate_controls,
+ &mut sequences,
+ );
for sequence in &sequences {
implicit::resolve_weak(text, sequence, processing_classes);
implicit::resolve_neutral(
@@ -1093,6 +1130,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
processing_classes,
);
}
+
implicit::resolve_levels(processing_classes, levels);
assign_levels_to_removed_chars(para.level, original_classes, levels);
@@ -1122,20 +1160,20 @@ fn reorder_levels<'a, T: TextSource<'a> + ?Sized>(
B | S => {
assert_eq!(reset_to, None);
reset_to = Some(i + T::char_len(c));
- if reset_from == None {
+ if reset_from.is_none() {
reset_from = Some(i);
}
}
// Whitespace, isolate formatting
WS | FSI | LRI | RLI | PDI => {
- if reset_from == None {
+ if reset_from.is_none() {
reset_from = Some(i);
}
}
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
// same as above + set the level
RLE | LRE | RLO | LRO | PDF | BN => {
- if reset_from == None {
+ if reset_from.is_none() {
reset_from = Some(i);
}
// also set the level to previous
@@ -1294,8 +1332,8 @@ fn get_base_direction_impl<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
let mut isolate_level = 0;
for c in text.chars() {
match data_source.bidi_class(c) {
- LRI | RLI | FSI => isolate_level = isolate_level + 1,
- PDI if isolate_level > 0 => isolate_level = isolate_level - 1,
+ LRI | RLI | FSI => isolate_level += 1,
+ PDI if isolate_level > 0 => isolate_level -= 1,
L if isolate_level == 0 => return Direction::Ltr,
R | AL if isolate_level == 0 => return Direction::Rtl,
B if !use_full_text => break,
@@ -1342,7 +1380,7 @@ impl<'text> TextSource<'text> for str {
}
#[inline]
fn indices_lengths(&'text self) -> Self::IndexLenIter {
- Utf8IndexLenIter::new(&self)
+ Utf8IndexLenIter::new(self)
}
#[inline]
fn char_len(ch: char) -> usize {
@@ -1544,6 +1582,24 @@ mod tests {
let tests = vec![
(
// text
+ "",
+ // base level
+ Some(RTL_LEVEL),
+ // levels
+ Level::vec(&[]),
+ // original_classes
+ vec![],
+ // paragraphs
+ vec![],
+ // levels_u16
+ Level::vec(&[]),
+ // original_classes_u16
+ vec![],
+ // paragraphs_u16
+ vec![],
+ ),
+ (
+ // text
"abc123",
// base level
Some(LTR_LEVEL),
@@ -1703,6 +1759,19 @@ mod tests {
paragraphs: t.4.clone(),
}
);
+ // If it was empty, also test that ParagraphBidiInfo handles it safely.
+ if t.4.len() == 0 {
+ assert_eq!(
+ ParagraphBidiInfo::new(t.0, t.1),
+ ParagraphBidiInfo {
+ text: t.0,
+ original_classes: t.3.clone(),
+ levels: t.2.clone(),
+ paragraph_level: RTL_LEVEL,
+ is_pure_ltr: true,
+ }
+ )
+ }
// If it was a single paragraph, also test ParagraphBidiInfo.
if t.4.len() == 1 {
assert_eq!(
diff --git a/third_party/rust/unicode-bidi/src/prepare.rs b/third_party/rust/unicode-bidi/src/prepare.rs
index 9234e1aa61..f7b35ad689 100644
--- a/third_party/rust/unicode-bidi/src/prepare.rs
+++ b/third_party/rust/unicode-bidi/src/prepare.rs
@@ -14,6 +14,8 @@
use alloc::vec::Vec;
use core::cmp::max;
use core::ops::Range;
+#[cfg(feature = "smallvec")]
+use smallvec::{smallvec, SmallVec};
use super::level::Level;
use super::BidiClass::{self, *};
@@ -23,6 +25,11 @@ use super::BidiClass::{self, *};
/// Represented as a range of byte indices.
pub type LevelRun = Range<usize>;
+#[cfg(feature = "smallvec")]
+pub type LevelRunVec = SmallVec<[LevelRun; 8]>;
+#[cfg(not(feature = "smallvec"))]
+pub type LevelRunVec = Vec<LevelRun>;
+
/// Output of `isolating_run_sequences` (steps X9-X10)
#[derive(Debug, PartialEq)]
pub struct IsolatingRunSequence {
@@ -31,6 +38,11 @@ pub struct IsolatingRunSequence {
pub eos: BidiClass, // End-of-sequence type.
}
+#[cfg(feature = "smallvec")]
+pub type IsolatingRunSequenceVec = SmallVec<[IsolatingRunSequence; 8]>;
+#[cfg(not(feature = "smallvec"))]
+pub type IsolatingRunSequenceVec = Vec<IsolatingRunSequence>;
+
/// Compute the set of isolating run sequences.
///
/// An isolating run sequence is a maximal sequence of level runs such that for all level runs
@@ -43,8 +55,59 @@ pub fn isolating_run_sequences(
para_level: Level,
original_classes: &[BidiClass],
levels: &[Level],
-) -> Vec<IsolatingRunSequence> {
- let runs = level_runs(levels, original_classes);
+ runs: LevelRunVec,
+ has_isolate_controls: bool,
+ isolating_run_sequences: &mut IsolatingRunSequenceVec,
+) {
+ // Per http://www.unicode.org/reports/tr9/#BD13:
+ // "In the absence of isolate initiators, each isolating run sequence in a paragraph
+ // consists of exactly one level run, and each level run constitutes a separate
+ // isolating run sequence."
+ // We can take a simplified path to handle this case.
+ if !has_isolate_controls {
+ isolating_run_sequences.reserve_exact(runs.len());
+ for run in runs {
+ // Determine the `sos` and `eos` class for the sequence.
+ // <http://www.unicode.org/reports/tr9/#X10>
+
+ let run_levels = &levels[run.clone()];
+ let run_classes = &original_classes[run.clone()];
+ let seq_level = run_levels[run_classes
+ .iter()
+ .position(|c| not_removed_by_x9(c))
+ .unwrap_or(0)];
+
+ let end_level = run_levels[run_classes
+ .iter()
+ .rposition(|c| not_removed_by_x9(c))
+ .unwrap_or(run.end - run.start - 1)];
+
+ // Get the level of the last non-removed char before the run.
+ let pred_level = match original_classes[..run.start]
+ .iter()
+ .rposition(not_removed_by_x9)
+ {
+ Some(idx) => levels[idx],
+ None => para_level,
+ };
+
+ // Get the level of the next non-removed char after the run.
+ let succ_level = match original_classes[run.end..]
+ .iter()
+ .position(not_removed_by_x9)
+ {
+ Some(idx) => levels[run.end + idx],
+ None => para_level,
+ };
+
+ isolating_run_sequences.push(IsolatingRunSequence {
+ runs: vec![run],
+ sos: max(seq_level, pred_level).bidi_class(),
+ eos: max(end_level, succ_level).bidi_class(),
+ });
+ }
+ return;
+ }
// Compute the set of isolating run sequences.
// <http://www.unicode.org/reports/tr9/#BD13>
@@ -52,10 +115,13 @@ pub fn isolating_run_sequences(
// When we encounter an isolate initiator, we push the current sequence onto the
// stack so we can resume it after the matching PDI.
- let mut stack = vec![Vec::new()];
+ #[cfg(feature = "smallvec")]
+ let mut stack: SmallVec<[Vec<Range<usize>>; 8]> = smallvec![vec![]];
+ #[cfg(not(feature = "smallvec"))]
+ let mut stack = vec![vec![]];
for run in runs {
- assert!(run.len() > 0);
+ assert!(!run.is_empty());
assert!(!stack.is_empty());
let start_class = original_classes[run.start];
@@ -67,8 +133,7 @@ pub fn isolating_run_sequences(
.iter()
.copied()
.rev()
- .filter(not_removed_by_x9)
- .next()
+ .find(not_removed_by_x9)
.unwrap_or(start_class);
let mut sequence = if start_class == PDI && stack.len() > 1 {
@@ -81,7 +146,7 @@ pub fn isolating_run_sequences(
sequence.push(run);
- if let RLI | LRI | FSI = end_class {
+ if matches!(end_class, RLI | LRI | FSI) {
// Resume this sequence after the isolate.
stack.push(sequence);
} else {
@@ -89,90 +154,82 @@ pub fn isolating_run_sequences(
sequences.push(sequence);
}
}
- // Pop any remaning sequences off the stack.
+ // Pop any remaining sequences off the stack.
sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty()));
// Determine the `sos` and `eos` class for each sequence.
// <http://www.unicode.org/reports/tr9/#X10>
- sequences
- .into_iter()
- .map(|sequence: Vec<LevelRun>| {
- assert!(!sequence.is_empty());
+ for sequence in sequences {
+ assert!(!sequence.is_empty());
- let mut result = IsolatingRunSequence {
- runs: sequence,
- sos: L,
- eos: L,
- };
+ let start_of_seq = sequence[0].start;
+ let runs_len = sequence.len();
+ let end_of_seq = sequence[runs_len - 1].end;
- let start_of_seq = result.runs[0].start;
- let runs_len = result.runs.len();
- let end_of_seq = result.runs[runs_len - 1].end;
-
- // > (not counting characters removed by X9)
- let seq_level = result
- .iter_forwards_from(start_of_seq, 0)
- .filter(|i| not_removed_by_x9(&original_classes[*i]))
- .map(|i| levels[i])
- .next()
- .unwrap_or(levels[start_of_seq]);
-
- // XXXManishearth the spec talks of a start and end level,
- // but for a given IRS the two should be equivalent, yes?
- let end_level = result
- .iter_backwards_from(end_of_seq, runs_len - 1)
- .filter(|i| not_removed_by_x9(&original_classes[*i]))
- .map(|i| levels[i])
- .next()
- .unwrap_or(levels[end_of_seq - 1]);
-
- #[cfg(test)]
- for run in result.runs.clone() {
- for idx in run {
- if not_removed_by_x9(&original_classes[idx]) {
- assert_eq!(seq_level, levels[idx]);
- }
- }
+ let mut result = IsolatingRunSequence {
+ runs: sequence,
+ sos: L,
+ eos: L,
+ };
+
+ // > (not counting characters removed by X9)
+ let seq_level = levels[result
+ .iter_forwards_from(start_of_seq, 0)
+ .find(|i| not_removed_by_x9(&original_classes[*i]))
+ .unwrap_or(start_of_seq)];
+
+ // XXXManishearth the spec talks of a start and end level,
+ // but for a given IRS the two should be equivalent, yes?
+ let end_level = levels[result
+ .iter_backwards_from(end_of_seq, runs_len - 1)
+ .find(|i| not_removed_by_x9(&original_classes[*i]))
+ .unwrap_or(end_of_seq - 1)];
+
+ #[cfg(test)]
+ for idx in result.runs.clone().into_iter().flatten() {
+ if not_removed_by_x9(&original_classes[idx]) {
+ assert_eq!(seq_level, levels[idx]);
}
+ }
+
+ // Get the level of the last non-removed char before the runs.
+ let pred_level = match original_classes[..start_of_seq]
+ .iter()
+ .rposition(not_removed_by_x9)
+ {
+ Some(idx) => levels[idx],
+ None => para_level,
+ };
- // Get the level of the last non-removed char before the runs.
- let pred_level = match original_classes[..start_of_seq]
+ // Get the last non-removed character to check if it is an isolate initiator.
+ // The spec calls for an unmatched one, but matched isolate initiators
+ // will never be at the end of a level run (otherwise there would be more to the run).
+ // We unwrap_or(BN) because BN marks removed classes and it won't matter for the check.
+ let last_non_removed = original_classes[..end_of_seq]
+ .iter()
+ .copied()
+ .rev()
+ .find(not_removed_by_x9)
+ .unwrap_or(BN);
+
+ // Get the level of the next non-removed char after the runs.
+ let succ_level = if matches!(last_non_removed, RLI | LRI | FSI) {
+ para_level
+ } else {
+ match original_classes[end_of_seq..]
.iter()
- .rposition(not_removed_by_x9)
+ .position(not_removed_by_x9)
{
- Some(idx) => levels[idx],
+ Some(idx) => levels[end_of_seq + idx],
None => para_level,
- };
+ }
+ };
- // Get the last non-removed character to check if it is an isolate initiator.
- // The spec calls for an unmatched one, but matched isolate initiators
- // will never be at the end of a level run (otherwise there would be more to the run).
- // We unwrap_or(BN) because BN marks removed classes and it won't matter for the check.
- let last_non_removed = original_classes[..end_of_seq]
- .iter()
- .copied()
- .rev()
- .find(not_removed_by_x9)
- .unwrap_or(BN);
-
- // Get the level of the next non-removed char after the runs.
- let succ_level = if let RLI | LRI | FSI = last_non_removed {
- para_level
- } else {
- match original_classes[end_of_seq..]
- .iter()
- .position(not_removed_by_x9)
- {
- Some(idx) => levels[end_of_seq + idx],
- None => para_level,
- }
- };
+ result.sos = max(seq_level, pred_level).bidi_class();
+ result.eos = max(end_level, succ_level).bidi_class();
- result.sos = max(seq_level, pred_level).bidi_class();
- result.eos = max(end_level, succ_level).bidi_class();
- result
- })
- .collect()
+ isolating_run_sequences.push(result);
+ }
}
impl IsolatingRunSequence {
@@ -219,6 +276,9 @@ impl IsolatingRunSequence {
/// Finds the level runs in a paragraph.
///
/// <http://www.unicode.org/reports/tr9/#BD7>
+///
+/// This is only used by tests; normally level runs are identified during explicit::compute.
+#[cfg(test)]
fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun> {
assert_eq!(levels.len(), original_classes.len());
@@ -246,10 +306,7 @@ fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun>
///
/// <http://www.unicode.org/reports/tr9/#X9>
pub fn removed_by_x9(class: BidiClass) -> bool {
- match class {
- RLE | LRE | RLO | LRO | PDF | BN => true,
- _ => false,
- }
+ matches!(class, RLE | LRE | RLO | LRO | PDF | BN)
}
// For use as a predicate for `position` / `rposition`
@@ -281,7 +338,14 @@ mod tests {
let classes = &[L, RLE, L, PDF, RLE, L, PDF, L];
let levels = &[0, 1, 1, 1, 1, 1, 1, 0];
let para_level = Level::ltr();
- let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
+ let mut sequences = IsolatingRunSequenceVec::new();
+ isolating_run_sequences(
+ para_level,
+ classes,
+ &Level::vec(levels),
+ level_runs(&Level::vec(levels), classes).into(),
+ false,
+ &mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
assert_eq!(
sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
@@ -294,7 +358,14 @@ mod tests {
let classes = &[L, RLI, L, PDI, RLI, L, PDI, L];
let levels = &[0, 0, 1, 0, 0, 1, 0, 0];
let para_level = Level::ltr();
- let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
+ let mut sequences = IsolatingRunSequenceVec::new();
+ isolating_run_sequences(
+ para_level,
+ classes,
+ &Level::vec(levels),
+ level_runs(&Level::vec(levels), classes).into(),
+ true,
+ &mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
assert_eq!(
sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
@@ -307,7 +378,14 @@ mod tests {
let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L];
let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0];
let para_level = Level::ltr();
- let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
+ let mut sequences = IsolatingRunSequenceVec::new();
+ isolating_run_sequences(
+ para_level,
+ classes,
+ &Level::vec(levels),
+ level_runs(&Level::vec(levels), classes).into(),
+ true,
+ &mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
assert_eq!(
sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
@@ -326,7 +404,14 @@ mod tests {
let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L];
let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0];
let para_level = Level::ltr();
- let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
+ let mut sequences = IsolatingRunSequenceVec::new();
+ isolating_run_sequences(
+ para_level,
+ classes,
+ &Level::vec(levels),
+ level_runs(&Level::vec(levels), classes).into(),
+ false,
+ &mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
// text1
@@ -385,7 +470,14 @@ mod tests {
let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L];
let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0];
let para_level = Level::ltr();
- let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels));
+ let mut sequences = IsolatingRunSequenceVec::new();
+ isolating_run_sequences(
+ para_level,
+ classes,
+ &Level::vec(levels),
+ level_runs(&Level::vec(levels), classes).into(),
+ true,
+ &mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
// text1·RLI·PDI·RLI·PDI·text6
diff --git a/third_party/rust/unicode-bidi/src/utf16.rs b/third_party/rust/unicode-bidi/src/utf16.rs
index dcd9baf2be..11b386f91e 100644
--- a/third_party/rust/unicode-bidi/src/utf16.rs
+++ b/third_party/rust/unicode-bidi/src/utf16.rs
@@ -18,7 +18,9 @@ use crate::{
compute_bidi_info_for_para, compute_initial_info, level, para_direction, reorder_levels,
reorder_visual, visual_runs_for_line,
};
-use crate::{BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo};
+use crate::{
+ BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo, ParagraphInfoFlags,
+};
#[cfg(feature = "hardcoded-data")]
use crate::HardcodedBidiData;
@@ -83,7 +85,7 @@ struct InitialInfoExt<'text> {
/// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
/// requires no further bidi processing (i.e. there are no RTL characters or bidi
/// control codes present).
- pure_ltr: Vec<bool>,
+ flags: Vec<ParagraphInfoFlags>,
}
impl<'text> InitialInfoExt<'text> {
@@ -103,12 +105,12 @@ impl<'text> InitialInfoExt<'text> {
default_para_level: Option<Level>,
) -> InitialInfoExt<'a> {
let mut paragraphs = Vec::<ParagraphInfo>::new();
- let mut pure_ltr = Vec::<bool>::new();
- let (original_classes, _, _) = compute_initial_info(
+ let mut flags = Vec::<ParagraphInfoFlags>::new();
+ let (original_classes, _, _, _) = compute_initial_info(
data_source,
text,
default_para_level,
- Some((&mut paragraphs, &mut pure_ltr)),
+ Some((&mut paragraphs, &mut flags)),
);
InitialInfoExt {
@@ -117,7 +119,7 @@ impl<'text> InitialInfoExt<'text> {
original_classes,
paragraphs,
},
- pure_ltr,
+ flags,
}
}
}
@@ -177,20 +179,21 @@ impl<'text> BidiInfo<'text> {
text: &'a [u16],
default_para_level: Option<Level>,
) -> BidiInfo<'a> {
- let InitialInfoExt { base, pure_ltr, .. } =
+ let InitialInfoExt { base, flags, .. } =
InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
let mut levels = Vec::<Level>::with_capacity(text.len());
let mut processing_classes = base.original_classes.clone();
- for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) {
+ for (para, flags) in base.paragraphs.iter().zip(flags.iter()) {
let text = &text[para.range.clone()];
let original_classes = &base.original_classes[para.range.clone()];
compute_bidi_info_for_para(
data_source,
para,
- *is_pure_ltr,
+ flags.is_pure_ltr,
+ flags.has_isolate_controls,
text,
original_classes,
&mut processing_classes,
@@ -411,7 +414,7 @@ impl<'text> ParagraphBidiInfo<'text> {
) -> ParagraphBidiInfo<'a> {
// Here we could create a ParagraphInitialInfo struct to parallel the one
// used by BidiInfo, but there doesn't seem any compelling reason for it.
- let (original_classes, paragraph_level, is_pure_ltr) =
+ let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) =
compute_initial_info(data_source, text, default_para_level, None);
let mut levels = Vec::<Level>::with_capacity(text.len());
@@ -429,6 +432,7 @@ impl<'text> ParagraphBidiInfo<'text> {
data_source,
&para_info,
is_pure_ltr,
+ has_isolate_controls,
text,
&original_classes,
&mut processing_classes,
@@ -551,12 +555,12 @@ impl<'text> ParagraphBidiInfo<'text> {
///
/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
-fn reorder_line<'text>(
- text: &'text [u16],
+fn reorder_line(
+ text: &[u16],
line: Range<usize>,
levels: Vec<Level>,
runs: Vec<LevelRun>,
-) -> Cow<'text, [u16]> {
+) -> Cow<'_, [u16]> {
// If all isolating run sequences are LTR, no reordering is needed
if runs.iter().all(|run| levels[run.start].is_ltr()) {
return text[line].into();
@@ -668,15 +672,15 @@ impl<'text> TextSource<'text> for [u16] {
}
#[inline]
fn chars(&'text self) -> Self::CharIter {
- Utf16CharIter::new(&self)
+ Utf16CharIter::new(self)
}
#[inline]
fn char_indices(&'text self) -> Self::CharIndexIter {
- Utf16CharIndexIter::new(&self)
+ Utf16CharIndexIter::new(self)
}
#[inline]
fn indices_lengths(&'text self) -> Self::IndexLenIter {
- Utf16IndexLenIter::new(&self)
+ Utf16IndexLenIter::new(self)
}
#[inline]
fn char_len(ch: char) -> usize {