From fbaf0bb26397aa498eb9156f06d5a6fe34dd7dd8 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 03:14:29 +0200 Subject: Merging upstream version 125.0.1. Signed-off-by: Daniel Baumann --- third_party/rust/unicode-bidi/.appveyor.yml | 19 ++ third_party/rust/unicode-bidi/.cargo-checksum.json | 2 +- .../rust/unicode-bidi/.github/workflows/main.yml | 49 ++++ third_party/rust/unicode-bidi/.rustfmt.toml | 1 + third_party/rust/unicode-bidi/Cargo.lock | 175 ++++++++++++++ third_party/rust/unicode-bidi/Cargo.toml | 10 +- third_party/rust/unicode-bidi/src/char_data/mod.rs | 5 +- .../rust/unicode-bidi/src/char_data/tables.rs | 4 +- third_party/rust/unicode-bidi/src/deprecated.rs | 9 +- third_party/rust/unicode-bidi/src/explicit.rs | 129 +++++----- third_party/rust/unicode-bidi/src/implicit.rs | 93 ++++--- third_party/rust/unicode-bidi/src/level.rs | 15 +- third_party/rust/unicode-bidi/src/lib.rs | 129 +++++++--- third_party/rust/unicode-bidi/src/prepare.rs | 266 ++++++++++++++------- third_party/rust/unicode-bidi/src/utf16.rs | 36 +-- 15 files changed, 696 insertions(+), 246 deletions(-) create mode 100644 third_party/rust/unicode-bidi/.appveyor.yml create mode 100644 third_party/rust/unicode-bidi/.github/workflows/main.yml create mode 100644 third_party/rust/unicode-bidi/.rustfmt.toml create mode 100644 third_party/rust/unicode-bidi/Cargo.lock (limited to 'third_party/rust/unicode-bidi') diff --git a/third_party/rust/unicode-bidi/.appveyor.yml b/third_party/rust/unicode-bidi/.appveyor.yml new file mode 100644 index 0000000000..1bd43a1145 --- /dev/null +++ b/third_party/rust/unicode-bidi/.appveyor.yml @@ -0,0 +1,19 @@ +install: + - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe + - rustup-init -yv --default-toolchain nightly + - set PATH=%PATH%;%USERPROFILE%\.cargo\bin + - rustc -V + - cargo -V + - git submodule update --init --recursive + +build: false + +environment: + RUST_BACKTRACE: full + +test_script: + - cargo build --verbose --all + - cargo doc --verbose --all --no-deps + + - cargo test --verbose --all + - cargo test --verbose --all --features serde diff --git a/third_party/rust/unicode-bidi/.cargo-checksum.json b/third_party/rust/unicode-bidi/.cargo-checksum.json index 3628dc8f6f..fa0b5995c0 100644 --- a/third_party/rust/unicode-bidi/.cargo-checksum.json +++ b/third_party/rust/unicode-bidi/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"AUTHORS":"1ff3a7c8519b29544bb28ba9b1e7502df0cb764051fb9a1172e60006aa2b8dcc","COPYRIGHT":"edb20b474f6cbd4f4db066b54a9e0f687d0009d309412a63431189b59b8e2a07","Cargo.toml":"9cd0be282dbaeacf5d1fdf07096114c7b3f16f275755f30a0d2e873ab1fbc150","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"81d3dc6d894a68894d982760b0a907f9dcbb0da179a8063ed9de1d2257518957","src/char_data/mod.rs":"0622df8ce4b4de60aea7e4787635d3187f79f7a3f9001e3d209f58fd07d03887","src/char_data/tables.rs":"50faf4eef73c831a38b735309ff3415e9f65992a0474ff5c055138f91c91ee16","src/data_source.rs":"36fa0785e51c549c1f72f09040cfe515b848d1b23fb30d469770a6b4b17b49df","src/deprecated.rs":"46c5a8bb1e6aa6193eec8269891d4cbbb5fd92214eb55eac7ea5e7ca193386aa","src/explicit.rs":"afa7599674fc8daad2939e5987ec5d937ed9fdbb78b8492b1e137db88d0a3af7","src/format_chars.rs":"678399fec3f4bfaf4093f38cfdb8956288313386dc3511dab9fb58164e8dc01b","src/implicit.rs":"e96484178d1bab97439b2c0cf4b3a8d6ee18459b9de64499aa07607aa304be0c","src/level.rs":"921fb7b8960f6cc703a51936075302db513927b8c5d5d760536b6ff70ddab0dd","src/lib.rs":"ca09c7dedc56ec139fa92fec26c3983346a3b6456992acdfbfe18b324174e0d8","src/prepare.rs":"c4aaad603f5c1be5c4364265aac7590335dc234288a4151e0f30bcefe532e14d","src/utf16.rs":"30d31c4d8c814315b48b56a2dfb755b8b442dde23f76429c6df1af754999fe3b"},"package":"08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"} \ No newline at end of file +{"files":{".appveyor.yml":"15bdeea0e836ac2ccbb259cde1509a0673a73300e90e970f3e533b189234b6fd",".github/workflows/main.yml":"e0bee93284a8b39c9d419038bfa72a6389ebdae39ce55c40624e764ac1c98a9e",".rustfmt.toml":"168c973274f3f5946e90cac6ae0f017d0832a5c830872d9d3b9b387ad6c1a81e","AUTHORS":"1ff3a7c8519b29544bb28ba9b1e7502df0cb764051fb9a1172e60006aa2b8dcc","COPYRIGHT":"edb20b474f6cbd4f4db066b54a9e0f687d0009d309412a63431189b59b8e2a07","Cargo.lock":"8842f03d0fcea88aa1546244d0455834732603175b293218f8e9a9f44c297b7c","Cargo.toml":"099454ebee9b081080e1521eccbe447db30b17ac36e9e655ed1d0d1e20e657fb","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"81d3dc6d894a68894d982760b0a907f9dcbb0da179a8063ed9de1d2257518957","src/char_data/mod.rs":"8cbdcaacddb3dd9b70d615693fa73d0e7dca6332102a95f0d3ce447df7645284","src/char_data/tables.rs":"8adf126131f573a3b6d2c35849c1cc13c831c9b55c4d3fcb5a3961d8ed7a0d44","src/data_source.rs":"36fa0785e51c549c1f72f09040cfe515b848d1b23fb30d469770a6b4b17b49df","src/deprecated.rs":"f94c0e75dec7e70cb9802e26b7f82fe618dcdd50e9973927bacd4eccc6899c62","src/explicit.rs":"86c3c55bf2cc90aab1411aac6cf05de505ca74e44a76fe829572dd7dc4dd2aa3","src/format_chars.rs":"678399fec3f4bfaf4093f38cfdb8956288313386dc3511dab9fb58164e8dc01b","src/implicit.rs":"8d5b003464aee3f333785c6170a884945251f39601e4ea658e669a2ad575d588","src/level.rs":"ce1eaa9940f1b90bc59aba296488b8cd128aefeb4b6b2e3ecc34da26c569150b","src/lib.rs":"9dff9c105f481a03823de6ad9a0a11733af019649ae211644061d5a525670244","src/prepare.rs":"aeb8b88cfb2d2e6b74473f5903205dd3683d57abcc8801de7b9fdea6a432a0fe","src/utf16.rs":"12ee177127a0b5b0350a1fcc1edf7387c26b51ec5654f724629aab723881c313"},"package":null} \ No newline at end of file diff --git a/third_party/rust/unicode-bidi/.github/workflows/main.yml b/third_party/rust/unicode-bidi/.github/workflows/main.yml new file mode 100644 index 0000000000..303bac8b97 --- /dev/null +++ b/third_party/rust/unicode-bidi/.github/workflows/main.yml @@ -0,0 +1,49 @@ +name: CI + +on: + push: + branches: ['master', 'auto'] + pull_request: + +jobs: + Test: + strategy: + matrix: + os: [ubuntu-latest] + rust: [1.47.0, stable, beta, nightly] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.rust }} + - name: Unpin dependencies except on MSRV + if: matrix.rust != '1.47.0' + run: cargo update + - run: cargo build --all-targets + - run: cargo test + - run: cargo test --features "serde" + - run: cargo test --no-default-features + - run: cargo test --no-default-features --features=hardcoded-data + Fmt: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + - run: cargo fmt --check + + build_result: + name: homu build finished + runs-on: ubuntu-latest + needs: + - "Test" + - "Fmt" + steps: + - name: Mark the job as successful + run: exit 0 + if: success() + - name: Mark the job as unsuccessful + run: exit 1 + if: "!success()" diff --git a/third_party/rust/unicode-bidi/.rustfmt.toml b/third_party/rust/unicode-bidi/.rustfmt.toml new file mode 100644 index 0000000000..e416686ee5 --- /dev/null +++ b/third_party/rust/unicode-bidi/.rustfmt.toml @@ -0,0 +1 @@ +array_width = 80 diff --git a/third_party/rust/unicode-bidi/Cargo.lock b/third_party/rust/unicode-bidi/Cargo.lock new file mode 100644 index 0000000000..63f01ebdcc --- /dev/null +++ b/third_party/rust/unicode-bidi/Cargo.lock @@ -0,0 +1,175 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "flame" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_derive 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)", + "thread-id 3.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "flamer" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "flame 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.109 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "lazy_static" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "libc" +version = "0.2.149" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "proc-macro2" +version = "1.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-ident 1.0.12 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "quote" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "redox_syscall" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "serde" +version = "1.0.156" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde_derive 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "serde_derive" +version = "1.0.156" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.109 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "serde_json" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "itoa 1.0.9 (registry+https://github.com/rust-lang/crates.io-index)", + "ryu 1.0.15 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "serde_test" +version = "1.0.175" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-ident 1.0.12 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread-id" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.149 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.57 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unicode-bidi" +version = "0.3.13" +dependencies = [ + "flame 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "flamer 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_test 1.0.175 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum flame 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1fc2706461e1ee94f55cab2ed2e3d34ae9536cfa830358ef80acff1a3dacab30" +"checksum flamer 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "36b732da54fd4ea34452f2431cf464ac7be94ca4b339c9cd3d3d12eb06fe7aab" +"checksum itoa 1.0.9 (registry+https://github.com/rust-lang/crates.io-index)" = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +"checksum lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73" +"checksum libc 0.2.149 (registry+https://github.com/rust-lang/crates.io-index)" = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +"checksum proc-macro2 1.0.65 (registry+https://github.com/rust-lang/crates.io-index)" = "92de25114670a878b1261c79c9f8f729fb97e95bac93f6312f583c60dd6a1dfe" +"checksum quote 1.0.30 (registry+https://github.com/rust-lang/crates.io-index)" = "5907a1b7c277254a8b15170f6e7c97cfa60ee7872a3217663bb81151e48184bb" +"checksum redox_syscall 0.1.57 (registry+https://github.com/rust-lang/crates.io-index)" = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" +"checksum ryu 1.0.15 (registry+https://github.com/rust-lang/crates.io-index)" = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +"checksum serde 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)" = "314b5b092c0ade17c00142951e50ced110ec27cea304b1037c6969246c2469a4" +"checksum serde_derive 1.0.156 (registry+https://github.com/rust-lang/crates.io-index)" = "d7e29c4601e36bcec74a223228dce795f4cd3616341a4af93520ca1a837c087d" +"checksum serde_json 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)" = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3" +"checksum serde_test 1.0.175 (registry+https://github.com/rust-lang/crates.io-index)" = "29baf0f77ca9ad9c6ed46e1b408b5e0f30b5184bcd66884e7f6d36bd7a65a8a4" +"checksum syn 1.0.109 (registry+https://github.com/rust-lang/crates.io-index)" = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +"checksum thread-id 3.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c7fbf4c9d56b320106cd64fd024dadfa0be7cb4706725fc44a7d7ce952d820c1" +"checksum unicode-ident 1.0.12 (registry+https://github.com/rust-lang/crates.io-index)" = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +"checksum winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/third_party/rust/unicode-bidi/Cargo.toml b/third_party/rust/unicode-bidi/Cargo.toml index 29be4e5a71..584d471c8a 100644 --- a/third_party/rust/unicode-bidi/Cargo.toml +++ b/third_party/rust/unicode-bidi/Cargo.toml @@ -11,6 +11,7 @@ [package] edition = "2018" +rust-version = "1.47.0" name = "unicode-bidi" version = "0.3.15" authors = ["The Servo Project Developers"] @@ -61,8 +62,13 @@ features = ["derive"] optional = true default-features = false -[dev-dependencies.serde_test] -version = ">=0.8, <2.0" +[dependencies.smallvec] +version = ">=1.13" +features = ["union"] +optional = true + +[dev-dependencies] +serde_test = ">=0.8, <2.0" [features] bench_it = [] diff --git a/third_party/rust/unicode-bidi/src/char_data/mod.rs b/third_party/rust/unicode-bidi/src/char_data/mod.rs index 4edf5b8f4c..543b0ed8fd 100644 --- a/third_party/rust/unicode-bidi/src/char_data/mod.rs +++ b/third_party/rust/unicode-bidi/src/char_data/mod.rs @@ -59,10 +59,7 @@ pub(crate) fn bidi_matched_opening_bracket(c: char) -> Option bool { - match bidi_class { - RLE | RLO | RLI => true, - _ => false, - } + matches!(bidi_class, RLE | RLO | RLI) } #[cfg(feature = "hardcoded-data")] diff --git a/third_party/rust/unicode-bidi/src/char_data/tables.rs b/third_party/rust/unicode-bidi/src/char_data/tables.rs index ecdcf496d1..f10265d214 100644 --- a/third_party/rust/unicode-bidi/src/char_data/tables.rs +++ b/third_party/rust/unicode-bidi/src/char_data/tables.rs @@ -45,7 +45,7 @@ pub enum BidiClass { use self::BidiClass::*; #[cfg(feature = "hardcoded-data")] -pub const bidi_class_table: &'static [(char, char, BidiClass)] = &[ +pub const bidi_class_table: &[(char, char, BidiClass)] = &[ ('\u{0}', '\u{8}', BN), ('\u{9}', '\u{9}', S), ('\u{a}', '\u{a}', B), ('\u{b}', '\u{b}', S), ('\u{c}', '\u{c}', WS), ('\u{d}', '\u{d}', B), ('\u{e}', '\u{1b}', BN), ('\u{1c}', '\u{1e}', B), ('\u{1f}', '\u{1f}', S), ('\u{20}', '\u{20}', WS), ('\u{21}', '\u{22}', ON), ('\u{23}', @@ -516,7 +516,7 @@ pub const bidi_class_table: &'static [(char, char, BidiClass)] = &[ '\u{e01ef}', NSM), ('\u{f0000}', '\u{ffffd}', L), ('\u{100000}', '\u{10fffd}', L) ]; -pub const bidi_pairs_table: &'static [(char, char, Option)] = &[ +pub const bidi_pairs_table: &[(char, char, Option)] = &[ ('\u{28}', '\u{29}', None), ('\u{5b}', '\u{5d}', None), ('\u{7b}', '\u{7d}', None), ('\u{f3a}', '\u{f3b}', None), ('\u{f3c}', '\u{f3d}', None), ('\u{169b}', '\u{169c}', None), ('\u{2045}', '\u{2046}', None), ('\u{207d}', '\u{207e}', None), ('\u{208d}', '\u{208e}', None), ('\u{2308}', diff --git a/third_party/rust/unicode-bidi/src/deprecated.rs b/third_party/rust/unicode-bidi/src/deprecated.rs index 74a24f5b8b..c903663e99 100644 --- a/third_party/rust/unicode-bidi/src/deprecated.rs +++ b/third_party/rust/unicode-bidi/src/deprecated.rs @@ -9,8 +9,6 @@ //! This module holds deprecated assets only. -use alloc::vec::Vec; - use super::*; /// Find the level runs within a line and return them in visual order. @@ -71,10 +69,8 @@ pub fn visual_runs(line: Range, levels: &[Level]) -> Vec { // Found the start of a sequence. Now find the end. let mut seq_end = seq_start + 1; - while seq_end < run_count { - if levels[runs[seq_end].start] < max_level { - break; - } + + while seq_end < run_count && levels[runs[seq_end].start] >= max_level { seq_end += 1; } @@ -83,6 +79,7 @@ pub fn visual_runs(line: Range, levels: &[Level]) -> Vec { seq_start = seq_end; } + max_level .lower(1) .expect("Lowering embedding level below zero"); diff --git a/third_party/rust/unicode-bidi/src/explicit.rs b/third_party/rust/unicode-bidi/src/explicit.rs index d4ad897b54..5760ab8ece 100644 --- a/third_party/rust/unicode-bidi/src/explicit.rs +++ b/third_party/rust/unicode-bidi/src/explicit.rs @@ -11,19 +11,25 @@ //! //! -use alloc::vec::Vec; +#[cfg(feature = "smallvec")] +use smallvec::{smallvec, SmallVec}; use super::char_data::{ is_rtl, BidiClass::{self, *}, }; use super::level::Level; +use super::prepare::removed_by_x9; +use super::LevelRunVec; use super::TextSource; -/// Compute explicit embedding levels for one paragraph of text (X1-X8). +/// Compute explicit embedding levels for one paragraph of text (X1-X8), and identify +/// level runs (BD7) for use when determining Isolating Run Sequences (X10). /// /// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`, /// for each char in `text`. +/// +/// `runs` returns the list of level runs (BD7) of the text. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn compute<'a, T: TextSource<'a> + ?Sized>( text: &'a T, @@ -31,35 +37,44 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( original_classes: &[BidiClass], levels: &mut [Level], processing_classes: &mut [BidiClass], + runs: &mut LevelRunVec, ) { assert_eq!(text.len(), original_classes.len()); // - let mut stack = DirectionalStatusStack::new(); - stack.push(para_level, OverrideStatus::Neutral); + #[cfg(feature = "smallvec")] + let mut stack: SmallVec<[Status; 8]> = smallvec![Status { + level: para_level, + status: OverrideStatus::Neutral, + }]; + #[cfg(not(feature = "smallvec"))] + let mut stack = vec![Status { + level: para_level, + status: OverrideStatus::Neutral, + }]; let mut overflow_isolate_count = 0u32; let mut overflow_embedding_count = 0u32; let mut valid_isolate_count = 0u32; + let mut current_run_level = Level::ltr(); + let mut current_run_start = 0; + for (i, len) in text.indices_lengths() { + let last = stack.last().unwrap(); + match original_classes[i] { // Rules X2-X5c RLE | LRE | RLO | LRO | RLI | LRI | FSI => { - let last_level = stack.last().level; - // - levels[i] = last_level; + levels[i] = last.level; // X5a-X5c: Isolate initiators get the level of the last entry on the stack. - let is_isolate = match original_classes[i] { - RLI | LRI | FSI => true, - _ => false, - }; + let is_isolate = matches!(original_classes[i], RLI | LRI | FSI); if is_isolate { // Redundant due to "Retaining explicit formatting characters" step. - // levels[i] = last_level; - match stack.last().status { + // levels[i] = last.level; + match last.status { OverrideStatus::RTL => processing_classes[i] = R, OverrideStatus::LTR => processing_classes[i] = L, _ => {} @@ -67,22 +82,25 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( } let new_level = if is_rtl(original_classes[i]) { - last_level.new_explicit_next_rtl() + last.level.new_explicit_next_rtl() } else { - last_level.new_explicit_next_ltr() + last.level.new_explicit_next_ltr() }; + if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0 { let new_level = new_level.unwrap(); - stack.push( - new_level, - match original_classes[i] { + + stack.push(Status { + level: new_level, + status: match original_classes[i] { RLO => OverrideStatus::RTL, LRO => OverrideStatus::LTR, RLI | LRI | FSI => OverrideStatus::Isolate, _ => OverrideStatus::Neutral, }, - ); + }); + if is_isolate { valid_isolate_count += 1; } else { @@ -110,21 +128,21 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( overflow_isolate_count -= 1; } else if valid_isolate_count > 0 { overflow_embedding_count = 0; - loop { - // Pop everything up to and including the last Isolate status. - match stack.vec.pop() { - None - | Some(Status { - status: OverrideStatus::Isolate, - .. - }) => break, - _ => continue, - } - } + + while !matches!( + stack.pop(), + None | Some(Status { + status: OverrideStatus::Isolate, + .. + }) + ) {} + valid_isolate_count -= 1; } - let last = stack.last(); + + let last = stack.last().unwrap(); levels[i] = last.level; + match last.status { OverrideStatus::RTL => processing_classes[i] = R, OverrideStatus::LTR => processing_classes[i] = L, @@ -138,11 +156,12 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( // do nothing } else if overflow_embedding_count > 0 { overflow_embedding_count -= 1; - } else if stack.last().status != OverrideStatus::Isolate && stack.vec.len() >= 2 { - stack.vec.pop(); + } else if last.status != OverrideStatus::Isolate && stack.len() >= 2 { + stack.pop(); } + // - levels[i] = stack.last().level; + levels[i] = stack.last().unwrap().level; // X9 part of retaining explicit formatting characters. processing_classes[i] = BN; } @@ -153,8 +172,8 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( // _ => { - let last = stack.last(); levels[i] = last.level; + // This condition is not in the spec, but I am pretty sure that is a spec bug. // https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf if original_classes[i] != BN { @@ -172,6 +191,26 @@ pub fn compute<'a, T: TextSource<'a> + ?Sized>( levels[i + j] = levels[i]; processing_classes[i + j] = processing_classes[i]; } + + // Identify level runs to be passed to prepare::isolating_run_sequences(). + if i == 0 { + // Initialize for the first (or only) run. + current_run_level = levels[i]; + } else { + // Check if we need to start a new level run. + // + if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level { + // End the last run and start a new one. + runs.push(current_run_start..i); + current_run_level = levels[i]; + current_run_start = i; + } + } + } + + // Append the trailing level run, if non-empty. + if levels.len() > current_run_start { + runs.push(current_run_start..levels.len()); } } @@ -188,23 +227,3 @@ enum OverrideStatus { LTR, Isolate, } - -struct DirectionalStatusStack { - vec: Vec, -} - -impl DirectionalStatusStack { - fn new() -> Self { - DirectionalStatusStack { - vec: Vec::with_capacity(Level::max_explicit_depth() as usize + 2), - } - } - - fn push(&mut self, level: Level, status: OverrideStatus) { - self.vec.push(Status { level, status }); - } - - fn last(&self) -> &Status { - self.vec.last().unwrap() - } -} diff --git a/third_party/rust/unicode-bidi/src/implicit.rs b/third_party/rust/unicode-bidi/src/implicit.rs index 0311053c0a..334afec049 100644 --- a/third_party/rust/unicode-bidi/src/implicit.rs +++ b/third_party/rust/unicode-bidi/src/implicit.rs @@ -9,8 +9,11 @@ //! 3.3.4 - 3.3.6. Resolve implicit levels and types. +#[cfg(not(feature = "smallvec"))] use alloc::vec::Vec; use core::cmp::max; +#[cfg(feature = "smallvec")] +use smallvec::SmallVec; use super::char_data::BidiClass::{self, *}; use super::level::Level; @@ -39,7 +42,13 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>( // The previous class for the purposes of rule W1, not tracking changes from any other rules. let mut prev_class_before_w1 = sequence.sos; let mut last_strong_is_al = false; + #[cfg(feature = "smallvec")] + let mut et_run_indices = SmallVec::<[usize; 8]>::new(); // for W5 + #[cfg(not(feature = "smallvec"))] let mut et_run_indices = Vec::new(); // for W5 + #[cfg(feature = "smallvec")] + let mut bn_run_indices = SmallVec::<[usize; 8]>::new(); // for W5 + + #[cfg(not(feature = "smallvec"))] let mut bn_run_indices = Vec::new(); // for W5 + for (run_index, level_run) in sequence.runs.iter().enumerate() { @@ -177,7 +186,7 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>( _ => { // // If there was a BN run before this, that's now a part of this ET run. - et_run_indices.extend(&bn_run_indices); + et_run_indices.extend(bn_run_indices.clone()); // In case this is followed by an EN. et_run_indices.push(i); @@ -224,26 +233,29 @@ pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>( // W7. If the previous strong char was L, change EN to L. let mut last_strong_is_l = sequence.sos == L; - for run in &sequence.runs { - for i in run.clone() { - match processing_classes[i] { - EN if last_strong_is_l => { - processing_classes[i] = L; - } - L => { - last_strong_is_l = true; - } - R | AL => { - last_strong_is_l = false; - } - // - // Already scanning past BN here. - _ => {} + for i in sequence.runs.iter().cloned().flatten() { + match processing_classes[i] { + EN if last_strong_is_l => { + processing_classes[i] = L; } + L => { + last_strong_is_l = true; + } + R | AL => { + last_strong_is_l = false; + } + // + // Already scanning past BN here. + _ => {} } } } +#[cfg(feature = "smallvec")] +type BracketPairVec = SmallVec<[BracketPair; 8]>; +#[cfg(not(feature = "smallvec"))] +type BracketPairVec = Vec; + /// 3.3.5 Resolving Neutral Types /// /// @@ -267,7 +279,14 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( // > Identify the bracket pairs in the current isolating run sequence according to BD16. // We use processing_classes, not original_classes, due to BD14/BD15 - let bracket_pairs = identify_bracket_pairs(text, data_source, sequence, processing_classes); + let mut bracket_pairs = BracketPairVec::new(); + identify_bracket_pairs( + text, + data_source, + sequence, + processing_classes, + &mut bracket_pairs, + ); // > For each bracket-pair element in the list of pairs of text positions // @@ -308,7 +327,7 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( found_e = true; } else if class == not_e { found_not_e = true; - } else if class == BidiClass::EN || class == BidiClass::AN { + } else if matches!(class, BidiClass::EN | BidiClass::AN) { // > Within this scope, bidirectional types EN and AN are treated as R. if e == BidiClass::L { found_not_e = true; @@ -337,15 +356,15 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( .iter_backwards_from(pair.start, pair.start_run) .map(|i| processing_classes[i]) .find(|class| { - *class == BidiClass::L - || *class == BidiClass::R - || *class == BidiClass::EN - || *class == BidiClass::AN + matches!( + class, + BidiClass::L | BidiClass::R | BidiClass::EN | BidiClass::AN + ) }) .unwrap_or(sequence.sos); // > Within this scope, bidirectional types EN and AN are treated as R. - if previous_strong == BidiClass::EN || previous_strong == BidiClass::AN { + if matches!(previous_strong, BidiClass::EN | BidiClass::AN) { previous_strong = BidiClass::R; } @@ -413,6 +432,9 @@ pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( let mut prev_class = sequence.sos; while let Some(mut i) = indices.next() { // Process sequences of NI characters. + #[cfg(feature = "smallvec")] + let mut ni_run = SmallVec::<[usize; 8]>::new(); + #[cfg(not(feature = "smallvec"))] let mut ni_run = Vec::new(); // The BN is for if is_NI(processing_classes[i]) || processing_classes[i] == BN { @@ -484,9 +506,12 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>( data_source: &D, run_sequence: &IsolatingRunSequence, original_classes: &[BidiClass], -) -> Vec { - let mut ret = vec![]; - let mut stack = vec![]; + bracket_pairs: &mut BracketPairVec, +) { + #[cfg(feature = "smallvec")] + let mut stack = SmallVec::<[(char, usize, usize); 8]>::new(); + #[cfg(not(feature = "smallvec"))] + let mut stack = Vec::new(); for (run_index, level_run) in run_sequence.runs.iter().enumerate() { for (i, ch) in text.subrange(level_run.clone()).char_indices() { @@ -532,7 +557,7 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>( start_run: element.2, end_run: run_index, }; - ret.push(pair); + bracket_pairs.push(pair); // > Pop the stack through the current stack element inclusively. stack.truncate(stack_index); @@ -545,8 +570,7 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>( } // > Sort the list of pairs of text positions in ascending order based on // > the text position of the opening paired bracket. - ret.sort_by_key(|r| r.start); - ret + bracket_pairs.sort_by_key(|r| r.start); } /// 3.3.6 Resolving Implicit Levels @@ -555,11 +579,11 @@ fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>( /// /// #[cfg_attr(feature = "flame_it", flamer::flame)] -pub fn resolve_levels(original_classes: &[BidiClass], levels: &mut [Level]) -> Level { +pub fn resolve_levels(processing_classes: &[BidiClass], levels: &mut [Level]) -> Level { let mut max_level = Level::ltr(); - assert_eq!(original_classes.len(), levels.len()); + assert_eq!(processing_classes.len(), levels.len()); for i in 0..levels.len() { - match (levels[i].is_rtl(), original_classes[i]) { + match (levels[i].is_rtl(), processing_classes[i]) { (false, AN) | (false, EN) => levels[i].raise(2).expect("Level number error"), (false, R) | (true, L) | (true, EN) | (true, AN) => { levels[i].raise(1).expect("Level number error") @@ -578,8 +602,5 @@ pub fn resolve_levels(original_classes: &[BidiClass], levels: &mut [Level]) -> L /// #[allow(non_snake_case)] fn is_NI(class: BidiClass) -> bool { - match class { - B | S | WS | ON | FSI | LRI | RLI | PDI => true, - _ => false, - } + matches!(class, B | S | WS | ON | FSI | LRI | RLI | PDI) } diff --git a/third_party/rust/unicode-bidi/src/level.rs b/third_party/rust/unicode-bidi/src/level.rs index ef4f6d9e40..5ece0251a5 100644 --- a/third_party/rust/unicode-bidi/src/level.rs +++ b/third_party/rust/unicode-bidi/src/level.rs @@ -13,9 +13,10 @@ //! //! -use alloc::string::{String, ToString}; -use alloc::vec::Vec; -use core::convert::{From, Into}; +use alloc::{ + string::{String, ToString}, + vec::Vec, +}; use core::slice; use super::char_data::BidiClass; @@ -219,11 +220,11 @@ pub fn has_rtl(levels: &[Level]) -> bool { levels.iter().any(|&lvl| lvl.is_rtl()) } -impl Into for Level { +impl From for u8 { /// Convert to the level number #[inline] - fn into(self) -> u8 { - self.number() + fn from(val: Level) -> Self { + val.number() } } @@ -244,7 +245,7 @@ impl<'a> PartialEq<&'a str> for Level { } /// Used for matching levels in conformance tests -impl<'a> PartialEq for Level { +impl PartialEq for Level { #[inline] fn eq(&self, s: &String) -> bool { self == &s.as_str() diff --git a/third_party/rust/unicode-bidi/src/lib.rs b/third_party/rust/unicode-bidi/src/lib.rs index 1072b67fe0..489927588a 100644 --- a/third_party/rust/unicode-bidi/src/lib.rs +++ b/third_party/rust/unicode-bidi/src/lib.rs @@ -71,6 +71,8 @@ extern crate std; #[macro_use] extern crate alloc; +#[cfg(feature = "smallvec")] +extern crate smallvec; pub mod data_source; pub mod deprecated; @@ -86,7 +88,7 @@ mod prepare; pub use crate::char_data::{BidiClass, UNICODE_VERSION}; pub use crate::data_source::BidiDataSource; pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL}; -pub use crate::prepare::LevelRun; +pub use crate::prepare::{LevelRun, LevelRunVec}; #[cfg(feature = "hardcoded-data")] pub use crate::char_data::{bidi_class, HardcodedBidiData}; @@ -99,6 +101,8 @@ use core::cmp; use core::iter::repeat; use core::ops::Range; use core::str::CharIndices; +#[cfg(feature = "smallvec")] +use smallvec::SmallVec; use crate::format_chars as chars; use crate::BidiClass::*; @@ -244,8 +248,14 @@ struct InitialInfoExt<'text> { /// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that /// requires no further bidi processing (i.e. there are no RTL characters or bidi - /// control codes present). - pure_ltr: Vec, + /// control codes present), and whether any bidi isolation controls are present. + flags: Vec, +} + +#[derive(PartialEq, Debug)] +struct ParagraphInfoFlags { + is_pure_ltr: bool, + has_isolate_controls: bool, } impl<'text> InitialInfoExt<'text> { @@ -265,12 +275,12 @@ impl<'text> InitialInfoExt<'text> { default_para_level: Option, ) -> InitialInfoExt<'a> { let mut paragraphs = Vec::::new(); - let mut pure_ltr = Vec::::new(); - let (original_classes, _, _) = compute_initial_info( + let mut flags = Vec::::new(); + let (original_classes, _, _, _) = compute_initial_info( data_source, text, default_para_level, - Some((&mut paragraphs, &mut pure_ltr)), + Some((&mut paragraphs, &mut flags)), ); InitialInfoExt { @@ -279,7 +289,7 @@ impl<'text> InitialInfoExt<'text> { original_classes, paragraphs, }, - pure_ltr, + flags, } } } @@ -295,16 +305,19 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( data_source: &D, text: &'a T, default_para_level: Option, - mut split_paragraphs: Option<(&mut Vec, &mut Vec)>, -) -> (Vec, Level, bool) { + mut split_paragraphs: Option<(&mut Vec, &mut Vec)>, +) -> (Vec, Level, bool, bool) { let mut original_classes = Vec::with_capacity(text.len()); // The stack contains the starting code unit index for each nested isolate we're inside. + #[cfg(feature = "smallvec")] + let mut isolate_stack = SmallVec::<[usize; 8]>::new(); + #[cfg(not(feature = "smallvec"))] let mut isolate_stack = Vec::new(); debug_assert!( - if let Some((ref paragraphs, ref pure_ltr)) = split_paragraphs { - paragraphs.is_empty() && pure_ltr.is_empty() + if let Some((ref paragraphs, ref flags)) = split_paragraphs { + paragraphs.is_empty() && flags.is_empty() } else { true } @@ -316,6 +329,8 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( // Per-paragraph flag: can subsequent processing be skipped? Set to false if any // RTL characters or bidi control characters are encountered in the paragraph. let mut is_pure_ltr = true; + // Set to true if any bidi isolation controls are present in the paragraph. + let mut has_isolate_controls = false; #[cfg(feature = "flame_it")] flame::start("compute_initial_info(): iter text.char_indices()"); @@ -334,7 +349,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( match class { B => { - if let Some((ref mut paragraphs, ref mut pure_ltr)) = split_paragraphs { + if let Some((ref mut paragraphs, ref mut flags)) = split_paragraphs { // P1. Split the text into separate paragraphs. The paragraph separator is kept // with the previous paragraph. let para_end = i + len; @@ -343,7 +358,10 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( // P3. If no character is found in p2, set the paragraph level to zero. level: para_level.unwrap_or(LTR_LEVEL), }); - pure_ltr.push(is_pure_ltr); + flags.push(ParagraphInfoFlags { + is_pure_ltr, + has_isolate_controls, + }); // Reset state for the start of the next paragraph. para_start = para_end; // TODO: Support defaulting to direction of previous paragraph @@ -351,6 +369,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( // para_level = default_para_level; is_pure_ltr = true; + has_isolate_controls = false; isolate_stack.clear(); } } @@ -387,6 +406,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( RLI | LRI | FSI => { is_pure_ltr = false; + has_isolate_controls = true; isolate_stack.push(i); } @@ -398,15 +418,18 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( } } - if let Some((paragraphs, pure_ltr)) = split_paragraphs { + if let Some((paragraphs, flags)) = split_paragraphs { if para_start < text.len() { paragraphs.push(ParagraphInfo { range: para_start..text.len(), level: para_level.unwrap_or(LTR_LEVEL), }); - pure_ltr.push(is_pure_ltr); + flags.push(ParagraphInfoFlags { + is_pure_ltr, + has_isolate_controls, + }); } - debug_assert_eq!(paragraphs.len(), pure_ltr.len()); + debug_assert_eq!(paragraphs.len(), flags.len()); } debug_assert_eq!(original_classes.len(), text.len()); @@ -417,6 +440,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( original_classes, para_level.unwrap_or(LTR_LEVEL), is_pure_ltr, + has_isolate_controls, ) } @@ -475,20 +499,21 @@ impl<'text> BidiInfo<'text> { text: &'a str, default_para_level: Option, ) -> BidiInfo<'a> { - let InitialInfoExt { base, pure_ltr, .. } = + let InitialInfoExt { base, flags, .. } = InitialInfoExt::new_with_data_source(data_source, text, default_para_level); let mut levels = Vec::::with_capacity(text.len()); let mut processing_classes = base.original_classes.clone(); - for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) { + for (para, flags) in base.paragraphs.iter().zip(flags.iter()) { let text = &text[para.range.clone()]; let original_classes = &base.original_classes[para.range.clone()]; compute_bidi_info_for_para( data_source, para, - *is_pure_ltr, + flags.is_pure_ltr, + flags.has_isolate_controls, text, original_classes, &mut processing_classes, @@ -713,7 +738,7 @@ impl<'text> ParagraphBidiInfo<'text> { ) -> ParagraphBidiInfo<'a> { // Here we could create a ParagraphInitialInfo struct to parallel the one // used by BidiInfo, but there doesn't seem any compelling reason for it. - let (original_classes, paragraph_level, is_pure_ltr) = + let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) = compute_initial_info(data_source, text, default_para_level, None); let mut levels = Vec::::with_capacity(text.len()); @@ -731,6 +756,7 @@ impl<'text> ParagraphBidiInfo<'text> { data_source, ¶_info, is_pure_ltr, + has_isolate_controls, text, &original_classes, &mut processing_classes, @@ -855,12 +881,12 @@ impl<'text> ParagraphBidiInfo<'text> { /// /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3 /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4 -fn reorder_line<'text>( - text: &'text str, +fn reorder_line( + text: &str, line: Range, levels: Vec, runs: Vec, -) -> Cow<'text, str> { +) -> Cow<'_, str> { // If all isolating run sequences are LTR, no reordering is needed if runs.iter().all(|run| levels[run.start].is_ltr()) { return text[line].into(); @@ -1059,6 +1085,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> data_source: &D, para: &ParagraphInfo, is_pure_ltr: bool, + has_isolate_controls: bool, text: &'a T, original_classes: &[BidiClass], processing_classes: &mut [BidiClass], @@ -1072,6 +1099,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> let processing_classes = &mut processing_classes[para.range.clone()]; let levels = &mut levels[para.range.clone()]; + let mut level_runs = LevelRunVec::new(); explicit::compute( text, @@ -1079,9 +1107,18 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> original_classes, levels, processing_classes, + &mut level_runs, ); - let sequences = prepare::isolating_run_sequences(para.level, original_classes, levels); + let mut sequences = prepare::IsolatingRunSequenceVec::new(); + prepare::isolating_run_sequences( + para.level, + original_classes, + levels, + level_runs, + has_isolate_controls, + &mut sequences, + ); for sequence in &sequences { implicit::resolve_weak(text, sequence, processing_classes); implicit::resolve_neutral( @@ -1093,6 +1130,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized> processing_classes, ); } + implicit::resolve_levels(processing_classes, levels); assign_levels_to_removed_chars(para.level, original_classes, levels); @@ -1122,20 +1160,20 @@ fn reorder_levels<'a, T: TextSource<'a> + ?Sized>( B | S => { assert_eq!(reset_to, None); reset_to = Some(i + T::char_len(c)); - if reset_from == None { + if reset_from.is_none() { reset_from = Some(i); } } // Whitespace, isolate formatting WS | FSI | LRI | RLI | PDI => { - if reset_from == None { + if reset_from.is_none() { reset_from = Some(i); } } // // same as above + set the level RLE | LRE | RLO | LRO | PDF | BN => { - if reset_from == None { + if reset_from.is_none() { reset_from = Some(i); } // also set the level to previous @@ -1294,8 +1332,8 @@ fn get_base_direction_impl<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>( let mut isolate_level = 0; for c in text.chars() { match data_source.bidi_class(c) { - LRI | RLI | FSI => isolate_level = isolate_level + 1, - PDI if isolate_level > 0 => isolate_level = isolate_level - 1, + LRI | RLI | FSI => isolate_level += 1, + PDI if isolate_level > 0 => isolate_level -= 1, L if isolate_level == 0 => return Direction::Ltr, R | AL if isolate_level == 0 => return Direction::Rtl, B if !use_full_text => break, @@ -1342,7 +1380,7 @@ impl<'text> TextSource<'text> for str { } #[inline] fn indices_lengths(&'text self) -> Self::IndexLenIter { - Utf8IndexLenIter::new(&self) + Utf8IndexLenIter::new(self) } #[inline] fn char_len(ch: char) -> usize { @@ -1542,6 +1580,24 @@ mod tests { #[cfg(feature = "hardcoded-data")] fn test_process_text() { let tests = vec![ + ( + // text + "", + // base level + Some(RTL_LEVEL), + // levels + Level::vec(&[]), + // original_classes + vec![], + // paragraphs + vec![], + // levels_u16 + Level::vec(&[]), + // original_classes_u16 + vec![], + // paragraphs_u16 + vec![], + ), ( // text "abc123", @@ -1703,6 +1759,19 @@ mod tests { paragraphs: t.4.clone(), } ); + // If it was empty, also test that ParagraphBidiInfo handles it safely. + if t.4.len() == 0 { + assert_eq!( + ParagraphBidiInfo::new(t.0, t.1), + ParagraphBidiInfo { + text: t.0, + original_classes: t.3.clone(), + levels: t.2.clone(), + paragraph_level: RTL_LEVEL, + is_pure_ltr: true, + } + ) + } // If it was a single paragraph, also test ParagraphBidiInfo. if t.4.len() == 1 { assert_eq!( diff --git a/third_party/rust/unicode-bidi/src/prepare.rs b/third_party/rust/unicode-bidi/src/prepare.rs index 9234e1aa61..f7b35ad689 100644 --- a/third_party/rust/unicode-bidi/src/prepare.rs +++ b/third_party/rust/unicode-bidi/src/prepare.rs @@ -14,6 +14,8 @@ use alloc::vec::Vec; use core::cmp::max; use core::ops::Range; +#[cfg(feature = "smallvec")] +use smallvec::{smallvec, SmallVec}; use super::level::Level; use super::BidiClass::{self, *}; @@ -23,6 +25,11 @@ use super::BidiClass::{self, *}; /// Represented as a range of byte indices. pub type LevelRun = Range; +#[cfg(feature = "smallvec")] +pub type LevelRunVec = SmallVec<[LevelRun; 8]>; +#[cfg(not(feature = "smallvec"))] +pub type LevelRunVec = Vec; + /// Output of `isolating_run_sequences` (steps X9-X10) #[derive(Debug, PartialEq)] pub struct IsolatingRunSequence { @@ -31,6 +38,11 @@ pub struct IsolatingRunSequence { pub eos: BidiClass, // End-of-sequence type. } +#[cfg(feature = "smallvec")] +pub type IsolatingRunSequenceVec = SmallVec<[IsolatingRunSequence; 8]>; +#[cfg(not(feature = "smallvec"))] +pub type IsolatingRunSequenceVec = Vec; + /// Compute the set of isolating run sequences. /// /// An isolating run sequence is a maximal sequence of level runs such that for all level runs @@ -43,8 +55,59 @@ pub fn isolating_run_sequences( para_level: Level, original_classes: &[BidiClass], levels: &[Level], -) -> Vec { - let runs = level_runs(levels, original_classes); + runs: LevelRunVec, + has_isolate_controls: bool, + isolating_run_sequences: &mut IsolatingRunSequenceVec, +) { + // Per http://www.unicode.org/reports/tr9/#BD13: + // "In the absence of isolate initiators, each isolating run sequence in a paragraph + // consists of exactly one level run, and each level run constitutes a separate + // isolating run sequence." + // We can take a simplified path to handle this case. + if !has_isolate_controls { + isolating_run_sequences.reserve_exact(runs.len()); + for run in runs { + // Determine the `sos` and `eos` class for the sequence. + // + + let run_levels = &levels[run.clone()]; + let run_classes = &original_classes[run.clone()]; + let seq_level = run_levels[run_classes + .iter() + .position(|c| not_removed_by_x9(c)) + .unwrap_or(0)]; + + let end_level = run_levels[run_classes + .iter() + .rposition(|c| not_removed_by_x9(c)) + .unwrap_or(run.end - run.start - 1)]; + + // Get the level of the last non-removed char before the run. + let pred_level = match original_classes[..run.start] + .iter() + .rposition(not_removed_by_x9) + { + Some(idx) => levels[idx], + None => para_level, + }; + + // Get the level of the next non-removed char after the run. + let succ_level = match original_classes[run.end..] + .iter() + .position(not_removed_by_x9) + { + Some(idx) => levels[run.end + idx], + None => para_level, + }; + + isolating_run_sequences.push(IsolatingRunSequence { + runs: vec![run], + sos: max(seq_level, pred_level).bidi_class(), + eos: max(end_level, succ_level).bidi_class(), + }); + } + return; + } // Compute the set of isolating run sequences. // @@ -52,10 +115,13 @@ pub fn isolating_run_sequences( // When we encounter an isolate initiator, we push the current sequence onto the // stack so we can resume it after the matching PDI. - let mut stack = vec![Vec::new()]; + #[cfg(feature = "smallvec")] + let mut stack: SmallVec<[Vec>; 8]> = smallvec![vec![]]; + #[cfg(not(feature = "smallvec"))] + let mut stack = vec![vec![]]; for run in runs { - assert!(run.len() > 0); + assert!(!run.is_empty()); assert!(!stack.is_empty()); let start_class = original_classes[run.start]; @@ -67,8 +133,7 @@ pub fn isolating_run_sequences( .iter() .copied() .rev() - .filter(not_removed_by_x9) - .next() + .find(not_removed_by_x9) .unwrap_or(start_class); let mut sequence = if start_class == PDI && stack.len() > 1 { @@ -81,7 +146,7 @@ pub fn isolating_run_sequences( sequence.push(run); - if let RLI | LRI | FSI = end_class { + if matches!(end_class, RLI | LRI | FSI) { // Resume this sequence after the isolate. stack.push(sequence); } else { @@ -89,90 +154,82 @@ pub fn isolating_run_sequences( sequences.push(sequence); } } - // Pop any remaning sequences off the stack. + // Pop any remaining sequences off the stack. sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty())); // Determine the `sos` and `eos` class for each sequence. // - sequences - .into_iter() - .map(|sequence: Vec| { - assert!(!sequence.is_empty()); + for sequence in sequences { + assert!(!sequence.is_empty()); - let mut result = IsolatingRunSequence { - runs: sequence, - sos: L, - eos: L, - }; + let start_of_seq = sequence[0].start; + let runs_len = sequence.len(); + let end_of_seq = sequence[runs_len - 1].end; - let start_of_seq = result.runs[0].start; - let runs_len = result.runs.len(); - let end_of_seq = result.runs[runs_len - 1].end; - - // > (not counting characters removed by X9) - let seq_level = result - .iter_forwards_from(start_of_seq, 0) - .filter(|i| not_removed_by_x9(&original_classes[*i])) - .map(|i| levels[i]) - .next() - .unwrap_or(levels[start_of_seq]); - - // XXXManishearth the spec talks of a start and end level, - // but for a given IRS the two should be equivalent, yes? - let end_level = result - .iter_backwards_from(end_of_seq, runs_len - 1) - .filter(|i| not_removed_by_x9(&original_classes[*i])) - .map(|i| levels[i]) - .next() - .unwrap_or(levels[end_of_seq - 1]); - - #[cfg(test)] - for run in result.runs.clone() { - for idx in run { - if not_removed_by_x9(&original_classes[idx]) { - assert_eq!(seq_level, levels[idx]); - } - } + let mut result = IsolatingRunSequence { + runs: sequence, + sos: L, + eos: L, + }; + + // > (not counting characters removed by X9) + let seq_level = levels[result + .iter_forwards_from(start_of_seq, 0) + .find(|i| not_removed_by_x9(&original_classes[*i])) + .unwrap_or(start_of_seq)]; + + // XXXManishearth the spec talks of a start and end level, + // but for a given IRS the two should be equivalent, yes? + let end_level = levels[result + .iter_backwards_from(end_of_seq, runs_len - 1) + .find(|i| not_removed_by_x9(&original_classes[*i])) + .unwrap_or(end_of_seq - 1)]; + + #[cfg(test)] + for idx in result.runs.clone().into_iter().flatten() { + if not_removed_by_x9(&original_classes[idx]) { + assert_eq!(seq_level, levels[idx]); } + } + + // Get the level of the last non-removed char before the runs. + let pred_level = match original_classes[..start_of_seq] + .iter() + .rposition(not_removed_by_x9) + { + Some(idx) => levels[idx], + None => para_level, + }; - // Get the level of the last non-removed char before the runs. - let pred_level = match original_classes[..start_of_seq] + // Get the last non-removed character to check if it is an isolate initiator. + // The spec calls for an unmatched one, but matched isolate initiators + // will never be at the end of a level run (otherwise there would be more to the run). + // We unwrap_or(BN) because BN marks removed classes and it won't matter for the check. + let last_non_removed = original_classes[..end_of_seq] + .iter() + .copied() + .rev() + .find(not_removed_by_x9) + .unwrap_or(BN); + + // Get the level of the next non-removed char after the runs. + let succ_level = if matches!(last_non_removed, RLI | LRI | FSI) { + para_level + } else { + match original_classes[end_of_seq..] .iter() - .rposition(not_removed_by_x9) + .position(not_removed_by_x9) { - Some(idx) => levels[idx], + Some(idx) => levels[end_of_seq + idx], None => para_level, - }; + } + }; - // Get the last non-removed character to check if it is an isolate initiator. - // The spec calls for an unmatched one, but matched isolate initiators - // will never be at the end of a level run (otherwise there would be more to the run). - // We unwrap_or(BN) because BN marks removed classes and it won't matter for the check. - let last_non_removed = original_classes[..end_of_seq] - .iter() - .copied() - .rev() - .find(not_removed_by_x9) - .unwrap_or(BN); - - // Get the level of the next non-removed char after the runs. - let succ_level = if let RLI | LRI | FSI = last_non_removed { - para_level - } else { - match original_classes[end_of_seq..] - .iter() - .position(not_removed_by_x9) - { - Some(idx) => levels[end_of_seq + idx], - None => para_level, - } - }; + result.sos = max(seq_level, pred_level).bidi_class(); + result.eos = max(end_level, succ_level).bidi_class(); - result.sos = max(seq_level, pred_level).bidi_class(); - result.eos = max(end_level, succ_level).bidi_class(); - result - }) - .collect() + isolating_run_sequences.push(result); + } } impl IsolatingRunSequence { @@ -219,6 +276,9 @@ impl IsolatingRunSequence { /// Finds the level runs in a paragraph. /// /// +/// +/// This is only used by tests; normally level runs are identified during explicit::compute. +#[cfg(test)] fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec { assert_eq!(levels.len(), original_classes.len()); @@ -246,10 +306,7 @@ fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec /// /// pub fn removed_by_x9(class: BidiClass) -> bool { - match class { - RLE | LRE | RLO | LRO | PDF | BN => true, - _ => false, - } + matches!(class, RLE | LRE | RLO | LRO | PDF | BN) } // For use as a predicate for `position` / `rposition` @@ -281,7 +338,14 @@ mod tests { let classes = &[L, RLE, L, PDF, RLE, L, PDF, L]; let levels = &[0, 1, 1, 1, 1, 1, 1, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + false, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), @@ -294,7 +358,14 @@ mod tests { let classes = &[L, RLI, L, PDI, RLI, L, PDI, L]; let levels = &[0, 0, 1, 0, 0, 1, 0, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + true, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), @@ -307,7 +378,14 @@ mod tests { let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L]; let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + true, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), @@ -326,7 +404,14 @@ mod tests { let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L]; let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + false, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); // text1 @@ -385,7 +470,14 @@ mod tests { let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L]; let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0]; let para_level = Level::ltr(); - let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); + let mut sequences = IsolatingRunSequenceVec::new(); + isolating_run_sequences( + para_level, + classes, + &Level::vec(levels), + level_runs(&Level::vec(levels), classes).into(), + true, + &mut sequences); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); // text1·RLI·PDI·RLI·PDI·text6 diff --git a/third_party/rust/unicode-bidi/src/utf16.rs b/third_party/rust/unicode-bidi/src/utf16.rs index dcd9baf2be..11b386f91e 100644 --- a/third_party/rust/unicode-bidi/src/utf16.rs +++ b/third_party/rust/unicode-bidi/src/utf16.rs @@ -18,7 +18,9 @@ use crate::{ compute_bidi_info_for_para, compute_initial_info, level, para_direction, reorder_levels, reorder_visual, visual_runs_for_line, }; -use crate::{BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo}; +use crate::{ + BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo, ParagraphInfoFlags, +}; #[cfg(feature = "hardcoded-data")] use crate::HardcodedBidiData; @@ -83,7 +85,7 @@ struct InitialInfoExt<'text> { /// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that /// requires no further bidi processing (i.e. there are no RTL characters or bidi /// control codes present). - pure_ltr: Vec, + flags: Vec, } impl<'text> InitialInfoExt<'text> { @@ -103,12 +105,12 @@ impl<'text> InitialInfoExt<'text> { default_para_level: Option, ) -> InitialInfoExt<'a> { let mut paragraphs = Vec::::new(); - let mut pure_ltr = Vec::::new(); - let (original_classes, _, _) = compute_initial_info( + let mut flags = Vec::::new(); + let (original_classes, _, _, _) = compute_initial_info( data_source, text, default_para_level, - Some((&mut paragraphs, &mut pure_ltr)), + Some((&mut paragraphs, &mut flags)), ); InitialInfoExt { @@ -117,7 +119,7 @@ impl<'text> InitialInfoExt<'text> { original_classes, paragraphs, }, - pure_ltr, + flags, } } } @@ -177,20 +179,21 @@ impl<'text> BidiInfo<'text> { text: &'a [u16], default_para_level: Option, ) -> BidiInfo<'a> { - let InitialInfoExt { base, pure_ltr, .. } = + let InitialInfoExt { base, flags, .. } = InitialInfoExt::new_with_data_source(data_source, text, default_para_level); let mut levels = Vec::::with_capacity(text.len()); let mut processing_classes = base.original_classes.clone(); - for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) { + for (para, flags) in base.paragraphs.iter().zip(flags.iter()) { let text = &text[para.range.clone()]; let original_classes = &base.original_classes[para.range.clone()]; compute_bidi_info_for_para( data_source, para, - *is_pure_ltr, + flags.is_pure_ltr, + flags.has_isolate_controls, text, original_classes, &mut processing_classes, @@ -411,7 +414,7 @@ impl<'text> ParagraphBidiInfo<'text> { ) -> ParagraphBidiInfo<'a> { // Here we could create a ParagraphInitialInfo struct to parallel the one // used by BidiInfo, but there doesn't seem any compelling reason for it. - let (original_classes, paragraph_level, is_pure_ltr) = + let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) = compute_initial_info(data_source, text, default_para_level, None); let mut levels = Vec::::with_capacity(text.len()); @@ -429,6 +432,7 @@ impl<'text> ParagraphBidiInfo<'text> { data_source, ¶_info, is_pure_ltr, + has_isolate_controls, text, &original_classes, &mut processing_classes, @@ -551,12 +555,12 @@ impl<'text> ParagraphBidiInfo<'text> { /// /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3 /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4 -fn reorder_line<'text>( - text: &'text [u16], +fn reorder_line( + text: &[u16], line: Range, levels: Vec, runs: Vec, -) -> Cow<'text, [u16]> { +) -> Cow<'_, [u16]> { // If all isolating run sequences are LTR, no reordering is needed if runs.iter().all(|run| levels[run.start].is_ltr()) { return text[line].into(); @@ -668,15 +672,15 @@ impl<'text> TextSource<'text> for [u16] { } #[inline] fn chars(&'text self) -> Self::CharIter { - Utf16CharIter::new(&self) + Utf16CharIter::new(self) } #[inline] fn char_indices(&'text self) -> Self::CharIndexIter { - Utf16CharIndexIter::new(&self) + Utf16CharIndexIter::new(self) } #[inline] fn indices_lengths(&'text self) -> Self::IndexLenIter { - Utf16IndexLenIter::new(&self) + Utf16IndexLenIter::new(self) } #[inline] fn char_len(ch: char) -> usize { -- cgit v1.2.3