Adding upstream version 124.0.1.upstream/124.0.1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
commit: 26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree: f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/unicode-bidi/src/utf16.rs
parent: Initial commit. (diff)
download: firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
1 files changed, 791 insertions, 0 deletions
diff --git a/third_party/rust/unicode-bidi/src/utf16.rs b/third_party/rust/unicode-bidi/src/utf16.rs
new file mode 100644
index 0000000000..dcd9baf2be
--- /dev/null
+++ b/third_party/rust/unicode-bidi/src/utf16.rs
@@ -0,0 +1,791 @@
+// Copyright 2023 The Mozilla Foundation. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use super::TextSource;
+
+use alloc::borrow::Cow;
+use alloc::vec::Vec;
+use core::char;
+use core::ops::Range;
+
+use crate::{
+    compute_bidi_info_for_para, compute_initial_info, level, para_direction, reorder_levels,
+    reorder_visual, visual_runs_for_line,
+};
+use crate::{BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo};
+
+#[cfg(feature = "hardcoded-data")]
+use crate::HardcodedBidiData;
+
+/// Initial bidi information of the text (UTF-16 version).
+///
+/// Contains the text paragraphs and `BidiClass` of its characters.
+#[derive(PartialEq, Debug)]
+pub struct InitialInfo<'text> {
+    /// The text
+    pub text: &'text [u16],
+
+    /// The BidiClass of the character at each code unit in the text.
+    /// If a character is multiple code units, its class will appear multiple times in the vector.
+    pub original_classes: Vec<BidiClass>,
+
+    /// The boundaries and level of each paragraph within the text.
+    pub paragraphs: Vec<ParagraphInfo>,
+}
+
+impl<'text> InitialInfo<'text> {
+    /// Find the paragraphs and BidiClasses in a string of text.
+    ///
+    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
+    ///
+    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
+    /// character is found before the matching PDI.  If no strong character is found, the class will
+    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
+    ///
+    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    #[cfg(feature = "hardcoded-data")]
+    pub fn new(text: &[u16], default_para_level: Option<Level>) -> InitialInfo<'_> {
+        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
+    }
+
+    /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
+    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
+    /// instead (enabled with tbe default `hardcoded-data` Cargo feature)
+    ///
+    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
+    ///
+    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
+    /// character is found before the matching PDI.  If no strong character is found, the class will
+    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    pub fn new_with_data_source<'a, D: BidiDataSource>(
+        data_source: &D,
+        text: &'a [u16],
+        default_para_level: Option<Level>,
+    ) -> InitialInfo<'a> {
+        InitialInfoExt::new_with_data_source(data_source, text, default_para_level).base
+    }
+}
+
+/// Extended version of InitialInfo (not public API).
+#[derive(PartialEq, Debug)]
+struct InitialInfoExt<'text> {
+    /// The base InitialInfo for the text, recording its paragraphs and bidi classes.
+    base: InitialInfo<'text>,
+
+    /// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
+    /// requires no further bidi processing (i.e. there are no RTL characters or bidi
+    /// control codes present).
+    pure_ltr: Vec<bool>,
+}
+
+impl<'text> InitialInfoExt<'text> {
+    /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
+    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
+    /// instead (enabled with tbe default `hardcoded-data` Cargo feature)
+    ///
+    /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
+    ///
+    /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
+    /// character is found before the matching PDI.  If no strong character is found, the class will
+    /// remain FSI, and it's up to later stages to treat these as LRI when needed.
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    pub fn new_with_data_source<'a, D: BidiDataSource>(
+        data_source: &D,
+        text: &'a [u16],
+        default_para_level: Option<Level>,
+    ) -> InitialInfoExt<'a> {
+        let mut paragraphs = Vec::<ParagraphInfo>::new();
+        let mut pure_ltr = Vec::<bool>::new();
+        let (original_classes, _, _) = compute_initial_info(
+            data_source,
+            text,
+            default_para_level,
+            Some((&mut paragraphs, &mut pure_ltr)),
+        );
+
+        InitialInfoExt {
+            base: InitialInfo {
+                text,
+                original_classes,
+                paragraphs,
+            },
+            pure_ltr,
+        }
+    }
+}
+
+/// Bidi information of the text (UTF-16 version).
+///
+/// The `original_classes` and `levels` vectors are indexed by code unit offsets into the text.  If a
+/// character is multiple code units wide, then its class and level will appear multiple times in these
+/// vectors.
+// TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T>
+#[derive(Debug, PartialEq)]
+pub struct BidiInfo<'text> {
+    /// The text
+    pub text: &'text [u16],
+
+    /// The BidiClass of the character at each byte in the text.
+    pub original_classes: Vec<BidiClass>,
+
+    /// The directional embedding level of each byte in the text.
+    pub levels: Vec<Level>,
+
+    /// The boundaries and paragraph embedding level of each paragraph within the text.
+    ///
+    /// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs?
+    /// Or just don't include the first paragraph, which always starts at 0?
+    pub paragraphs: Vec<ParagraphInfo>,
+}
+
+impl<'text> BidiInfo<'text> {
+    /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph.
+    ///
+    ///
+    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
+    ///
+    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
+    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
+    ///
+    /// TODO: Support auto-RTL base direction
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    #[cfg(feature = "hardcoded-data")]
+    #[inline]
+    pub fn new(text: &[u16], default_para_level: Option<Level>) -> BidiInfo<'_> {
+        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
+    }
+
+    /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph, with a custom [`BidiDataSource`]
+    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
+    /// instead (enabled with tbe default `hardcoded-data` Cargo feature).
+    ///
+    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
+    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
+    ///
+    /// TODO: Support auto-RTL base direction
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    pub fn new_with_data_source<'a, D: BidiDataSource>(
+        data_source: &D,
+        text: &'a [u16],
+        default_para_level: Option<Level>,
+    ) -> BidiInfo<'a> {
+        let InitialInfoExt { base, pure_ltr, .. } =
+            InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
+
+        let mut levels = Vec::<Level>::with_capacity(text.len());
+        let mut processing_classes = base.original_classes.clone();
+
+        for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) {
+            let text = &text[para.range.clone()];
+            let original_classes = &base.original_classes[para.range.clone()];
+
+            compute_bidi_info_for_para(
+                data_source,
+                para,
+                *is_pure_ltr,
+                text,
+                original_classes,
+                &mut processing_classes,
+                &mut levels,
+            );
+        }
+
+        BidiInfo {
+            text,
+            original_classes: base.original_classes,
+            paragraphs: base.paragraphs,
+            levels,
+        }
+    }
+
+    /// Produce the levels for this paragraph as needed for reordering, one level per *byte*
+    /// in the paragraph. The returned vector includes bytes that are not included
+    /// in the `line`, but will not adjust them.
+    ///
+    /// This runs [Rule L1], you can run
+    /// [Rule L2] by calling [`Self::reorder_visual()`].
+    /// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
+    /// to avoid non-byte indices.
+    ///
+    /// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
+    ///
+    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
+    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level> {
+        assert!(line.start <= self.levels.len());
+        assert!(line.end <= self.levels.len());
+
+        let mut levels = self.levels.clone();
+        let line_classes = &self.original_classes[line.clone()];
+        let line_levels = &mut levels[line.clone()];
+        let line_str: &[u16] = &self.text[line.clone()];
+
+        reorder_levels(line_classes, line_levels, line_str, para.level);
+
+        levels
+    }
+
+    /// Produce the levels for this paragraph as needed for reordering, one level per *character*
+    /// in the paragraph. The returned vector includes characters that are not included
+    /// in the `line`, but will not adjust them.
+    ///
+    /// This runs [Rule L1], you can run
+    /// [Rule L2] by calling [`Self::reorder_visual()`].
+    /// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
+    /// to avoid non-byte indices.
+    ///
+    /// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
+    ///
+    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
+    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    pub fn reordered_levels_per_char(
+        &self,
+        para: &ParagraphInfo,
+        line: Range<usize>,
+    ) -> Vec<Level> {
+        let levels = self.reordered_levels(para, line);
+        self.text.char_indices().map(|(i, _)| levels[i]).collect()
+    }
+
+    /// Re-order a line based on resolved levels and return the line in display order.
+    ///
+    /// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
+    ///
+    /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
+    /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, [u16]> {
+        if !level::has_rtl(&self.levels[line.clone()]) {
+            return self.text[line].into();
+        }
+        let (levels, runs) = self.visual_runs(para, line.clone());
+        reorder_line(self.text, line, levels, runs)
+    }
+
+    /// Reorders pre-calculated levels of a sequence of characters.
+    ///
+    /// NOTE: This is a convenience method that does not use a `Paragraph`  object. It is
+    /// intended to be used when an application has determined the levels of the objects (character sequences)
+    /// and just needs to have them reordered.
+    ///
+    /// the index map will result in `indexMap[visualIndex]==logicalIndex`.
+    ///
+    /// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have
+    /// information about the actual text.
+    ///
+    /// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be
+    /// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level`
+    /// is for a single code point.
+    ///
+    ///
+    ///   # # Example
+    /// ```
+    /// use unicode_bidi::BidiInfo;
+    /// use unicode_bidi::Level;
+    ///
+    /// let l0 = Level::from(0);
+    /// let l1 = Level::from(1);
+    /// let l2 = Level::from(2);
+    ///
+    /// let levels = vec![l0, l0, l0, l0];
+    /// let index_map = BidiInfo::reorder_visual(&levels);
+    /// assert_eq!(levels.len(), index_map.len());
+    /// assert_eq!(index_map, [0, 1, 2, 3]);
+    ///
+    /// let levels: Vec<Level> = vec![l0, l0, l0, l1, l1, l1, l2, l2];
+    /// let index_map = BidiInfo::reorder_visual(&levels);
+    /// assert_eq!(levels.len(), index_map.len());
+    /// assert_eq!(index_map, [0, 1, 2, 6, 7, 5, 4, 3]);
+    /// ```
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    #[inline]
+    pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
+        reorder_visual(levels)
+    }
+
+    /// Find the level runs within a line and return them in visual order.
+    ///
+    /// `line` is a range of bytes indices within `levels`.
+    ///
+    /// The first return value is a vector of levels used by the reordering algorithm,
+    /// i.e. the result of [Rule L1]. The second return value is a vector of level runs,
+    /// the result of [Rule L2], showing the visual order that each level run (a run of text with the
+    /// same level) should be displayed. Within each run, the display order can be checked
+    /// against the Level vector.
+    ///
+    /// This does not handle [Rule L3] (combining characters) or [Rule L4] (mirroring),
+    /// as that should be handled by the engine using this API.
+    ///
+    /// Conceptually, this is the same as running [`Self::reordered_levels()`] followed by
+    /// [`Self::reorder_visual()`], however it returns the result as a list of level runs instead
+    /// of producing a level map, since one may wish to deal with the fact that this is operating on
+    /// byte rather than character indices.
+    ///
+    /// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
+    ///
+    /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
+    /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
+    /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
+    /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    #[inline]
+    pub fn visual_runs(
+        &self,
+        para: &ParagraphInfo,
+        line: Range<usize>,
+    ) -> (Vec<Level>, Vec<LevelRun>) {
+        let levels = self.reordered_levels(para, line.clone());
+        visual_runs_for_line(levels, &line)
+    }
+
+    /// If processed text has any computed RTL levels
+    ///
+    /// This information is usually used to skip re-ordering of text when no RTL level is present
+    #[inline]
+    pub fn has_rtl(&self) -> bool {
+        level::has_rtl(&self.levels)
+    }
+}
+
+/// Bidi information of text treated as a single paragraph.
+///
+/// The `original_classes` and `levels` vectors are indexed by code unit offsets into the text.  If a
+/// character is multiple code units wide, then its class and level will appear multiple times in these
+/// vectors.
+#[derive(Debug, PartialEq)]
+pub struct ParagraphBidiInfo<'text> {
+    /// The text
+    pub text: &'text [u16],
+
+    /// The BidiClass of the character at each byte in the text.
+    pub original_classes: Vec<BidiClass>,
+
+    /// The directional embedding level of each byte in the text.
+    pub levels: Vec<Level>,
+
+    /// The paragraph embedding level.
+    pub paragraph_level: Level,
+
+    /// Whether the paragraph is purely LTR.
+    pub is_pure_ltr: bool,
+}
+
+impl<'text> ParagraphBidiInfo<'text> {
+    /// Determine the bidi embedding level.
+    ///
+    ///
+    /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
+    ///
+    /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
+    /// text that is entirely LTR.  See the `nsBidi` class from Gecko for comparison.
+    ///
+    /// TODO: Support auto-RTL base direction
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    #[cfg(feature = "hardcoded-data")]
+    #[inline]
+    pub fn new(text: &[u16], default_para_level: Option<Level>) -> ParagraphBidiInfo<'_> {
+        Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
+    }
+
+    /// Determine the bidi embedding level, with a custom [`BidiDataSource`]
+    /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
+    /// instead (enabled with tbe default `hardcoded-data` Cargo feature).
+    ///
+    /// (This is the single-paragraph equivalent of BidiInfo::new_with_data_source,
+    /// and should be kept in sync with it.
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    pub fn new_with_data_source<'a, D: BidiDataSource>(
+        data_source: &D,
+        text: &'a [u16],
+        default_para_level: Option<Level>,
+    ) -> ParagraphBidiInfo<'a> {
+        // Here we could create a ParagraphInitialInfo struct to parallel the one
+        // used by BidiInfo, but there doesn't seem any compelling reason for it.
+        let (original_classes, paragraph_level, is_pure_ltr) =
+            compute_initial_info(data_source, text, default_para_level, None);
+
+        let mut levels = Vec::<Level>::with_capacity(text.len());
+        let mut processing_classes = original_classes.clone();
+
+        let para_info = ParagraphInfo {
+            range: Range {
+                start: 0,
+                end: text.len(),
+            },
+            level: paragraph_level,
+        };
+
+        compute_bidi_info_for_para(
+            data_source,
+            &para_info,
+            is_pure_ltr,
+            text,
+            &original_classes,
+            &mut processing_classes,
+            &mut levels,
+        );
+
+        ParagraphBidiInfo {
+            text,
+            original_classes,
+            levels,
+            paragraph_level,
+            is_pure_ltr,
+        }
+    }
+
+    /// Produce the levels for this paragraph as needed for reordering, one level per *code unit*
+    /// in the paragraph. The returned vector includes code units that are not included
+    /// in the `line`, but will not adjust them.
+    ///
+    /// See BidiInfo::reordered_levels for details.
+    ///
+    /// (This should be kept in sync with BidiInfo::reordered_levels.)
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    pub fn reordered_levels(&self, line: Range<usize>) -> Vec<Level> {
+        assert!(line.start <= self.levels.len());
+        assert!(line.end <= self.levels.len());
+
+        let mut levels = self.levels.clone();
+        let line_classes = &self.original_classes[line.clone()];
+        let line_levels = &mut levels[line.clone()];
+
+        reorder_levels(
+            line_classes,
+            line_levels,
+            self.text.subrange(line),
+            self.paragraph_level,
+        );
+
+        levels
+    }
+
+    /// Produce the levels for this paragraph as needed for reordering, one level per *character*
+    /// in the paragraph. The returned vector includes characters that are not included
+    /// in the `line`, but will not adjust them.
+    ///
+    /// See BidiInfo::reordered_levels_per_char for details.
+    ///
+    /// (This should be kept in sync with BidiInfo::reordered_levels_per_char.)
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    pub fn reordered_levels_per_char(&self, line: Range<usize>) -> Vec<Level> {
+        let levels = self.reordered_levels(line);
+        self.text.char_indices().map(|(i, _)| levels[i]).collect()
+    }
+
+    /// Re-order a line based on resolved levels and return the line in display order.
+    ///
+    /// See BidiInfo::reorder_line for details.
+    ///
+    /// (This should be kept in sync with BidiInfo::reorder_line.)
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    pub fn reorder_line(&self, line: Range<usize>) -> Cow<'text, [u16]> {
+        if !level::has_rtl(&self.levels[line.clone()]) {
+            return self.text[line].into();
+        }
+        let (levels, runs) = self.visual_runs(line.clone());
+        reorder_line(self.text, line, levels, runs)
+    }
+
+    /// Reorders pre-calculated levels of a sequence of characters.
+    ///
+    /// See BidiInfo::reorder_visual for details.
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    #[inline]
+    pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
+        reorder_visual(levels)
+    }
+
+    /// Find the level runs within a line and return them in visual order.
+    ///
+    /// `line` is a range of code-unit indices within `levels`.
+    ///
+    /// See `BidiInfo::visual_runs` for details.
+    ///
+    /// (This should be kept in sync with BidiInfo::visual_runs.)
+    #[cfg_attr(feature = "flame_it", flamer::flame)]
+    #[inline]
+    pub fn visual_runs(&self, line: Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
+        let levels = self.reordered_levels(line.clone());
+        visual_runs_for_line(levels, &line)
+    }
+
+    /// If processed text has any computed RTL levels
+    ///
+    /// This information is usually used to skip re-ordering of text when no RTL level is present
+    #[inline]
+    pub fn has_rtl(&self) -> bool {
+        !self.is_pure_ltr
+    }
+
+    /// Return the paragraph's Direction (Ltr, Rtl, or Mixed) based on its levels.
+    #[inline]
+    pub fn direction(&self) -> Direction {
+        para_direction(&self.levels)
+    }
+}
+
+/// Return a line of the text in display order based on resolved levels.
+///
+/// `text`   the full text passed to the `BidiInfo` or `ParagraphBidiInfo` for analysis
+/// `line`   a range of byte indices within `text` corresponding to one line
+/// `levels` array of `Level` values, with `line`'s levels reordered into visual order
+/// `runs`   array of `LevelRun`s in visual order
+///
+/// (`levels` and `runs` are the result of calling `BidiInfo::visual_runs()` or
+/// `ParagraphBidiInfo::visual_runs()` for the line of interest.)
+///
+/// Returns: the reordered text of the line.
+///
+/// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
+///
+/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
+/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
+fn reorder_line<'text>(
+    text: &'text [u16],
+    line: Range<usize>,
+    levels: Vec<Level>,
+    runs: Vec<LevelRun>,
+) -> Cow<'text, [u16]> {
+    // If all isolating run sequences are LTR, no reordering is needed
+    if runs.iter().all(|run| levels[run.start].is_ltr()) {
+        return text[line].into();
+    }
+
+    let mut result = Vec::<u16>::with_capacity(line.len());
+    for run in runs {
+        if levels[run.start].is_rtl() {
+            let mut buf = [0; 2];
+            for c in text[run].chars().rev() {
+                result.extend(c.encode_utf16(&mut buf).iter());
+            }
+        } else {
+            result.extend(text[run].iter());
+        }
+    }
+    result.into()
+}
+
+/// Contains a reference of `BidiInfo` and one of its `paragraphs`.
+/// And it supports all operation in the `Paragraph` that needs also its
+/// `BidiInfo` such as `direction`.
+#[derive(Debug)]
+pub struct Paragraph<'a, 'text> {
+    pub info: &'a BidiInfo<'text>,
+    pub para: &'a ParagraphInfo,
+}
+
+impl<'a, 'text> Paragraph<'a, 'text> {
+    #[inline]
+    pub fn new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text> {
+        Paragraph { info, para }
+    }
+
+    /// Returns if the paragraph is Left direction, right direction or mixed.
+    #[inline]
+    pub fn direction(&self) -> Direction {
+        para_direction(&self.info.levels[self.para.range.clone()])
+    }
+
+    /// Returns the `Level` of a certain character in the paragraph.
+    #[inline]
+    pub fn level_at(&self, pos: usize) -> Level {
+        let actual_position = self.para.range.start + pos;
+        self.info.levels[actual_position]
+    }
+}
+
+/// Implementation of TextSource for UTF-16 text in a [u16] array.
+/// Note that there could be unpaired surrogates present!
+
+// Convenience functions to check whether a UTF16 code unit is a surrogate.
+#[inline]
+fn is_high_surrogate(code: u16) -> bool {
+    (code & 0xFC00) == 0xD800
+}
+#[inline]
+fn is_low_surrogate(code: u16) -> bool {
+    (code & 0xFC00) == 0xDC00
+}
+
+impl<'text> TextSource<'text> for [u16] {
+    type CharIter = Utf16CharIter<'text>;
+    type CharIndexIter = Utf16CharIndexIter<'text>;
+    type IndexLenIter = Utf16IndexLenIter<'text>;
+
+    #[inline]
+    fn len(&self) -> usize {
+        (self as &[u16]).len()
+    }
+    fn char_at(&self, index: usize) -> Option<(char, usize)> {
+        if index >= self.len() {
+            return None;
+        }
+        // Get the indicated code unit and try simply converting it to a char;
+        // this will fail if it is half of a surrogate pair.
+        let c = self[index];
+        if let Some(ch) = char::from_u32(c.into()) {
+            return Some((ch, 1));
+        }
+        // If it's a low surrogate, and was immediately preceded by a high surrogate,
+        // then we're in the middle of a (valid) character, and should return None.
+        if is_low_surrogate(c) && index > 0 && is_high_surrogate(self[index - 1]) {
+            return None;
+        }
+        // Otherwise, try to decode, returning REPLACEMENT_CHARACTER for errors.
+        if let Some(ch) = char::decode_utf16(self[index..].iter().cloned()).next() {
+            if let Ok(ch) = ch {
+                // This must be a surrogate pair, otherwise char::from_u32() above should
+                // have succeeded!
+                debug_assert!(ch.len_utf16() == 2, "BMP should have already been handled");
+                return Some((ch, ch.len_utf16()));
+            }
+        } else {
+            debug_assert!(
+                false,
+                "Why did decode_utf16 return None when we're not at the end?"
+            );
+            return None;
+        }
+        // Failed to decode UTF-16: we must have encountered an unpaired surrogate.
+        // Return REPLACEMENT_CHARACTER (not None), to continue processing the following text
+        // and keep indexing correct.
+        Some((char::REPLACEMENT_CHARACTER, 1))
+    }
+    #[inline]
+    fn subrange(&self, range: Range<usize>) -> &Self {
+        &(self as &[u16])[range]
+    }
+    #[inline]
+    fn chars(&'text self) -> Self::CharIter {
+        Utf16CharIter::new(&self)
+    }
+    #[inline]
+    fn char_indices(&'text self) -> Self::CharIndexIter {
+        Utf16CharIndexIter::new(&self)
+    }
+    #[inline]
+    fn indices_lengths(&'text self) -> Self::IndexLenIter {
+        Utf16IndexLenIter::new(&self)
+    }
+    #[inline]
+    fn char_len(ch: char) -> usize {
+        ch.len_utf16()
+    }
+}
+
+/// Iterator over UTF-16 text in a [u16] slice, returning (index, char_len) tuple.
+#[derive(Debug)]
+pub struct Utf16IndexLenIter<'text> {
+    text: &'text [u16],
+    cur_pos: usize,
+}
+
+impl<'text> Utf16IndexLenIter<'text> {
+    #[inline]
+    pub fn new(text: &'text [u16]) -> Self {
+        Utf16IndexLenIter { text, cur_pos: 0 }
+    }
+}
+
+impl Iterator for Utf16IndexLenIter<'_> {
+    type Item = (usize, usize);
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some((_, char_len)) = self.text.char_at(self.cur_pos) {
+            let result = (self.cur_pos, char_len);
+            self.cur_pos += char_len;
+            return Some(result);
+        }
+        None
+    }
+}
+
+/// Iterator over UTF-16 text in a [u16] slice, returning (index, char) tuple.
+#[derive(Debug)]
+pub struct Utf16CharIndexIter<'text> {
+    text: &'text [u16],
+    cur_pos: usize,
+}
+
+impl<'text> Utf16CharIndexIter<'text> {
+    pub fn new(text: &'text [u16]) -> Self {
+        Utf16CharIndexIter { text, cur_pos: 0 }
+    }
+}
+
+impl Iterator for Utf16CharIndexIter<'_> {
+    type Item = (usize, char);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some((ch, char_len)) = self.text.char_at(self.cur_pos) {
+            let result = (self.cur_pos, ch);
+            self.cur_pos += char_len;
+            return Some(result);
+        }
+        None
+    }
+}
+
+/// Iterator over UTF-16 text in a [u16] slice, returning Unicode chars.
+/// (Unlike the other iterators above, this also supports reverse iteration.)
+#[derive(Debug)]
+pub struct Utf16CharIter<'text> {
+    text: &'text [u16],
+    cur_pos: usize,
+    end_pos: usize,
+}
+
+impl<'text> Utf16CharIter<'text> {
+    pub fn new(text: &'text [u16]) -> Self {
+        Utf16CharIter {
+            text,
+            cur_pos: 0,
+            end_pos: text.len(),
+        }
+    }
+}
+
+impl Iterator for Utf16CharIter<'_> {
+    type Item = char;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some((ch, char_len)) = self.text.char_at(self.cur_pos) {
+            self.cur_pos += char_len;
+            return Some(ch);
+        }
+        None
+    }
+}
+
+impl DoubleEndedIterator for Utf16CharIter<'_> {
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if self.end_pos <= self.cur_pos {
+            return None;
+        }
+        self.end_pos -= 1;
+        if let Some(ch) = char::from_u32(self.text[self.end_pos] as u32) {
+            return Some(ch);
+        }
+        if self.end_pos > self.cur_pos {
+            if let Some((ch, char_len)) = self.text.char_at(self.end_pos - 1) {
+                if char_len == 2 {
+                    self.end_pos -= 1;
+                    return Some(ch);
+                }
+            }
+        }
+        Some(char::REPLACEMENT_CHARACTER)
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
commit	26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree	f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/unicode-bidi/src/utf16.rs
parent	Initial commit. (diff)
download	firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip