summaryrefslogtreecommitdiffstats
path: root/vendor/line-index
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:59:24 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:59:24 +0000
commit023939b627b7dc93b01471f7d41fb8553ddb4ffa (patch)
tree60fc59477c605c72b0a1051409062ddecc43f877 /vendor/line-index
parentAdding debian version 1.72.1+dfsg1-1. (diff)
downloadrustc-023939b627b7dc93b01471f7d41fb8553ddb4ffa.tar.xz
rustc-023939b627b7dc93b01471f7d41fb8553ddb4ffa.zip
Merging upstream version 1.73.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/line-index')
-rw-r--r--vendor/line-index/.cargo-checksum.json1
-rw-r--r--vendor/line-index/Cargo.toml25
-rw-r--r--vendor/line-index/src/lib.rs237
-rw-r--r--vendor/line-index/src/tests.rs11
-rw-r--r--vendor/line-index/tests/it.rs62
5 files changed, 336 insertions, 0 deletions
diff --git a/vendor/line-index/.cargo-checksum.json b/vendor/line-index/.cargo-checksum.json
new file mode 100644
index 000000000..d96ced783
--- /dev/null
+++ b/vendor/line-index/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.toml":"a10220f394cb62baef1dbf0d96c188b21eef910259d21c7f7d60dd622306c961","src/lib.rs":"12d63393f2a07750ad7ad1a344b543792800518d9cf74955301ea33782640bae","src/tests.rs":"4741ca88d75c136fedf6c698cd58aaae7a2c092f754f0fecee80774e53f4e8e4","tests/it.rs":"aa3cc4fb79acd647d7d9f74134fd05b728039bd6eefea19b28eaff450e830b24"},"package":"2cad96769710c1745e11d4f940a8ff36000ade4bbada4285b001cb8aa2f745ce"} \ No newline at end of file
diff --git a/vendor/line-index/Cargo.toml b/vendor/line-index/Cargo.toml
new file mode 100644
index 000000000..06efc2071
--- /dev/null
+++ b/vendor/line-index/Cargo.toml
@@ -0,0 +1,25 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+name = "line-index"
+version = "0.1.0-pre.1"
+description = "Maps flat `TextSize` offsets to/from `(line, column)` representation."
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/line-index"
+resolver = "1"
+
+[dependencies.nohash-hasher]
+version = "0.2.0"
+
+[dependencies.text-size]
+version = "1.1.0"
diff --git a/vendor/line-index/src/lib.rs b/vendor/line-index/src/lib.rs
new file mode 100644
index 000000000..ad67d3f24
--- /dev/null
+++ b/vendor/line-index/src/lib.rs
@@ -0,0 +1,237 @@
+//! See [`LineIndex`].
+
+#![deny(missing_debug_implementations, missing_docs, rust_2018_idioms)]
+
+#[cfg(test)]
+mod tests;
+
+use nohash_hasher::IntMap;
+
+pub use text_size::{TextRange, TextSize};
+
+/// `(line, column)` information in the native, UTF-8 encoding.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub struct LineCol {
+ /// Zero-based.
+ pub line: u32,
+ /// Zero-based UTF-8 offset.
+ pub col: u32,
+}
+
+/// A kind of wide character encoding.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[non_exhaustive]
+pub enum WideEncoding {
+ /// UTF-16.
+ Utf16,
+ /// UTF-32.
+ Utf32,
+}
+
+impl WideEncoding {
+ /// Returns the number of code units it takes to encode `text` in this encoding.
+ pub fn measure(&self, text: &str) -> usize {
+ match self {
+ WideEncoding::Utf16 => text.encode_utf16().count(),
+ WideEncoding::Utf32 => text.chars().count(),
+ }
+ }
+}
+
+/// `(line, column)` information in wide encodings.
+///
+/// See [`WideEncoding`] for the kinds of wide encodings available.
+//
+// Deliberately not a generic type and different from `LineCol`.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub struct WideLineCol {
+ /// Zero-based.
+ pub line: u32,
+ /// Zero-based.
+ pub col: u32,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+struct WideChar {
+ /// Start offset of a character inside a line, zero-based.
+ start: TextSize,
+ /// End offset of a character inside a line, zero-based.
+ end: TextSize,
+}
+
+impl WideChar {
+ /// Returns the length in 8-bit UTF-8 code units.
+ fn len(&self) -> TextSize {
+ self.end - self.start
+ }
+
+ /// Returns the length in UTF-16 or UTF-32 code units.
+ fn wide_len(&self, enc: WideEncoding) -> u32 {
+ match enc {
+ WideEncoding::Utf16 => {
+ if self.len() == TextSize::from(4) {
+ 2
+ } else {
+ 1
+ }
+ }
+ WideEncoding::Utf32 => 1,
+ }
+ }
+}
+
+/// Maps flat [`TextSize`] offsets to/from `(line, column)` representation.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct LineIndex {
+ /// Offset the beginning of each line (except the first, which always has offset 0).
+ newlines: Box<[TextSize]>,
+ /// List of non-ASCII characters on each line.
+ line_wide_chars: IntMap<u32, Box<[WideChar]>>,
+ /// The length of the entire text.
+ len: TextSize,
+}
+
+impl LineIndex {
+ /// Returns a `LineIndex` for the `text`.
+ pub fn new(text: &str) -> LineIndex {
+ let mut newlines = Vec::<TextSize>::with_capacity(16);
+ let mut line_wide_chars = IntMap::<u32, Box<[WideChar]>>::default();
+
+ let mut wide_chars = Vec::<WideChar>::new();
+ let mut cur_row = TextSize::from(0);
+ let mut cur_col = TextSize::from(0);
+ let mut line = 0u32;
+
+ for c in text.chars() {
+ let c_len = TextSize::of(c);
+ cur_row += c_len;
+ if c == '\n' {
+ newlines.push(cur_row);
+
+ // Save any wide characters seen in the previous line
+ if !wide_chars.is_empty() {
+ let cs = std::mem::take(&mut wide_chars).into_boxed_slice();
+ line_wide_chars.insert(line, cs);
+ }
+
+ // Prepare for processing the next line
+ cur_col = TextSize::from(0);
+ line += 1;
+ continue;
+ }
+
+ if !c.is_ascii() {
+ wide_chars.push(WideChar { start: cur_col, end: cur_col + c_len });
+ }
+
+ cur_col += c_len;
+ }
+
+ // Save any wide characters seen in the last line
+ if !wide_chars.is_empty() {
+ line_wide_chars.insert(line, wide_chars.into_boxed_slice());
+ }
+
+ LineIndex {
+ newlines: newlines.into_boxed_slice(),
+ line_wide_chars,
+ len: TextSize::of(text),
+ }
+ }
+
+ /// Transforms the `TextSize` into a `LineCol`.
+ ///
+ /// # Panics
+ ///
+ /// If the offset is invalid. See [`Self::try_line_col`].
+ pub fn line_col(&self, offset: TextSize) -> LineCol {
+ self.try_line_col(offset).expect("invalid offset")
+ }
+
+ /// Transforms the `TextSize` into a `LineCol`.
+ ///
+ /// Returns `None` if the `offset` was invalid, e.g. if it extends past the end of the text or
+ /// points to the middle of a multi-byte character.
+ pub fn try_line_col(&self, offset: TextSize) -> Option<LineCol> {
+ if offset > self.len {
+ return None;
+ }
+ let line = self.newlines.partition_point(|&it| it <= offset);
+ let start = self.start_offset(line)?;
+ let col = offset - start;
+ let ret = LineCol { line: line as u32, col: col.into() };
+ self.line_wide_chars
+ .get(&ret.line)
+ .into_iter()
+ .flat_map(|it| it.iter())
+ .all(|it| col <= it.start || it.end <= col)
+ .then_some(ret)
+ }
+
+ /// Transforms the `LineCol` into a `TextSize`.
+ pub fn offset(&self, line_col: LineCol) -> Option<TextSize> {
+ self.start_offset(line_col.line as usize).map(|start| start + TextSize::from(line_col.col))
+ }
+
+ fn start_offset(&self, line: usize) -> Option<TextSize> {
+ match line.checked_sub(1) {
+ None => Some(TextSize::from(0)),
+ Some(it) => self.newlines.get(it).copied(),
+ }
+ }
+
+ /// Transforms the `LineCol` with the given `WideEncoding` into a `WideLineCol`.
+ pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> Option<WideLineCol> {
+ let mut col = line_col.col;
+ if let Some(wide_chars) = self.line_wide_chars.get(&line_col.line) {
+ for c in wide_chars.iter() {
+ if u32::from(c.end) <= line_col.col {
+ col = col.checked_sub(u32::from(c.len()) - c.wide_len(enc))?;
+ } else {
+ // From here on, all utf16 characters come *after* the character we are mapping,
+ // so we don't need to take them into account
+ break;
+ }
+ }
+ }
+ Some(WideLineCol { line: line_col.line, col })
+ }
+
+ /// Transforms the `WideLineCol` with the given `WideEncoding` into a `LineCol`.
+ pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> Option<LineCol> {
+ let mut col = line_col.col;
+ if let Some(wide_chars) = self.line_wide_chars.get(&line_col.line) {
+ for c in wide_chars.iter() {
+ if col > u32::from(c.start) {
+ col = col.checked_add(u32::from(c.len()) - c.wide_len(enc))?;
+ } else {
+ // From here on, all utf16 characters come *after* the character we are mapping,
+ // so we don't need to take them into account
+ break;
+ }
+ }
+ }
+ Some(LineCol { line: line_col.line, col })
+ }
+
+ /// Given a range [start, end), returns a sorted iterator of non-empty ranges [start, x1), [x1,
+ /// x2), ..., [xn, end) where all the xi, which are positions of newlines, are inside the range
+ /// [start, end).
+ pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ {
+ let lo = self.newlines.partition_point(|&it| it < range.start());
+ let hi = self.newlines.partition_point(|&it| it <= range.end());
+ let all = std::iter::once(range.start())
+ .chain(self.newlines[lo..hi].iter().copied())
+ .chain(std::iter::once(range.end()));
+
+ all.clone()
+ .zip(all.skip(1))
+ .map(|(lo, hi)| TextRange::new(lo, hi))
+ .filter(|it| !it.is_empty())
+ }
+
+ /// Returns the length of the original text.
+ pub fn len(&self) -> TextSize {
+ self.len
+ }
+}
diff --git a/vendor/line-index/src/tests.rs b/vendor/line-index/src/tests.rs
new file mode 100644
index 000000000..31c01c20e
--- /dev/null
+++ b/vendor/line-index/src/tests.rs
@@ -0,0 +1,11 @@
+use super::LineIndex;
+
+#[test]
+fn test_empty_index() {
+ let col_index = LineIndex::new(
+ "
+const C: char = 'x';
+",
+ );
+ assert_eq!(col_index.line_wide_chars.len(), 0);
+}
diff --git a/vendor/line-index/tests/it.rs b/vendor/line-index/tests/it.rs
new file mode 100644
index 000000000..ce1c0bc6f
--- /dev/null
+++ b/vendor/line-index/tests/it.rs
@@ -0,0 +1,62 @@
+use line_index::{LineCol, LineIndex, TextRange};
+
+#[test]
+fn test_line_index() {
+ let text = "hello\nworld";
+ let table = [
+ (00, 0, 0),
+ (01, 0, 1),
+ (05, 0, 5),
+ (06, 1, 0),
+ (07, 1, 1),
+ (08, 1, 2),
+ (10, 1, 4),
+ (11, 1, 5),
+ ];
+
+ let index = LineIndex::new(text);
+ for (offset, line, col) in table {
+ assert_eq!(index.line_col(offset.into()), LineCol { line, col });
+ }
+
+ let text = "\nhello\nworld";
+ let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)];
+ let index = LineIndex::new(text);
+ for (offset, line, col) in table {
+ assert_eq!(index.line_col(offset.into()), LineCol { line, col });
+ }
+}
+
+#[test]
+fn test_char_len() {
+ assert_eq!('メ'.len_utf8(), 3);
+ assert_eq!('メ'.len_utf16(), 1);
+}
+
+#[test]
+fn test_splitlines() {
+ fn r(lo: u32, hi: u32) -> TextRange {
+ TextRange::new(lo.into(), hi.into())
+ }
+
+ let text = "a\nbb\nccc\n";
+ let line_index = LineIndex::new(text);
+
+ let actual = line_index.lines(r(0, 9)).collect::<Vec<_>>();
+ let expected = vec![r(0, 2), r(2, 5), r(5, 9)];
+ assert_eq!(actual, expected);
+
+ let text = "";
+ let line_index = LineIndex::new(text);
+
+ let actual = line_index.lines(r(0, 0)).collect::<Vec<_>>();
+ let expected = vec![];
+ assert_eq!(actual, expected);
+
+ let text = "\n";
+ let line_index = LineIndex::new(text);
+
+ let actual = line_index.lines(r(0, 1)).collect::<Vec<_>>();
+ let expected = vec![r(0, 1)];
+ assert_eq!(actual, expected)
+}