From 023939b627b7dc93b01471f7d41fb8553ddb4ffa Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 30 May 2024 05:59:24 +0200 Subject: Merging upstream version 1.73.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/line-index/.cargo-checksum.json | 1 + vendor/line-index/Cargo.toml | 25 ++++ vendor/line-index/src/lib.rs | 237 +++++++++++++++++++++++++++++++++ vendor/line-index/src/tests.rs | 11 ++ vendor/line-index/tests/it.rs | 62 +++++++++ 5 files changed, 336 insertions(+) create mode 100644 vendor/line-index/.cargo-checksum.json create mode 100644 vendor/line-index/Cargo.toml create mode 100644 vendor/line-index/src/lib.rs create mode 100644 vendor/line-index/src/tests.rs create mode 100644 vendor/line-index/tests/it.rs (limited to 'vendor/line-index') diff --git a/vendor/line-index/.cargo-checksum.json b/vendor/line-index/.cargo-checksum.json new file mode 100644 index 000000000..d96ced783 --- /dev/null +++ b/vendor/line-index/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"a10220f394cb62baef1dbf0d96c188b21eef910259d21c7f7d60dd622306c961","src/lib.rs":"12d63393f2a07750ad7ad1a344b543792800518d9cf74955301ea33782640bae","src/tests.rs":"4741ca88d75c136fedf6c698cd58aaae7a2c092f754f0fecee80774e53f4e8e4","tests/it.rs":"aa3cc4fb79acd647d7d9f74134fd05b728039bd6eefea19b28eaff450e830b24"},"package":"2cad96769710c1745e11d4f940a8ff36000ade4bbada4285b001cb8aa2f745ce"} \ No newline at end of file diff --git a/vendor/line-index/Cargo.toml b/vendor/line-index/Cargo.toml new file mode 100644 index 000000000..06efc2071 --- /dev/null +++ b/vendor/line-index/Cargo.toml @@ -0,0 +1,25 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +name = "line-index" +version = "0.1.0-pre.1" +description = "Maps flat `TextSize` offsets to/from `(line, column)` representation." +license = "MIT OR Apache-2.0" +repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/line-index" +resolver = "1" + +[dependencies.nohash-hasher] +version = "0.2.0" + +[dependencies.text-size] +version = "1.1.0" diff --git a/vendor/line-index/src/lib.rs b/vendor/line-index/src/lib.rs new file mode 100644 index 000000000..ad67d3f24 --- /dev/null +++ b/vendor/line-index/src/lib.rs @@ -0,0 +1,237 @@ +//! See [`LineIndex`]. + +#![deny(missing_debug_implementations, missing_docs, rust_2018_idioms)] + +#[cfg(test)] +mod tests; + +use nohash_hasher::IntMap; + +pub use text_size::{TextRange, TextSize}; + +/// `(line, column)` information in the native, UTF-8 encoding. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct LineCol { + /// Zero-based. + pub line: u32, + /// Zero-based UTF-8 offset. + pub col: u32, +} + +/// A kind of wide character encoding. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum WideEncoding { + /// UTF-16. + Utf16, + /// UTF-32. + Utf32, +} + +impl WideEncoding { + /// Returns the number of code units it takes to encode `text` in this encoding. + pub fn measure(&self, text: &str) -> usize { + match self { + WideEncoding::Utf16 => text.encode_utf16().count(), + WideEncoding::Utf32 => text.chars().count(), + } + } +} + +/// `(line, column)` information in wide encodings. +/// +/// See [`WideEncoding`] for the kinds of wide encodings available. +// +// Deliberately not a generic type and different from `LineCol`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct WideLineCol { + /// Zero-based. + pub line: u32, + /// Zero-based. + pub col: u32, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct WideChar { + /// Start offset of a character inside a line, zero-based. + start: TextSize, + /// End offset of a character inside a line, zero-based. + end: TextSize, +} + +impl WideChar { + /// Returns the length in 8-bit UTF-8 code units. + fn len(&self) -> TextSize { + self.end - self.start + } + + /// Returns the length in UTF-16 or UTF-32 code units. + fn wide_len(&self, enc: WideEncoding) -> u32 { + match enc { + WideEncoding::Utf16 => { + if self.len() == TextSize::from(4) { + 2 + } else { + 1 + } + } + WideEncoding::Utf32 => 1, + } + } +} + +/// Maps flat [`TextSize`] offsets to/from `(line, column)` representation. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct LineIndex { + /// Offset the beginning of each line (except the first, which always has offset 0). + newlines: Box<[TextSize]>, + /// List of non-ASCII characters on each line. + line_wide_chars: IntMap>, + /// The length of the entire text. + len: TextSize, +} + +impl LineIndex { + /// Returns a `LineIndex` for the `text`. + pub fn new(text: &str) -> LineIndex { + let mut newlines = Vec::::with_capacity(16); + let mut line_wide_chars = IntMap::>::default(); + + let mut wide_chars = Vec::::new(); + let mut cur_row = TextSize::from(0); + let mut cur_col = TextSize::from(0); + let mut line = 0u32; + + for c in text.chars() { + let c_len = TextSize::of(c); + cur_row += c_len; + if c == '\n' { + newlines.push(cur_row); + + // Save any wide characters seen in the previous line + if !wide_chars.is_empty() { + let cs = std::mem::take(&mut wide_chars).into_boxed_slice(); + line_wide_chars.insert(line, cs); + } + + // Prepare for processing the next line + cur_col = TextSize::from(0); + line += 1; + continue; + } + + if !c.is_ascii() { + wide_chars.push(WideChar { start: cur_col, end: cur_col + c_len }); + } + + cur_col += c_len; + } + + // Save any wide characters seen in the last line + if !wide_chars.is_empty() { + line_wide_chars.insert(line, wide_chars.into_boxed_slice()); + } + + LineIndex { + newlines: newlines.into_boxed_slice(), + line_wide_chars, + len: TextSize::of(text), + } + } + + /// Transforms the `TextSize` into a `LineCol`. + /// + /// # Panics + /// + /// If the offset is invalid. See [`Self::try_line_col`]. + pub fn line_col(&self, offset: TextSize) -> LineCol { + self.try_line_col(offset).expect("invalid offset") + } + + /// Transforms the `TextSize` into a `LineCol`. + /// + /// Returns `None` if the `offset` was invalid, e.g. if it extends past the end of the text or + /// points to the middle of a multi-byte character. + pub fn try_line_col(&self, offset: TextSize) -> Option { + if offset > self.len { + return None; + } + let line = self.newlines.partition_point(|&it| it <= offset); + let start = self.start_offset(line)?; + let col = offset - start; + let ret = LineCol { line: line as u32, col: col.into() }; + self.line_wide_chars + .get(&ret.line) + .into_iter() + .flat_map(|it| it.iter()) + .all(|it| col <= it.start || it.end <= col) + .then_some(ret) + } + + /// Transforms the `LineCol` into a `TextSize`. + pub fn offset(&self, line_col: LineCol) -> Option { + self.start_offset(line_col.line as usize).map(|start| start + TextSize::from(line_col.col)) + } + + fn start_offset(&self, line: usize) -> Option { + match line.checked_sub(1) { + None => Some(TextSize::from(0)), + Some(it) => self.newlines.get(it).copied(), + } + } + + /// Transforms the `LineCol` with the given `WideEncoding` into a `WideLineCol`. + pub fn to_wide(&self, enc: WideEncoding, line_col: LineCol) -> Option { + let mut col = line_col.col; + if let Some(wide_chars) = self.line_wide_chars.get(&line_col.line) { + for c in wide_chars.iter() { + if u32::from(c.end) <= line_col.col { + col = col.checked_sub(u32::from(c.len()) - c.wide_len(enc))?; + } else { + // From here on, all utf16 characters come *after* the character we are mapping, + // so we don't need to take them into account + break; + } + } + } + Some(WideLineCol { line: line_col.line, col }) + } + + /// Transforms the `WideLineCol` with the given `WideEncoding` into a `LineCol`. + pub fn to_utf8(&self, enc: WideEncoding, line_col: WideLineCol) -> Option { + let mut col = line_col.col; + if let Some(wide_chars) = self.line_wide_chars.get(&line_col.line) { + for c in wide_chars.iter() { + if col > u32::from(c.start) { + col = col.checked_add(u32::from(c.len()) - c.wide_len(enc))?; + } else { + // From here on, all utf16 characters come *after* the character we are mapping, + // so we don't need to take them into account + break; + } + } + } + Some(LineCol { line: line_col.line, col }) + } + + /// Given a range [start, end), returns a sorted iterator of non-empty ranges [start, x1), [x1, + /// x2), ..., [xn, end) where all the xi, which are positions of newlines, are inside the range + /// [start, end). + pub fn lines(&self, range: TextRange) -> impl Iterator + '_ { + let lo = self.newlines.partition_point(|&it| it < range.start()); + let hi = self.newlines.partition_point(|&it| it <= range.end()); + let all = std::iter::once(range.start()) + .chain(self.newlines[lo..hi].iter().copied()) + .chain(std::iter::once(range.end())); + + all.clone() + .zip(all.skip(1)) + .map(|(lo, hi)| TextRange::new(lo, hi)) + .filter(|it| !it.is_empty()) + } + + /// Returns the length of the original text. + pub fn len(&self) -> TextSize { + self.len + } +} diff --git a/vendor/line-index/src/tests.rs b/vendor/line-index/src/tests.rs new file mode 100644 index 000000000..31c01c20e --- /dev/null +++ b/vendor/line-index/src/tests.rs @@ -0,0 +1,11 @@ +use super::LineIndex; + +#[test] +fn test_empty_index() { + let col_index = LineIndex::new( + " +const C: char = 'x'; +", + ); + assert_eq!(col_index.line_wide_chars.len(), 0); +} diff --git a/vendor/line-index/tests/it.rs b/vendor/line-index/tests/it.rs new file mode 100644 index 000000000..ce1c0bc6f --- /dev/null +++ b/vendor/line-index/tests/it.rs @@ -0,0 +1,62 @@ +use line_index::{LineCol, LineIndex, TextRange}; + +#[test] +fn test_line_index() { + let text = "hello\nworld"; + let table = [ + (00, 0, 0), + (01, 0, 1), + (05, 0, 5), + (06, 1, 0), + (07, 1, 1), + (08, 1, 2), + (10, 1, 4), + (11, 1, 5), + ]; + + let index = LineIndex::new(text); + for (offset, line, col) in table { + assert_eq!(index.line_col(offset.into()), LineCol { line, col }); + } + + let text = "\nhello\nworld"; + let table = [(0, 0, 0), (1, 1, 0), (2, 1, 1), (6, 1, 5), (7, 2, 0)]; + let index = LineIndex::new(text); + for (offset, line, col) in table { + assert_eq!(index.line_col(offset.into()), LineCol { line, col }); + } +} + +#[test] +fn test_char_len() { + assert_eq!('メ'.len_utf8(), 3); + assert_eq!('メ'.len_utf16(), 1); +} + +#[test] +fn test_splitlines() { + fn r(lo: u32, hi: u32) -> TextRange { + TextRange::new(lo.into(), hi.into()) + } + + let text = "a\nbb\nccc\n"; + let line_index = LineIndex::new(text); + + let actual = line_index.lines(r(0, 9)).collect::>(); + let expected = vec![r(0, 2), r(2, 5), r(5, 9)]; + assert_eq!(actual, expected); + + let text = ""; + let line_index = LineIndex::new(text); + + let actual = line_index.lines(r(0, 0)).collect::>(); + let expected = vec![]; + assert_eq!(actual, expected); + + let text = "\n"; + let line_index = LineIndex::new(text); + + let actual = line_index.lines(r(0, 1)).collect::>(); + let expected = vec![r(0, 1)]; + assert_eq!(actual, expected) +} -- cgit v1.2.3