From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- third_party/rust/codespan-reporting/src/files.rs | 443 +++++++++++++++++++++++ 1 file changed, 443 insertions(+) create mode 100644 third_party/rust/codespan-reporting/src/files.rs (limited to 'third_party/rust/codespan-reporting/src/files.rs') diff --git a/third_party/rust/codespan-reporting/src/files.rs b/third_party/rust/codespan-reporting/src/files.rs new file mode 100644 index 0000000000..b25cd79788 --- /dev/null +++ b/third_party/rust/codespan-reporting/src/files.rs @@ -0,0 +1,443 @@ +//! Source file support for diagnostic reporting. +//! +//! The main trait defined in this module is the [`Files`] trait, which provides +//! provides the minimum amount of functionality required for printing [`Diagnostics`] +//! with the [`term::emit`] function. +//! +//! Simple implementations of this trait are implemented: +//! +//! - [`SimpleFile`]: For single-file use-cases +//! - [`SimpleFiles`]: For multi-file use-cases +//! +//! These data structures provide a pretty minimal API, however, +//! so end-users are encouraged to create their own implementations for their +//! own specific use-cases, such as an implementation that accesses the file +//! system directly (and caches the line start locations), or an implementation +//! using an incremental compilation library like [`salsa`]. +//! +//! [`term::emit`]: crate::term::emit +//! [`Diagnostics`]: crate::diagnostic::Diagnostic +//! [`Files`]: Files +//! [`SimpleFile`]: SimpleFile +//! [`SimpleFiles`]: SimpleFiles +//! +//! [`salsa`]: https://crates.io/crates/salsa + +use std::ops::Range; + +/// An enum representing an error that happened while looking up a file or a piece of content in that file. +#[derive(Debug)] +#[non_exhaustive] +pub enum Error { + /// A required file is not in the file database. + FileMissing, + /// The file is present, but does not contain the specified byte index. + IndexTooLarge { given: usize, max: usize }, + /// The file is present, but does not contain the specified line index. + LineTooLarge { given: usize, max: usize }, + /// The file is present and contains the specified line index, but the line does not contain the specified column index. + ColumnTooLarge { given: usize, max: usize }, + /// The given index is contained in the file, but is not a boundary of a UTF-8 code point. + InvalidCharBoundary { given: usize }, + /// There was a error while doing IO. + Io(std::io::Error), +} + +impl From for Error { + fn from(err: std::io::Error) -> Error { + Error::Io(err) + } +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::FileMissing => write!(f, "file missing"), + Error::IndexTooLarge { given, max } => { + write!(f, "invalid index {}, maximum index is {}", given, max) + } + Error::LineTooLarge { given, max } => { + write!(f, "invalid line {}, maximum line is {}", given, max) + } + Error::ColumnTooLarge { given, max } => { + write!(f, "invalid column {}, maximum column {}", given, max) + } + Error::InvalidCharBoundary { .. } => write!(f, "index is not a code point boundary"), + Error::Io(err) => write!(f, "{}", err), + } + } +} + +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match &self { + Error::Io(err) => Some(err), + _ => None, + } + } +} + +/// A minimal interface for accessing source files when rendering diagnostics. +/// +/// A lifetime parameter `'a` is provided to allow any of the returned values to returned by reference. +/// This is to workaround the lack of higher kinded lifetime parameters. +/// This can be ignored if this is not needed, however. +pub trait Files<'a> { + /// A unique identifier for files in the file provider. This will be used + /// for rendering `diagnostic::Label`s in the corresponding source files. + type FileId: 'a + Copy + PartialEq; + /// The user-facing name of a file, to be displayed in diagnostics. + type Name: 'a + std::fmt::Display; + /// The source code of a file. + type Source: 'a + AsRef; + + /// The user-facing name of a file. + fn name(&'a self, id: Self::FileId) -> Result; + + /// The source code of a file. + fn source(&'a self, id: Self::FileId) -> Result; + + /// The index of the line at the given byte index. + /// If the byte index is past the end of the file, returns the maximum line index in the file. + /// This means that this function only fails if the file is not present. + /// + /// # Note for trait implementors + /// + /// This can be implemented efficiently by performing a binary search over + /// a list of line starts that was computed by calling the [`line_starts`] + /// function that is exported from the [`files`] module. It might be useful + /// to pre-compute and cache these line starts. + /// + /// [`line_starts`]: crate::files::line_starts + /// [`files`]: crate::files + fn line_index(&'a self, id: Self::FileId, byte_index: usize) -> Result; + + /// The user-facing line number at the given line index. + /// It is not necessarily checked that the specified line index + /// is actually in the file. + /// + /// # Note for trait implementors + /// + /// This is usually 1-indexed from the beginning of the file, but + /// can be useful for implementing something like the + /// [C preprocessor's `#line` macro][line-macro]. + /// + /// [line-macro]: https://en.cppreference.com/w/c/preprocessor/line + #[allow(unused_variables)] + fn line_number(&'a self, id: Self::FileId, line_index: usize) -> Result { + Ok(line_index + 1) + } + + /// The user-facing column number at the given line index and byte index. + /// + /// # Note for trait implementors + /// + /// This is usually 1-indexed from the the start of the line. + /// A default implementation is provided, based on the [`column_index`] + /// function that is exported from the [`files`] module. + /// + /// [`files`]: crate::files + /// [`column_index`]: crate::files::column_index + fn column_number( + &'a self, + id: Self::FileId, + line_index: usize, + byte_index: usize, + ) -> Result { + let source = self.source(id)?; + let line_range = self.line_range(id, line_index)?; + let column_index = column_index(source.as_ref(), line_range, byte_index); + + Ok(column_index + 1) + } + + /// Convenience method for returning line and column number at the given + /// byte index in the file. + fn location(&'a self, id: Self::FileId, byte_index: usize) -> Result { + let line_index = self.line_index(id, byte_index)?; + + Ok(Location { + line_number: self.line_number(id, line_index)?, + column_number: self.column_number(id, line_index, byte_index)?, + }) + } + + /// The byte range of line in the source of the file. + fn line_range(&'a self, id: Self::FileId, line_index: usize) -> Result, Error>; +} + +/// A user-facing location in a source file. +/// +/// Returned by [`Files::location`]. +/// +/// [`Files::location`]: Files::location +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct Location { + /// The user-facing line number. + pub line_number: usize, + /// The user-facing column number. + pub column_number: usize, +} + +/// The column index at the given byte index in the source file. +/// This is the number of characters to the given byte index. +/// +/// If the byte index is smaller than the start of the line, then `0` is returned. +/// If the byte index is past the end of the line, the column index of the last +/// character `+ 1` is returned. +/// +/// # Example +/// +/// ```rust +/// use codespan_reporting::files; +/// +/// let source = "\n\nšŸ—»āˆˆšŸŒ\n\n"; +/// +/// assert_eq!(files::column_index(source, 0..1, 0), 0); +/// assert_eq!(files::column_index(source, 2..13, 0), 0); +/// assert_eq!(files::column_index(source, 2..13, 2 + 0), 0); +/// assert_eq!(files::column_index(source, 2..13, 2 + 1), 0); +/// assert_eq!(files::column_index(source, 2..13, 2 + 4), 1); +/// assert_eq!(files::column_index(source, 2..13, 2 + 8), 2); +/// assert_eq!(files::column_index(source, 2..13, 2 + 10), 2); +/// assert_eq!(files::column_index(source, 2..13, 2 + 11), 3); +/// assert_eq!(files::column_index(source, 2..13, 2 + 12), 3); +/// ``` +pub fn column_index(source: &str, line_range: Range, byte_index: usize) -> usize { + let end_index = std::cmp::min(byte_index, std::cmp::min(line_range.end, source.len())); + + (line_range.start..end_index) + .filter(|byte_index| source.is_char_boundary(byte_index + 1)) + .count() +} + +/// Return the starting byte index of each line in the source string. +/// +/// This can make it easier to implement [`Files::line_index`] by allowing +/// implementors of [`Files`] to pre-compute the line starts, then search for +/// the corresponding line range, as shown in the example below. +/// +/// [`Files`]: Files +/// [`Files::line_index`]: Files::line_index +/// +/// # Example +/// +/// ```rust +/// use codespan_reporting::files; +/// +/// let source = "foo\nbar\r\n\nbaz"; +/// let line_starts: Vec<_> = files::line_starts(source).collect(); +/// +/// assert_eq!( +/// line_starts, +/// [ +/// 0, // "foo\n" +/// 4, // "bar\r\n" +/// 9, // "" +/// 10, // "baz" +/// ], +/// ); +/// +/// fn line_index(line_starts: &[usize], byte_index: usize) -> Option { +/// match line_starts.binary_search(&byte_index) { +/// Ok(line) => Some(line), +/// Err(next_line) => Some(next_line - 1), +/// } +/// } +/// +/// assert_eq!(line_index(&line_starts, 5), Some(1)); +/// ``` +// NOTE: this is copied in `codespan::file::line_starts` and should be kept in sync. +pub fn line_starts<'source>(source: &'source str) -> impl 'source + Iterator { + std::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1)) +} + +/// A file database that contains a single source file. +/// +/// Because there is only single file in this database we use `()` as a [`FileId`]. +/// +/// This is useful for simple language tests, but it might be worth creating a +/// custom implementation when a language scales beyond a certain size. +/// +/// [`FileId`]: Files::FileId +#[derive(Debug, Clone)] +pub struct SimpleFile { + /// The name of the file. + name: Name, + /// The source code of the file. + source: Source, + /// The starting byte indices in the source code. + line_starts: Vec, +} + +impl SimpleFile +where + Name: std::fmt::Display, + Source: AsRef, +{ + /// Create a new source file. + pub fn new(name: Name, source: Source) -> SimpleFile { + SimpleFile { + name, + line_starts: line_starts(source.as_ref()).collect(), + source, + } + } + + /// Return the name of the file. + pub fn name(&self) -> &Name { + &self.name + } + + /// Return the source of the file. + pub fn source(&self) -> &Source { + &self.source + } + + /// Return the starting byte index of the line with the specified line index. + /// Convenience method that already generates errors if necessary. + fn line_start(&self, line_index: usize) -> Result { + use std::cmp::Ordering; + + match line_index.cmp(&self.line_starts.len()) { + Ordering::Less => Ok(self + .line_starts + .get(line_index) + .cloned() + .expect("failed despite previous check")), + Ordering::Equal => Ok(self.source.as_ref().len()), + Ordering::Greater => Err(Error::LineTooLarge { + given: line_index, + max: self.line_starts.len() - 1, + }), + } + } +} + +impl<'a, Name, Source> Files<'a> for SimpleFile +where + Name: 'a + std::fmt::Display + Clone, + Source: 'a + AsRef, +{ + type FileId = (); + type Name = Name; + type Source = &'a str; + + fn name(&self, (): ()) -> Result { + Ok(self.name.clone()) + } + + fn source(&self, (): ()) -> Result<&str, Error> { + Ok(self.source.as_ref()) + } + + fn line_index(&self, (): (), byte_index: usize) -> Result { + Ok(self + .line_starts + .binary_search(&byte_index) + .unwrap_or_else(|next_line| next_line - 1)) + } + + fn line_range(&self, (): (), line_index: usize) -> Result, Error> { + let line_start = self.line_start(line_index)?; + let next_line_start = self.line_start(line_index + 1)?; + + Ok(line_start..next_line_start) + } +} + +/// A file database that can store multiple source files. +/// +/// This is useful for simple language tests, but it might be worth creating a +/// custom implementation when a language scales beyond a certain size. +/// It is a glorified `Vec` that implements the `Files` trait. +#[derive(Debug, Clone)] +pub struct SimpleFiles { + files: Vec>, +} + +impl SimpleFiles +where + Name: std::fmt::Display, + Source: AsRef, +{ + /// Create a new files database. + pub fn new() -> SimpleFiles { + SimpleFiles { files: Vec::new() } + } + + /// Add a file to the database, returning the handle that can be used to + /// refer to it again. + pub fn add(&mut self, name: Name, source: Source) -> usize { + let file_id = self.files.len(); + self.files.push(SimpleFile::new(name, source)); + file_id + } + + /// Get the file corresponding to the given id. + pub fn get(&self, file_id: usize) -> Result<&SimpleFile, Error> { + self.files.get(file_id).ok_or(Error::FileMissing) + } +} + +impl<'a, Name, Source> Files<'a> for SimpleFiles +where + Name: 'a + std::fmt::Display + Clone, + Source: 'a + AsRef, +{ + type FileId = usize; + type Name = Name; + type Source = &'a str; + + fn name(&self, file_id: usize) -> Result { + Ok(self.get(file_id)?.name().clone()) + } + + fn source(&self, file_id: usize) -> Result<&str, Error> { + Ok(self.get(file_id)?.source().as_ref()) + } + + fn line_index(&self, file_id: usize, byte_index: usize) -> Result { + self.get(file_id)?.line_index((), byte_index) + } + + fn line_range(&self, file_id: usize, line_index: usize) -> Result, Error> { + self.get(file_id)?.line_range((), line_index) + } +} + +#[cfg(test)] +mod test { + use super::*; + + const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz"; + + #[test] + fn line_starts() { + let file = SimpleFile::new("test", TEST_SOURCE); + + assert_eq!( + file.line_starts, + [ + 0, // "foo\n" + 4, // "bar\r\n" + 9, // "" + 10, // "baz" + ], + ); + } + + #[test] + fn line_span_sources() { + let file = SimpleFile::new("test", TEST_SOURCE); + + let line_sources = (0..4) + .map(|line| { + let line_range = file.line_range((), line).unwrap(); + &file.source[line_range] + }) + .collect::>(); + + assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"]); + } +} -- cgit v1.2.3