//! Source file support for diagnostic reporting. //! //! The main trait defined in this module is the [`Files`] trait, which provides //! provides the minimum amount of functionality required for printing [`Diagnostics`] //! with the [`term::emit`] function. //! //! Simple implementations of this trait are implemented: //! //! - [`SimpleFile`]: For single-file use-cases //! - [`SimpleFiles`]: For multi-file use-cases //! //! These data structures provide a pretty minimal API, however, //! so end-users are encouraged to create their own implementations for their //! own specific use-cases, such as an implementation that accesses the file //! system directly (and caches the line start locations), or an implementation //! using an incremental compilation library like [`salsa`]. //! //! [`term::emit`]: crate::term::emit //! [`Diagnostics`]: crate::diagnostic::Diagnostic //! [`Files`]: Files //! [`SimpleFile`]: SimpleFile //! [`SimpleFiles`]: SimpleFiles //! //! [`salsa`]: https://crates.io/crates/salsa use std::ops::Range; /// An enum representing an error that happened while looking up a file or a piece of content in that file. #[derive(Debug)] #[non_exhaustive] pub enum Error { /// A required file is not in the file database. FileMissing, /// The file is present, but does not contain the specified byte index. IndexTooLarge { given: usize, max: usize }, /// The file is present, but does not contain the specified line index. LineTooLarge { given: usize, max: usize }, /// The file is present and contains the specified line index, but the line does not contain the specified column index. ColumnTooLarge { given: usize, max: usize }, /// The given index is contained in the file, but is not a boundary of a UTF-8 code point. InvalidCharBoundary { given: usize }, /// There was a error while doing IO. Io(std::io::Error), } impl From for Error { fn from(err: std::io::Error) -> Error { Error::Io(err) } } impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Error::FileMissing => write!(f, "file missing"), Error::IndexTooLarge { given, max } => { write!(f, "invalid index {}, maximum index is {}", given, max) } Error::LineTooLarge { given, max } => { write!(f, "invalid line {}, maximum line is {}", given, max) } Error::ColumnTooLarge { given, max } => { write!(f, "invalid column {}, maximum column {}", given, max) } Error::InvalidCharBoundary { .. } => write!(f, "index is not a code point boundary"), Error::Io(err) => write!(f, "{}", err), } } } impl std::error::Error for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match &self { Error::Io(err) => Some(err), _ => None, } } } /// A minimal interface for accessing source files when rendering diagnostics. /// /// A lifetime parameter `'a` is provided to allow any of the returned values to returned by reference. /// This is to workaround the lack of higher kinded lifetime parameters. /// This can be ignored if this is not needed, however. pub trait Files<'a> { /// A unique identifier for files in the file provider. This will be used /// for rendering `diagnostic::Label`s in the corresponding source files. type FileId: 'a + Copy + PartialEq; /// The user-facing name of a file, to be displayed in diagnostics. type Name: 'a + std::fmt::Display; /// The source code of a file. type Source: 'a + AsRef; /// The user-facing name of a file. fn name(&'a self, id: Self::FileId) -> Result; /// The source code of a file. fn source(&'a self, id: Self::FileId) -> Result; /// The index of the line at the given byte index. /// If the byte index is past the end of the file, returns the maximum line index in the file. /// This means that this function only fails if the file is not present. /// /// # Note for trait implementors /// /// This can be implemented efficiently by performing a binary search over /// a list of line starts that was computed by calling the [`line_starts`] /// function that is exported from the [`files`] module. It might be useful /// to pre-compute and cache these line starts. /// /// [`line_starts`]: crate::files::line_starts /// [`files`]: crate::files fn line_index(&'a self, id: Self::FileId, byte_index: usize) -> Result; /// The user-facing line number at the given line index. /// It is not necessarily checked that the specified line index /// is actually in the file. /// /// # Note for trait implementors /// /// This is usually 1-indexed from the beginning of the file, but /// can be useful for implementing something like the /// [C preprocessor's `#line` macro][line-macro]. /// /// [line-macro]: https://en.cppreference.com/w/c/preprocessor/line #[allow(unused_variables)] fn line_number(&'a self, id: Self::FileId, line_index: usize) -> Result { Ok(line_index + 1) } /// The user-facing column number at the given line index and byte index. /// /// # Note for trait implementors /// /// This is usually 1-indexed from the the start of the line. /// A default implementation is provided, based on the [`column_index`] /// function that is exported from the [`files`] module. /// /// [`files`]: crate::files /// [`column_index`]: crate::files::column_index fn column_number( &'a self, id: Self::FileId, line_index: usize, byte_index: usize, ) -> Result { let source = self.source(id)?; let line_range = self.line_range(id, line_index)?; let column_index = column_index(source.as_ref(), line_range, byte_index); Ok(column_index + 1) } /// Convenience method for returning line and column number at the given /// byte index in the file. fn location(&'a self, id: Self::FileId, byte_index: usize) -> Result { let line_index = self.line_index(id, byte_index)?; Ok(Location { line_number: self.line_number(id, line_index)?, column_number: self.column_number(id, line_index, byte_index)?, }) } /// The byte range of line in the source of the file. fn line_range(&'a self, id: Self::FileId, line_index: usize) -> Result, Error>; } /// A user-facing location in a source file. /// /// Returned by [`Files::location`]. /// /// [`Files::location`]: Files::location #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct Location { /// The user-facing line number. pub line_number: usize, /// The user-facing column number. pub column_number: usize, } /// The column index at the given byte index in the source file. /// This is the number of characters to the given byte index. /// /// If the byte index is smaller than the start of the line, then `0` is returned. /// If the byte index is past the end of the line, the column index of the last /// character `+ 1` is returned. /// /// # Example /// /// ```rust /// use codespan_reporting::files; /// /// let source = "\n\nšŸ—»āˆˆšŸŒ\n\n"; /// /// assert_eq!(files::column_index(source, 0..1, 0), 0); /// assert_eq!(files::column_index(source, 2..13, 0), 0); /// assert_eq!(files::column_index(source, 2..13, 2 + 0), 0); /// assert_eq!(files::column_index(source, 2..13, 2 + 1), 0); /// assert_eq!(files::column_index(source, 2..13, 2 + 4), 1); /// assert_eq!(files::column_index(source, 2..13, 2 + 8), 2); /// assert_eq!(files::column_index(source, 2..13, 2 + 10), 2); /// assert_eq!(files::column_index(source, 2..13, 2 + 11), 3); /// assert_eq!(files::column_index(source, 2..13, 2 + 12), 3); /// ``` pub fn column_index(source: &str, line_range: Range, byte_index: usize) -> usize { let end_index = std::cmp::min(byte_index, std::cmp::min(line_range.end, source.len())); (line_range.start..end_index) .filter(|byte_index| source.is_char_boundary(byte_index + 1)) .count() } /// Return the starting byte index of each line in the source string. /// /// This can make it easier to implement [`Files::line_index`] by allowing /// implementors of [`Files`] to pre-compute the line starts, then search for /// the corresponding line range, as shown in the example below. /// /// [`Files`]: Files /// [`Files::line_index`]: Files::line_index /// /// # Example /// /// ```rust /// use codespan_reporting::files; /// /// let source = "foo\nbar\r\n\nbaz"; /// let line_starts: Vec<_> = files::line_starts(source).collect(); /// /// assert_eq!( /// line_starts, /// [ /// 0, // "foo\n" /// 4, // "bar\r\n" /// 9, // "" /// 10, // "baz" /// ], /// ); /// /// fn line_index(line_starts: &[usize], byte_index: usize) -> Option { /// match line_starts.binary_search(&byte_index) { /// Ok(line) => Some(line), /// Err(next_line) => Some(next_line - 1), /// } /// } /// /// assert_eq!(line_index(&line_starts, 5), Some(1)); /// ``` // NOTE: this is copied in `codespan::file::line_starts` and should be kept in sync. pub fn line_starts<'source>(source: &'source str) -> impl 'source + Iterator { std::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1)) } /// A file database that contains a single source file. /// /// Because there is only single file in this database we use `()` as a [`FileId`]. /// /// This is useful for simple language tests, but it might be worth creating a /// custom implementation when a language scales beyond a certain size. /// /// [`FileId`]: Files::FileId #[derive(Debug, Clone)] pub struct SimpleFile { /// The name of the file. name: Name, /// The source code of the file. source: Source, /// The starting byte indices in the source code. line_starts: Vec, } impl SimpleFile where Name: std::fmt::Display, Source: AsRef, { /// Create a new source file. pub fn new(name: Name, source: Source) -> SimpleFile { SimpleFile { name, line_starts: line_starts(source.as_ref()).collect(), source, } } /// Return the name of the file. pub fn name(&self) -> &Name { &self.name } /// Return the source of the file. pub fn source(&self) -> &Source { &self.source } /// Return the starting byte index of the line with the specified line index. /// Convenience method that already generates errors if necessary. fn line_start(&self, line_index: usize) -> Result { use std::cmp::Ordering; match line_index.cmp(&self.line_starts.len()) { Ordering::Less => Ok(self .line_starts .get(line_index) .cloned() .expect("failed despite previous check")), Ordering::Equal => Ok(self.source.as_ref().len()), Ordering::Greater => Err(Error::LineTooLarge { given: line_index, max: self.line_starts.len() - 1, }), } } } impl<'a, Name, Source> Files<'a> for SimpleFile where Name: 'a + std::fmt::Display + Clone, Source: 'a + AsRef, { type FileId = (); type Name = Name; type Source = &'a str; fn name(&self, (): ()) -> Result { Ok(self.name.clone()) } fn source(&self, (): ()) -> Result<&str, Error> { Ok(self.source.as_ref()) } fn line_index(&self, (): (), byte_index: usize) -> Result { Ok(self .line_starts .binary_search(&byte_index) .unwrap_or_else(|next_line| next_line - 1)) } fn line_range(&self, (): (), line_index: usize) -> Result, Error> { let line_start = self.line_start(line_index)?; let next_line_start = self.line_start(line_index + 1)?; Ok(line_start..next_line_start) } } /// A file database that can store multiple source files. /// /// This is useful for simple language tests, but it might be worth creating a /// custom implementation when a language scales beyond a certain size. /// It is a glorified `Vec` that implements the `Files` trait. #[derive(Debug, Clone)] pub struct SimpleFiles { files: Vec>, } impl SimpleFiles where Name: std::fmt::Display, Source: AsRef, { /// Create a new files database. pub fn new() -> SimpleFiles { SimpleFiles { files: Vec::new() } } /// Add a file to the database, returning the handle that can be used to /// refer to it again. pub fn add(&mut self, name: Name, source: Source) -> usize { let file_id = self.files.len(); self.files.push(SimpleFile::new(name, source)); file_id } /// Get the file corresponding to the given id. pub fn get(&self, file_id: usize) -> Result<&SimpleFile, Error> { self.files.get(file_id).ok_or(Error::FileMissing) } } impl<'a, Name, Source> Files<'a> for SimpleFiles where Name: 'a + std::fmt::Display + Clone, Source: 'a + AsRef, { type FileId = usize; type Name = Name; type Source = &'a str; fn name(&self, file_id: usize) -> Result { Ok(self.get(file_id)?.name().clone()) } fn source(&self, file_id: usize) -> Result<&str, Error> { Ok(self.get(file_id)?.source().as_ref()) } fn line_index(&self, file_id: usize, byte_index: usize) -> Result { self.get(file_id)?.line_index((), byte_index) } fn line_range(&self, file_id: usize, line_index: usize) -> Result, Error> { self.get(file_id)?.line_range((), line_index) } } #[cfg(test)] mod test { use super::*; const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz"; #[test] fn line_starts() { let file = SimpleFile::new("test", TEST_SOURCE); assert_eq!( file.line_starts, [ 0, // "foo\n" 4, // "bar\r\n" 9, // "" 10, // "baz" ], ); } #[test] fn line_span_sources() { let file = SimpleFile::new("test", TEST_SOURCE); let line_sources = (0..4) .map(|line| { let line_range = file.line_range((), line).unwrap(); &file.source[line_range] }) .collect::>(); assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"]); } }