diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /src/tools/rust-analyzer/crates/vfs | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/tools/rust-analyzer/crates/vfs')
-rw-r--r-- | src/tools/rust-analyzer/crates/vfs/Cargo.toml | 17 | ||||
-rw-r--r-- | src/tools/rust-analyzer/crates/vfs/src/anchored_path.rs | 49 | ||||
-rw-r--r-- | src/tools/rust-analyzer/crates/vfs/src/file_set.rs | 218 | ||||
-rw-r--r-- | src/tools/rust-analyzer/crates/vfs/src/file_set/tests.rs | 42 | ||||
-rw-r--r-- | src/tools/rust-analyzer/crates/vfs/src/lib.rs | 221 | ||||
-rw-r--r-- | src/tools/rust-analyzer/crates/vfs/src/loader.rs | 215 | ||||
-rw-r--r-- | src/tools/rust-analyzer/crates/vfs/src/path_interner.rs | 48 | ||||
-rw-r--r-- | src/tools/rust-analyzer/crates/vfs/src/vfs_path.rs | 406 | ||||
-rw-r--r-- | src/tools/rust-analyzer/crates/vfs/src/vfs_path/tests.rs | 30 |
9 files changed, 1246 insertions, 0 deletions
diff --git a/src/tools/rust-analyzer/crates/vfs/Cargo.toml b/src/tools/rust-analyzer/crates/vfs/Cargo.toml new file mode 100644 index 000000000..c63773487 --- /dev/null +++ b/src/tools/rust-analyzer/crates/vfs/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "vfs" +version = "0.0.0" +description = "TBD" +license = "MIT OR Apache-2.0" +edition = "2021" +rust-version = "1.57" + +[lib] +doctest = false + +[dependencies] +rustc-hash = "1.1.0" +fst = "0.4.7" + +paths = { path = "../paths", version = "0.0.0" } +indexmap = "1.9.1" diff --git a/src/tools/rust-analyzer/crates/vfs/src/anchored_path.rs b/src/tools/rust-analyzer/crates/vfs/src/anchored_path.rs new file mode 100644 index 000000000..db15a2a21 --- /dev/null +++ b/src/tools/rust-analyzer/crates/vfs/src/anchored_path.rs @@ -0,0 +1,49 @@ +//! Analysis-level representation of file-system paths. +//! +//! The primary goal of this is to losslessly represent paths like +//! +//! ``` +//! #[path = "./bar.rs"] +//! mod foo; +//! ``` +//! +//! The first approach one might reach for is to use `PathBuf`. The problem here +//! is that `PathBuf` depends on host target (windows or linux), but +//! rust-analyzer should be capable to process `#[path = r"C:\bar.rs"]` on Unix. +//! +//! The second try is to use a `String`. This also fails, however. Consider a +//! hypothetical scenario, where rust-analyzer operates in a +//! networked/distributed mode. There's one global instance of rust-analyzer, +//! which processes requests from different machines. Now, the semantics of +//! `#[path = "/abs/path.rs"]` actually depends on which file-system we are at! +//! That is, even absolute paths exist relative to a file system! +//! +//! A more realistic scenario here is virtual VFS paths we use for testing. More +//! generally, there can be separate "universes" of VFS paths. +//! +//! That's why we use anchored representation -- each path carries an info about +//! a file this path originates from. We can fetch fs/"universe" information +//! from the anchor than. +use crate::FileId; + +/// Path relative to a file. +/// +/// Owned version of [`AnchoredPath`]. +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct AnchoredPathBuf { + /// File that this path is relative to. + pub anchor: FileId, + /// Path relative to `anchor`'s containing directory. + pub path: String, +} + +/// Path relative to a file. +/// +/// Borrowed version of [`AnchoredPathBuf`]. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub struct AnchoredPath<'a> { + /// File that this path is relative to. + pub anchor: FileId, + /// Path relative to `anchor`'s containing directory. + pub path: &'a str, +} diff --git a/src/tools/rust-analyzer/crates/vfs/src/file_set.rs b/src/tools/rust-analyzer/crates/vfs/src/file_set.rs new file mode 100644 index 000000000..6a89263e5 --- /dev/null +++ b/src/tools/rust-analyzer/crates/vfs/src/file_set.rs @@ -0,0 +1,218 @@ +//! Partitions a list of files into disjoint subsets. +//! +//! Files which do not belong to any explicitly configured `FileSet` belong to +//! the default `FileSet`. +use std::fmt; + +use fst::{IntoStreamer, Streamer}; +use rustc_hash::FxHashMap; + +use crate::{AnchoredPath, FileId, Vfs, VfsPath}; + +/// A set of [`VfsPath`]s identified by [`FileId`]s. +#[derive(Default, Clone, Eq, PartialEq)] +pub struct FileSet { + files: FxHashMap<VfsPath, FileId>, + paths: FxHashMap<FileId, VfsPath>, +} + +impl FileSet { + /// Returns the number of stored paths. + pub fn len(&self) -> usize { + self.files.len() + } + + /// Get the id of the file corresponding to `path`. + /// + /// If either `path`'s [`anchor`](AnchoredPath::anchor) or the resolved path is not in + /// the set, returns [`None`]. + pub fn resolve_path(&self, path: AnchoredPath<'_>) -> Option<FileId> { + let mut base = self.paths[&path.anchor].clone(); + base.pop(); + let path = base.join(path.path)?; + self.files.get(&path).copied() + } + + /// Get the id corresponding to `path` if it exists in the set. + pub fn file_for_path(&self, path: &VfsPath) -> Option<&FileId> { + self.files.get(path) + } + + /// Get the path corresponding to `file` if it exists in the set. + pub fn path_for_file(&self, file: &FileId) -> Option<&VfsPath> { + self.paths.get(file) + } + + /// Insert the `file_id, path` pair into the set. + /// + /// # Note + /// Multiple [`FileId`] can be mapped to the same [`VfsPath`], and vice-versa. + pub fn insert(&mut self, file_id: FileId, path: VfsPath) { + self.files.insert(path.clone(), file_id); + self.paths.insert(file_id, path); + } + + /// Iterate over this set's ids. + pub fn iter(&self) -> impl Iterator<Item = FileId> + '_ { + self.paths.keys().copied() + } +} + +impl fmt::Debug for FileSet { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("FileSet").field("n_files", &self.files.len()).finish() + } +} + +/// This contains path prefixes to partition a [`Vfs`] into [`FileSet`]s. +/// +/// # Example +/// ```rust +/// # use vfs::{file_set::FileSetConfigBuilder, VfsPath, Vfs}; +/// let mut builder = FileSetConfigBuilder::default(); +/// builder.add_file_set(vec![VfsPath::new_virtual_path("/src".to_string())]); +/// let config = builder.build(); +/// let mut file_system = Vfs::default(); +/// file_system.set_file_contents(VfsPath::new_virtual_path("/src/main.rs".to_string()), Some(vec![])); +/// file_system.set_file_contents(VfsPath::new_virtual_path("/src/lib.rs".to_string()), Some(vec![])); +/// file_system.set_file_contents(VfsPath::new_virtual_path("/build.rs".to_string()), Some(vec![])); +/// // contains the sets : +/// // { "/src/main.rs", "/src/lib.rs" } +/// // { "build.rs" } +/// let sets = config.partition(&file_system); +/// ``` +#[derive(Debug)] +pub struct FileSetConfig { + /// Number of sets that `self` can partition a [`Vfs`] into. + /// + /// This should be the number of sets in `self.map` + 1 for files that don't fit in any + /// defined set. + n_file_sets: usize, + /// Map from encoded paths to the set they belong to. + map: fst::Map<Vec<u8>>, +} + +impl Default for FileSetConfig { + fn default() -> Self { + FileSetConfig::builder().build() + } +} + +impl FileSetConfig { + /// Returns a builder for `FileSetConfig`. + pub fn builder() -> FileSetConfigBuilder { + FileSetConfigBuilder::default() + } + + /// Partition `vfs` into `FileSet`s. + /// + /// Creates a new [`FileSet`] for every set of prefixes in `self`. + pub fn partition(&self, vfs: &Vfs) -> Vec<FileSet> { + let mut scratch_space = Vec::new(); + let mut res = vec![FileSet::default(); self.len()]; + for (file_id, path) in vfs.iter() { + let root = self.classify(path, &mut scratch_space); + res[root].insert(file_id, path.clone()); + } + res + } + + /// Number of sets that `self` can partition a [`Vfs`] into. + fn len(&self) -> usize { + self.n_file_sets + } + + /// Returns the set index for the given `path`. + /// + /// `scratch_space` is used as a buffer and will be entirely replaced. + fn classify(&self, path: &VfsPath, scratch_space: &mut Vec<u8>) -> usize { + scratch_space.clear(); + path.encode(scratch_space); + let automaton = PrefixOf::new(scratch_space.as_slice()); + let mut longest_prefix = self.len() - 1; + let mut stream = self.map.search(automaton).into_stream(); + while let Some((_, v)) = stream.next() { + longest_prefix = v as usize; + } + longest_prefix + } +} + +/// Builder for [`FileSetConfig`]. +pub struct FileSetConfigBuilder { + roots: Vec<Vec<VfsPath>>, +} + +impl Default for FileSetConfigBuilder { + fn default() -> Self { + FileSetConfigBuilder { roots: Vec::new() } + } +} + +impl FileSetConfigBuilder { + /// Returns the number of sets currently held. + pub fn len(&self) -> usize { + self.roots.len() + } + + /// Add a new set of paths prefixes. + pub fn add_file_set(&mut self, roots: Vec<VfsPath>) { + self.roots.push(roots); + } + + /// Build the `FileSetConfig`. + pub fn build(self) -> FileSetConfig { + let n_file_sets = self.roots.len() + 1; + let map = { + let mut entries = Vec::new(); + for (i, paths) in self.roots.into_iter().enumerate() { + for p in paths { + let mut buf = Vec::new(); + p.encode(&mut buf); + entries.push((buf, i as u64)); + } + } + entries.sort(); + entries.dedup_by(|(a, _), (b, _)| a == b); + fst::Map::from_iter(entries).unwrap() + }; + FileSetConfig { n_file_sets, map } + } +} + +/// Implements [`fst::Automaton`] +/// +/// It will match if `prefix_of` is a prefix of the given data. +struct PrefixOf<'a> { + prefix_of: &'a [u8], +} + +impl<'a> PrefixOf<'a> { + /// Creates a new `PrefixOf` from the given slice. + fn new(prefix_of: &'a [u8]) -> Self { + Self { prefix_of } + } +} + +impl fst::Automaton for PrefixOf<'_> { + type State = usize; + fn start(&self) -> usize { + 0 + } + fn is_match(&self, &state: &usize) -> bool { + state != !0 + } + fn can_match(&self, &state: &usize) -> bool { + state != !0 + } + fn accept(&self, &state: &usize, byte: u8) -> usize { + if self.prefix_of.get(state) == Some(&byte) { + state + 1 + } else { + !0 + } + } +} + +#[cfg(test)] +mod tests; diff --git a/src/tools/rust-analyzer/crates/vfs/src/file_set/tests.rs b/src/tools/rust-analyzer/crates/vfs/src/file_set/tests.rs new file mode 100644 index 000000000..2146df185 --- /dev/null +++ b/src/tools/rust-analyzer/crates/vfs/src/file_set/tests.rs @@ -0,0 +1,42 @@ +use super::*; + +#[test] +fn path_prefix() { + let mut file_set = FileSetConfig::builder(); + file_set.add_file_set(vec![VfsPath::new_virtual_path("/foo".into())]); + file_set.add_file_set(vec![VfsPath::new_virtual_path("/foo/bar/baz".into())]); + let file_set = file_set.build(); + + let mut vfs = Vfs::default(); + vfs.set_file_contents(VfsPath::new_virtual_path("/foo/src/lib.rs".into()), Some(Vec::new())); + vfs.set_file_contents( + VfsPath::new_virtual_path("/foo/src/bar/baz/lib.rs".into()), + Some(Vec::new()), + ); + vfs.set_file_contents( + VfsPath::new_virtual_path("/foo/bar/baz/lib.rs".into()), + Some(Vec::new()), + ); + vfs.set_file_contents(VfsPath::new_virtual_path("/quux/lib.rs".into()), Some(Vec::new())); + + let partition = file_set.partition(&vfs).into_iter().map(|it| it.len()).collect::<Vec<_>>(); + assert_eq!(partition, vec![2, 1, 1]); +} + +#[test] +fn name_prefix() { + let mut file_set = FileSetConfig::builder(); + file_set.add_file_set(vec![VfsPath::new_virtual_path("/foo".into())]); + file_set.add_file_set(vec![VfsPath::new_virtual_path("/foo-things".into())]); + let file_set = file_set.build(); + + let mut vfs = Vfs::default(); + vfs.set_file_contents(VfsPath::new_virtual_path("/foo/src/lib.rs".into()), Some(Vec::new())); + vfs.set_file_contents( + VfsPath::new_virtual_path("/foo-things/src/lib.rs".into()), + Some(Vec::new()), + ); + + let partition = file_set.partition(&vfs).into_iter().map(|it| it.len()).collect::<Vec<_>>(); + assert_eq!(partition, vec![1, 1, 0]); +} diff --git a/src/tools/rust-analyzer/crates/vfs/src/lib.rs b/src/tools/rust-analyzer/crates/vfs/src/lib.rs new file mode 100644 index 000000000..10fae41d0 --- /dev/null +++ b/src/tools/rust-analyzer/crates/vfs/src/lib.rs @@ -0,0 +1,221 @@ +//! # Virtual File System +//! +//! VFS stores all files read by rust-analyzer. Reading file contents from VFS +//! always returns the same contents, unless VFS was explicitly modified with +//! [`set_file_contents`]. All changes to VFS are logged, and can be retrieved via +//! [`take_changes`] method. The pack of changes is then pushed to `salsa` and +//! triggers incremental recomputation. +//! +//! Files in VFS are identified with [`FileId`]s -- interned paths. The notion of +//! the path, [`VfsPath`] is somewhat abstract: at the moment, it is represented +//! as an [`std::path::PathBuf`] internally, but this is an implementation detail. +//! +//! VFS doesn't do IO or file watching itself. For that, see the [`loader`] +//! module. [`loader::Handle`] is an object-safe trait which abstracts both file +//! loading and file watching. [`Handle`] is dynamically configured with a set of +//! directory entries which should be scanned and watched. [`Handle`] then +//! asynchronously pushes file changes. Directory entries are configured in +//! free-form via list of globs, it's up to the [`Handle`] to interpret the globs +//! in any specific way. +//! +//! VFS stores a flat list of files. [`file_set::FileSet`] can partition this list +//! of files into disjoint sets of files. Traversal-like operations (including +//! getting the neighbor file by the relative path) are handled by the [`FileSet`]. +//! [`FileSet`]s are also pushed to salsa and cause it to re-check `mod foo;` +//! declarations when files are created or deleted. +//! +//! [`FileSet`] and [`loader::Entry`] play similar, but different roles. +//! Both specify the "set of paths/files", one is geared towards file watching, +//! the other towards salsa changes. In particular, single [`FileSet`] +//! may correspond to several [`loader::Entry`]. For example, a crate from +//! crates.io which uses code generation would have two [`Entries`] -- for sources +//! in `~/.cargo`, and for generated code in `./target/debug/build`. It will +//! have a single [`FileSet`] which unions the two sources. +//! +//! [`set_file_contents`]: Vfs::set_file_contents +//! [`take_changes`]: Vfs::take_changes +//! [`FileSet`]: file_set::FileSet +//! [`Handle`]: loader::Handle +//! [`Entries`]: loader::Entry + +#![warn(rust_2018_idioms, unused_lifetimes, semicolon_in_expressions_from_macros)] + +mod anchored_path; +pub mod file_set; +pub mod loader; +mod path_interner; +mod vfs_path; + +use std::{fmt, mem}; + +use crate::path_interner::PathInterner; + +pub use crate::{ + anchored_path::{AnchoredPath, AnchoredPathBuf}, + vfs_path::VfsPath, +}; +pub use paths::{AbsPath, AbsPathBuf}; + +/// Handle to a file in [`Vfs`] +/// +/// Most functions in rust-analyzer use this when they need to refer to a file. +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct FileId(pub u32); + +/// Storage for all files read by rust-analyzer. +/// +/// For more informations see the [crate-level](crate) documentation. +#[derive(Default)] +pub struct Vfs { + interner: PathInterner, + data: Vec<Option<Vec<u8>>>, + changes: Vec<ChangedFile>, +} + +/// Changed file in the [`Vfs`]. +pub struct ChangedFile { + /// Id of the changed file + pub file_id: FileId, + /// Kind of change + pub change_kind: ChangeKind, +} + +impl ChangedFile { + /// Returns `true` if the change is not [`Delete`](ChangeKind::Delete). + pub fn exists(&self) -> bool { + self.change_kind != ChangeKind::Delete + } + + /// Returns `true` if the change is [`Create`](ChangeKind::Create) or + /// [`Delete`](ChangeKind::Delete). + pub fn is_created_or_deleted(&self) -> bool { + matches!(self.change_kind, ChangeKind::Create | ChangeKind::Delete) + } +} + +/// Kind of [file change](ChangedFile). +#[derive(Eq, PartialEq, Copy, Clone, Debug)] +pub enum ChangeKind { + /// The file was (re-)created + Create, + /// The file was modified + Modify, + /// The file was deleted + Delete, +} + +impl Vfs { + /// Amount of files currently stored. + /// + /// Note that this includes deleted files. + pub fn len(&self) -> usize { + self.data.len() + } + + /// Id of the given path if it exists in the `Vfs` and is not deleted. + pub fn file_id(&self, path: &VfsPath) -> Option<FileId> { + self.interner.get(path).filter(|&it| self.get(it).is_some()) + } + + /// File path corresponding to the given `file_id`. + /// + /// # Panics + /// + /// Panics if the id is not present in the `Vfs`. + pub fn file_path(&self, file_id: FileId) -> VfsPath { + self.interner.lookup(file_id).clone() + } + + /// File content corresponding to the given `file_id`. + /// + /// # Panics + /// + /// Panics if the id is not present in the `Vfs`, or if the corresponding file is + /// deleted. + pub fn file_contents(&self, file_id: FileId) -> &[u8] { + self.get(file_id).as_deref().unwrap() + } + + /// Returns an iterator over the stored ids and their corresponding paths. + /// + /// This will skip deleted files. + pub fn iter(&self) -> impl Iterator<Item = (FileId, &VfsPath)> + '_ { + (0..self.data.len()) + .map(|it| FileId(it as u32)) + .filter(move |&file_id| self.get(file_id).is_some()) + .map(move |file_id| { + let path = self.interner.lookup(file_id); + (file_id, path) + }) + } + + /// Update the `path` with the given `contents`. `None` means the file was deleted. + /// + /// Returns `true` if the file was modified, and saves the [change](ChangedFile). + /// + /// If the path does not currently exists in the `Vfs`, allocates a new + /// [`FileId`] for it. + pub fn set_file_contents(&mut self, path: VfsPath, contents: Option<Vec<u8>>) -> bool { + let file_id = self.alloc_file_id(path); + let change_kind = match (&self.get(file_id), &contents) { + (None, None) => return false, + (None, Some(_)) => ChangeKind::Create, + (Some(_), None) => ChangeKind::Delete, + (Some(old), Some(new)) if old == new => return false, + (Some(_), Some(_)) => ChangeKind::Modify, + }; + + *self.get_mut(file_id) = contents; + self.changes.push(ChangedFile { file_id, change_kind }); + true + } + + /// Returns `true` if the `Vfs` contains [changes](ChangedFile). + pub fn has_changes(&self) -> bool { + !self.changes.is_empty() + } + + /// Drain and returns all the changes in the `Vfs`. + pub fn take_changes(&mut self) -> Vec<ChangedFile> { + mem::take(&mut self.changes) + } + + /// Returns the id associated with `path` + /// + /// - If `path` does not exists in the `Vfs`, allocate a new id for it, associated with a + /// deleted file; + /// - Else, returns `path`'s id. + /// + /// Does not record a change. + fn alloc_file_id(&mut self, path: VfsPath) -> FileId { + let file_id = self.interner.intern(path); + let idx = file_id.0 as usize; + let len = self.data.len().max(idx + 1); + self.data.resize_with(len, || None); + file_id + } + + /// Returns the content associated with the given `file_id`. + /// + /// # Panics + /// + /// Panics if no file is associated to that id. + fn get(&self, file_id: FileId) -> &Option<Vec<u8>> { + &self.data[file_id.0 as usize] + } + + /// Mutably returns the content associated with the given `file_id`. + /// + /// # Panics + /// + /// Panics if no file is associated to that id. + fn get_mut(&mut self, file_id: FileId) -> &mut Option<Vec<u8>> { + &mut self.data[file_id.0 as usize] + } +} + +impl fmt::Debug for Vfs { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Vfs").field("n_files", &self.data.len()).finish() + } +} diff --git a/src/tools/rust-analyzer/crates/vfs/src/loader.rs b/src/tools/rust-analyzer/crates/vfs/src/loader.rs new file mode 100644 index 000000000..e2d74782a --- /dev/null +++ b/src/tools/rust-analyzer/crates/vfs/src/loader.rs @@ -0,0 +1,215 @@ +//! Object safe interface for file watching and reading. +use std::fmt; + +use paths::{AbsPath, AbsPathBuf}; + +/// A set of files on the file system. +#[derive(Debug, Clone)] +pub enum Entry { + /// The `Entry` is represented by a raw set of files. + Files(Vec<AbsPathBuf>), + /// The `Entry` is represented by `Directories`. + Directories(Directories), +} + +/// Specifies a set of files on the file system. +/// +/// A file is included if: +/// * it has included extension +/// * it is under an `include` path +/// * it is not under `exclude` path +/// +/// If many include/exclude paths match, the longest one wins. +/// +/// If a path is in both `include` and `exclude`, the `exclude` one wins. +#[derive(Debug, Clone, Default)] +pub struct Directories { + pub extensions: Vec<String>, + pub include: Vec<AbsPathBuf>, + pub exclude: Vec<AbsPathBuf>, +} + +/// [`Handle`]'s configuration. +#[derive(Debug)] +pub struct Config { + /// Version number to associate progress updates to the right config + /// version. + pub version: u32, + /// Set of initially loaded files. + pub load: Vec<Entry>, + /// Index of watched entries in `load`. + /// + /// If a path in a watched entry is modified,the [`Handle`] should notify it. + pub watch: Vec<usize>, +} + +/// Message about an action taken by a [`Handle`]. +pub enum Message { + /// Indicate a gradual progress. + /// + /// This is supposed to be the number of loaded files. + Progress { n_total: usize, n_done: usize, config_version: u32 }, + /// The handle loaded the following files' content. + Loaded { files: Vec<(AbsPathBuf, Option<Vec<u8>>)> }, +} + +/// Type that will receive [`Messages`](Message) from a [`Handle`]. +pub type Sender = Box<dyn Fn(Message) + Send>; + +/// Interface for reading and watching files. +pub trait Handle: fmt::Debug { + /// Spawn a new handle with the given `sender`. + fn spawn(sender: Sender) -> Self + where + Self: Sized; + + /// Set this handle's configuration. + fn set_config(&mut self, config: Config); + + /// The file's content at `path` has been modified, and should be reloaded. + fn invalidate(&mut self, path: AbsPathBuf); + + /// Load the content of the given file, returning [`None`] if it does not + /// exists. + fn load_sync(&mut self, path: &AbsPath) -> Option<Vec<u8>>; +} + +impl Entry { + /// Returns: + /// ```text + /// Entry::Directories(Directories { + /// extensions: ["rs"], + /// include: [base], + /// exclude: [base/.git], + /// }) + /// ``` + pub fn rs_files_recursively(base: AbsPathBuf) -> Entry { + Entry::Directories(dirs(base, &[".git"])) + } + + /// Returns: + /// ```text + /// Entry::Directories(Directories { + /// extensions: ["rs"], + /// include: [base], + /// exclude: [base/.git, base/target], + /// }) + /// ``` + pub fn local_cargo_package(base: AbsPathBuf) -> Entry { + Entry::Directories(dirs(base, &[".git", "target"])) + } + + /// Returns: + /// ```text + /// Entry::Directories(Directories { + /// extensions: ["rs"], + /// include: [base], + /// exclude: [base/.git, /tests, /examples, /benches], + /// }) + /// ``` + pub fn cargo_package_dependency(base: AbsPathBuf) -> Entry { + Entry::Directories(dirs(base, &[".git", "/tests", "/examples", "/benches"])) + } + + /// Returns `true` if `path` is included in `self`. + /// + /// See [`Directories::contains_file`]. + pub fn contains_file(&self, path: &AbsPath) -> bool { + match self { + Entry::Files(files) => files.iter().any(|it| it == path), + Entry::Directories(dirs) => dirs.contains_file(path), + } + } + + /// Returns `true` if `path` is included in `self`. + /// + /// - If `self` is `Entry::Files`, returns `false` + /// - Else, see [`Directories::contains_dir`]. + pub fn contains_dir(&self, path: &AbsPath) -> bool { + match self { + Entry::Files(_) => false, + Entry::Directories(dirs) => dirs.contains_dir(path), + } + } +} + +impl Directories { + /// Returns `true` if `path` is included in `self`. + pub fn contains_file(&self, path: &AbsPath) -> bool { + // First, check the file extension... + let ext = path.extension().unwrap_or_default(); + if self.extensions.iter().all(|it| it.as_str() != ext) { + return false; + } + + // Then, check for path inclusion... + self.includes_path(path) + } + + /// Returns `true` if `path` is included in `self`. + /// + /// Since `path` is supposed to be a directory, this will not take extension + /// into account. + pub fn contains_dir(&self, path: &AbsPath) -> bool { + self.includes_path(path) + } + + /// Returns `true` if `path` is included in `self`. + /// + /// It is included if + /// - An element in `self.include` is a prefix of `path`. + /// - This path is longer than any element in `self.exclude` that is a prefix + /// of `path`. In case of equality, exclusion wins. + fn includes_path(&self, path: &AbsPath) -> bool { + let mut include: Option<&AbsPathBuf> = None; + for incl in &self.include { + if path.starts_with(incl) { + include = Some(match include { + Some(prev) if prev.starts_with(incl) => prev, + _ => incl, + }); + } + } + + let include = match include { + Some(it) => it, + None => return false, + }; + + !self.exclude.iter().any(|excl| path.starts_with(excl) && excl.starts_with(include)) + } +} + +/// Returns : +/// ```text +/// Directories { +/// extensions: ["rs"], +/// include: [base], +/// exclude: [base/<exclude>], +/// } +/// ``` +fn dirs(base: AbsPathBuf, exclude: &[&str]) -> Directories { + let exclude = exclude.iter().map(|it| base.join(it)).collect::<Vec<_>>(); + Directories { extensions: vec!["rs".to_string()], include: vec![base], exclude } +} + +impl fmt::Debug for Message { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Message::Loaded { files } => { + f.debug_struct("Loaded").field("n_files", &files.len()).finish() + } + Message::Progress { n_total, n_done, config_version } => f + .debug_struct("Progress") + .field("n_total", n_total) + .field("n_done", n_done) + .field("config_version", config_version) + .finish(), + } + } +} + +#[test] +fn handle_is_object_safe() { + fn _assert(_: &dyn Handle) {} +} diff --git a/src/tools/rust-analyzer/crates/vfs/src/path_interner.rs b/src/tools/rust-analyzer/crates/vfs/src/path_interner.rs new file mode 100644 index 000000000..6e049f0d4 --- /dev/null +++ b/src/tools/rust-analyzer/crates/vfs/src/path_interner.rs @@ -0,0 +1,48 @@ +//! Maps paths to compact integer ids. We don't care about clearings paths which +//! no longer exist -- the assumption is total size of paths we ever look at is +//! not too big. +use std::hash::BuildHasherDefault; + +use indexmap::IndexSet; +use rustc_hash::FxHasher; + +use crate::{FileId, VfsPath}; + +/// Structure to map between [`VfsPath`] and [`FileId`]. +pub(crate) struct PathInterner { + map: IndexSet<VfsPath, BuildHasherDefault<FxHasher>>, +} + +impl Default for PathInterner { + fn default() -> Self { + Self { map: IndexSet::default() } + } +} + +impl PathInterner { + /// Get the id corresponding to `path`. + /// + /// If `path` does not exists in `self`, returns [`None`]. + pub(crate) fn get(&self, path: &VfsPath) -> Option<FileId> { + self.map.get_index_of(path).map(|i| FileId(i as u32)) + } + + /// Insert `path` in `self`. + /// + /// - If `path` already exists in `self`, returns its associated id; + /// - Else, returns a newly allocated id. + pub(crate) fn intern(&mut self, path: VfsPath) -> FileId { + let (id, _added) = self.map.insert_full(path); + assert!(id < u32::MAX as usize); + FileId(id as u32) + } + + /// Returns the path corresponding to `id`. + /// + /// # Panics + /// + /// Panics if `id` does not exists in `self`. + pub(crate) fn lookup(&self, id: FileId) -> &VfsPath { + self.map.get_index(id.0 as usize).unwrap() + } +} diff --git a/src/tools/rust-analyzer/crates/vfs/src/vfs_path.rs b/src/tools/rust-analyzer/crates/vfs/src/vfs_path.rs new file mode 100644 index 000000000..668c7320d --- /dev/null +++ b/src/tools/rust-analyzer/crates/vfs/src/vfs_path.rs @@ -0,0 +1,406 @@ +//! Abstract-ish representation of paths for VFS. +use std::fmt; + +use paths::{AbsPath, AbsPathBuf}; + +/// Path in [`Vfs`]. +/// +/// Long-term, we want to support files which do not reside in the file-system, +/// so we treat `VfsPath`s as opaque identifiers. +/// +/// [`Vfs`]: crate::Vfs +#[derive(Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct VfsPath(VfsPathRepr); + +impl VfsPath { + /// Creates an "in-memory" path from `/`-separated string. + /// + /// This is most useful for testing, to avoid windows/linux differences + /// + /// # Panics + /// + /// Panics if `path` does not start with `'/'`. + pub fn new_virtual_path(path: String) -> VfsPath { + assert!(path.starts_with('/')); + VfsPath(VfsPathRepr::VirtualPath(VirtualPath(path))) + } + + /// Create a path from string. Input should be a string representation of + /// an absolute path inside filesystem + pub fn new_real_path(path: String) -> VfsPath { + VfsPath::from(AbsPathBuf::assert(path.into())) + } + + /// Returns the `AbsPath` representation of `self` if `self` is on the file system. + pub fn as_path(&self) -> Option<&AbsPath> { + match &self.0 { + VfsPathRepr::PathBuf(it) => Some(it.as_path()), + VfsPathRepr::VirtualPath(_) => None, + } + } + + /// Creates a new `VfsPath` with `path` adjoined to `self`. + pub fn join(&self, path: &str) -> Option<VfsPath> { + match &self.0 { + VfsPathRepr::PathBuf(it) => { + let res = it.join(path).normalize(); + Some(VfsPath(VfsPathRepr::PathBuf(res))) + } + VfsPathRepr::VirtualPath(it) => { + let res = it.join(path)?; + Some(VfsPath(VfsPathRepr::VirtualPath(res))) + } + } + } + + /// Remove the last component of `self` if there is one. + /// + /// If `self` has no component, returns `false`; else returns `true`. + /// + /// # Example + /// + /// ``` + /// # use vfs::{AbsPathBuf, VfsPath}; + /// let mut path = VfsPath::from(AbsPathBuf::assert("/foo/bar".into())); + /// assert!(path.pop()); + /// assert_eq!(path, VfsPath::from(AbsPathBuf::assert("/foo".into()))); + /// assert!(path.pop()); + /// assert_eq!(path, VfsPath::from(AbsPathBuf::assert("/".into()))); + /// assert!(!path.pop()); + /// ``` + pub fn pop(&mut self) -> bool { + match &mut self.0 { + VfsPathRepr::PathBuf(it) => it.pop(), + VfsPathRepr::VirtualPath(it) => it.pop(), + } + } + + /// Returns `true` if `other` is a prefix of `self`. + pub fn starts_with(&self, other: &VfsPath) -> bool { + match (&self.0, &other.0) { + (VfsPathRepr::PathBuf(lhs), VfsPathRepr::PathBuf(rhs)) => lhs.starts_with(rhs), + (VfsPathRepr::VirtualPath(lhs), VfsPathRepr::VirtualPath(rhs)) => lhs.starts_with(rhs), + (VfsPathRepr::PathBuf(_) | VfsPathRepr::VirtualPath(_), _) => false, + } + } + + /// Returns the `VfsPath` without its final component, if there is one. + /// + /// Returns [`None`] if the path is a root or prefix. + pub fn parent(&self) -> Option<VfsPath> { + let mut parent = self.clone(); + if parent.pop() { + Some(parent) + } else { + None + } + } + + /// Returns `self`'s base name and file extension. + pub fn name_and_extension(&self) -> Option<(&str, Option<&str>)> { + match &self.0 { + VfsPathRepr::PathBuf(p) => Some(( + p.file_stem()?.to_str()?, + p.extension().and_then(|extension| extension.to_str()), + )), + VfsPathRepr::VirtualPath(p) => p.name_and_extension(), + } + } + + /// **Don't make this `pub`** + /// + /// Encode the path in the given buffer. + /// + /// The encoding will be `0` if [`AbsPathBuf`], `1` if [`VirtualPath`], followed + /// by `self`'s representation. + /// + /// Note that this encoding is dependent on the operating system. + pub(crate) fn encode(&self, buf: &mut Vec<u8>) { + let tag = match &self.0 { + VfsPathRepr::PathBuf(_) => 0, + VfsPathRepr::VirtualPath(_) => 1, + }; + buf.push(tag); + match &self.0 { + VfsPathRepr::PathBuf(path) => { + #[cfg(windows)] + { + use windows_paths::Encode; + let path: &std::path::Path = path.as_ref(); + let components = path.components(); + let mut add_sep = false; + for component in components { + if add_sep { + windows_paths::SEP.encode(buf); + } + let len_before = buf.len(); + match component { + std::path::Component::Prefix(prefix) => { + // kind() returns a normalized and comparable path prefix. + prefix.kind().encode(buf); + } + std::path::Component::RootDir => { + if !add_sep { + component.as_os_str().encode(buf); + } + } + _ => component.as_os_str().encode(buf), + } + + // some components may be encoded empty + add_sep = len_before != buf.len(); + } + } + #[cfg(unix)] + { + use std::os::unix::ffi::OsStrExt; + buf.extend(path.as_os_str().as_bytes()); + } + #[cfg(not(any(windows, unix)))] + { + buf.extend(path.as_os_str().to_string_lossy().as_bytes()); + } + } + VfsPathRepr::VirtualPath(VirtualPath(s)) => buf.extend(s.as_bytes()), + } + } +} + +#[cfg(windows)] +mod windows_paths { + pub(crate) trait Encode { + fn encode(&self, buf: &mut Vec<u8>); + } + + impl Encode for std::ffi::OsStr { + fn encode(&self, buf: &mut Vec<u8>) { + use std::os::windows::ffi::OsStrExt; + for wchar in self.encode_wide() { + buf.extend(wchar.to_le_bytes().iter().copied()); + } + } + } + + impl Encode for u8 { + fn encode(&self, buf: &mut Vec<u8>) { + let wide = *self as u16; + buf.extend(wide.to_le_bytes().iter().copied()) + } + } + + impl Encode for &str { + fn encode(&self, buf: &mut Vec<u8>) { + debug_assert!(self.is_ascii()); + for b in self.as_bytes() { + b.encode(buf) + } + } + } + + pub(crate) const SEP: &str = "\\"; + const VERBATIM: &str = "\\\\?\\"; + const UNC: &str = "UNC"; + const DEVICE: &str = "\\\\.\\"; + const COLON: &str = ":"; + + impl Encode for std::path::Prefix<'_> { + fn encode(&self, buf: &mut Vec<u8>) { + match self { + std::path::Prefix::Verbatim(c) => { + VERBATIM.encode(buf); + c.encode(buf); + } + std::path::Prefix::VerbatimUNC(server, share) => { + VERBATIM.encode(buf); + UNC.encode(buf); + SEP.encode(buf); + server.encode(buf); + SEP.encode(buf); + share.encode(buf); + } + std::path::Prefix::VerbatimDisk(d) => { + VERBATIM.encode(buf); + d.encode(buf); + COLON.encode(buf); + } + std::path::Prefix::DeviceNS(device) => { + DEVICE.encode(buf); + device.encode(buf); + } + std::path::Prefix::UNC(server, share) => { + SEP.encode(buf); + SEP.encode(buf); + server.encode(buf); + SEP.encode(buf); + share.encode(buf); + } + std::path::Prefix::Disk(d) => { + d.encode(buf); + COLON.encode(buf); + } + } + } + } + #[test] + fn paths_encoding() { + // drive letter casing agnostic + test_eq("C:/x.rs", "c:/x.rs"); + // separator agnostic + test_eq("C:/x/y.rs", "C:\\x\\y.rs"); + + fn test_eq(a: &str, b: &str) { + let mut b1 = Vec::new(); + let mut b2 = Vec::new(); + vfs(a).encode(&mut b1); + vfs(b).encode(&mut b2); + assert_eq!(b1, b2); + } + } + + #[test] + fn test_sep_root_dir_encoding() { + let mut buf = Vec::new(); + vfs("C:/x/y").encode(&mut buf); + assert_eq!(&buf, &[0, 67, 0, 58, 0, 92, 0, 120, 0, 92, 0, 121, 0]) + } + + #[cfg(test)] + fn vfs(str: &str) -> super::VfsPath { + use super::{AbsPathBuf, VfsPath}; + VfsPath::from(AbsPathBuf::try_from(str).unwrap()) + } +} + +/// Internal, private representation of [`VfsPath`]. +#[derive(Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +enum VfsPathRepr { + PathBuf(AbsPathBuf), + VirtualPath(VirtualPath), +} + +impl From<AbsPathBuf> for VfsPath { + fn from(v: AbsPathBuf) -> Self { + VfsPath(VfsPathRepr::PathBuf(v.normalize())) + } +} + +impl fmt::Display for VfsPath { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.0 { + VfsPathRepr::PathBuf(it) => fmt::Display::fmt(&it.display(), f), + VfsPathRepr::VirtualPath(VirtualPath(it)) => fmt::Display::fmt(it, f), + } + } +} + +impl fmt::Debug for VfsPath { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.0, f) + } +} + +impl fmt::Debug for VfsPathRepr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + VfsPathRepr::PathBuf(it) => fmt::Debug::fmt(&it.display(), f), + VfsPathRepr::VirtualPath(VirtualPath(it)) => fmt::Debug::fmt(&it, f), + } + } +} + +/// `/`-separated virtual path. +/// +/// This is used to describe files that do not reside on the file system. +#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +struct VirtualPath(String); + +impl VirtualPath { + /// Returns `true` if `other` is a prefix of `self` (as strings). + fn starts_with(&self, other: &VirtualPath) -> bool { + self.0.starts_with(&other.0) + } + + /// Remove the last component of `self`. + /// + /// This will find the last `'/'` in `self`, and remove everything after it, + /// including the `'/'`. + /// + /// If `self` contains no `'/'`, returns `false`; else returns `true`. + /// + /// # Example + /// + /// ```rust,ignore + /// let mut path = VirtualPath("/foo/bar".to_string()); + /// path.pop(); + /// assert_eq!(path.0, "/foo"); + /// path.pop(); + /// assert_eq!(path.0, ""); + /// ``` + fn pop(&mut self) -> bool { + let pos = match self.0.rfind('/') { + Some(pos) => pos, + None => return false, + }; + self.0 = self.0[..pos].to_string(); + true + } + + /// Append the given *relative* path `path` to `self`. + /// + /// This will resolve any leading `"../"` in `path` before appending it. + /// + /// Returns [`None`] if `path` has more leading `"../"` than the number of + /// components in `self`. + /// + /// # Notes + /// + /// In practice, appending here means `self/path` as strings. + fn join(&self, mut path: &str) -> Option<VirtualPath> { + let mut res = self.clone(); + while path.starts_with("../") { + if !res.pop() { + return None; + } + path = &path["../".len()..]; + } + path = path.trim_start_matches("./"); + res.0 = format!("{}/{}", res.0, path); + Some(res) + } + + /// Returns `self`'s base name and file extension. + /// + /// # Returns + /// - `None` if `self` ends with `"//"`. + /// - `Some((name, None))` if `self`'s base contains no `.`, or only one `.` at + /// the start. + /// - `Some((name, Some(extension))` else. + /// + /// # Note + /// The extension will not contains `.`. This means `"/foo/bar.baz.rs"` will + /// return `Some(("bar.baz", Some("rs"))`. + fn name_and_extension(&self) -> Option<(&str, Option<&str>)> { + let file_path = if self.0.ends_with('/') { &self.0[..&self.0.len() - 1] } else { &self.0 }; + let file_name = match file_path.rfind('/') { + Some(position) => &file_path[position + 1..], + None => file_path, + }; + + if file_name.is_empty() { + None + } else { + let mut file_stem_and_extension = file_name.rsplitn(2, '.'); + let extension = file_stem_and_extension.next(); + let file_stem = file_stem_and_extension.next(); + + match (file_stem, extension) { + (None, None) => None, + (None | Some(""), Some(_)) => Some((file_name, None)), + (Some(file_stem), extension) => Some((file_stem, extension)), + } + } + } +} + +#[cfg(test)] +mod tests; diff --git a/src/tools/rust-analyzer/crates/vfs/src/vfs_path/tests.rs b/src/tools/rust-analyzer/crates/vfs/src/vfs_path/tests.rs new file mode 100644 index 000000000..510e021e8 --- /dev/null +++ b/src/tools/rust-analyzer/crates/vfs/src/vfs_path/tests.rs @@ -0,0 +1,30 @@ +use super::*; + +#[test] +fn virtual_path_extensions() { + assert_eq!(VirtualPath("/".to_string()).name_and_extension(), None); + assert_eq!( + VirtualPath("/directory".to_string()).name_and_extension(), + Some(("directory", None)) + ); + assert_eq!( + VirtualPath("/directory/".to_string()).name_and_extension(), + Some(("directory", None)) + ); + assert_eq!( + VirtualPath("/directory/file".to_string()).name_and_extension(), + Some(("file", None)) + ); + assert_eq!( + VirtualPath("/directory/.file".to_string()).name_and_extension(), + Some((".file", None)) + ); + assert_eq!( + VirtualPath("/directory/.file.rs".to_string()).name_and_extension(), + Some((".file", Some("rs"))) + ); + assert_eq!( + VirtualPath("/directory/file.rs".to_string()).name_and_extension(), + Some(("file", Some("rs"))) + ); +} |