summaryrefslogtreecommitdiffstats
path: root/vendor/ignore/src/types.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /vendor/ignore/src/types.rs
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/ignore/src/types.rs')
-rw-r--r--vendor/ignore/src/types.rs591
1 files changed, 591 insertions, 0 deletions
diff --git a/vendor/ignore/src/types.rs b/vendor/ignore/src/types.rs
new file mode 100644
index 000000000..efb9a8d9b
--- /dev/null
+++ b/vendor/ignore/src/types.rs
@@ -0,0 +1,591 @@
+/*!
+The types module provides a way of associating globs on file names to file
+types.
+
+This can be used to match specific types of files. For example, among
+the default file types provided, the Rust file type is defined to be `*.rs`
+with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with
+name `c`.
+
+Note that the set of default types may change over time.
+
+# Example
+
+This shows how to create and use a simple file type matcher using the default
+file types defined in this crate.
+
+```
+use ignore::types::TypesBuilder;
+
+let mut builder = TypesBuilder::new();
+builder.add_defaults();
+builder.select("rust");
+let matcher = builder.build().unwrap();
+
+assert!(matcher.matched("foo.rs", false).is_whitelist());
+assert!(matcher.matched("foo.c", false).is_ignore());
+```
+
+# Example: negation
+
+This is like the previous example, but shows how negating a file type works.
+That is, this will let us match file paths that *don't* correspond to a
+particular file type.
+
+```
+use ignore::types::TypesBuilder;
+
+let mut builder = TypesBuilder::new();
+builder.add_defaults();
+builder.negate("c");
+let matcher = builder.build().unwrap();
+
+assert!(matcher.matched("foo.rs", false).is_none());
+assert!(matcher.matched("foo.c", false).is_ignore());
+```
+
+# Example: custom file type definitions
+
+This shows how to extend this library default file type definitions with
+your own.
+
+```
+use ignore::types::TypesBuilder;
+
+let mut builder = TypesBuilder::new();
+builder.add_defaults();
+builder.add("foo", "*.foo");
+// Another way of adding a file type definition.
+// This is useful when accepting input from an end user.
+builder.add_def("bar:*.bar");
+// Note: we only select `foo`, not `bar`.
+builder.select("foo");
+let matcher = builder.build().unwrap();
+
+assert!(matcher.matched("x.foo", false).is_whitelist());
+// This is ignored because we only selected the `foo` file type.
+assert!(matcher.matched("x.bar", false).is_ignore());
+```
+
+We can also add file type definitions based on other definitions.
+
+```
+use ignore::types::TypesBuilder;
+
+let mut builder = TypesBuilder::new();
+builder.add_defaults();
+builder.add("foo", "*.foo");
+builder.add_def("bar:include:foo,cpp");
+builder.select("bar");
+let matcher = builder.build().unwrap();
+
+assert!(matcher.matched("x.foo", false).is_whitelist());
+assert!(matcher.matched("y.cpp", false).is_whitelist());
+```
+*/
+
+use std::cell::RefCell;
+use std::collections::HashMap;
+use std::path::Path;
+use std::sync::Arc;
+
+use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
+use regex::Regex;
+use thread_local::ThreadLocal;
+
+use crate::default_types::DEFAULT_TYPES;
+use crate::pathutil::file_name;
+use crate::{Error, Match};
+
+/// Glob represents a single glob in a set of file type definitions.
+///
+/// There may be more than one glob for a particular file type.
+///
+/// This is used to report information about the highest precedent glob
+/// that matched.
+///
+/// Note that not all matches necessarily correspond to a specific glob.
+/// For example, if there are one or more selections and a file path doesn't
+/// match any of those selections, then the file path is considered to be
+/// ignored.
+///
+/// The lifetime `'a` refers to the lifetime of the underlying file type
+/// definition, which corresponds to the lifetime of the file type matcher.
+#[derive(Clone, Debug)]
+pub struct Glob<'a>(GlobInner<'a>);
+
+#[derive(Clone, Debug)]
+enum GlobInner<'a> {
+ /// No glob matched, but the file path should still be ignored.
+ UnmatchedIgnore,
+ /// A glob matched.
+ Matched {
+ /// The file type definition which provided the glob.
+ def: &'a FileTypeDef,
+ /// The index of the glob that matched inside the file type definition.
+ which: usize,
+ /// Whether the selection was negated or not.
+ negated: bool,
+ },
+}
+
+impl<'a> Glob<'a> {
+ fn unmatched() -> Glob<'a> {
+ Glob(GlobInner::UnmatchedIgnore)
+ }
+
+ /// Return the file type definition that matched, if one exists. A file type
+ /// definition always exists when a specific definition matches a file
+ /// path.
+ pub fn file_type_def(&self) -> Option<&FileTypeDef> {
+ match self {
+ Glob(GlobInner::UnmatchedIgnore) => None,
+ Glob(GlobInner::Matched { def, .. }) => Some(def),
+ }
+ }
+}
+
+/// A single file type definition.
+///
+/// File type definitions can be retrieved in aggregate from a file type
+/// matcher. File type definitions are also reported when its responsible
+/// for a match.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct FileTypeDef {
+ name: String,
+ globs: Vec<String>,
+}
+
+impl FileTypeDef {
+ /// Return the name of this file type.
+ pub fn name(&self) -> &str {
+ &self.name
+ }
+
+ /// Return the globs used to recognize this file type.
+ pub fn globs(&self) -> &[String] {
+ &self.globs
+ }
+}
+
+/// Types is a file type matcher.
+#[derive(Clone, Debug)]
+pub struct Types {
+ /// All of the file type definitions, sorted lexicographically by name.
+ defs: Vec<FileTypeDef>,
+ /// All of the selections made by the user.
+ selections: Vec<Selection<FileTypeDef>>,
+ /// Whether there is at least one Selection::Select in our selections.
+ /// When this is true, a Match::None is converted to Match::Ignore.
+ has_selected: bool,
+ /// A mapping from glob index in the set to two indices. The first is an
+ /// index into `selections` and the second is an index into the
+ /// corresponding file type definition's list of globs.
+ glob_to_selection: Vec<(usize, usize)>,
+ /// The set of all glob selections, used for actual matching.
+ set: GlobSet,
+ /// Temporary storage for globs that match.
+ matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
+}
+
+/// Indicates the type of a selection for a particular file type.
+#[derive(Clone, Debug)]
+enum Selection<T> {
+ Select(String, T),
+ Negate(String, T),
+}
+
+impl<T> Selection<T> {
+ fn is_negated(&self) -> bool {
+ match *self {
+ Selection::Select(..) => false,
+ Selection::Negate(..) => true,
+ }
+ }
+
+ fn name(&self) -> &str {
+ match *self {
+ Selection::Select(ref name, _) => name,
+ Selection::Negate(ref name, _) => name,
+ }
+ }
+
+ fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Selection<U> {
+ match self {
+ Selection::Select(name, inner) => {
+ Selection::Select(name, f(inner))
+ }
+ Selection::Negate(name, inner) => {
+ Selection::Negate(name, f(inner))
+ }
+ }
+ }
+
+ fn inner(&self) -> &T {
+ match *self {
+ Selection::Select(_, ref inner) => inner,
+ Selection::Negate(_, ref inner) => inner,
+ }
+ }
+}
+
+impl Types {
+ /// Creates a new file type matcher that never matches any path and
+ /// contains no file type definitions.
+ pub fn empty() -> Types {
+ Types {
+ defs: vec![],
+ selections: vec![],
+ has_selected: false,
+ glob_to_selection: vec![],
+ set: GlobSetBuilder::new().build().unwrap(),
+ matches: Arc::new(ThreadLocal::default()),
+ }
+ }
+
+ /// Returns true if and only if this matcher has zero selections.
+ pub fn is_empty(&self) -> bool {
+ self.selections.is_empty()
+ }
+
+ /// Returns the number of selections used in this matcher.
+ pub fn len(&self) -> usize {
+ self.selections.len()
+ }
+
+ /// Return the set of current file type definitions.
+ ///
+ /// Definitions and globs are sorted.
+ pub fn definitions(&self) -> &[FileTypeDef] {
+ &self.defs
+ }
+
+ /// Returns a match for the given path against this file type matcher.
+ ///
+ /// The path is considered whitelisted if it matches a selected file type.
+ /// The path is considered ignored if it matches a negated file type.
+ /// If at least one file type is selected and `path` doesn't match, then
+ /// the path is also considered ignored.
+ pub fn matched<'a, P: AsRef<Path>>(
+ &'a self,
+ path: P,
+ is_dir: bool,
+ ) -> Match<Glob<'a>> {
+ // File types don't apply to directories, and we can't do anything
+ // if our glob set is empty.
+ if is_dir || self.set.is_empty() {
+ return Match::None;
+ }
+ // We only want to match against the file name, so extract it.
+ // If one doesn't exist, then we can't match it.
+ let name = match file_name(path.as_ref()) {
+ Some(name) => name,
+ None if self.has_selected => {
+ return Match::Ignore(Glob::unmatched());
+ }
+ None => {
+ return Match::None;
+ }
+ };
+ let mut matches = self.matches.get_or_default().borrow_mut();
+ self.set.matches_into(name, &mut *matches);
+ // The highest precedent match is the last one.
+ if let Some(&i) = matches.last() {
+ let (isel, iglob) = self.glob_to_selection[i];
+ let sel = &self.selections[isel];
+ let glob = Glob(GlobInner::Matched {
+ def: sel.inner(),
+ which: iglob,
+ negated: sel.is_negated(),
+ });
+ return if sel.is_negated() {
+ Match::Ignore(glob)
+ } else {
+ Match::Whitelist(glob)
+ };
+ }
+ if self.has_selected {
+ Match::Ignore(Glob::unmatched())
+ } else {
+ Match::None
+ }
+ }
+}
+
+/// TypesBuilder builds a type matcher from a set of file type definitions and
+/// a set of file type selections.
+pub struct TypesBuilder {
+ types: HashMap<String, FileTypeDef>,
+ selections: Vec<Selection<()>>,
+}
+
+impl TypesBuilder {
+ /// Create a new builder for a file type matcher.
+ ///
+ /// The builder contains *no* type definitions to start with. A set
+ /// of default type definitions can be added with `add_defaults`, and
+ /// additional type definitions can be added with `select` and `negate`.
+ pub fn new() -> TypesBuilder {
+ TypesBuilder { types: HashMap::new(), selections: vec![] }
+ }
+
+ /// Build the current set of file type definitions *and* selections into
+ /// a file type matcher.
+ pub fn build(&self) -> Result<Types, Error> {
+ let defs = self.definitions();
+ let has_selected = self.selections.iter().any(|s| !s.is_negated());
+
+ let mut selections = vec![];
+ let mut glob_to_selection = vec![];
+ let mut build_set = GlobSetBuilder::new();
+ for (isel, selection) in self.selections.iter().enumerate() {
+ let def = match self.types.get(selection.name()) {
+ Some(def) => def.clone(),
+ None => {
+ let name = selection.name().to_string();
+ return Err(Error::UnrecognizedFileType(name));
+ }
+ };
+ for (iglob, glob) in def.globs.iter().enumerate() {
+ build_set.add(
+ GlobBuilder::new(glob)
+ .literal_separator(true)
+ .build()
+ .map_err(|err| Error::Glob {
+ glob: Some(glob.to_string()),
+ err: err.kind().to_string(),
+ })?,
+ );
+ glob_to_selection.push((isel, iglob));
+ }
+ selections.push(selection.clone().map(move |_| def));
+ }
+ let set = build_set
+ .build()
+ .map_err(|err| Error::Glob { glob: None, err: err.to_string() })?;
+ Ok(Types {
+ defs: defs,
+ selections: selections,
+ has_selected: has_selected,
+ glob_to_selection: glob_to_selection,
+ set: set,
+ matches: Arc::new(ThreadLocal::default()),
+ })
+ }
+
+ /// Return the set of current file type definitions.
+ ///
+ /// Definitions and globs are sorted.
+ pub fn definitions(&self) -> Vec<FileTypeDef> {
+ let mut defs = vec![];
+ for def in self.types.values() {
+ let mut def = def.clone();
+ def.globs.sort();
+ defs.push(def);
+ }
+ defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
+ defs
+ }
+
+ /// Select the file type given by `name`.
+ ///
+ /// If `name` is `all`, then all file types currently defined are selected.
+ pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
+ if name == "all" {
+ for name in self.types.keys() {
+ self.selections.push(Selection::Select(name.to_string(), ()));
+ }
+ } else {
+ self.selections.push(Selection::Select(name.to_string(), ()));
+ }
+ self
+ }
+
+ /// Ignore the file type given by `name`.
+ ///
+ /// If `name` is `all`, then all file types currently defined are negated.
+ pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
+ if name == "all" {
+ for name in self.types.keys() {
+ self.selections.push(Selection::Negate(name.to_string(), ()));
+ }
+ } else {
+ self.selections.push(Selection::Negate(name.to_string(), ()));
+ }
+ self
+ }
+
+ /// Clear any file type definitions for the type name given.
+ pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
+ self.types.remove(name);
+ self
+ }
+
+ /// Add a new file type definition. `name` can be arbitrary and `pat`
+ /// should be a glob recognizing file paths belonging to the `name` type.
+ ///
+ /// If `name` is `all` or otherwise contains any character that is not a
+ /// Unicode letter or number, then an error is returned.
+ pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> {
+ lazy_static::lazy_static! {
+ static ref RE: Regex = Regex::new(r"^[\pL\pN]+$").unwrap();
+ };
+ if name == "all" || !RE.is_match(name) {
+ return Err(Error::InvalidDefinition);
+ }
+ let (key, glob) = (name.to_string(), glob.to_string());
+ self.types
+ .entry(key)
+ .or_insert_with(|| FileTypeDef {
+ name: name.to_string(),
+ globs: vec![],
+ })
+ .globs
+ .push(glob);
+ Ok(())
+ }
+
+ /// Add a new file type definition specified in string form. There are two
+ /// valid formats:
+ /// 1. `{name}:{glob}`. This defines a 'root' definition that associates the
+ /// given name with the given glob.
+ /// 2. `{name}:include:{comma-separated list of already defined names}.
+ /// This defines an 'include' definition that associates the given name
+ /// with the definitions of the given existing types.
+ /// Names may not include any characters that are not
+ /// Unicode letters or numbers.
+ pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
+ let parts: Vec<&str> = def.split(':').collect();
+ match parts.len() {
+ 2 => {
+ let name = parts[0];
+ let glob = parts[1];
+ if name.is_empty() || glob.is_empty() {
+ return Err(Error::InvalidDefinition);
+ }
+ self.add(name, glob)
+ }
+ 3 => {
+ let name = parts[0];
+ let types_string = parts[2];
+ if name.is_empty()
+ || parts[1] != "include"
+ || types_string.is_empty()
+ {
+ return Err(Error::InvalidDefinition);
+ }
+ let types = types_string.split(',');
+ // Check ahead of time to ensure that all types specified are
+ // present and fail fast if not.
+ if types.clone().any(|t| !self.types.contains_key(t)) {
+ return Err(Error::InvalidDefinition);
+ }
+ for type_name in types {
+ let globs =
+ self.types.get(type_name).unwrap().globs.clone();
+ for glob in globs {
+ self.add(name, &glob)?;
+ }
+ }
+ Ok(())
+ }
+ _ => Err(Error::InvalidDefinition),
+ }
+ }
+
+ /// Add a set of default file type definitions.
+ pub fn add_defaults(&mut self) -> &mut TypesBuilder {
+ static MSG: &'static str = "adding a default type should never fail";
+ for &(name, exts) in DEFAULT_TYPES {
+ for ext in exts {
+ self.add(name, ext).expect(MSG);
+ }
+ }
+ self
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::TypesBuilder;
+
+ macro_rules! matched {
+ ($name:ident, $types:expr, $sel:expr, $selnot:expr,
+ $path:expr) => {
+ matched!($name, $types, $sel, $selnot, $path, true);
+ };
+ (not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
+ $path:expr) => {
+ matched!($name, $types, $sel, $selnot, $path, false);
+ };
+ ($name:ident, $types:expr, $sel:expr, $selnot:expr,
+ $path:expr, $matched:expr) => {
+ #[test]
+ fn $name() {
+ let mut btypes = TypesBuilder::new();
+ for tydef in $types {
+ btypes.add_def(tydef).unwrap();
+ }
+ for sel in $sel {
+ btypes.select(sel);
+ }
+ for selnot in $selnot {
+ btypes.negate(selnot);
+ }
+ let types = btypes.build().unwrap();
+ let mat = types.matched($path, false);
+ assert_eq!($matched, !mat.is_ignore());
+ }
+ };
+ }
+
+ fn types() -> Vec<&'static str> {
+ vec![
+ "html:*.html",
+ "html:*.htm",
+ "rust:*.rs",
+ "js:*.js",
+ "foo:*.{rs,foo}",
+ "combo:include:html,rust",
+ ]
+ }
+
+ matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
+ matched!(match2, types(), vec!["html"], vec![], "index.html");
+ matched!(match3, types(), vec!["html"], vec![], "index.htm");
+ matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
+ matched!(match5, types(), vec![], vec![], "index.html");
+ matched!(match6, types(), vec![], vec!["rust"], "index.html");
+ matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
+ matched!(match8, types(), vec!["combo"], vec![], "index.html");
+ matched!(match9, types(), vec!["combo"], vec![], "lib.rs");
+
+ matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
+ matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
+ matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs");
+ matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
+ matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
+ matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js");
+
+ #[test]
+ fn test_invalid_defs() {
+ let mut btypes = TypesBuilder::new();
+ for tydef in types() {
+ btypes.add_def(tydef).unwrap();
+ }
+ // Preserve the original definitions for later comparison.
+ let original_defs = btypes.definitions();
+ let bad_defs = vec![
+ // Reference to type that does not exist
+ "combo:include:html,python",
+ // Bad format
+ "combo:foobar:html,rust",
+ "",
+ ];
+ for def in bad_defs {
+ assert!(btypes.add_def(def).is_err());
+ // Ensure that nothing changed, even if some of the includes were valid.
+ assert_eq!(btypes.definitions(), original_defs);
+ }
+ }
+}