diff options
Diffstat (limited to 'third_party/rust/glob')
-rw-r--r-- | third_party/rust/glob/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | third_party/rust/glob/Cargo.toml | 30 | ||||
-rw-r--r-- | third_party/rust/glob/LICENSE-APACHE | 201 | ||||
-rw-r--r-- | third_party/rust/glob/LICENSE-MIT | 25 | ||||
-rw-r--r-- | third_party/rust/glob/README.md | 38 | ||||
-rw-r--r-- | third_party/rust/glob/src/lib.rs | 1434 | ||||
-rw-r--r-- | third_party/rust/glob/tests/glob-std.rs | 377 | ||||
-rw-r--r-- | third_party/rust/glob/triagebot.toml | 1 |
8 files changed, 2107 insertions, 0 deletions
diff --git a/third_party/rust/glob/.cargo-checksum.json b/third_party/rust/glob/.cargo-checksum.json new file mode 100644 index 0000000000..e31f7a6130 --- /dev/null +++ b/third_party/rust/glob/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"6791dcdd292ecaeeec9535afae7189026dca9d8486211acd27208673edfa995a","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"3482ed957e7e22efdf5f368d92c497b932c665bce3ad17db8ec31c1007164539","src/lib.rs":"b2f3c3f48eb07f1767a7e5bb02847d10e02655f1dbf57281eb63fd001427e28b","tests/glob-std.rs":"b0a0aa11d7b8cc2cc0216fb117a543d6e584eec8b51173994dfd19761ab00a62","triagebot.toml":"a135e10c777cd13459559bdf74fb704c1379af7c9b0f70bc49fa6f5a837daa81"},"package":"d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"}
\ No newline at end of file diff --git a/third_party/rust/glob/Cargo.toml b/third_party/rust/glob/Cargo.toml new file mode 100644 index 0000000000..53977177e0 --- /dev/null +++ b/third_party/rust/glob/Cargo.toml @@ -0,0 +1,30 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +name = "glob" +version = "0.3.1" +authors = ["The Rust Project Developers"] +description = """ +Support for matching file paths against Unix shell style patterns. +""" +homepage = "https://github.com/rust-lang/glob" +documentation = "https://docs.rs/glob/0.3.1" +readme = "README.md" +categories = ["filesystem"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/rust-lang/glob" + +[dev-dependencies.doc-comment] +version = "0.3" + +[dev-dependencies.tempdir] +version = "0.3" diff --git a/third_party/rust/glob/LICENSE-APACHE b/third_party/rust/glob/LICENSE-APACHE new file mode 100644 index 0000000000..16fe87b06e --- /dev/null +++ b/third_party/rust/glob/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/third_party/rust/glob/LICENSE-MIT b/third_party/rust/glob/LICENSE-MIT new file mode 100644 index 0000000000..39d4bdb5ac --- /dev/null +++ b/third_party/rust/glob/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2014 The Rust Project Developers + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/glob/README.md b/third_party/rust/glob/README.md new file mode 100644 index 0000000000..b68a40e402 --- /dev/null +++ b/third_party/rust/glob/README.md @@ -0,0 +1,38 @@ +glob +==== + +Support for matching file paths against Unix shell style patterns. + +[![Continuous integration](https://github.com/rust-lang/glob/actions/workflows/rust.yml/badge.svg)](https://github.com/rust-lang/glob/actions/workflows/rust.yml) + +[Documentation](https://docs.rs/glob) + +## Usage + +To use `glob`, add this to your `Cargo.toml`: + +```toml +[dependencies] +glob = "0.3.1" +``` + +And add this to your crate root: + +```rust +extern crate glob; +``` + +## Examples + +Print all jpg files in /media/ and all of its subdirectories. + +```rust +use glob::glob; + +for entry in glob("/media/**/*.jpg").expect("Failed to read glob pattern") { + match entry { + Ok(path) => println!("{:?}", path.display()), + Err(e) => println!("{:?}", e), + } +} +``` diff --git a/third_party/rust/glob/src/lib.rs b/third_party/rust/glob/src/lib.rs new file mode 100644 index 0000000000..744cabf380 --- /dev/null +++ b/third_party/rust/glob/src/lib.rs @@ -0,0 +1,1434 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Support for matching file paths against Unix shell style patterns. +//! +//! The `glob` and `glob_with` functions allow querying the filesystem for all +//! files that match a particular pattern (similar to the libc `glob` function). +//! The methods on the `Pattern` type provide functionality for checking if +//! individual paths match a particular pattern (similar to the libc `fnmatch` +//! function). +//! +//! For consistency across platforms, and for Windows support, this module +//! is implemented entirely in Rust rather than deferring to the libc +//! `glob`/`fnmatch` functions. +//! +//! # Examples +//! +//! To print all jpg files in `/media/` and all of its subdirectories. +//! +//! ```rust,no_run +//! use glob::glob; +//! +//! for entry in glob("/media/**/*.jpg").expect("Failed to read glob pattern") { +//! match entry { +//! Ok(path) => println!("{:?}", path.display()), +//! Err(e) => println!("{:?}", e), +//! } +//! } +//! ``` +//! +//! To print all files containing the letter "a", case insensitive, in a `local` +//! directory relative to the current working directory. This ignores errors +//! instead of printing them. +//! +//! ```rust,no_run +//! use glob::glob_with; +//! use glob::MatchOptions; +//! +//! let options = MatchOptions { +//! case_sensitive: false, +//! require_literal_separator: false, +//! require_literal_leading_dot: false, +//! }; +//! for entry in glob_with("local/*a*", options).unwrap() { +//! if let Ok(path) = entry { +//! println!("{:?}", path.display()) +//! } +//! } +//! ``` + +#![doc( + html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png", + html_favicon_url = "https://www.rust-lang.org/favicon.ico", + html_root_url = "https://docs.rs/glob/0.3.1" +)] +#![deny(missing_docs)] + +#[cfg(test)] +#[macro_use] +extern crate doc_comment; + +#[cfg(test)] +doctest!("../README.md"); + +use std::cmp; +use std::error::Error; +use std::fmt; +use std::fs; +use std::io; +use std::path::{self, Component, Path, PathBuf}; +use std::str::FromStr; + +use CharSpecifier::{CharRange, SingleChar}; +use MatchResult::{EntirePatternDoesntMatch, Match, SubPatternDoesntMatch}; +use PatternToken::AnyExcept; +use PatternToken::{AnyChar, AnyRecursiveSequence, AnySequence, AnyWithin, Char}; + +/// An iterator that yields `Path`s from the filesystem that match a particular +/// pattern. +/// +/// Note that it yields `GlobResult` in order to report any `IoErrors` that may +/// arise during iteration. If a directory matches but is unreadable, +/// thereby preventing its contents from being checked for matches, a +/// `GlobError` is returned to express this. +/// +/// See the `glob` function for more details. +#[derive(Debug)] +pub struct Paths { + dir_patterns: Vec<Pattern>, + require_dir: bool, + options: MatchOptions, + todo: Vec<Result<(PathBuf, usize), GlobError>>, + scope: Option<PathBuf>, +} + +/// Return an iterator that produces all the `Path`s that match the given +/// pattern using default match options, which may be absolute or relative to +/// the current working directory. +/// +/// This may return an error if the pattern is invalid. +/// +/// This method uses the default match options and is equivalent to calling +/// `glob_with(pattern, MatchOptions::new())`. Use `glob_with` directly if you +/// want to use non-default match options. +/// +/// When iterating, each result is a `GlobResult` which expresses the +/// possibility that there was an `IoError` when attempting to read the contents +/// of the matched path. In other words, each item returned by the iterator +/// will either be an `Ok(Path)` if the path matched, or an `Err(GlobError)` if +/// the path (partially) matched _but_ its contents could not be read in order +/// to determine if its contents matched. +/// +/// See the `Paths` documentation for more information. +/// +/// # Examples +/// +/// Consider a directory `/media/pictures` containing only the files +/// `kittens.jpg`, `puppies.jpg` and `hamsters.gif`: +/// +/// ```rust,no_run +/// use glob::glob; +/// +/// for entry in glob("/media/pictures/*.jpg").unwrap() { +/// match entry { +/// Ok(path) => println!("{:?}", path.display()), +/// +/// // if the path matched but was unreadable, +/// // thereby preventing its contents from matching +/// Err(e) => println!("{:?}", e), +/// } +/// } +/// ``` +/// +/// The above code will print: +/// +/// ```ignore +/// /media/pictures/kittens.jpg +/// /media/pictures/puppies.jpg +/// ``` +/// +/// If you want to ignore unreadable paths, you can use something like +/// `filter_map`: +/// +/// ```rust +/// use glob::glob; +/// use std::result::Result; +/// +/// for path in glob("/media/pictures/*.jpg").unwrap().filter_map(Result::ok) { +/// println!("{}", path.display()); +/// } +/// ``` +/// Paths are yielded in alphabetical order. +pub fn glob(pattern: &str) -> Result<Paths, PatternError> { + glob_with(pattern, MatchOptions::new()) +} + +/// Return an iterator that produces all the `Path`s that match the given +/// pattern using the specified match options, which may be absolute or relative +/// to the current working directory. +/// +/// This may return an error if the pattern is invalid. +/// +/// This function accepts Unix shell style patterns as described by +/// `Pattern::new(..)`. The options given are passed through unchanged to +/// `Pattern::matches_with(..)` with the exception that +/// `require_literal_separator` is always set to `true` regardless of the value +/// passed to this function. +/// +/// Paths are yielded in alphabetical order. +pub fn glob_with(pattern: &str, options: MatchOptions) -> Result<Paths, PatternError> { + #[cfg(windows)] + fn check_windows_verbatim(p: &Path) -> bool { + match p.components().next() { + Some(Component::Prefix(ref p)) => p.kind().is_verbatim(), + _ => false, + } + } + #[cfg(not(windows))] + fn check_windows_verbatim(_: &Path) -> bool { + false + } + + #[cfg(windows)] + fn to_scope(p: &Path) -> PathBuf { + // FIXME handle volume relative paths here + p.to_path_buf() + } + #[cfg(not(windows))] + fn to_scope(p: &Path) -> PathBuf { + p.to_path_buf() + } + + // make sure that the pattern is valid first, else early return with error + if let Err(err) = Pattern::new(pattern) { + return Err(err); + } + + let mut components = Path::new(pattern).components().peekable(); + loop { + match components.peek() { + Some(&Component::Prefix(..)) | Some(&Component::RootDir) => { + components.next(); + } + _ => break, + } + } + let rest = components.map(|s| s.as_os_str()).collect::<PathBuf>(); + let normalized_pattern = Path::new(pattern).iter().collect::<PathBuf>(); + let root_len = normalized_pattern.to_str().unwrap().len() - rest.to_str().unwrap().len(); + let root = if root_len > 0 { + Some(Path::new(&pattern[..root_len])) + } else { + None + }; + + if root_len > 0 && check_windows_verbatim(root.unwrap()) { + // FIXME: How do we want to handle verbatim paths? I'm inclined to + // return nothing, since we can't very well find all UNC shares with a + // 1-letter server name. + return Ok(Paths { + dir_patterns: Vec::new(), + require_dir: false, + options, + todo: Vec::new(), + scope: None, + }); + } + + let scope = root.map_or_else(|| PathBuf::from("."), to_scope); + + let mut dir_patterns = Vec::new(); + let components = + pattern[cmp::min(root_len, pattern.len())..].split_terminator(path::is_separator); + + for component in components { + dir_patterns.push(Pattern::new(component)?); + } + + if root_len == pattern.len() { + dir_patterns.push(Pattern { + original: "".to_string(), + tokens: Vec::new(), + is_recursive: false, + }); + } + + let last_is_separator = pattern.chars().next_back().map(path::is_separator); + let require_dir = last_is_separator == Some(true); + let todo = Vec::new(); + + Ok(Paths { + dir_patterns, + require_dir, + options, + todo, + scope: Some(scope), + }) +} + +/// A glob iteration error. +/// +/// This is typically returned when a particular path cannot be read +/// to determine if its contents match the glob pattern. This is possible +/// if the program lacks the appropriate permissions, for example. +#[derive(Debug)] +pub struct GlobError { + path: PathBuf, + error: io::Error, +} + +impl GlobError { + /// The Path that the error corresponds to. + pub fn path(&self) -> &Path { + &self.path + } + + /// The error in question. + pub fn error(&self) -> &io::Error { + &self.error + } + + /// Consumes self, returning the _raw_ underlying `io::Error` + pub fn into_error(self) -> io::Error { + self.error + } +} + +impl Error for GlobError { + #[allow(deprecated)] + fn description(&self) -> &str { + self.error.description() + } + + #[allow(unknown_lints, bare_trait_objects)] + fn cause(&self) -> Option<&Error> { + Some(&self.error) + } +} + +impl fmt::Display for GlobError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "attempting to read `{}` resulted in an error: {}", + self.path.display(), + self.error + ) + } +} + +fn is_dir(p: &Path) -> bool { + fs::metadata(p).map(|m| m.is_dir()).unwrap_or(false) +} + +/// An alias for a glob iteration result. +/// +/// This represents either a matched path or a glob iteration error, +/// such as failing to read a particular directory's contents. +pub type GlobResult = Result<PathBuf, GlobError>; + +impl Iterator for Paths { + type Item = GlobResult; + + fn next(&mut self) -> Option<GlobResult> { + // the todo buffer hasn't been initialized yet, so it's done at this + // point rather than in glob() so that the errors are unified that is, + // failing to fill the buffer is an iteration error construction of the + // iterator (i.e. glob()) only fails if it fails to compile the Pattern + if let Some(scope) = self.scope.take() { + if !self.dir_patterns.is_empty() { + // Shouldn't happen, but we're using -1 as a special index. + assert!(self.dir_patterns.len() < !0 as usize); + + fill_todo(&mut self.todo, &self.dir_patterns, 0, &scope, self.options); + } + } + + loop { + if self.dir_patterns.is_empty() || self.todo.is_empty() { + return None; + } + + let (path, mut idx) = match self.todo.pop().unwrap() { + Ok(pair) => pair, + Err(e) => return Some(Err(e)), + }; + + // idx -1: was already checked by fill_todo, maybe path was '.' or + // '..' that we can't match here because of normalization. + if idx == !0 as usize { + if self.require_dir && !is_dir(&path) { + continue; + } + return Some(Ok(path)); + } + + if self.dir_patterns[idx].is_recursive { + let mut next = idx; + + // collapse consecutive recursive patterns + while (next + 1) < self.dir_patterns.len() + && self.dir_patterns[next + 1].is_recursive + { + next += 1; + } + + if is_dir(&path) { + // the path is a directory, so it's a match + + // push this directory's contents + fill_todo( + &mut self.todo, + &self.dir_patterns, + next, + &path, + self.options, + ); + + if next == self.dir_patterns.len() - 1 { + // pattern ends in recursive pattern, so return this + // directory as a result + return Some(Ok(path)); + } else { + // advanced to the next pattern for this path + idx = next + 1; + } + } else if next == self.dir_patterns.len() - 1 { + // not a directory and it's the last pattern, meaning no + // match + continue; + } else { + // advanced to the next pattern for this path + idx = next + 1; + } + } + + // not recursive, so match normally + if self.dir_patterns[idx].matches_with( + { + match path.file_name().and_then(|s| s.to_str()) { + // FIXME (#9639): How do we handle non-utf8 filenames? + // Ignore them for now; ideally we'd still match them + // against a * + None => continue, + Some(x) => x, + } + }, + self.options, + ) { + if idx == self.dir_patterns.len() - 1 { + // it is not possible for a pattern to match a directory + // *AND* its children so we don't need to check the + // children + + if !self.require_dir || is_dir(&path) { + return Some(Ok(path)); + } + } else { + fill_todo( + &mut self.todo, + &self.dir_patterns, + idx + 1, + &path, + self.options, + ); + } + } + } + } +} + +/// A pattern parsing error. +#[derive(Debug)] +#[allow(missing_copy_implementations)] +pub struct PatternError { + /// The approximate character index of where the error occurred. + pub pos: usize, + + /// A message describing the error. + pub msg: &'static str, +} + +impl Error for PatternError { + fn description(&self) -> &str { + self.msg + } +} + +impl fmt::Display for PatternError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "Pattern syntax error near position {}: {}", + self.pos, self.msg + ) + } +} + +/// A compiled Unix shell style pattern. +/// +/// - `?` matches any single character. +/// +/// - `*` matches any (possibly empty) sequence of characters. +/// +/// - `**` matches the current directory and arbitrary subdirectories. This +/// sequence **must** form a single path component, so both `**a` and `b**` +/// are invalid and will result in an error. A sequence of more than two +/// consecutive `*` characters is also invalid. +/// +/// - `[...]` matches any character inside the brackets. Character sequences +/// can also specify ranges of characters, as ordered by Unicode, so e.g. +/// `[0-9]` specifies any character between 0 and 9 inclusive. An unclosed +/// bracket is invalid. +/// +/// - `[!...]` is the negation of `[...]`, i.e. it matches any characters +/// **not** in the brackets. +/// +/// - The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets +/// (e.g. `[?]`). When a `]` occurs immediately following `[` or `[!` then it +/// is interpreted as being part of, rather then ending, the character set, so +/// `]` and NOT `]` can be matched by `[]]` and `[!]]` respectively. The `-` +/// character can be specified inside a character sequence pattern by placing +/// it at the start or the end, e.g. `[abc-]`. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] +pub struct Pattern { + original: String, + tokens: Vec<PatternToken>, + is_recursive: bool, +} + +/// Show the original glob pattern. +impl fmt::Display for Pattern { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.original.fmt(f) + } +} + +impl FromStr for Pattern { + type Err = PatternError; + + fn from_str(s: &str) -> Result<Self, PatternError> { + Self::new(s) + } +} + +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +enum PatternToken { + Char(char), + AnyChar, + AnySequence, + AnyRecursiveSequence, + AnyWithin(Vec<CharSpecifier>), + AnyExcept(Vec<CharSpecifier>), +} + +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +enum CharSpecifier { + SingleChar(char), + CharRange(char, char), +} + +#[derive(Copy, Clone, PartialEq)] +enum MatchResult { + Match, + SubPatternDoesntMatch, + EntirePatternDoesntMatch, +} + +const ERROR_WILDCARDS: &str = "wildcards are either regular `*` or recursive `**`"; +const ERROR_RECURSIVE_WILDCARDS: &str = "recursive wildcards must form a single path \ + component"; +const ERROR_INVALID_RANGE: &str = "invalid range pattern"; + +impl Pattern { + /// This function compiles Unix shell style patterns. + /// + /// An invalid glob pattern will yield a `PatternError`. + pub fn new(pattern: &str) -> Result<Self, PatternError> { + let chars = pattern.chars().collect::<Vec<_>>(); + let mut tokens = Vec::new(); + let mut is_recursive = false; + let mut i = 0; + + while i < chars.len() { + match chars[i] { + '?' => { + tokens.push(AnyChar); + i += 1; + } + '*' => { + let old = i; + + while i < chars.len() && chars[i] == '*' { + i += 1; + } + + let count = i - old; + + if count > 2 { + return Err(PatternError { + pos: old + 2, + msg: ERROR_WILDCARDS, + }); + } else if count == 2 { + // ** can only be an entire path component + // i.e. a/**/b is valid, but a**/b or a/**b is not + // invalid matches are treated literally + let is_valid = if i == 2 || path::is_separator(chars[i - count - 1]) { + // it ends in a '/' + if i < chars.len() && path::is_separator(chars[i]) { + i += 1; + true + // or the pattern ends here + // this enables the existing globbing mechanism + } else if i == chars.len() { + true + // `**` ends in non-separator + } else { + return Err(PatternError { + pos: i, + msg: ERROR_RECURSIVE_WILDCARDS, + }); + } + // `**` begins with non-separator + } else { + return Err(PatternError { + pos: old - 1, + msg: ERROR_RECURSIVE_WILDCARDS, + }); + }; + + if is_valid { + // collapse consecutive AnyRecursiveSequence to a + // single one + + let tokens_len = tokens.len(); + + if !(tokens_len > 1 && tokens[tokens_len - 1] == AnyRecursiveSequence) { + is_recursive = true; + tokens.push(AnyRecursiveSequence); + } + } + } else { + tokens.push(AnySequence); + } + } + '[' => { + if i + 4 <= chars.len() && chars[i + 1] == '!' { + match chars[i + 3..].iter().position(|x| *x == ']') { + None => (), + Some(j) => { + let chars = &chars[i + 2..i + 3 + j]; + let cs = parse_char_specifiers(chars); + tokens.push(AnyExcept(cs)); + i += j + 4; + continue; + } + } + } else if i + 3 <= chars.len() && chars[i + 1] != '!' { + match chars[i + 2..].iter().position(|x| *x == ']') { + None => (), + Some(j) => { + let cs = parse_char_specifiers(&chars[i + 1..i + 2 + j]); + tokens.push(AnyWithin(cs)); + i += j + 3; + continue; + } + } + } + + // if we get here then this is not a valid range pattern + return Err(PatternError { + pos: i, + msg: ERROR_INVALID_RANGE, + }); + } + c => { + tokens.push(Char(c)); + i += 1; + } + } + } + + Ok(Self { + tokens, + original: pattern.to_string(), + is_recursive, + }) + } + + /// Escape metacharacters within the given string by surrounding them in + /// brackets. The resulting string will, when compiled into a `Pattern`, + /// match the input string and nothing else. + pub fn escape(s: &str) -> String { + let mut escaped = String::new(); + for c in s.chars() { + match c { + // note that ! does not need escaping because it is only special + // inside brackets + '?' | '*' | '[' | ']' => { + escaped.push('['); + escaped.push(c); + escaped.push(']'); + } + c => { + escaped.push(c); + } + } + } + escaped + } + + /// Return if the given `str` matches this `Pattern` using the default + /// match options (i.e. `MatchOptions::new()`). + /// + /// # Examples + /// + /// ```rust + /// use glob::Pattern; + /// + /// assert!(Pattern::new("c?t").unwrap().matches("cat")); + /// assert!(Pattern::new("k[!e]tteh").unwrap().matches("kitteh")); + /// assert!(Pattern::new("d*g").unwrap().matches("doog")); + /// ``` + pub fn matches(&self, str: &str) -> bool { + self.matches_with(str, MatchOptions::new()) + } + + /// Return if the given `Path`, when converted to a `str`, matches this + /// `Pattern` using the default match options (i.e. `MatchOptions::new()`). + pub fn matches_path(&self, path: &Path) -> bool { + // FIXME (#9639): This needs to handle non-utf8 paths + path.to_str().map_or(false, |s| self.matches(s)) + } + + /// Return if the given `str` matches this `Pattern` using the specified + /// match options. + pub fn matches_with(&self, str: &str, options: MatchOptions) -> bool { + self.matches_from(true, str.chars(), 0, options) == Match + } + + /// Return if the given `Path`, when converted to a `str`, matches this + /// `Pattern` using the specified match options. + pub fn matches_path_with(&self, path: &Path, options: MatchOptions) -> bool { + // FIXME (#9639): This needs to handle non-utf8 paths + path.to_str() + .map_or(false, |s| self.matches_with(s, options)) + } + + /// Access the original glob pattern. + pub fn as_str(&self) -> &str { + &self.original + } + + fn matches_from( + &self, + mut follows_separator: bool, + mut file: std::str::Chars, + i: usize, + options: MatchOptions, + ) -> MatchResult { + for (ti, token) in self.tokens[i..].iter().enumerate() { + match *token { + AnySequence | AnyRecursiveSequence => { + // ** must be at the start. + debug_assert!(match *token { + AnyRecursiveSequence => follows_separator, + _ => true, + }); + + // Empty match + match self.matches_from(follows_separator, file.clone(), i + ti + 1, options) { + SubPatternDoesntMatch => (), // keep trying + m => return m, + }; + + while let Some(c) = file.next() { + if follows_separator && options.require_literal_leading_dot && c == '.' { + return SubPatternDoesntMatch; + } + follows_separator = path::is_separator(c); + match *token { + AnyRecursiveSequence if !follows_separator => continue, + AnySequence + if options.require_literal_separator && follows_separator => + { + return SubPatternDoesntMatch + } + _ => (), + } + match self.matches_from( + follows_separator, + file.clone(), + i + ti + 1, + options, + ) { + SubPatternDoesntMatch => (), // keep trying + m => return m, + } + } + } + _ => { + let c = match file.next() { + Some(c) => c, + None => return EntirePatternDoesntMatch, + }; + + let is_sep = path::is_separator(c); + + if !match *token { + AnyChar | AnyWithin(..) | AnyExcept(..) + if (options.require_literal_separator && is_sep) + || (follows_separator + && options.require_literal_leading_dot + && c == '.') => + { + false + } + AnyChar => true, + AnyWithin(ref specifiers) => in_char_specifiers(&specifiers, c, options), + AnyExcept(ref specifiers) => !in_char_specifiers(&specifiers, c, options), + Char(c2) => chars_eq(c, c2, options.case_sensitive), + AnySequence | AnyRecursiveSequence => unreachable!(), + } { + return SubPatternDoesntMatch; + } + follows_separator = is_sep; + } + } + } + + // Iter is fused. + if file.next().is_none() { + Match + } else { + SubPatternDoesntMatch + } + } +} + +// Fills `todo` with paths under `path` to be matched by `patterns[idx]`, +// special-casing patterns to match `.` and `..`, and avoiding `readdir()` +// calls when there are no metacharacters in the pattern. +fn fill_todo( + todo: &mut Vec<Result<(PathBuf, usize), GlobError>>, + patterns: &[Pattern], + idx: usize, + path: &Path, + options: MatchOptions, +) { + // convert a pattern that's just many Char(_) to a string + fn pattern_as_str(pattern: &Pattern) -> Option<String> { + let mut s = String::new(); + for token in &pattern.tokens { + match *token { + Char(c) => s.push(c), + _ => return None, + } + } + + Some(s) + } + + let add = |todo: &mut Vec<_>, next_path: PathBuf| { + if idx + 1 == patterns.len() { + // We know it's good, so don't make the iterator match this path + // against the pattern again. In particular, it can't match + // . or .. globs since these never show up as path components. + todo.push(Ok((next_path, !0 as usize))); + } else { + fill_todo(todo, patterns, idx + 1, &next_path, options); + } + }; + + let pattern = &patterns[idx]; + let is_dir = is_dir(path); + let curdir = path == Path::new("."); + match pattern_as_str(pattern) { + Some(s) => { + // This pattern component doesn't have any metacharacters, so we + // don't need to read the current directory to know where to + // continue. So instead of passing control back to the iterator, + // we can just check for that one entry and potentially recurse + // right away. + let special = "." == s || ".." == s; + let next_path = if curdir { + PathBuf::from(s) + } else { + path.join(&s) + }; + if (special && is_dir) || (!special && fs::metadata(&next_path).is_ok()) { + add(todo, next_path); + } + } + None if is_dir => { + let dirs = fs::read_dir(path).and_then(|d| { + d.map(|e| { + e.map(|e| { + if curdir { + PathBuf::from(e.path().file_name().unwrap()) + } else { + e.path() + } + }) + }) + .collect::<Result<Vec<_>, _>>() + }); + match dirs { + Ok(mut children) => { + children.sort_by(|p1, p2| p2.file_name().cmp(&p1.file_name())); + todo.extend(children.into_iter().map(|x| Ok((x, idx)))); + + // Matching the special directory entries . and .. that + // refer to the current and parent directory respectively + // requires that the pattern has a leading dot, even if the + // `MatchOptions` field `require_literal_leading_dot` is not + // set. + if !pattern.tokens.is_empty() && pattern.tokens[0] == Char('.') { + for &special in &[".", ".."] { + if pattern.matches_with(special, options) { + add(todo, path.join(special)); + } + } + } + } + Err(e) => { + todo.push(Err(GlobError { + path: path.to_path_buf(), + error: e, + })); + } + } + } + None => { + // not a directory, nothing more to find + } + } +} + +fn parse_char_specifiers(s: &[char]) -> Vec<CharSpecifier> { + let mut cs = Vec::new(); + let mut i = 0; + while i < s.len() { + if i + 3 <= s.len() && s[i + 1] == '-' { + cs.push(CharRange(s[i], s[i + 2])); + i += 3; + } else { + cs.push(SingleChar(s[i])); + i += 1; + } + } + cs +} + +fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: MatchOptions) -> bool { + for &specifier in specifiers.iter() { + match specifier { + SingleChar(sc) => { + if chars_eq(c, sc, options.case_sensitive) { + return true; + } + } + CharRange(start, end) => { + // FIXME: work with non-ascii chars properly (issue #1347) + if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() { + let start = start.to_ascii_lowercase(); + let end = end.to_ascii_lowercase(); + + let start_up = start.to_uppercase().next().unwrap(); + let end_up = end.to_uppercase().next().unwrap(); + + // only allow case insensitive matching when + // both start and end are within a-z or A-Z + if start != start_up && end != end_up { + let c = c.to_ascii_lowercase(); + if c >= start && c <= end { + return true; + } + } + } + + if c >= start && c <= end { + return true; + } + } + } + } + + false +} + +/// A helper function to determine if two chars are (possibly case-insensitively) equal. +fn chars_eq(a: char, b: char, case_sensitive: bool) -> bool { + if cfg!(windows) && path::is_separator(a) && path::is_separator(b) { + true + } else if !case_sensitive && a.is_ascii() && b.is_ascii() { + // FIXME: work with non-ascii chars properly (issue #9084) + a.to_ascii_lowercase() == b.to_ascii_lowercase() + } else { + a == b + } +} + +/// Configuration options to modify the behaviour of `Pattern::matches_with(..)`. +#[allow(missing_copy_implementations)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +pub struct MatchOptions { + /// Whether or not patterns should be matched in a case-sensitive manner. + /// This currently only considers upper/lower case relationships between + /// ASCII characters, but in future this might be extended to work with + /// Unicode. + pub case_sensitive: bool, + + /// Whether or not path-component separator characters (e.g. `/` on + /// Posix) must be matched by a literal `/`, rather than by `*` or `?` or + /// `[...]`. + pub require_literal_separator: bool, + + /// Whether or not paths that contain components that start with a `.` + /// will require that `.` appears literally in the pattern; `*`, `?`, `**`, + /// or `[...]` will not match. This is useful because such files are + /// conventionally considered hidden on Unix systems and it might be + /// desirable to skip them when listing files. + pub require_literal_leading_dot: bool, +} + +impl MatchOptions { + /// Constructs a new `MatchOptions` with default field values. This is used + /// when calling functions that do not take an explicit `MatchOptions` + /// parameter. + /// + /// This function always returns this value: + /// + /// ```rust,ignore + /// MatchOptions { + /// case_sensitive: true, + /// require_literal_separator: false, + /// require_literal_leading_dot: false + /// } + /// ``` + /// + /// # Note + /// The behavior of this method doesn't match `default()`'s. This returns + /// `case_sensitive` as `true` while `default()` does it as `false`. + // FIXME: Consider unity the behavior with `default()` in a next major release. + pub fn new() -> Self { + Self { + case_sensitive: true, + require_literal_separator: false, + require_literal_leading_dot: false, + } + } +} + +#[cfg(test)] +mod test { + use super::{glob, MatchOptions, Pattern}; + use std::path::Path; + + #[test] + fn test_pattern_from_str() { + assert!("a*b".parse::<Pattern>().unwrap().matches("a_b")); + assert!("a/**b".parse::<Pattern>().unwrap_err().pos == 4); + } + + #[test] + fn test_wildcard_errors() { + assert!(Pattern::new("a/**b").unwrap_err().pos == 4); + assert!(Pattern::new("a/bc**").unwrap_err().pos == 3); + assert!(Pattern::new("a/*****").unwrap_err().pos == 4); + assert!(Pattern::new("a/b**c**d").unwrap_err().pos == 2); + assert!(Pattern::new("a**b").unwrap_err().pos == 0); + } + + #[test] + fn test_unclosed_bracket_errors() { + assert!(Pattern::new("abc[def").unwrap_err().pos == 3); + assert!(Pattern::new("abc[!def").unwrap_err().pos == 3); + assert!(Pattern::new("abc[").unwrap_err().pos == 3); + assert!(Pattern::new("abc[!").unwrap_err().pos == 3); + assert!(Pattern::new("abc[d").unwrap_err().pos == 3); + assert!(Pattern::new("abc[!d").unwrap_err().pos == 3); + assert!(Pattern::new("abc[]").unwrap_err().pos == 3); + assert!(Pattern::new("abc[!]").unwrap_err().pos == 3); + } + + #[test] + fn test_glob_errors() { + assert!(glob("a/**b").err().unwrap().pos == 4); + assert!(glob("abc[def").err().unwrap().pos == 3); + } + + // this test assumes that there is a /root directory and that + // the user running this test is not root or otherwise doesn't + // have permission to read its contents + #[cfg(all(unix, not(target_os = "macos")))] + #[test] + fn test_iteration_errors() { + use std::io; + let mut iter = glob("/root/*").unwrap(); + + // GlobErrors shouldn't halt iteration + let next = iter.next(); + assert!(next.is_some()); + + let err = next.unwrap(); + assert!(err.is_err()); + + let err = err.err().unwrap(); + assert!(err.path() == Path::new("/root")); + assert!(err.error().kind() == io::ErrorKind::PermissionDenied); + } + + #[test] + fn test_absolute_pattern() { + assert!(glob("/").unwrap().next().is_some()); + assert!(glob("//").unwrap().next().is_some()); + + // assume that the filesystem is not empty! + assert!(glob("/*").unwrap().next().is_some()); + + #[cfg(not(windows))] + fn win() {} + + #[cfg(windows)] + fn win() { + use std::env::current_dir; + use std::path::Component; + + // check windows absolute paths with host/device components + let root_with_device = current_dir() + .ok() + .and_then(|p| { + match p.components().next().unwrap() { + Component::Prefix(prefix_component) => { + let path = Path::new(prefix_component.as_os_str()); + path.join("*"); + Some(path.to_path_buf()) + } + _ => panic!("no prefix in this path"), + } + }) + .unwrap(); + // FIXME (#9639): This needs to handle non-utf8 paths + assert!(glob(root_with_device.as_os_str().to_str().unwrap()) + .unwrap() + .next() + .is_some()); + } + win() + } + + #[test] + fn test_wildcards() { + assert!(Pattern::new("a*b").unwrap().matches("a_b")); + assert!(Pattern::new("a*b*c").unwrap().matches("abc")); + assert!(!Pattern::new("a*b*c").unwrap().matches("abcd")); + assert!(Pattern::new("a*b*c").unwrap().matches("a_b_c")); + assert!(Pattern::new("a*b*c").unwrap().matches("a___b___c")); + assert!(Pattern::new("abc*abc*abc") + .unwrap() + .matches("abcabcabcabcabcabcabc")); + assert!(!Pattern::new("abc*abc*abc") + .unwrap() + .matches("abcabcabcabcabcabcabca")); + assert!(Pattern::new("a*a*a*a*a*a*a*a*a") + .unwrap() + .matches("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")); + assert!(Pattern::new("a*b[xyz]c*d").unwrap().matches("abxcdbxcddd")); + } + + #[test] + fn test_recursive_wildcards() { + let pat = Pattern::new("some/**/needle.txt").unwrap(); + assert!(pat.matches("some/needle.txt")); + assert!(pat.matches("some/one/needle.txt")); + assert!(pat.matches("some/one/two/needle.txt")); + assert!(pat.matches("some/other/needle.txt")); + assert!(!pat.matches("some/other/notthis.txt")); + + // a single ** should be valid, for globs + // Should accept anything + let pat = Pattern::new("**").unwrap(); + assert!(pat.is_recursive); + assert!(pat.matches("abcde")); + assert!(pat.matches("")); + assert!(pat.matches(".asdf")); + assert!(pat.matches("/x/.asdf")); + + // collapse consecutive wildcards + let pat = Pattern::new("some/**/**/needle.txt").unwrap(); + assert!(pat.matches("some/needle.txt")); + assert!(pat.matches("some/one/needle.txt")); + assert!(pat.matches("some/one/two/needle.txt")); + assert!(pat.matches("some/other/needle.txt")); + assert!(!pat.matches("some/other/notthis.txt")); + + // ** can begin the pattern + let pat = Pattern::new("**/test").unwrap(); + assert!(pat.matches("one/two/test")); + assert!(pat.matches("one/test")); + assert!(pat.matches("test")); + + // /** can begin the pattern + let pat = Pattern::new("/**/test").unwrap(); + assert!(pat.matches("/one/two/test")); + assert!(pat.matches("/one/test")); + assert!(pat.matches("/test")); + assert!(!pat.matches("/one/notthis")); + assert!(!pat.matches("/notthis")); + + // Only start sub-patterns on start of path segment. + let pat = Pattern::new("**/.*").unwrap(); + assert!(pat.matches(".abc")); + assert!(pat.matches("abc/.abc")); + assert!(!pat.matches("ab.c")); + assert!(!pat.matches("abc/ab.c")); + } + + #[test] + fn test_lots_of_files() { + // this is a good test because it touches lots of differently named files + glob("/*/*/*/*").unwrap().skip(10000).next(); + } + + #[test] + fn test_range_pattern() { + let pat = Pattern::new("a[0-9]b").unwrap(); + for i in 0..10 { + assert!(pat.matches(&format!("a{}b", i))); + } + assert!(!pat.matches("a_b")); + + let pat = Pattern::new("a[!0-9]b").unwrap(); + for i in 0..10 { + assert!(!pat.matches(&format!("a{}b", i))); + } + assert!(pat.matches("a_b")); + + let pats = ["[a-z123]", "[1a-z23]", "[123a-z]"]; + for &p in pats.iter() { + let pat = Pattern::new(p).unwrap(); + for c in "abcdefghijklmnopqrstuvwxyz".chars() { + assert!(pat.matches(&c.to_string())); + } + for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ".chars() { + let options = MatchOptions { + case_sensitive: false, + ..MatchOptions::new() + }; + assert!(pat.matches_with(&c.to_string(), options)); + } + assert!(pat.matches("1")); + assert!(pat.matches("2")); + assert!(pat.matches("3")); + } + + let pats = ["[abc-]", "[-abc]", "[a-c-]"]; + for &p in pats.iter() { + let pat = Pattern::new(p).unwrap(); + assert!(pat.matches("a")); + assert!(pat.matches("b")); + assert!(pat.matches("c")); + assert!(pat.matches("-")); + assert!(!pat.matches("d")); + } + + let pat = Pattern::new("[2-1]").unwrap(); + assert!(!pat.matches("1")); + assert!(!pat.matches("2")); + + assert!(Pattern::new("[-]").unwrap().matches("-")); + assert!(!Pattern::new("[!-]").unwrap().matches("-")); + } + + #[test] + fn test_pattern_matches() { + let txt_pat = Pattern::new("*hello.txt").unwrap(); + assert!(txt_pat.matches("hello.txt")); + assert!(txt_pat.matches("gareth_says_hello.txt")); + assert!(txt_pat.matches("some/path/to/hello.txt")); + assert!(txt_pat.matches("some\\path\\to\\hello.txt")); + assert!(txt_pat.matches("/an/absolute/path/to/hello.txt")); + assert!(!txt_pat.matches("hello.txt-and-then-some")); + assert!(!txt_pat.matches("goodbye.txt")); + + let dir_pat = Pattern::new("*some/path/to/hello.txt").unwrap(); + assert!(dir_pat.matches("some/path/to/hello.txt")); + assert!(dir_pat.matches("a/bigger/some/path/to/hello.txt")); + assert!(!dir_pat.matches("some/path/to/hello.txt-and-then-some")); + assert!(!dir_pat.matches("some/other/path/to/hello.txt")); + } + + #[test] + fn test_pattern_escape() { + let s = "_[_]_?_*_!_"; + assert_eq!(Pattern::escape(s), "_[[]_[]]_[?]_[*]_!_".to_string()); + assert!(Pattern::new(&Pattern::escape(s)).unwrap().matches(s)); + } + + #[test] + fn test_pattern_matches_case_insensitive() { + let pat = Pattern::new("aBcDeFg").unwrap(); + let options = MatchOptions { + case_sensitive: false, + require_literal_separator: false, + require_literal_leading_dot: false, + }; + + assert!(pat.matches_with("aBcDeFg", options)); + assert!(pat.matches_with("abcdefg", options)); + assert!(pat.matches_with("ABCDEFG", options)); + assert!(pat.matches_with("AbCdEfG", options)); + } + + #[test] + fn test_pattern_matches_case_insensitive_range() { + let pat_within = Pattern::new("[a]").unwrap(); + let pat_except = Pattern::new("[!a]").unwrap(); + + let options_case_insensitive = MatchOptions { + case_sensitive: false, + require_literal_separator: false, + require_literal_leading_dot: false, + }; + let options_case_sensitive = MatchOptions { + case_sensitive: true, + require_literal_separator: false, + require_literal_leading_dot: false, + }; + + assert!(pat_within.matches_with("a", options_case_insensitive)); + assert!(pat_within.matches_with("A", options_case_insensitive)); + assert!(!pat_within.matches_with("A", options_case_sensitive)); + + assert!(!pat_except.matches_with("a", options_case_insensitive)); + assert!(!pat_except.matches_with("A", options_case_insensitive)); + assert!(pat_except.matches_with("A", options_case_sensitive)); + } + + #[test] + fn test_pattern_matches_require_literal_separator() { + let options_require_literal = MatchOptions { + case_sensitive: true, + require_literal_separator: true, + require_literal_leading_dot: false, + }; + let options_not_require_literal = MatchOptions { + case_sensitive: true, + require_literal_separator: false, + require_literal_leading_dot: false, + }; + + assert!(Pattern::new("abc/def") + .unwrap() + .matches_with("abc/def", options_require_literal)); + assert!(!Pattern::new("abc?def") + .unwrap() + .matches_with("abc/def", options_require_literal)); + assert!(!Pattern::new("abc*def") + .unwrap() + .matches_with("abc/def", options_require_literal)); + assert!(!Pattern::new("abc[/]def") + .unwrap() + .matches_with("abc/def", options_require_literal)); + + assert!(Pattern::new("abc/def") + .unwrap() + .matches_with("abc/def", options_not_require_literal)); + assert!(Pattern::new("abc?def") + .unwrap() + .matches_with("abc/def", options_not_require_literal)); + assert!(Pattern::new("abc*def") + .unwrap() + .matches_with("abc/def", options_not_require_literal)); + assert!(Pattern::new("abc[/]def") + .unwrap() + .matches_with("abc/def", options_not_require_literal)); + } + + #[test] + fn test_pattern_matches_require_literal_leading_dot() { + let options_require_literal_leading_dot = MatchOptions { + case_sensitive: true, + require_literal_separator: false, + require_literal_leading_dot: true, + }; + let options_not_require_literal_leading_dot = MatchOptions { + case_sensitive: true, + require_literal_separator: false, + require_literal_leading_dot: false, + }; + + let f = |options| { + Pattern::new("*.txt") + .unwrap() + .matches_with(".hello.txt", options) + }; + assert!(f(options_not_require_literal_leading_dot)); + assert!(!f(options_require_literal_leading_dot)); + + let f = |options| { + Pattern::new(".*.*") + .unwrap() + .matches_with(".hello.txt", options) + }; + assert!(f(options_not_require_literal_leading_dot)); + assert!(f(options_require_literal_leading_dot)); + + let f = |options| { + Pattern::new("aaa/bbb/*") + .unwrap() + .matches_with("aaa/bbb/.ccc", options) + }; + assert!(f(options_not_require_literal_leading_dot)); + assert!(!f(options_require_literal_leading_dot)); + + let f = |options| { + Pattern::new("aaa/bbb/*") + .unwrap() + .matches_with("aaa/bbb/c.c.c.", options) + }; + assert!(f(options_not_require_literal_leading_dot)); + assert!(f(options_require_literal_leading_dot)); + + let f = |options| { + Pattern::new("aaa/bbb/.*") + .unwrap() + .matches_with("aaa/bbb/.ccc", options) + }; + assert!(f(options_not_require_literal_leading_dot)); + assert!(f(options_require_literal_leading_dot)); + + let f = |options| { + Pattern::new("aaa/?bbb") + .unwrap() + .matches_with("aaa/.bbb", options) + }; + assert!(f(options_not_require_literal_leading_dot)); + assert!(!f(options_require_literal_leading_dot)); + + let f = |options| { + Pattern::new("aaa/[.]bbb") + .unwrap() + .matches_with("aaa/.bbb", options) + }; + assert!(f(options_not_require_literal_leading_dot)); + assert!(!f(options_require_literal_leading_dot)); + + let f = |options| Pattern::new("**/*").unwrap().matches_with(".bbb", options); + assert!(f(options_not_require_literal_leading_dot)); + assert!(!f(options_require_literal_leading_dot)); + } + + #[test] + fn test_matches_path() { + // on windows, (Path::new("a/b").as_str().unwrap() == "a\\b"), so this + // tests that / and \ are considered equivalent on windows + assert!(Pattern::new("a/b").unwrap().matches_path(&Path::new("a/b"))); + } + + #[test] + fn test_path_join() { + let pattern = Path::new("one").join(&Path::new("**/*.rs")); + assert!(Pattern::new(pattern.to_str().unwrap()).is_ok()); + } +} diff --git a/third_party/rust/glob/tests/glob-std.rs b/third_party/rust/glob/tests/glob-std.rs new file mode 100644 index 0000000000..085eb6d2ca --- /dev/null +++ b/third_party/rust/glob/tests/glob-std.rs @@ -0,0 +1,377 @@ +// Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// ignore-windows TempDir may cause IoError on windows: #10462 + +#![cfg_attr(test, deny(warnings))] + +extern crate glob; +extern crate tempdir; + +use glob::glob; +use std::env; +use std::fs; +use std::path::PathBuf; +use tempdir::TempDir; + +#[test] +fn main() { + fn mk_file(path: &str, directory: bool) { + if directory { + fs::create_dir(path).unwrap(); + } else { + fs::File::create(path).unwrap(); + } + } + + fn glob_vec(pattern: &str) -> Vec<PathBuf> { + glob(pattern).unwrap().map(|r| r.unwrap()).collect() + } + + let root = TempDir::new("glob-tests"); + let root = root.ok().expect("Should have created a temp directory"); + assert!(env::set_current_dir(root.path()).is_ok()); + + mk_file("aaa", true); + mk_file("aaa/apple", true); + mk_file("aaa/orange", true); + mk_file("aaa/tomato", true); + mk_file("aaa/tomato/tomato.txt", false); + mk_file("aaa/tomato/tomoto.txt", false); + mk_file("bbb", true); + mk_file("bbb/specials", true); + mk_file("bbb/specials/!", false); + + // windows does not allow `*` or `?` characters to exist in filenames + if env::consts::FAMILY != "windows" { + mk_file("bbb/specials/*", false); + mk_file("bbb/specials/?", false); + } + + mk_file("bbb/specials/[", false); + mk_file("bbb/specials/]", false); + mk_file("ccc", true); + mk_file("xyz", true); + mk_file("xyz/x", false); + mk_file("xyz/y", false); + mk_file("xyz/z", false); + + mk_file("r", true); + mk_file("r/current_dir.md", false); + mk_file("r/one", true); + mk_file("r/one/a.md", false); + mk_file("r/one/another", true); + mk_file("r/one/another/a.md", false); + mk_file("r/one/another/deep", true); + mk_file("r/one/another/deep/spelunking.md", false); + mk_file("r/another", true); + mk_file("r/another/a.md", false); + mk_file("r/two", true); + mk_file("r/two/b.md", false); + mk_file("r/three", true); + mk_file("r/three/c.md", false); + + // all recursive entities + assert_eq!( + glob_vec("r/**"), + vec!( + PathBuf::from("r/another"), + PathBuf::from("r/one"), + PathBuf::from("r/one/another"), + PathBuf::from("r/one/another/deep"), + PathBuf::from("r/three"), + PathBuf::from("r/two") + ) + ); + + // collapse consecutive recursive patterns + assert_eq!( + glob_vec("r/**/**"), + vec!( + PathBuf::from("r/another"), + PathBuf::from("r/one"), + PathBuf::from("r/one/another"), + PathBuf::from("r/one/another/deep"), + PathBuf::from("r/three"), + PathBuf::from("r/two") + ) + ); + + assert_eq!( + glob_vec("r/**/*"), + vec!( + PathBuf::from("r/another"), + PathBuf::from("r/another/a.md"), + PathBuf::from("r/current_dir.md"), + PathBuf::from("r/one"), + PathBuf::from("r/one/a.md"), + PathBuf::from("r/one/another"), + PathBuf::from("r/one/another/a.md"), + PathBuf::from("r/one/another/deep"), + PathBuf::from("r/one/another/deep/spelunking.md"), + PathBuf::from("r/three"), + PathBuf::from("r/three/c.md"), + PathBuf::from("r/two"), + PathBuf::from("r/two/b.md") + ) + ); + + // followed by a wildcard + assert_eq!( + glob_vec("r/**/*.md"), + vec!( + PathBuf::from("r/another/a.md"), + PathBuf::from("r/current_dir.md"), + PathBuf::from("r/one/a.md"), + PathBuf::from("r/one/another/a.md"), + PathBuf::from("r/one/another/deep/spelunking.md"), + PathBuf::from("r/three/c.md"), + PathBuf::from("r/two/b.md") + ) + ); + + // followed by a precise pattern + assert_eq!( + glob_vec("r/one/**/a.md"), + vec!( + PathBuf::from("r/one/a.md"), + PathBuf::from("r/one/another/a.md") + ) + ); + + // followed by another recursive pattern + // collapses consecutive recursives into one + assert_eq!( + glob_vec("r/one/**/**/a.md"), + vec!( + PathBuf::from("r/one/a.md"), + PathBuf::from("r/one/another/a.md") + ) + ); + + // followed by two precise patterns + assert_eq!( + glob_vec("r/**/another/a.md"), + vec!( + PathBuf::from("r/another/a.md"), + PathBuf::from("r/one/another/a.md") + ) + ); + + assert_eq!(glob_vec(""), Vec::<PathBuf>::new()); + assert_eq!(glob_vec("."), vec!(PathBuf::from("."))); + assert_eq!(glob_vec(".."), vec!(PathBuf::from(".."))); + + assert_eq!(glob_vec("aaa"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("aaa/"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("a"), Vec::<PathBuf>::new()); + assert_eq!(glob_vec("aa"), Vec::<PathBuf>::new()); + assert_eq!(glob_vec("aaaa"), Vec::<PathBuf>::new()); + + assert_eq!(glob_vec("aaa/apple"), vec!(PathBuf::from("aaa/apple"))); + assert_eq!(glob_vec("aaa/apple/nope"), Vec::<PathBuf>::new()); + + // windows should support both / and \ as directory separators + if env::consts::FAMILY == "windows" { + assert_eq!(glob_vec("aaa\\apple"), vec!(PathBuf::from("aaa/apple"))); + } + + assert_eq!( + glob_vec("???/"), + vec!( + PathBuf::from("aaa"), + PathBuf::from("bbb"), + PathBuf::from("ccc"), + PathBuf::from("xyz") + ) + ); + + assert_eq!( + glob_vec("aaa/tomato/tom?to.txt"), + vec!( + PathBuf::from("aaa/tomato/tomato.txt"), + PathBuf::from("aaa/tomato/tomoto.txt") + ) + ); + + assert_eq!( + glob_vec("xyz/?"), + vec!( + PathBuf::from("xyz/x"), + PathBuf::from("xyz/y"), + PathBuf::from("xyz/z") + ) + ); + + assert_eq!(glob_vec("a*"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("*a*"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("a*a"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("aaa*"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("*aaa"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("*aaa*"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("*a*a*a*"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("aaa*/"), vec!(PathBuf::from("aaa"))); + + assert_eq!( + glob_vec("aaa/*"), + vec!( + PathBuf::from("aaa/apple"), + PathBuf::from("aaa/orange"), + PathBuf::from("aaa/tomato") + ) + ); + + assert_eq!( + glob_vec("aaa/*a*"), + vec!( + PathBuf::from("aaa/apple"), + PathBuf::from("aaa/orange"), + PathBuf::from("aaa/tomato") + ) + ); + + assert_eq!( + glob_vec("*/*/*.txt"), + vec!( + PathBuf::from("aaa/tomato/tomato.txt"), + PathBuf::from("aaa/tomato/tomoto.txt") + ) + ); + + assert_eq!( + glob_vec("*/*/t[aob]m?to[.]t[!y]t"), + vec!( + PathBuf::from("aaa/tomato/tomato.txt"), + PathBuf::from("aaa/tomato/tomoto.txt") + ) + ); + + assert_eq!(glob_vec("./aaa"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("./*"), glob_vec("*")); + assert_eq!(glob_vec("*/..").pop().unwrap(), PathBuf::from("xyz/..")); + assert_eq!(glob_vec("aaa/../bbb"), vec!(PathBuf::from("aaa/../bbb"))); + assert_eq!(glob_vec("nonexistent/../bbb"), Vec::<PathBuf>::new()); + assert_eq!(glob_vec("aaa/tomato/tomato.txt/.."), Vec::<PathBuf>::new()); + + assert_eq!(glob_vec("aaa/tomato/tomato.txt/"), Vec::<PathBuf>::new()); + + assert_eq!(glob_vec("aa[a]"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("aa[abc]"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("a[bca]a"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("aa[b]"), Vec::<PathBuf>::new()); + assert_eq!(glob_vec("aa[xyz]"), Vec::<PathBuf>::new()); + assert_eq!(glob_vec("aa[]]"), Vec::<PathBuf>::new()); + + assert_eq!(glob_vec("aa[!b]"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("aa[!bcd]"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("a[!bcd]a"), vec!(PathBuf::from("aaa"))); + assert_eq!(glob_vec("aa[!a]"), Vec::<PathBuf>::new()); + assert_eq!(glob_vec("aa[!abc]"), Vec::<PathBuf>::new()); + + assert_eq!( + glob_vec("bbb/specials/[[]"), + vec!(PathBuf::from("bbb/specials/[")) + ); + assert_eq!( + glob_vec("bbb/specials/!"), + vec!(PathBuf::from("bbb/specials/!")) + ); + assert_eq!( + glob_vec("bbb/specials/[]]"), + vec!(PathBuf::from("bbb/specials/]")) + ); + + if env::consts::FAMILY != "windows" { + assert_eq!( + glob_vec("bbb/specials/[*]"), + vec!(PathBuf::from("bbb/specials/*")) + ); + assert_eq!( + glob_vec("bbb/specials/[?]"), + vec!(PathBuf::from("bbb/specials/?")) + ); + } + + if env::consts::FAMILY == "windows" { + assert_eq!( + glob_vec("bbb/specials/[![]"), + vec!( + PathBuf::from("bbb/specials/!"), + PathBuf::from("bbb/specials/]") + ) + ); + + assert_eq!( + glob_vec("bbb/specials/[!]]"), + vec!( + PathBuf::from("bbb/specials/!"), + PathBuf::from("bbb/specials/[") + ) + ); + + assert_eq!( + glob_vec("bbb/specials/[!!]"), + vec!( + PathBuf::from("bbb/specials/["), + PathBuf::from("bbb/specials/]") + ) + ); + } else { + assert_eq!( + glob_vec("bbb/specials/[![]"), + vec!( + PathBuf::from("bbb/specials/!"), + PathBuf::from("bbb/specials/*"), + PathBuf::from("bbb/specials/?"), + PathBuf::from("bbb/specials/]") + ) + ); + + assert_eq!( + glob_vec("bbb/specials/[!]]"), + vec!( + PathBuf::from("bbb/specials/!"), + PathBuf::from("bbb/specials/*"), + PathBuf::from("bbb/specials/?"), + PathBuf::from("bbb/specials/[") + ) + ); + + assert_eq!( + glob_vec("bbb/specials/[!!]"), + vec!( + PathBuf::from("bbb/specials/*"), + PathBuf::from("bbb/specials/?"), + PathBuf::from("bbb/specials/["), + PathBuf::from("bbb/specials/]") + ) + ); + + assert_eq!( + glob_vec("bbb/specials/[!*]"), + vec!( + PathBuf::from("bbb/specials/!"), + PathBuf::from("bbb/specials/?"), + PathBuf::from("bbb/specials/["), + PathBuf::from("bbb/specials/]") + ) + ); + + assert_eq!( + glob_vec("bbb/specials/[!?]"), + vec!( + PathBuf::from("bbb/specials/!"), + PathBuf::from("bbb/specials/*"), + PathBuf::from("bbb/specials/["), + PathBuf::from("bbb/specials/]") + ) + ); + } +} diff --git a/third_party/rust/glob/triagebot.toml b/third_party/rust/glob/triagebot.toml new file mode 100644 index 0000000000..fa0824ac53 --- /dev/null +++ b/third_party/rust/glob/triagebot.toml @@ -0,0 +1 @@ +[assign] |