From cf7da1843c45a4c2df7a749f7886a2d2ba0ee92a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 19:25:40 +0200 Subject: Adding upstream version 7.2.6. Signed-off-by: Daniel Baumann --- sphinx/util/matching.py | 169 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 sphinx/util/matching.py (limited to 'sphinx/util/matching.py') diff --git a/sphinx/util/matching.py b/sphinx/util/matching.py new file mode 100644 index 0000000..dd91905 --- /dev/null +++ b/sphinx/util/matching.py @@ -0,0 +1,169 @@ +"""Pattern-matching utility functions for Sphinx.""" + +from __future__ import annotations + +import os.path +import re +from typing import TYPE_CHECKING, Callable + +from sphinx.util.osutil import canon_path, path_stabilize + +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator + + +def _translate_pattern(pat: str) -> str: + """Translate a shell-style glob pattern to a regular expression. + + Adapted from the fnmatch module, but enhanced so that single stars don't + match slashes. + """ + i, n = 0, len(pat) + res = '' + while i < n: + c = pat[i] + i += 1 + if c == '*': + if i < n and pat[i] == '*': + # double star matches slashes too + i += 1 + res = res + '.*' + else: + # single star doesn't match slashes + res = res + '[^/]*' + elif c == '?': + # question mark doesn't match slashes too + res = res + '[^/]' + elif c == '[': + j = i + if j < n and pat[j] == '!': + j += 1 + if j < n and pat[j] == ']': + j += 1 + while j < n and pat[j] != ']': + j += 1 + if j >= n: + res = res + '\\[' + else: + stuff = pat[i:j].replace('\\', '\\\\') + i = j + 1 + if stuff[0] == '!': + # negative pattern mustn't match slashes too + stuff = '^/' + stuff[1:] + elif stuff[0] == '^': + stuff = '\\' + stuff + res = f'{res}[{stuff}]' + else: + res += re.escape(c) + return res + '$' + + +def compile_matchers( + patterns: Iterable[str], +) -> list[Callable[[str], re.Match[str] | None]]: + return [re.compile(_translate_pattern(pat)).match for pat in patterns] + + +class Matcher: + """A pattern matcher for Multiple shell-style glob patterns. + + Note: this modifies the patterns to work with copy_asset(). + For example, "**/index.rst" matches with "index.rst" + """ + + def __init__(self, exclude_patterns: Iterable[str]) -> None: + expanded = [pat[3:] for pat in exclude_patterns if pat.startswith('**/')] + self.patterns = compile_matchers(list(exclude_patterns) + expanded) + + def __call__(self, string: str) -> bool: + return self.match(string) + + def match(self, string: str) -> bool: + string = canon_path(string) + return any(pat(string) for pat in self.patterns) + + +DOTFILES = Matcher(['**/.*']) + + +_pat_cache: dict[str, re.Pattern[str]] = {} + + +def patmatch(name: str, pat: str) -> re.Match[str] | None: + """Return if name matches the regular expression (pattern) + ``pat```. Adapted from fnmatch module.""" + if pat not in _pat_cache: + _pat_cache[pat] = re.compile(_translate_pattern(pat)) + return _pat_cache[pat].match(name) + + +def patfilter(names: Iterable[str], pat: str) -> list[str]: + """Return the subset of the list ``names`` that match + the regular expression (pattern) ``pat``. + + Adapted from fnmatch module. + """ + if pat not in _pat_cache: + _pat_cache[pat] = re.compile(_translate_pattern(pat)) + match = _pat_cache[pat].match + return list(filter(match, names)) + + +def get_matching_files( + dirname: str | os.PathLike[str], + include_patterns: Iterable[str] = ("**",), + exclude_patterns: Iterable[str] = (), +) -> Iterator[str]: + """Get all file names in a directory, recursively. + + Filter file names by the glob-style include_patterns and exclude_patterns. + The default values include all files ("**") and exclude nothing (""). + + Only files matching some pattern in *include_patterns* are included, and + exclusions from *exclude_patterns* take priority over inclusions. + + """ + # dirname is a normalized absolute path. + dirname = os.path.normpath(os.path.abspath(dirname)) + + exclude_matchers = compile_matchers(exclude_patterns) + include_matchers = compile_matchers(include_patterns) + + for root, dirs, files in os.walk(dirname, followlinks=True): + relative_root = os.path.relpath(root, dirname) + if relative_root == ".": + relative_root = "" # suppress dirname for files on the target dir + + # Filter files + included_files = [] + for entry in sorted(files): + entry = path_stabilize(os.path.join(relative_root, entry)) + keep = False + for matcher in include_matchers: + if matcher(entry): + keep = True + break # break the inner loop + + for matcher in exclude_matchers: + if matcher(entry): + keep = False + break # break the inner loop + + if keep: + included_files.append(entry) + + # Filter directories + filtered_dirs = [] + for dir_name in sorted(dirs): + normalised = path_stabilize(os.path.join(relative_root, dir_name)) + for matcher in exclude_matchers: + if matcher(normalised): + break # break the inner loop + else: + # if the loop didn't break + filtered_dirs.append(dir_name) + + dirs[:] = filtered_dirs + + # Yield filtered files + yield from included_files -- cgit v1.2.3