"""Pattern-matching utility functions for Sphinx.""" from __future__ import annotations import os.path import re from typing import TYPE_CHECKING, Callable from sphinx.util.osutil import canon_path, path_stabilize if TYPE_CHECKING: from collections.abc import Iterable, Iterator def _translate_pattern(pat: str) -> str: """Translate a shell-style glob pattern to a regular expression. Adapted from the fnmatch module, but enhanced so that single stars don't match slashes. """ i, n = 0, len(pat) res = '' while i < n: c = pat[i] i += 1 if c == '*': if i < n and pat[i] == '*': # double star matches slashes too i += 1 res = res + '.*' else: # single star doesn't match slashes res = res + '[^/]*' elif c == '?': # question mark doesn't match slashes too res = res + '[^/]' elif c == '[': j = i if j < n and pat[j] == '!': j += 1 if j < n and pat[j] == ']': j += 1 while j < n and pat[j] != ']': j += 1 if j >= n: res = res + '\\[' else: stuff = pat[i:j].replace('\\', '\\\\') i = j + 1 if stuff[0] == '!': # negative pattern mustn't match slashes too stuff = '^/' + stuff[1:] elif stuff[0] == '^': stuff = '\\' + stuff res = f'{res}[{stuff}]' else: res += re.escape(c) return res + '$' def compile_matchers( patterns: Iterable[str], ) -> list[Callable[[str], re.Match[str] | None]]: return [re.compile(_translate_pattern(pat)).match for pat in patterns] class Matcher: """A pattern matcher for Multiple shell-style glob patterns. Note: this modifies the patterns to work with copy_asset(). For example, "**/index.rst" matches with "index.rst" """ def __init__(self, exclude_patterns: Iterable[str]) -> None: expanded = [pat[3:] for pat in exclude_patterns if pat.startswith('**/')] self.patterns = compile_matchers(list(exclude_patterns) + expanded) def __call__(self, string: str) -> bool: return self.match(string) def match(self, string: str) -> bool: string = canon_path(string) return any(pat(string) for pat in self.patterns) DOTFILES = Matcher(['**/.*']) _pat_cache: dict[str, re.Pattern[str]] = {} def patmatch(name: str, pat: str) -> re.Match[str] | None: """Return if name matches the regular expression (pattern) ``pat```. Adapted from fnmatch module.""" if pat not in _pat_cache: _pat_cache[pat] = re.compile(_translate_pattern(pat)) return _pat_cache[pat].match(name) def patfilter(names: Iterable[str], pat: str) -> list[str]: """Return the subset of the list ``names`` that match the regular expression (pattern) ``pat``. Adapted from fnmatch module. """ if pat not in _pat_cache: _pat_cache[pat] = re.compile(_translate_pattern(pat)) match = _pat_cache[pat].match return list(filter(match, names)) def get_matching_files( dirname: str | os.PathLike[str], include_patterns: Iterable[str] = ("**",), exclude_patterns: Iterable[str] = (), ) -> Iterator[str]: """Get all file names in a directory, recursively. Filter file names by the glob-style include_patterns and exclude_patterns. The default values include all files ("**") and exclude nothing (""). Only files matching some pattern in *include_patterns* are included, and exclusions from *exclude_patterns* take priority over inclusions. """ # dirname is a normalized absolute path. dirname = os.path.normpath(os.path.abspath(dirname)) exclude_matchers = compile_matchers(exclude_patterns) include_matchers = compile_matchers(include_patterns) for root, dirs, files in os.walk(dirname, followlinks=True): relative_root = os.path.relpath(root, dirname) if relative_root == ".": relative_root = "" # suppress dirname for files on the target dir # Filter files included_files = [] for entry in sorted(files): entry = path_stabilize(os.path.join(relative_root, entry)) keep = False for matcher in include_matchers: if matcher(entry): keep = True break # break the inner loop for matcher in exclude_matchers: if matcher(entry): keep = False break # break the inner loop if keep: included_files.append(entry) # Filter directories filtered_dirs = [] for dir_name in sorted(dirs): normalised = path_stabilize(os.path.join(relative_root, dir_name)) for matcher in exclude_matchers: if matcher(normalised): break # break the inner loop else: # if the loop didn't break filtered_dirs.append(dir_name) dirs[:] = filtered_dirs # Yield filtered files yield from included_files