diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /testing/web-platform/tests/tools/gitignore | |
parent | Initial commit. (diff) | |
download | thunderbird-upstream.tar.xz thunderbird-upstream.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'testing/web-platform/tests/tools/gitignore')
4 files changed, 375 insertions, 0 deletions
diff --git a/testing/web-platform/tests/tools/gitignore/__init__.py b/testing/web-platform/tests/tools/gitignore/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/testing/web-platform/tests/tools/gitignore/__init__.py diff --git a/testing/web-platform/tests/tools/gitignore/gitignore.py b/testing/web-platform/tests/tools/gitignore/gitignore.py new file mode 100644 index 0000000000..68c2a0835e --- /dev/null +++ b/testing/web-platform/tests/tools/gitignore/gitignore.py @@ -0,0 +1,268 @@ +import re +import os +import itertools +from collections import defaultdict +from typing import (Any, Dict, Iterable, List, MutableMapping, Optional, Pattern, Tuple, TypeVar, + Union, cast) + + +T = TypeVar('T') + +end_space = re.compile(r"([^\\]\s)*$") + + +def fnmatch_translate(pat: bytes) -> Tuple[bool, Pattern[bytes]]: + parts = [] + seq: Optional[int] = None + i = 0 + any_char = b"[^/]" + if pat[0:1] == b"/": + parts.append(b"^") + pat = pat[1:] + else: + # By default match the entire path up to a / + # but if / doesn't appear in the pattern we will mark is as + # a name pattern and just produce a pattern that matches against + # the filename + parts.append(b"^(?:.*/)?") + + name_pattern = True + if pat[-1:] == b"/": + # If the last character is / match this directory or any subdirectory + pat = pat[:-1] + suffix = b"(?:/|$)" + else: + suffix = b"$" + while i < len(pat): + c = pat[i:i+1] + if c == b"\\": + if i < len(pat) - 1: + i += 1 + c = pat[i:i+1] + parts.append(re.escape(c)) + else: + raise ValueError + elif seq is not None: + # TODO: this doesn't really handle invalid sequences in the right way + if c == b"]": + seq = None + if parts[-1] == b"[": + parts = parts[:-1] + elif parts[-1] == b"^" and parts[-2] == b"[": + raise ValueError + else: + parts.append(c) + elif c == b"-": + parts.append(c) + elif c == b"[": + raise ValueError + else: + parts.append(re.escape(c)) + elif c == b"[": + parts.append(b"[") + if i < len(pat) - 1 and pat[i+1:i+2] in (b"!", b"^"): + parts.append(b"^") + i += 1 + seq = i + elif c == b"*": + if i < len(pat) - 1 and pat[i+1:i+2] == b"*": + if i > 0 and pat[i-1:i] != b"/": + raise ValueError + parts.append(b".*") + i += 1 + if i < len(pat) - 1 and pat[i+1:i+2] != b"/": + raise ValueError + else: + parts.append(any_char + b"*") + elif c == b"?": + parts.append(any_char) + elif c == b"/" and not seq: + name_pattern = False + parts.append(c) + else: + parts.append(re.escape(c)) + i += 1 + + if name_pattern: + parts[0] = b"^" + + if seq is not None: + raise ValueError + parts.append(suffix) + try: + return name_pattern, re.compile(b"".join(parts)) + except Exception: + raise ValueError + +# Regexp matching rules that have to be converted to patterns +pattern_re = re.compile(br".*[\*\[\?]") + + +def parse_line(line: bytes) -> Optional[Tuple[bool, bool, bool, Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]]]]: + line = line.rstrip() + if not line or line[0:1] == b"#": + return None + + invert = line[0:1] == b"!" + if invert: + line = line[1:] + + dir_only = line[-1:] == b"/" + + if dir_only: + line = line[:-1] + + # Could make a special case for **/foo, but we don't have any patterns like that + if not invert and not pattern_re.match(line): + literal = True + pattern: Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]] = tuple(line.rsplit(b"/", 1)) + else: + pattern = fnmatch_translate(line) + literal = False + + return invert, dir_only, literal, pattern + + +class PathFilter: + def __init__(self, root: bytes, extras: Optional[List[bytes]] = None, cache: Optional[MutableMapping[bytes, bool]] = None) -> None: + if root: + ignore_path: Optional[bytes] = os.path.join(root, b".gitignore") + else: + ignore_path = None + if not ignore_path and not extras: + self.trivial = True + return + self.trivial = False + + self.literals_file: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]] = defaultdict(dict) + self.literals_dir: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]] = defaultdict(dict) + self.patterns_file: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]] = [] + self.patterns_dir: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]] = [] + + if cache is None: + cache = {} + self.cache: MutableMapping[bytes, bool] = cache + + if extras is None: + extras = [] + + if ignore_path and os.path.exists(ignore_path): + args: Tuple[Optional[bytes], List[bytes]] = (ignore_path, extras) + else: + args = None, extras + self._read_ignore(*args) + + def _read_ignore(self, ignore_path: Optional[bytes], extras: List[bytes]) -> None: + if ignore_path is not None: + with open(ignore_path, "rb") as f: + for line in f: + self._read_line(line) + for line in extras: + self._read_line(line) + + def _read_line(self, line: bytes) -> None: + parsed = parse_line(line) + if not parsed: + return + invert, dir_only, literal, rule = parsed + + if invert: + # For exclude rules, we attach the rules to all preceeding patterns, so + # that we can match patterns out of order and check if they were later + # overriden by an exclude rule + assert not literal + rule = cast(Tuple[bool, Pattern[bytes]], rule) + if not dir_only: + rules_iter: Iterable[Tuple[Any, List[Tuple[bool, Pattern[bytes]]]]] = itertools.chain( + itertools.chain(*(item.items() for item in self.literals_dir.values())), + itertools.chain(*(item.items() for item in self.literals_file.values())), + self.patterns_dir, + self.patterns_file) + else: + rules_iter = itertools.chain( + itertools.chain(*(item.items() for item in self.literals_dir.values())), + self.patterns_dir) + + for rules in rules_iter: + rules[1].append(rule) + else: + if literal: + rule = cast(Tuple[bytes, ...], rule) + if len(rule) == 1: + dir_name, pattern = None, rule[0] # type: Tuple[Optional[bytes], bytes] + else: + dir_name, pattern = rule + self.literals_dir[dir_name][pattern] = [] + if not dir_only: + self.literals_file[dir_name][pattern] = [] + else: + rule = cast(Tuple[bool, Pattern[bytes]], rule) + self.patterns_dir.append((rule, [])) + if not dir_only: + self.patterns_file.append((rule, [])) + + def filter(self, + iterator: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]] + ) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]: + empty: Dict[Any, Any] = {} + for dirpath, dirnames, filenames in iterator: + orig_dirpath = dirpath + path_sep = os.path.sep.encode() + if path_sep != b"/": + dirpath = dirpath.replace(path_sep, b"/") + + keep_dirs: List[Tuple[bytes, T]] = [] + keep_files: List[Tuple[bytes, T]] = [] + + for iter_items, literals, patterns, target, suffix in [ + (dirnames, self.literals_dir, self.patterns_dir, keep_dirs, b"/"), + (filenames, self.literals_file, self.patterns_file, keep_files, b"")]: + for item in iter_items: + name = item[0] + if dirpath: + path = b"%s/%s" % (dirpath, name) + suffix + else: + path = name + suffix + if path in self.cache: + if not self.cache[path]: + target.append(item) + continue + for rule_dir in [None, dirpath if dirpath != b"." else b""]: + if name in literals.get(rule_dir, empty): + exclude = literals[rule_dir][name] + if not any(rule.match(name if name_only else path) + for name_only, rule in exclude): + # Skip this item + self.cache[path] = True + break + else: + for (component_only, pattern), exclude in patterns: + if component_only: + match = pattern.match(name) + else: + match = pattern.match(path) + if match: + if not any(rule.match(name if name_only else path) + for name_only, rule in exclude): + # Skip this item + self.cache[path] = True + break + else: + self.cache[path] = False + target.append(item) + + dirnames[:] = keep_dirs + assert not any(b".git" == name for name, _ in dirnames) + yield orig_dirpath, dirnames, keep_files + + def __call__(self, + iterator: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]] + ) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]: + if self.trivial: + return iterator + + return self.filter(iterator) + + +def has_ignore(dirpath: bytes) -> bool: + return os.path.exists(os.path.join(dirpath, b".gitignore")) diff --git a/testing/web-platform/tests/tools/gitignore/tests/__init__.py b/testing/web-platform/tests/tools/gitignore/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/testing/web-platform/tests/tools/gitignore/tests/__init__.py diff --git a/testing/web-platform/tests/tools/gitignore/tests/test_gitignore.py b/testing/web-platform/tests/tools/gitignore/tests/test_gitignore.py new file mode 100644 index 0000000000..61ad9fdc4e --- /dev/null +++ b/testing/web-platform/tests/tools/gitignore/tests/test_gitignore.py @@ -0,0 +1,107 @@ +# mypy: allow-untyped-defs +from typing import Iterable, Sequence, Tuple + +import pytest + +from ..gitignore import fnmatch_translate, PathFilter + + +match_data: Sequence[Tuple[bytes, bool, Iterable[bytes]]] = [ + (b"foo", True, [b"a/foo", b"foo"]), + (b"*.a", True, [b"foo.a", b"a/foo.a", b"a/b/foo.a", b"a.a/foo.a"]), + (b"*.py[co]", True, [b"a.pyc", b"a.pyo", b"a/b/c.pyc"]), + (b"\\#*", True, [b"#a", b"a/#b"]), + (b"*#", True, [b"a#", b"a/b#", b"#a#"]), + (b"/*.c", True, [b"a.c", b".c"]), + (b"**/b", False, [b"a/b", b"a/c/b"]), + (b"*b", True, [b"ab"]), + (b"*b", True, [b"a/b"]), + (b"**/b", False, [b"a/b"]), + (b"a/", True, [b"a"]), + (b"a[/]b", True, []), + (b"**/b", False, [b"a/c/b"]), + (b"a?c", True, [b"abc"]), + (b"a[^b]c", True, [b"acc"]), + (b"a[b-c]c", True, [b"abc", b"acc"]), + (b"a[]c", True, [b"ac"]), +] + +mismatch_data: Sequence[Tuple[bytes, bool, Iterable[bytes]]] = [ + (b"foo", True, [b"foob", b"afoo"]), + (b"*.a", True, [b"a", b"foo:a", b"a.a/foo"]), + (b"*.py[co]", True, [b"a.pyd", b"pyo", b"a.py"]), + (b"a", True, [b"ab"]), + (b"a?c", True, [b"ac", b"abbc"]), + (b"a[^b]c", True, [b"abc"]), + (b"a[b-c]c", True, [b"adc"]), +] + +invalid_data = [ + b"[a", + b"***/foo", + b"a\\", + b"**b", + b"b**/", + b"[[]", + b"a[^]c", +] + +filter_data = [ + ([b"foo", b"bar/", b"/a", b"*.py"], + [(b"", [b"foo", b"bar", b"baz"], [b"a"]), + (b"baz", [b"a"], [b"foo", b"bar"])], + [([b"baz"], []), + ([b"a"], [b"bar"])]), + ([b"#foo", b"", b"a*", b"!a.py"], + [(b"", [b"foo"], [b"a", b"a.foo", b"a.py"])], + [([b"foo"], [b"a.py"])]), + ([b"a.foo", b"!a.py"], + [(b"", [b"foo"], [b"a", b"a.foo", b"a.py"])], + [([b"foo"], [b"a", b"a.py"])]), +] + + +def expand_data(compact_data: Sequence[Tuple[bytes, bool, Iterable[bytes]]]) -> Iterable[Tuple[bytes, bool, bytes]]: + for pattern, name_only, inputs in compact_data: + for input in inputs: + yield pattern, name_only, input + + +@pytest.mark.parametrize("pattern, name_only, input", expand_data(match_data)) +def tests_match(pattern, name_only, input): + name_only_result, regexp = fnmatch_translate(pattern) + assert name_only_result == name_only + if name_only: + input = input.rsplit(b"/", 1)[-1] + assert regexp.match(input) is not None + + +@pytest.mark.parametrize("pattern, name_only, input", expand_data(mismatch_data)) +def tests_no_match(pattern, name_only, input): + name_only_result, regexp = fnmatch_translate(pattern) + assert name_only_result == name_only + if name_only: + input = input.rsplit(b"/", 1)[-1] + assert regexp.match(input) is None + + +@pytest.mark.parametrize("pattern", invalid_data) +def tests_invalid(pattern): + with pytest.raises(ValueError): + fnmatch_translate(pattern) + + +@pytest.mark.parametrize("rules, input, expected", filter_data) +def test_path_filter(rules, input, expected): + f = PathFilter(None, rules) + # Add some fake stat data + for i, item in enumerate(input): + repl = [input[i][0]] + for j in [1, 2]: + repl.append([(name, None) for name in input[i][j]]) + input[i] = tuple(repl) + + for i, output in enumerate(f(input)): + assert output[0] == input[i][0] + for j in [1, 2]: + assert [item[0] for item in output[j]] == expected[i][j-1] |