From c3ded2824ef2c8a2ed59b2a6e339aba8810c1e06 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 10 May 2023 08:30:43 +0200 Subject: Merging upstream version 3.3.1. Signed-off-by: Daniel Baumann --- pre_commit/commands/autoupdate.py | 154 +++++++++++++++++++-------------- pre_commit/lang_base.py | 10 +-- pre_commit/main.py | 9 +- pre_commit/resources/ruby-build.tar.gz | Bin 76466 -> 75808 bytes pre_commit/xargs.py | 8 ++ 5 files changed, 105 insertions(+), 76 deletions(-) (limited to 'pre_commit') diff --git a/pre_commit/commands/autoupdate.py b/pre_commit/commands/autoupdate.py index 7ed6e77..e7725fd 100644 --- a/pre_commit/commands/autoupdate.py +++ b/pre_commit/commands/autoupdate.py @@ -1,5 +1,6 @@ from __future__ import annotations +import concurrent.futures import os.path import re import tempfile @@ -10,13 +11,13 @@ from typing import Sequence import pre_commit.constants as C from pre_commit import git from pre_commit import output +from pre_commit import xargs from pre_commit.clientlib import InvalidManifestError from pre_commit.clientlib import load_config from pre_commit.clientlib import load_manifest from pre_commit.clientlib import LOCAL from pre_commit.clientlib import META from pre_commit.commands.migrate_config import migrate_config -from pre_commit.store import Store from pre_commit.util import CalledProcessError from pre_commit.util import cmd_output from pre_commit.util import cmd_output_b @@ -27,49 +28,58 @@ from pre_commit.yaml import yaml_load class RevInfo(NamedTuple): repo: str rev: str - frozen: str | None + frozen: str | None = None + hook_ids: frozenset[str] = frozenset() @classmethod def from_config(cls, config: dict[str, Any]) -> RevInfo: - return cls(config['repo'], config['rev'], None) + return cls(config['repo'], config['rev']) def update(self, tags_only: bool, freeze: bool) -> RevInfo: - git_cmd = ('git', *git.NO_FS_MONITOR) + with tempfile.TemporaryDirectory() as tmp: + _git = ('git', *git.NO_FS_MONITOR, '-C', tmp) - if tags_only: - tag_cmd = ( - *git_cmd, 'describe', - 'FETCH_HEAD', '--tags', '--abbrev=0', - ) - else: - tag_cmd = ( - *git_cmd, 'describe', - 'FETCH_HEAD', '--tags', '--exact', - ) + if tags_only: + tag_opt = '--abbrev=0' + else: + tag_opt = '--exact' + tag_cmd = (*_git, 'describe', 'FETCH_HEAD', '--tags', tag_opt) - with tempfile.TemporaryDirectory() as tmp: git.init_repo(tmp, self.repo) + cmd_output_b(*_git, 'config', 'extensions.partialClone', 'true') cmd_output_b( - *git_cmd, 'fetch', 'origin', 'HEAD', '--tags', - cwd=tmp, + *_git, 'fetch', 'origin', 'HEAD', + '--quiet', '--filter=blob:none', '--tags', ) try: - rev = cmd_output(*tag_cmd, cwd=tmp)[1].strip() + rev = cmd_output(*tag_cmd)[1].strip() except CalledProcessError: - cmd = (*git_cmd, 'rev-parse', 'FETCH_HEAD') - rev = cmd_output(*cmd, cwd=tmp)[1].strip() + rev = cmd_output(*_git, 'rev-parse', 'FETCH_HEAD')[1].strip() else: if tags_only: rev = git.get_best_candidate_tag(rev, tmp) frozen = None if freeze: - exact_rev_cmd = (*git_cmd, 'rev-parse', rev) - exact = cmd_output(*exact_rev_cmd, cwd=tmp)[1].strip() + exact = cmd_output(*_git, 'rev-parse', rev)[1].strip() if exact != rev: rev, frozen = exact, rev - return self._replace(rev=rev, frozen=frozen) + + try: + # workaround for windows -- see #2865 + cmd_output_b(*_git, 'show', f'{rev}:{C.MANIFEST_FILE}') + cmd_output(*_git, 'checkout', rev, '--', C.MANIFEST_FILE) + except CalledProcessError: + pass # this will be caught by manifest validating code + try: + manifest = load_manifest(os.path.join(tmp, C.MANIFEST_FILE)) + except InvalidManifestError as e: + raise RepositoryCannotBeUpdatedError(f'[{self.repo}] {e}') + else: + hook_ids = frozenset(hook['id'] for hook in manifest) + + return self._replace(rev=rev, frozen=frozen, hook_ids=hook_ids) class RepositoryCannotBeUpdatedError(RuntimeError): @@ -79,24 +89,30 @@ class RepositoryCannotBeUpdatedError(RuntimeError): def _check_hooks_still_exist_at_rev( repo_config: dict[str, Any], info: RevInfo, - store: Store, ) -> None: - try: - path = store.clone(repo_config['repo'], info.rev) - manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE)) - except InvalidManifestError as e: - raise RepositoryCannotBeUpdatedError(str(e)) - # See if any of our hooks were deleted with the new commits hooks = {hook['id'] for hook in repo_config['hooks']} - hooks_missing = hooks - {hook['id'] for hook in manifest} + hooks_missing = hooks - info.hook_ids if hooks_missing: raise RepositoryCannotBeUpdatedError( - f'Cannot update because the update target is missing these ' - f'hooks:\n{", ".join(sorted(hooks_missing))}', + f'[{info.repo}] Cannot update because the update target is ' + f'missing these hooks: {", ".join(sorted(hooks_missing))}', ) +def _update_one( + i: int, + repo: dict[str, Any], + *, + tags_only: bool, + freeze: bool, +) -> tuple[int, RevInfo, RevInfo]: + old = RevInfo.from_config(repo) + new = old.update(tags_only=tags_only, freeze=freeze) + _check_hooks_still_exist_at_rev(repo, new) + return i, old, new + + REV_LINE_RE = re.compile(r'^(\s+)rev:(\s*)([\'"]?)([^\s#]+)(.*)(\r?\n)$') @@ -145,49 +161,53 @@ def _write_new_config(path: str, rev_infos: list[RevInfo | None]) -> None: def autoupdate( config_file: str, - store: Store, tags_only: bool, freeze: bool, repos: Sequence[str] = (), + jobs: int = 1, ) -> int: """Auto-update the pre-commit config to the latest versions of repos.""" migrate_config(config_file, quiet=True) - retv = 0 - rev_infos: list[RevInfo | None] = [] changed = False + retv = 0 - config = load_config(config_file) - for repo_config in config['repos']: - if repo_config['repo'] in {LOCAL, META}: - continue - - info = RevInfo.from_config(repo_config) - if repos and info.repo not in repos: - rev_infos.append(None) - continue - - output.write(f'Updating {info.repo} ... ') - new_info = info.update(tags_only=tags_only, freeze=freeze) - try: - _check_hooks_still_exist_at_rev(repo_config, new_info, store) - except RepositoryCannotBeUpdatedError as error: - output.write_line(error.args[0]) - rev_infos.append(None) - retv = 1 - continue - - if new_info.rev != info.rev: - changed = True - if new_info.frozen: - updated_to = f'{new_info.frozen} (frozen)' + config_repos = [ + repo for repo in load_config(config_file)['repos'] + if repo['repo'] not in {LOCAL, META} + ] + + rev_infos: list[RevInfo | None] = [None] * len(config_repos) + jobs = jobs or xargs.cpu_count() # 0 => number of cpus + jobs = min(jobs, len(repos) or len(config_repos)) # max 1-per-thread + jobs = max(jobs, 1) # at least one thread + with concurrent.futures.ThreadPoolExecutor(jobs) as exe: + futures = [ + exe.submit( + _update_one, + i, repo, tags_only=tags_only, freeze=freeze, + ) + for i, repo in enumerate(config_repos) + if not repos or repo['repo'] in repos + ] + for future in concurrent.futures.as_completed(futures): + try: + i, old, new = future.result() + except RepositoryCannotBeUpdatedError as e: + output.write_line(str(e)) + retv = 1 else: - updated_to = new_info.rev - msg = f'updating {info.rev} -> {updated_to}.' - output.write_line(msg) - rev_infos.append(new_info) - else: - output.write_line('already up to date.') - rev_infos.append(None) + if new.rev != old.rev: + changed = True + if new.frozen: + new_s = f'{new.frozen} (frozen)' + else: + new_s = new.rev + msg = f'updating {old.rev} -> {new_s}' + rev_infos[i] = new + else: + msg = 'already up to date!' + + output.write_line(f'[{old.repo}] {msg}') if changed: _write_new_config(config_file, rev_infos) diff --git a/pre_commit/lang_base.py b/pre_commit/lang_base.py index 9480c55..4a993ea 100644 --- a/pre_commit/lang_base.py +++ b/pre_commit/lang_base.py @@ -1,7 +1,6 @@ from __future__ import annotations import contextlib -import multiprocessing import os import random import re @@ -15,9 +14,9 @@ from typing import Sequence import pre_commit.constants as C from pre_commit import parse_shebang +from pre_commit import xargs from pre_commit.prefix import Prefix from pre_commit.util import cmd_output_b -from pre_commit.xargs import xargs FIXED_RANDOM_SEED = 1542676187 @@ -140,10 +139,7 @@ def target_concurrency() -> int: if 'TRAVIS' in os.environ: return 2 else: - try: - return multiprocessing.cpu_count() - except NotImplementedError: - return 1 + return xargs.cpu_count() def _shuffled(seq: Sequence[str]) -> list[str]: @@ -171,7 +167,7 @@ def run_xargs( # ordering. file_args = _shuffled(file_args) jobs = target_concurrency() - return xargs(cmd, file_args, target_concurrency=jobs, color=color) + return xargs.xargs(cmd, file_args, target_concurrency=jobs, color=color) def hook_cmd(entry: str, args: Sequence[str]) -> tuple[str, ...]: diff --git a/pre_commit/main.py b/pre_commit/main.py index 9615c5e..9dfce2c 100644 --- a/pre_commit/main.py +++ b/pre_commit/main.py @@ -226,9 +226,13 @@ def main(argv: Sequence[str] | None = None) -> int: help='Store "frozen" hashes in `rev` instead of tag names', ) autoupdate_parser.add_argument( - '--repo', dest='repos', action='append', metavar='REPO', + '--repo', dest='repos', action='append', metavar='REPO', default=[], help='Only update this repository -- may be specified multiple times.', ) + autoupdate_parser.add_argument( + '-j', '--jobs', type=int, default=1, + help='Number of threads to use. (default %(default)s).', + ) _add_cmd('clean', help='Clean out pre-commit files.') @@ -368,10 +372,11 @@ def main(argv: Sequence[str] | None = None) -> int: if args.command == 'autoupdate': return autoupdate( - args.config, store, + args.config, tags_only=not args.bleeding_edge, freeze=args.freeze, repos=args.repos, + jobs=args.jobs, ) elif args.command == 'clean': return clean(store) diff --git a/pre_commit/resources/ruby-build.tar.gz b/pre_commit/resources/ruby-build.tar.gz index b6eacf5..19d467f 100644 Binary files a/pre_commit/resources/ruby-build.tar.gz and b/pre_commit/resources/ruby-build.tar.gz differ diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py index e3af90e..31be6f3 100644 --- a/pre_commit/xargs.py +++ b/pre_commit/xargs.py @@ -3,6 +3,7 @@ from __future__ import annotations import concurrent.futures import contextlib import math +import multiprocessing import os import subprocess import sys @@ -22,6 +23,13 @@ TArg = TypeVar('TArg') TRet = TypeVar('TRet') +def cpu_count() -> int: + try: + return multiprocessing.cpu_count() + except NotImplementedError: + return 1 + + def _environ_size(_env: MutableMapping[str, str] | None = None) -> int: environ = _env if _env is not None else getattr(os, 'environb', os.environ) size = 8 * len(environ) # number of pointers in `envp` -- cgit v1.2.3