diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2020-03-24 21:59:15 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2020-03-24 21:59:15 +0000 |
commit | 63fad53303381388673073de580a32088a4ef0fe (patch) | |
tree | a2c5c329ee5e79a220fac7e079283235fecc0cda /pre_commit | |
parent | Initial commit. (diff) | |
download | pre-commit-63fad53303381388673073de580a32088a4ef0fe.tar.xz pre-commit-63fad53303381388673073de580a32088a4ef0fe.zip |
Adding upstream version 2.2.0.upstream/2.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'pre_commit')
68 files changed, 5287 insertions, 0 deletions
diff --git a/pre_commit/__init__.py b/pre_commit/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/pre_commit/__init__.py diff --git a/pre_commit/__main__.py b/pre_commit/__main__.py new file mode 100644 index 0000000..5414068 --- /dev/null +++ b/pre_commit/__main__.py @@ -0,0 +1,5 @@ +from pre_commit.main import main + + +if __name__ == '__main__': + exit(main()) diff --git a/pre_commit/clientlib.py b/pre_commit/clientlib.py new file mode 100644 index 0000000..56ec0dd --- /dev/null +++ b/pre_commit/clientlib.py @@ -0,0 +1,317 @@ +import argparse +import functools +import logging +import shlex +import sys +from typing import Any +from typing import Dict +from typing import Optional +from typing import Sequence + +import cfgv +from identify.identify import ALL_TAGS + +import pre_commit.constants as C +from pre_commit.error_handler import FatalError +from pre_commit.languages.all import all_languages +from pre_commit.util import parse_version +from pre_commit.util import yaml_load + +logger = logging.getLogger('pre_commit') + +check_string_regex = cfgv.check_and(cfgv.check_string, cfgv.check_regex) + + +def check_type_tag(tag: str) -> None: + if tag not in ALL_TAGS: + raise cfgv.ValidationError( + f'Type tag {tag!r} is not recognized. ' + f'Try upgrading identify and pre-commit?', + ) + + +def check_min_version(version: str) -> None: + if parse_version(version) > parse_version(C.VERSION): + raise cfgv.ValidationError( + f'pre-commit version {version} is required but version ' + f'{C.VERSION} is installed. ' + f'Perhaps run `pip install --upgrade pre-commit`.', + ) + + +def _make_argparser(filenames_help: str) -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*', help=filenames_help) + parser.add_argument('-V', '--version', action='version', version=C.VERSION) + return parser + + +MANIFEST_HOOK_DICT = cfgv.Map( + 'Hook', 'id', + + cfgv.Required('id', cfgv.check_string), + cfgv.Required('name', cfgv.check_string), + cfgv.Required('entry', cfgv.check_string), + cfgv.Required('language', cfgv.check_one_of(all_languages)), + cfgv.Optional('alias', cfgv.check_string, ''), + + cfgv.Optional('files', check_string_regex, ''), + cfgv.Optional('exclude', check_string_regex, '^$'), + cfgv.Optional('types', cfgv.check_array(check_type_tag), ['file']), + cfgv.Optional('exclude_types', cfgv.check_array(check_type_tag), []), + + cfgv.Optional( + 'additional_dependencies', cfgv.check_array(cfgv.check_string), [], + ), + cfgv.Optional('args', cfgv.check_array(cfgv.check_string), []), + cfgv.Optional('always_run', cfgv.check_bool, False), + cfgv.Optional('pass_filenames', cfgv.check_bool, True), + cfgv.Optional('description', cfgv.check_string, ''), + cfgv.Optional('language_version', cfgv.check_string, C.DEFAULT), + cfgv.Optional('log_file', cfgv.check_string, ''), + cfgv.Optional('minimum_pre_commit_version', cfgv.check_string, '0'), + cfgv.Optional('require_serial', cfgv.check_bool, False), + cfgv.Optional('stages', cfgv.check_array(cfgv.check_one_of(C.STAGES)), []), + cfgv.Optional('verbose', cfgv.check_bool, False), +) +MANIFEST_SCHEMA = cfgv.Array(MANIFEST_HOOK_DICT) + + +class InvalidManifestError(FatalError): + pass + + +load_manifest = functools.partial( + cfgv.load_from_filename, + schema=MANIFEST_SCHEMA, + load_strategy=yaml_load, + exc_tp=InvalidManifestError, +) + + +def validate_manifest_main(argv: Optional[Sequence[str]] = None) -> int: + parser = _make_argparser('Manifest filenames.') + args = parser.parse_args(argv) + ret = 0 + for filename in args.filenames: + try: + load_manifest(filename) + except InvalidManifestError as e: + print(e) + ret = 1 + return ret + + +LOCAL = 'local' +META = 'meta' + + +class MigrateShaToRev: + key = 'rev' + + @staticmethod + def _cond(key: str) -> cfgv.Conditional: + return cfgv.Conditional( + key, cfgv.check_string, + condition_key='repo', + condition_value=cfgv.NotIn(LOCAL, META), + ensure_absent=True, + ) + + def check(self, dct: Dict[str, Any]) -> None: + if dct.get('repo') in {LOCAL, META}: + self._cond('rev').check(dct) + self._cond('sha').check(dct) + elif 'sha' in dct and 'rev' in dct: + raise cfgv.ValidationError('Cannot specify both sha and rev') + elif 'sha' in dct: + self._cond('sha').check(dct) + else: + self._cond('rev').check(dct) + + def apply_default(self, dct: Dict[str, Any]) -> None: + if 'sha' in dct: + dct['rev'] = dct.pop('sha') + + remove_default = cfgv.Required.remove_default + + +def _entry(modname: str) -> str: + """the hook `entry` is passed through `shlex.split()` by the command + runner, so to prevent issues with spaces and backslashes (on Windows) + it must be quoted here. + """ + return f'{shlex.quote(sys.executable)} -m pre_commit.meta_hooks.{modname}' + + +def warn_unknown_keys_root( + extra: Sequence[str], + orig_keys: Sequence[str], + dct: Dict[str, str], +) -> None: + logger.warning(f'Unexpected key(s) present at root: {", ".join(extra)}') + + +def warn_unknown_keys_repo( + extra: Sequence[str], + orig_keys: Sequence[str], + dct: Dict[str, str], +) -> None: + logger.warning( + f'Unexpected key(s) present on {dct["repo"]}: {", ".join(extra)}', + ) + + +_meta = ( + ( + 'check-hooks-apply', ( + ('name', 'Check hooks apply to the repository'), + ('files', C.CONFIG_FILE), + ('entry', _entry('check_hooks_apply')), + ), + ), + ( + 'check-useless-excludes', ( + ('name', 'Check for useless excludes'), + ('files', C.CONFIG_FILE), + ('entry', _entry('check_useless_excludes')), + ), + ), + ( + 'identity', ( + ('name', 'identity'), + ('verbose', True), + ('entry', _entry('identity')), + ), + ), +) + +META_HOOK_DICT = cfgv.Map( + 'Hook', 'id', + cfgv.Required('id', cfgv.check_string), + cfgv.Required('id', cfgv.check_one_of(tuple(k for k, _ in _meta))), + # language must be system + cfgv.Optional('language', cfgv.check_one_of({'system'}), 'system'), + *( + # default to the hook definition for the meta hooks + cfgv.ConditionalOptional(key, cfgv.check_any, value, 'id', hook_id) + for hook_id, values in _meta + for key, value in values + ), + *( + # default to the "manifest" parsing + cfgv.OptionalNoDefault(item.key, item.check_fn) + # these will always be defaulted above + if item.key in {'name', 'language', 'entry'} else + item + for item in MANIFEST_HOOK_DICT.items + ), +) +CONFIG_HOOK_DICT = cfgv.Map( + 'Hook', 'id', + + cfgv.Required('id', cfgv.check_string), + + # All keys in manifest hook dict are valid in a config hook dict, but + # are optional. + # No defaults are provided here as the config is merged on top of the + # manifest. + *( + cfgv.OptionalNoDefault(item.key, item.check_fn) + for item in MANIFEST_HOOK_DICT.items + if item.key != 'id' + ), +) +CONFIG_REPO_DICT = cfgv.Map( + 'Repository', 'repo', + + cfgv.Required('repo', cfgv.check_string), + + cfgv.ConditionalRecurse( + 'hooks', cfgv.Array(CONFIG_HOOK_DICT), + 'repo', cfgv.NotIn(LOCAL, META), + ), + cfgv.ConditionalRecurse( + 'hooks', cfgv.Array(MANIFEST_HOOK_DICT), + 'repo', LOCAL, + ), + cfgv.ConditionalRecurse( + 'hooks', cfgv.Array(META_HOOK_DICT), + 'repo', META, + ), + + MigrateShaToRev(), + cfgv.WarnAdditionalKeys(('repo', 'rev', 'hooks'), warn_unknown_keys_repo), +) +DEFAULT_LANGUAGE_VERSION = cfgv.Map( + 'DefaultLanguageVersion', None, + cfgv.NoAdditionalKeys(all_languages), + *(cfgv.Optional(x, cfgv.check_string, C.DEFAULT) for x in all_languages), +) +CONFIG_SCHEMA = cfgv.Map( + 'Config', None, + + cfgv.RequiredRecurse('repos', cfgv.Array(CONFIG_REPO_DICT)), + cfgv.OptionalRecurse( + 'default_language_version', DEFAULT_LANGUAGE_VERSION, {}, + ), + cfgv.Optional( + 'default_stages', + cfgv.check_array(cfgv.check_one_of(C.STAGES)), + C.STAGES, + ), + cfgv.Optional('files', check_string_regex, ''), + cfgv.Optional('exclude', check_string_regex, '^$'), + cfgv.Optional('fail_fast', cfgv.check_bool, False), + cfgv.Optional( + 'minimum_pre_commit_version', + cfgv.check_and(cfgv.check_string, check_min_version), + '0', + ), + cfgv.WarnAdditionalKeys( + ( + 'repos', + 'default_language_version', + 'default_stages', + 'files', + 'exclude', + 'fail_fast', + 'minimum_pre_commit_version', + ), + warn_unknown_keys_root, + ), +) + + +class InvalidConfigError(FatalError): + pass + + +def ordered_load_normalize_legacy_config(contents: str) -> Dict[str, Any]: + data = yaml_load(contents) + if isinstance(data, list): + # TODO: Once happy, issue a deprecation warning and instructions + return {'repos': data} + else: + return data + + +load_config = functools.partial( + cfgv.load_from_filename, + schema=CONFIG_SCHEMA, + load_strategy=ordered_load_normalize_legacy_config, + exc_tp=InvalidConfigError, +) + + +def validate_config_main(argv: Optional[Sequence[str]] = None) -> int: + parser = _make_argparser('Config filenames.') + args = parser.parse_args(argv) + ret = 0 + for filename in args.filenames: + try: + load_config(filename) + except InvalidConfigError as e: + print(e) + ret = 1 + return ret diff --git a/pre_commit/color.py b/pre_commit/color.py new file mode 100644 index 0000000..5fa7042 --- /dev/null +++ b/pre_commit/color.py @@ -0,0 +1,97 @@ +import os +import sys + +if sys.platform == 'win32': # pragma: no cover (windows) + def _enable() -> None: + from ctypes import POINTER + from ctypes import windll + from ctypes import WinError + from ctypes import WINFUNCTYPE + from ctypes.wintypes import BOOL + from ctypes.wintypes import DWORD + from ctypes.wintypes import HANDLE + + STD_OUTPUT_HANDLE = -11 + ENABLE_VIRTUAL_TERMINAL_PROCESSING = 4 + + def bool_errcheck(result, func, args): + if not result: + raise WinError() + return args + + GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)( + ('GetStdHandle', windll.kernel32), ((1, 'nStdHandle'),), + ) + + GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))( + ('GetConsoleMode', windll.kernel32), + ((1, 'hConsoleHandle'), (2, 'lpMode')), + ) + GetConsoleMode.errcheck = bool_errcheck + + SetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, DWORD)( + ('SetConsoleMode', windll.kernel32), + ((1, 'hConsoleHandle'), (1, 'dwMode')), + ) + SetConsoleMode.errcheck = bool_errcheck + + # As of Windows 10, the Windows console supports (some) ANSI escape + # sequences, but it needs to be enabled using `SetConsoleMode` first. + # + # More info on the escape sequences supported: + # https://msdn.microsoft.com/en-us/library/windows/desktop/mt638032(v=vs.85).aspx + stdout = GetStdHandle(STD_OUTPUT_HANDLE) + flags = GetConsoleMode(stdout) + SetConsoleMode(stdout, flags | ENABLE_VIRTUAL_TERMINAL_PROCESSING) + + try: + _enable() + except OSError: + terminal_supports_color = False + else: + terminal_supports_color = True +else: # pragma: win32 no cover + terminal_supports_color = True + +RED = '\033[41m' +GREEN = '\033[42m' +YELLOW = '\033[43;30m' +TURQUOISE = '\033[46;30m' +SUBTLE = '\033[2m' +NORMAL = '\033[m' + + +def format_color(text: str, color: str, use_color_setting: bool) -> str: + """Format text with color. + + Args: + text - Text to be formatted with color if `use_color` + color - The color start string + use_color_setting - Whether or not to color + """ + if use_color_setting: + return f'{color}{text}{NORMAL}' + else: + return text + + +COLOR_CHOICES = ('auto', 'always', 'never') + + +def use_color(setting: str) -> bool: + """Choose whether to use color based on the command argument. + + Args: + setting - Either `auto`, `always`, or `never` + """ + if setting not in COLOR_CHOICES: + raise ValueError(setting) + + return ( + setting == 'always' or ( + setting == 'auto' and + sys.stdout.isatty() and + terminal_supports_color and + os.getenv('TERM') != 'dumb' + ) + ) diff --git a/pre_commit/commands/__init__.py b/pre_commit/commands/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/pre_commit/commands/__init__.py diff --git a/pre_commit/commands/autoupdate.py b/pre_commit/commands/autoupdate.py new file mode 100644 index 0000000..5a9a988 --- /dev/null +++ b/pre_commit/commands/autoupdate.py @@ -0,0 +1,182 @@ +import os.path +import re +from typing import Any +from typing import Dict +from typing import List +from typing import NamedTuple +from typing import Optional +from typing import Sequence +from typing import Tuple + +import pre_commit.constants as C +from pre_commit import git +from pre_commit import output +from pre_commit.clientlib import InvalidManifestError +from pre_commit.clientlib import load_config +from pre_commit.clientlib import load_manifest +from pre_commit.clientlib import LOCAL +from pre_commit.clientlib import META +from pre_commit.commands.migrate_config import migrate_config +from pre_commit.store import Store +from pre_commit.util import CalledProcessError +from pre_commit.util import cmd_output +from pre_commit.util import cmd_output_b +from pre_commit.util import tmpdir +from pre_commit.util import yaml_dump +from pre_commit.util import yaml_load + + +class RevInfo(NamedTuple): + repo: str + rev: str + frozen: Optional[str] + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> 'RevInfo': + return cls(config['repo'], config['rev'], None) + + def update(self, tags_only: bool, freeze: bool) -> 'RevInfo': + if tags_only: + tag_cmd = ('git', 'describe', 'FETCH_HEAD', '--tags', '--abbrev=0') + else: + tag_cmd = ('git', 'describe', 'FETCH_HEAD', '--tags', '--exact') + + with tmpdir() as tmp: + git.init_repo(tmp, self.repo) + cmd_output_b('git', 'fetch', 'origin', 'HEAD', '--tags', cwd=tmp) + + try: + rev = cmd_output(*tag_cmd, cwd=tmp)[1].strip() + except CalledProcessError: + cmd = ('git', 'rev-parse', 'FETCH_HEAD') + rev = cmd_output(*cmd, cwd=tmp)[1].strip() + + frozen = None + if freeze: + exact = cmd_output('git', 'rev-parse', rev, cwd=tmp)[1].strip() + if exact != rev: + rev, frozen = exact, rev + return self._replace(rev=rev, frozen=frozen) + + +class RepositoryCannotBeUpdatedError(RuntimeError): + pass + + +def _check_hooks_still_exist_at_rev( + repo_config: Dict[str, Any], + info: RevInfo, + store: Store, +) -> None: + try: + path = store.clone(repo_config['repo'], info.rev) + manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE)) + except InvalidManifestError as e: + raise RepositoryCannotBeUpdatedError(str(e)) + + # See if any of our hooks were deleted with the new commits + hooks = {hook['id'] for hook in repo_config['hooks']} + hooks_missing = hooks - {hook['id'] for hook in manifest} + if hooks_missing: + raise RepositoryCannotBeUpdatedError( + f'Cannot update because the tip of HEAD is missing these hooks:\n' + f'{", ".join(sorted(hooks_missing))}', + ) + + +REV_LINE_RE = re.compile(r'^(\s+)rev:(\s*)([^\s#]+)(.*)(\r?\n)$', re.DOTALL) + + +def _original_lines( + path: str, + rev_infos: List[Optional[RevInfo]], + retry: bool = False, +) -> Tuple[List[str], List[int]]: + """detect `rev:` lines or reformat the file""" + with open(path) as f: + original = f.read() + + lines = original.splitlines(True) + idxs = [i for i, line in enumerate(lines) if REV_LINE_RE.match(line)] + if len(idxs) == len(rev_infos): + return lines, idxs + elif retry: + raise AssertionError('could not find rev lines') + else: + with open(path, 'w') as f: + f.write(yaml_dump(yaml_load(original))) + return _original_lines(path, rev_infos, retry=True) + + +def _write_new_config(path: str, rev_infos: List[Optional[RevInfo]]) -> None: + lines, idxs = _original_lines(path, rev_infos) + + for idx, rev_info in zip(idxs, rev_infos): + if rev_info is None: + continue + match = REV_LINE_RE.match(lines[idx]) + assert match is not None + new_rev_s = yaml_dump({'rev': rev_info.rev}) + new_rev = new_rev_s.split(':', 1)[1].strip() + if rev_info.frozen is not None: + comment = f' # frozen: {rev_info.frozen}' + elif match[4].strip().startswith('# frozen:'): + comment = '' + else: + comment = match[4] + lines[idx] = f'{match[1]}rev:{match[2]}{new_rev}{comment}{match[5]}' + + with open(path, 'w') as f: + f.write(''.join(lines)) + + +def autoupdate( + config_file: str, + store: Store, + tags_only: bool, + freeze: bool, + repos: Sequence[str] = (), +) -> int: + """Auto-update the pre-commit config to the latest versions of repos.""" + migrate_config(config_file, quiet=True) + retv = 0 + rev_infos: List[Optional[RevInfo]] = [] + changed = False + + config = load_config(config_file) + for repo_config in config['repos']: + if repo_config['repo'] in {LOCAL, META}: + continue + + info = RevInfo.from_config(repo_config) + if repos and info.repo not in repos: + rev_infos.append(None) + continue + + output.write(f'Updating {info.repo} ... ') + new_info = info.update(tags_only=tags_only, freeze=freeze) + try: + _check_hooks_still_exist_at_rev(repo_config, new_info, store) + except RepositoryCannotBeUpdatedError as error: + output.write_line(error.args[0]) + rev_infos.append(None) + retv = 1 + continue + + if new_info.rev != info.rev: + changed = True + if new_info.frozen: + updated_to = f'{new_info.frozen} (frozen)' + else: + updated_to = new_info.rev + msg = f'updating {info.rev} -> {updated_to}.' + output.write_line(msg) + rev_infos.append(new_info) + else: + output.write_line('already up to date.') + rev_infos.append(None) + + if changed: + _write_new_config(config_file, rev_infos) + + return retv diff --git a/pre_commit/commands/clean.py b/pre_commit/commands/clean.py new file mode 100644 index 0000000..2be6c16 --- /dev/null +++ b/pre_commit/commands/clean.py @@ -0,0 +1,14 @@ +import os.path + +from pre_commit import output +from pre_commit.store import Store +from pre_commit.util import rmtree + + +def clean(store: Store) -> int: + legacy_path = os.path.expanduser('~/.pre-commit') + for directory in (store.directory, legacy_path): + if os.path.exists(directory): + rmtree(directory) + output.write_line(f'Cleaned {directory}.') + return 0 diff --git a/pre_commit/commands/gc.py b/pre_commit/commands/gc.py new file mode 100644 index 0000000..7f6d311 --- /dev/null +++ b/pre_commit/commands/gc.py @@ -0,0 +1,90 @@ +import os.path +from typing import Any +from typing import Dict +from typing import Set +from typing import Tuple + +import pre_commit.constants as C +from pre_commit import output +from pre_commit.clientlib import InvalidConfigError +from pre_commit.clientlib import InvalidManifestError +from pre_commit.clientlib import load_config +from pre_commit.clientlib import load_manifest +from pre_commit.clientlib import LOCAL +from pre_commit.clientlib import META +from pre_commit.store import Store + + +def _mark_used_repos( + store: Store, + all_repos: Dict[Tuple[str, str], str], + unused_repos: Set[Tuple[str, str]], + repo: Dict[str, Any], +) -> None: + if repo['repo'] == META: + return + elif repo['repo'] == LOCAL: + for hook in repo['hooks']: + deps = hook.get('additional_dependencies') + unused_repos.discard(( + store.db_repo_name(repo['repo'], deps), C.LOCAL_REPO_VERSION, + )) + else: + key = (repo['repo'], repo['rev']) + path = all_repos.get(key) + # can't inspect manifest if it isn't cloned + if path is None: + return + + try: + manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE)) + except InvalidManifestError: + return + else: + unused_repos.discard(key) + by_id = {hook['id']: hook for hook in manifest} + + for hook in repo['hooks']: + if hook['id'] not in by_id: + continue + + deps = hook.get( + 'additional_dependencies', + by_id[hook['id']]['additional_dependencies'], + ) + unused_repos.discard(( + store.db_repo_name(repo['repo'], deps), repo['rev'], + )) + + +def _gc_repos(store: Store) -> int: + configs = store.select_all_configs() + repos = store.select_all_repos() + + # delete config paths which do not exist + dead_configs = [p for p in configs if not os.path.exists(p)] + live_configs = [p for p in configs if os.path.exists(p)] + + all_repos = {(repo, ref): path for repo, ref, path in repos} + unused_repos = set(all_repos) + for config_path in live_configs: + try: + config = load_config(config_path) + except InvalidConfigError: + dead_configs.append(config_path) + continue + else: + for repo in config['repos']: + _mark_used_repos(store, all_repos, unused_repos, repo) + + store.delete_configs(dead_configs) + for db_repo_name, ref in unused_repos: + store.delete_repo(db_repo_name, ref, all_repos[(db_repo_name, ref)]) + return len(unused_repos) + + +def gc(store: Store) -> int: + with store.exclusive_lock(): + repos_removed = _gc_repos(store) + output.write_line(f'{repos_removed} repo(s) removed.') + return 0 diff --git a/pre_commit/commands/hook_impl.py b/pre_commit/commands/hook_impl.py new file mode 100644 index 0000000..5ff4555 --- /dev/null +++ b/pre_commit/commands/hook_impl.py @@ -0,0 +1,187 @@ +import argparse +import os.path +import subprocess +import sys +from typing import Optional +from typing import Sequence +from typing import Tuple + +from pre_commit.commands.run import run +from pre_commit.envcontext import envcontext +from pre_commit.parse_shebang import normalize_cmd +from pre_commit.store import Store + +Z40 = '0' * 40 + + +def _run_legacy( + hook_type: str, + hook_dir: str, + args: Sequence[str], +) -> Tuple[int, bytes]: + if os.environ.get('PRE_COMMIT_RUNNING_LEGACY'): + raise SystemExit( + f"bug: pre-commit's script is installed in migration mode\n" + f'run `pre-commit install -f --hook-type {hook_type}` to fix ' + f'this\n\n' + f'Please report this bug at ' + f'https://github.com/pre-commit/pre-commit/issues', + ) + + if hook_type == 'pre-push': + stdin = sys.stdin.buffer.read() + else: + stdin = b'' + + # not running in legacy mode + legacy_hook = os.path.join(hook_dir, f'{hook_type}.legacy') + if not os.access(legacy_hook, os.X_OK): + return 0, stdin + + with envcontext((('PRE_COMMIT_RUNNING_LEGACY', '1'),)): + cmd = normalize_cmd((legacy_hook, *args)) + return subprocess.run(cmd, input=stdin).returncode, stdin + + +def _validate_config( + retv: int, + config: str, + skip_on_missing_config: bool, +) -> None: + if not os.path.isfile(config): + if skip_on_missing_config or os.getenv('PRE_COMMIT_ALLOW_NO_CONFIG'): + print(f'`{config}` config file not found. Skipping `pre-commit`.') + raise SystemExit(retv) + else: + print( + f'No {config} file was found\n' + f'- To temporarily silence this, run ' + f'`PRE_COMMIT_ALLOW_NO_CONFIG=1 git ...`\n' + f'- To permanently silence this, install pre-commit with the ' + f'--allow-missing-config option\n' + f'- To uninstall pre-commit run `pre-commit uninstall`', + ) + raise SystemExit(1) + + +def _ns( + hook_type: str, + color: bool, + *, + all_files: bool = False, + from_ref: Optional[str] = None, + to_ref: Optional[str] = None, + remote_name: Optional[str] = None, + remote_url: Optional[str] = None, + commit_msg_filename: Optional[str] = None, + checkout_type: Optional[str] = None, +) -> argparse.Namespace: + return argparse.Namespace( + color=color, + hook_stage=hook_type.replace('pre-', ''), + from_ref=from_ref, + to_ref=to_ref, + remote_name=remote_name, + remote_url=remote_url, + commit_msg_filename=commit_msg_filename, + all_files=all_files, + checkout_type=checkout_type, + files=(), + hook=None, + verbose=False, + show_diff_on_failure=False, + ) + + +def _rev_exists(rev: str) -> bool: + return not subprocess.call(('git', 'rev-list', '--quiet', rev)) + + +def _pre_push_ns( + color: bool, + args: Sequence[str], + stdin: bytes, +) -> Optional[argparse.Namespace]: + remote_name = args[0] + remote_url = args[1] + + for line in stdin.decode().splitlines(): + _, local_sha, _, remote_sha = line.split() + if local_sha == Z40: + continue + elif remote_sha != Z40 and _rev_exists(remote_sha): + return _ns( + 'pre-push', color, + from_ref=remote_sha, to_ref=local_sha, + remote_name=remote_name, remote_url=remote_url, + ) + else: + # ancestors not found in remote + ancestors = subprocess.check_output(( + 'git', 'rev-list', local_sha, '--topo-order', '--reverse', + '--not', f'--remotes={remote_name}', + )).decode().strip() + if not ancestors: + continue + else: + first_ancestor = ancestors.splitlines()[0] + cmd = ('git', 'rev-list', '--max-parents=0', local_sha) + roots = set(subprocess.check_output(cmd).decode().splitlines()) + if first_ancestor in roots: + # pushing the whole tree including root commit + return _ns( + 'pre-push', color, + all_files=True, + remote_name=remote_name, remote_url=remote_url, + ) + else: + rev_cmd = ('git', 'rev-parse', f'{first_ancestor}^') + source = subprocess.check_output(rev_cmd).decode().strip() + return _ns( + 'pre-push', color, + from_ref=source, to_ref=local_sha, + remote_name=remote_name, remote_url=remote_url, + ) + + # nothing to push + return None + + +def _run_ns( + hook_type: str, + color: bool, + args: Sequence[str], + stdin: bytes, +) -> Optional[argparse.Namespace]: + if hook_type == 'pre-push': + return _pre_push_ns(color, args, stdin) + elif hook_type in {'prepare-commit-msg', 'commit-msg'}: + return _ns(hook_type, color, commit_msg_filename=args[0]) + elif hook_type in {'pre-merge-commit', 'pre-commit'}: + return _ns(hook_type, color) + elif hook_type == 'post-checkout': + return _ns( + hook_type, color, + from_ref=args[0], to_ref=args[1], checkout_type=args[2], + ) + else: + raise AssertionError(f'unexpected hook type: {hook_type}') + + +def hook_impl( + store: Store, + *, + config: str, + color: bool, + hook_type: str, + hook_dir: str, + skip_on_missing_config: bool, + args: Sequence[str], +) -> int: + retv, stdin = _run_legacy(hook_type, hook_dir, args) + _validate_config(retv, config, skip_on_missing_config) + ns = _run_ns(hook_type, color, args, stdin) + if ns is None: + return retv + else: + return retv | run(config, store, ns) diff --git a/pre_commit/commands/init_templatedir.py b/pre_commit/commands/init_templatedir.py new file mode 100644 index 0000000..f676fb1 --- /dev/null +++ b/pre_commit/commands/init_templatedir.py @@ -0,0 +1,33 @@ +import logging +import os.path +from typing import Sequence + +from pre_commit.commands.install_uninstall import install +from pre_commit.store import Store +from pre_commit.util import CalledProcessError +from pre_commit.util import cmd_output + +logger = logging.getLogger('pre_commit') + + +def init_templatedir( + config_file: str, + store: Store, + directory: str, + hook_types: Sequence[str], +) -> int: + install( + config_file, store, hook_types=hook_types, + overwrite=True, skip_on_missing_config=True, git_dir=directory, + ) + try: + _, out, _ = cmd_output('git', 'config', 'init.templateDir') + except CalledProcessError: + configured_path = None + else: + configured_path = os.path.realpath(os.path.expanduser(out.strip())) + dest = os.path.realpath(directory) + if configured_path != dest: + logger.warning('`init.templateDir` not set to the target directory') + logger.warning(f'maybe `git config --global init.templateDir {dest}`?') + return 0 diff --git a/pre_commit/commands/install_uninstall.py b/pre_commit/commands/install_uninstall.py new file mode 100644 index 0000000..c8b7633 --- /dev/null +++ b/pre_commit/commands/install_uninstall.py @@ -0,0 +1,175 @@ +import itertools +import logging +import os.path +import shutil +import sys +from typing import Optional +from typing import Sequence +from typing import Tuple + +from pre_commit import git +from pre_commit import output +from pre_commit.clientlib import load_config +from pre_commit.repository import all_hooks +from pre_commit.repository import install_hook_envs +from pre_commit.store import Store +from pre_commit.util import make_executable +from pre_commit.util import resource_text + + +logger = logging.getLogger(__name__) + +# This is used to identify the hook file we install +PRIOR_HASHES = ( + '4d9958c90bc262f47553e2c073f14cfe', + 'd8ee923c46731b42cd95cc869add4062', + '49fd668cb42069aa1b6048464be5d395', + '79f09a650522a87b0da915d0d983b2de', + 'e358c9dae00eac5d06b38dfdb1e33a8c', +) +CURRENT_HASH = '138fd403232d2ddd5efb44317e38bf03' +TEMPLATE_START = '# start templated\n' +TEMPLATE_END = '# end templated\n' +# Homebrew/homebrew-core#35825: be more timid about appropriate `PATH` +# #1312 os.defpath is too restrictive on BSD +POSIX_SEARCH_PATH = ('/usr/local/bin', '/usr/bin', '/bin') +SYS_EXE = os.path.basename(os.path.realpath(sys.executable)) + + +def _hook_paths( + hook_type: str, + git_dir: Optional[str] = None, +) -> Tuple[str, str]: + git_dir = git_dir if git_dir is not None else git.get_git_dir() + pth = os.path.join(git_dir, 'hooks', hook_type) + return pth, f'{pth}.legacy' + + +def is_our_script(filename: str) -> bool: + if not os.path.exists(filename): # pragma: win32 no cover (symlink) + return False + with open(filename) as f: + contents = f.read() + return any(h in contents for h in (CURRENT_HASH,) + PRIOR_HASHES) + + +def shebang() -> str: + if sys.platform == 'win32': + py = SYS_EXE + else: + exe_choices = [ + f'python{sys.version_info[0]}.{sys.version_info[1]}', + f'python{sys.version_info[0]}', + ] + # avoid searching for bare `python` as it's likely to be python 2 + if SYS_EXE != 'python': + exe_choices.append(SYS_EXE) + for path, exe in itertools.product(POSIX_SEARCH_PATH, exe_choices): + if os.access(os.path.join(path, exe), os.X_OK): + py = exe + break + else: + py = SYS_EXE + return f'#!/usr/bin/env {py}' + + +def _install_hook_script( + config_file: str, + hook_type: str, + overwrite: bool = False, + skip_on_missing_config: bool = False, + git_dir: Optional[str] = None, +) -> None: + hook_path, legacy_path = _hook_paths(hook_type, git_dir=git_dir) + + os.makedirs(os.path.dirname(hook_path), exist_ok=True) + + # If we have an existing hook, move it to pre-commit.legacy + if os.path.lexists(hook_path) and not is_our_script(hook_path): + shutil.move(hook_path, legacy_path) + + # If we specify overwrite, we simply delete the legacy file + if overwrite and os.path.exists(legacy_path): + os.remove(legacy_path) + elif os.path.exists(legacy_path): + output.write_line( + f'Running in migration mode with existing hooks at {legacy_path}\n' + f'Use -f to use only pre-commit.', + ) + + args = ['hook-impl', f'--config={config_file}', f'--hook-type={hook_type}'] + if skip_on_missing_config: + args.append('--skip-on-missing-config') + params = {'INSTALL_PYTHON': sys.executable, 'ARGS': args} + + with open(hook_path, 'w') as hook_file: + contents = resource_text('hook-tmpl') + before, rest = contents.split(TEMPLATE_START) + to_template, after = rest.split(TEMPLATE_END) + + before = before.replace('#!/usr/bin/env python3', shebang()) + + hook_file.write(before + TEMPLATE_START) + for line in to_template.splitlines(): + var = line.split()[0] + hook_file.write(f'{var} = {params[var]!r}\n') + hook_file.write(TEMPLATE_END + after) + make_executable(hook_path) + + output.write_line(f'pre-commit installed at {hook_path}') + + +def install( + config_file: str, + store: Store, + hook_types: Sequence[str], + overwrite: bool = False, + hooks: bool = False, + skip_on_missing_config: bool = False, + git_dir: Optional[str] = None, +) -> int: + if git_dir is None and git.has_core_hookpaths_set(): + logger.error( + 'Cowardly refusing to install hooks with `core.hooksPath` set.\n' + 'hint: `git config --unset-all core.hooksPath`', + ) + return 1 + + for hook_type in hook_types: + _install_hook_script( + config_file, hook_type, + overwrite=overwrite, + skip_on_missing_config=skip_on_missing_config, + git_dir=git_dir, + ) + + if hooks: + install_hooks(config_file, store) + + return 0 + + +def install_hooks(config_file: str, store: Store) -> int: + install_hook_envs(all_hooks(load_config(config_file), store), store) + return 0 + + +def _uninstall_hook_script(hook_type: str) -> None: + hook_path, legacy_path = _hook_paths(hook_type) + + # If our file doesn't exist or it isn't ours, gtfo. + if not os.path.exists(hook_path) or not is_our_script(hook_path): + return + + os.remove(hook_path) + output.write_line(f'{hook_type} uninstalled') + + if os.path.exists(legacy_path): + os.rename(legacy_path, hook_path) + output.write_line(f'Restored previous hooks to {hook_path}') + + +def uninstall(hook_types: Sequence[str]) -> int: + for hook_type in hook_types: + _uninstall_hook_script(hook_type) + return 0 diff --git a/pre_commit/commands/migrate_config.py b/pre_commit/commands/migrate_config.py new file mode 100644 index 0000000..d83b8e9 --- /dev/null +++ b/pre_commit/commands/migrate_config.py @@ -0,0 +1,59 @@ +import re + +import yaml + +from pre_commit.util import yaml_load + + +def _indent(s: str) -> str: + lines = s.splitlines(True) + return ''.join(' ' * 4 + line if line.strip() else line for line in lines) + + +def _is_header_line(line: str) -> bool: + return line.startswith(('#', '---')) or not line.strip() + + +def _migrate_map(contents: str) -> str: + # Find the first non-header line + lines = contents.splitlines(True) + i = 0 + # Only loop on non empty configuration file + while i < len(lines) and _is_header_line(lines[i]): + i += 1 + + header = ''.join(lines[:i]) + rest = ''.join(lines[i:]) + + if isinstance(yaml_load(contents), list): + # If they are using the "default" flow style of yaml, this operation + # will yield a valid configuration + try: + trial_contents = f'{header}repos:\n{rest}' + yaml_load(trial_contents) + contents = trial_contents + except yaml.YAMLError: + contents = f'{header}repos:\n{_indent(rest)}' + + return contents + + +def _migrate_sha_to_rev(contents: str) -> str: + return re.sub(r'(\n\s+)sha:', r'\1rev:', contents) + + +def migrate_config(config_file: str, quiet: bool = False) -> int: + with open(config_file) as f: + orig_contents = contents = f.read() + + contents = _migrate_map(contents) + contents = _migrate_sha_to_rev(contents) + + if contents != orig_contents: + with open(config_file, 'w') as f: + f.write(contents) + + print('Configuration has been migrated.') + elif not quiet: + print('Configuration is already migrated.') + return 0 diff --git a/pre_commit/commands/run.py b/pre_commit/commands/run.py new file mode 100644 index 0000000..2f74578 --- /dev/null +++ b/pre_commit/commands/run.py @@ -0,0 +1,360 @@ +import argparse +import contextlib +import functools +import logging +import os +import re +import subprocess +import time +from typing import Any +from typing import Collection +from typing import Dict +from typing import List +from typing import Sequence +from typing import Set +from typing import Tuple + +from identify.identify import tags_from_path + +from pre_commit import color +from pre_commit import git +from pre_commit import output +from pre_commit.clientlib import load_config +from pre_commit.hook import Hook +from pre_commit.languages.all import languages +from pre_commit.repository import all_hooks +from pre_commit.repository import install_hook_envs +from pre_commit.staged_files_only import staged_files_only +from pre_commit.store import Store +from pre_commit.util import cmd_output_b +from pre_commit.util import EnvironT + + +logger = logging.getLogger('pre_commit') + + +def _start_msg(*, start: str, cols: int, end_len: int) -> str: + dots = '.' * (cols - len(start) - end_len - 1) + return f'{start}{dots}' + + +def _full_msg( + *, + start: str, + cols: int, + end_msg: str, + end_color: str, + use_color: bool, + postfix: str = '', +) -> str: + dots = '.' * (cols - len(start) - len(postfix) - len(end_msg) - 1) + end = color.format_color(end_msg, end_color, use_color) + return f'{start}{dots}{postfix}{end}\n' + + +def filter_by_include_exclude( + names: Collection[str], + include: str, + exclude: str, +) -> List[str]: + include_re, exclude_re = re.compile(include), re.compile(exclude) + return [ + filename for filename in names + if include_re.search(filename) + if not exclude_re.search(filename) + ] + + +class Classifier: + def __init__(self, filenames: Sequence[str]) -> None: + # on windows we normalize all filenames to use forward slashes + # this makes it easier to filter using the `files:` regex + # this also makes improperly quoted shell-based hooks work better + # see #1173 + if os.altsep == '/' and os.sep == '\\': + filenames = [f.replace(os.sep, os.altsep) for f in filenames] + self.filenames = [f for f in filenames if os.path.lexists(f)] + + @functools.lru_cache(maxsize=None) + def _types_for_file(self, filename: str) -> Set[str]: + return tags_from_path(filename) + + def by_types( + self, + names: Sequence[str], + types: Collection[str], + exclude_types: Collection[str], + ) -> List[str]: + types, exclude_types = frozenset(types), frozenset(exclude_types) + ret = [] + for filename in names: + tags = self._types_for_file(filename) + if tags >= types and not tags & exclude_types: + ret.append(filename) + return ret + + def filenames_for_hook(self, hook: Hook) -> Tuple[str, ...]: + names = self.filenames + names = filter_by_include_exclude(names, hook.files, hook.exclude) + names = self.by_types(names, hook.types, hook.exclude_types) + return tuple(names) + + +def _get_skips(environ: EnvironT) -> Set[str]: + skips = environ.get('SKIP', '') + return {skip.strip() for skip in skips.split(',') if skip.strip()} + + +SKIPPED = 'Skipped' +NO_FILES = '(no files to check)' + + +def _subtle_line(s: str, use_color: bool) -> None: + output.write_line(color.format_color(s, color.SUBTLE, use_color)) + + +def _run_single_hook( + classifier: Classifier, + hook: Hook, + skips: Set[str], + cols: int, + verbose: bool, + use_color: bool, +) -> bool: + filenames = classifier.filenames_for_hook(hook) + + if hook.id in skips or hook.alias in skips: + output.write( + _full_msg( + start=hook.name, + end_msg=SKIPPED, + end_color=color.YELLOW, + use_color=use_color, + cols=cols, + ), + ) + duration = None + retcode = 0 + files_modified = False + out = b'' + elif not filenames and not hook.always_run: + output.write( + _full_msg( + start=hook.name, + postfix=NO_FILES, + end_msg=SKIPPED, + end_color=color.TURQUOISE, + use_color=use_color, + cols=cols, + ), + ) + duration = None + retcode = 0 + files_modified = False + out = b'' + else: + # print hook and dots first in case the hook takes a while to run + output.write(_start_msg(start=hook.name, end_len=6, cols=cols)) + + diff_cmd = ('git', 'diff', '--no-ext-diff') + diff_before = cmd_output_b(*diff_cmd, retcode=None) + if not hook.pass_filenames: + filenames = () + time_before = time.time() + language = languages[hook.language] + retcode, out = language.run_hook(hook, filenames, use_color) + duration = round(time.time() - time_before, 2) or 0 + diff_after = cmd_output_b(*diff_cmd, retcode=None) + + # if the hook makes changes, fail the commit + files_modified = diff_before != diff_after + + if retcode or files_modified: + print_color = color.RED + status = 'Failed' + else: + print_color = color.GREEN + status = 'Passed' + + output.write_line(color.format_color(status, print_color, use_color)) + + if verbose or hook.verbose or retcode or files_modified: + _subtle_line(f'- hook id: {hook.id}', use_color) + + if (verbose or hook.verbose) and duration is not None: + _subtle_line(f'- duration: {duration}s', use_color) + + if retcode: + _subtle_line(f'- exit code: {retcode}', use_color) + + # Print a message if failing due to file modifications + if files_modified: + _subtle_line('- files were modified by this hook', use_color) + + if out.strip(): + output.write_line() + output.write_line_b(out.strip(), logfile_name=hook.log_file) + output.write_line() + + return files_modified or bool(retcode) + + +def _compute_cols(hooks: Sequence[Hook]) -> int: + """Compute the number of columns to display hook messages. The widest + that will be displayed is in the no files skipped case: + + Hook name...(no files to check) Skipped + """ + if hooks: + name_len = max(len(hook.name) for hook in hooks) + else: + name_len = 0 + + cols = name_len + 3 + len(NO_FILES) + 1 + len(SKIPPED) + return max(cols, 80) + + +def _all_filenames(args: argparse.Namespace) -> Collection[str]: + if args.hook_stage == 'post-checkout': # no files for post-checkout + return () + elif args.hook_stage in {'prepare-commit-msg', 'commit-msg'}: + return (args.commit_msg_filename,) + elif args.from_ref and args.to_ref: + return git.get_changed_files(args.from_ref, args.to_ref) + elif args.files: + return args.files + elif args.all_files: + return git.get_all_files() + elif git.is_in_merge_conflict(): + return git.get_conflicted_files() + else: + return git.get_staged_files() + + +def _run_hooks( + config: Dict[str, Any], + hooks: Sequence[Hook], + args: argparse.Namespace, + environ: EnvironT, +) -> int: + """Actually run the hooks.""" + skips = _get_skips(environ) + cols = _compute_cols(hooks) + filenames = filter_by_include_exclude( + _all_filenames(args), config['files'], config['exclude'], + ) + classifier = Classifier(filenames) + retval = 0 + for hook in hooks: + retval |= _run_single_hook( + classifier, hook, skips, cols, + verbose=args.verbose, use_color=args.color, + ) + if retval and config['fail_fast']: + break + if retval and args.show_diff_on_failure and git.has_diff(): + if args.all_files: + output.write_line( + 'pre-commit hook(s) made changes.\n' + 'If you are seeing this message in CI, ' + 'reproduce locally with: `pre-commit run --all-files`.\n' + 'To run `pre-commit` as part of git workflow, use ' + '`pre-commit install`.', + ) + output.write_line('All changes made by hooks:') + # args.color is a boolean. + # See user_color function in color.py + git_color_opt = 'always' if args.color else 'never' + subprocess.call(( + 'git', '--no-pager', 'diff', '--no-ext-diff', + f'--color={git_color_opt}', + )) + + return retval + + +def _has_unmerged_paths() -> bool: + _, stdout, _ = cmd_output_b('git', 'ls-files', '--unmerged') + return bool(stdout.strip()) + + +def _has_unstaged_config(config_file: str) -> bool: + retcode, _, _ = cmd_output_b( + 'git', 'diff', '--no-ext-diff', '--exit-code', config_file, + retcode=None, + ) + # be explicit, other git errors don't mean it has an unstaged config. + return retcode == 1 + + +def run( + config_file: str, + store: Store, + args: argparse.Namespace, + environ: EnvironT = os.environ, +) -> int: + stash = not args.all_files and not args.files + + # Check if we have unresolved merge conflict files and fail fast. + if _has_unmerged_paths(): + logger.error('Unmerged files. Resolve before committing.') + return 1 + if bool(args.from_ref) != bool(args.to_ref): + logger.error('Specify both --from-ref and --to-ref.') + return 1 + if stash and _has_unstaged_config(config_file): + logger.error( + f'Your pre-commit configuration is unstaged.\n' + f'`git add {config_file}` to fix this.', + ) + return 1 + if ( + args.hook_stage in {'prepare-commit-msg', 'commit-msg'} and + not args.commit_msg_filename + ): + logger.error( + f'`--commit-msg-filename` is required for ' + f'`--hook-stage {args.hook_stage}`', + ) + return 1 + + # Expose from-ref / to-ref as environment variables for hooks to consume + if args.from_ref and args.to_ref: + # legacy names + environ['PRE_COMMIT_ORIGIN'] = args.from_ref + environ['PRE_COMMIT_SOURCE'] = args.to_ref + # new names + environ['PRE_COMMIT_FROM_REF'] = args.from_ref + environ['PRE_COMMIT_TO_REF'] = args.to_ref + + if args.remote_name and args.remote_url: + environ['PRE_COMMIT_REMOTE_NAME'] = args.remote_name + environ['PRE_COMMIT_REMOTE_URL'] = args.remote_url + + if args.checkout_type: + environ['PRE_COMMIT_CHECKOUT_TYPE'] = args.checkout_type + + with contextlib.ExitStack() as exit_stack: + if stash: + exit_stack.enter_context(staged_files_only(store.directory)) + + config = load_config(config_file) + hooks = [ + hook + for hook in all_hooks(config, store) + if not args.hook or hook.id == args.hook or hook.alias == args.hook + if args.hook_stage in hook.stages + ] + + if args.hook and not hooks: + output.write_line( + f'No hook with id `{args.hook}` in stage `{args.hook_stage}`', + ) + return 1 + + install_hook_envs(hooks, store) + + return _run_hooks(config, hooks, args, environ) + + # https://github.com/python/mypy/issues/7726 + raise AssertionError('unreachable') diff --git a/pre_commit/commands/sample_config.py b/pre_commit/commands/sample_config.py new file mode 100644 index 0000000..d435faa --- /dev/null +++ b/pre_commit/commands/sample_config.py @@ -0,0 +1,21 @@ +# TODO: maybe `git ls-remote git://github.com/pre-commit/pre-commit-hooks` to +# determine the latest revision? This adds ~200ms from my tests (and is +# significantly faster than https:// or http://). For now, periodically +# manually updating the revision is fine. +SAMPLE_CONFIG = '''\ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files +''' + + +def sample_config() -> int: + print(SAMPLE_CONFIG, end='') + return 0 diff --git a/pre_commit/commands/try_repo.py b/pre_commit/commands/try_repo.py new file mode 100644 index 0000000..4aee209 --- /dev/null +++ b/pre_commit/commands/try_repo.py @@ -0,0 +1,77 @@ +import argparse +import logging +import os.path +from typing import Optional +from typing import Tuple + +import pre_commit.constants as C +from pre_commit import git +from pre_commit import output +from pre_commit.clientlib import load_manifest +from pre_commit.commands.run import run +from pre_commit.store import Store +from pre_commit.util import cmd_output_b +from pre_commit.util import tmpdir +from pre_commit.util import yaml_dump +from pre_commit.xargs import xargs + +logger = logging.getLogger(__name__) + + +def _repo_ref(tmpdir: str, repo: str, ref: Optional[str]) -> Tuple[str, str]: + # if `ref` is explicitly passed, use it + if ref is not None: + return repo, ref + + ref = git.head_rev(repo) + # if it exists on disk, we'll try and clone it with the local changes + if os.path.exists(repo) and git.has_diff('HEAD', repo=repo): + logger.warning('Creating temporary repo with uncommitted changes...') + + shadow = os.path.join(tmpdir, 'shadow-repo') + cmd_output_b('git', 'clone', repo, shadow) + cmd_output_b('git', 'checkout', ref, '-b', '_pc_tmp', cwd=shadow) + + idx = git.git_path('index', repo=shadow) + objs = git.git_path('objects', repo=shadow) + env = dict(os.environ, GIT_INDEX_FILE=idx, GIT_OBJECT_DIRECTORY=objs) + + staged_files = git.get_staged_files(cwd=repo) + if staged_files: + xargs(('git', 'add', '--'), staged_files, cwd=repo, env=env) + + cmd_output_b('git', 'add', '-u', cwd=repo, env=env) + git.commit(repo=shadow) + + return shadow, git.head_rev(shadow) + else: + return repo, ref + + +def try_repo(args: argparse.Namespace) -> int: + with tmpdir() as tempdir: + repo, ref = _repo_ref(tempdir, args.repo, args.ref) + + store = Store(tempdir) + if args.hook: + hooks = [{'id': args.hook}] + else: + repo_path = store.clone(repo, ref) + manifest = load_manifest(os.path.join(repo_path, C.MANIFEST_FILE)) + manifest = sorted(manifest, key=lambda hook: hook['id']) + hooks = [{'id': hook['id']} for hook in manifest] + + config = {'repos': [{'repo': repo, 'rev': ref, 'hooks': hooks}]} + config_s = yaml_dump(config) + + config_filename = os.path.join(tempdir, C.CONFIG_FILE) + with open(config_filename, 'w') as cfg: + cfg.write(config_s) + + output.write_line('=' * 79) + output.write_line('Using config:') + output.write_line('=' * 79) + output.write(config_s) + output.write_line('=' * 79) + + return run(config_filename, store, args) diff --git a/pre_commit/constants.py b/pre_commit/constants.py new file mode 100644 index 0000000..e2b8e3a --- /dev/null +++ b/pre_commit/constants.py @@ -0,0 +1,24 @@ +import sys + +if sys.version_info < (3, 8): # pragma: no cover (<PY38) + import importlib_metadata +else: # pragma: no cover (PY38+) + import importlib.metadata as importlib_metadata + +CONFIG_FILE = '.pre-commit-config.yaml' +MANIFEST_FILE = '.pre-commit-hooks.yaml' + +# Bump when installation changes in a backwards / forwards incompatible way +INSTALLED_STATE_VERSION = '1' +# Bump when modifying `empty_template` +LOCAL_REPO_VERSION = '1' + +VERSION = importlib_metadata.version('pre_commit') + +# `manual` is not invoked by any installed git hook. See #719 +STAGES = ( + 'commit', 'merge-commit', 'prepare-commit-msg', 'commit-msg', 'manual', + 'post-checkout', 'push', +) + +DEFAULT = 'default' diff --git a/pre_commit/envcontext.py b/pre_commit/envcontext.py new file mode 100644 index 0000000..16d3d15 --- /dev/null +++ b/pre_commit/envcontext.py @@ -0,0 +1,67 @@ +import contextlib +import enum +import os +from typing import Generator +from typing import NamedTuple +from typing import Optional +from typing import Tuple +from typing import Union + +from pre_commit.util import EnvironT + + +class _Unset(enum.Enum): + UNSET = 1 + + +UNSET = _Unset.UNSET + + +class Var(NamedTuple): + name: str + default: str = '' + + +SubstitutionT = Tuple[Union[str, Var], ...] +ValueT = Union[str, _Unset, SubstitutionT] +PatchesT = Tuple[Tuple[str, ValueT], ...] + + +def format_env(parts: SubstitutionT, env: EnvironT) -> str: + return ''.join( + env.get(part.name, part.default) if isinstance(part, Var) else part + for part in parts + ) + + +@contextlib.contextmanager +def envcontext( + patch: PatchesT, + _env: Optional[EnvironT] = None, +) -> Generator[None, None, None]: + """In this context, `os.environ` is modified according to `patch`. + + `patch` is an iterable of 2-tuples (key, value): + `key`: string + `value`: + - string: `environ[key] == value` inside the context. + - UNSET: `key not in environ` inside the context. + - template: A template is a tuple of strings and Var which will be + replaced with the previous environment + """ + env = os.environ if _env is None else _env + before = env.copy() + + for k, v in patch: + if v is UNSET: + env.pop(k, None) + elif isinstance(v, tuple): + env[k] = format_env(v, before) + else: + env[k] = v + + try: + yield + finally: + env.clear() + env.update(before) diff --git a/pre_commit/error_handler.py b/pre_commit/error_handler.py new file mode 100644 index 0000000..b2321ae --- /dev/null +++ b/pre_commit/error_handler.py @@ -0,0 +1,64 @@ +import contextlib +import functools +import os.path +import sys +import traceback +from typing import Generator + +import pre_commit.constants as C +from pre_commit import output +from pre_commit.store import Store +from pre_commit.util import force_bytes + + +class FatalError(RuntimeError): + pass + + +def _log_and_exit(msg: str, exc: BaseException, formatted: str) -> None: + error_msg = f'{msg}: {type(exc).__name__}: '.encode() + force_bytes(exc) + output.write_line_b(error_msg) + log_path = os.path.join(Store().directory, 'pre-commit.log') + output.write_line(f'Check the log at {log_path}') + + with open(log_path, 'wb') as log: + _log_line = functools.partial(output.write_line, stream=log) + _log_line_b = functools.partial(output.write_line_b, stream=log) + + _log_line('### version information') + _log_line() + _log_line('```') + _log_line(f'pre-commit version: {C.VERSION}') + _log_line('sys.version:') + for line in sys.version.splitlines(): + _log_line(f' {line}') + _log_line(f'sys.executable: {sys.executable}') + _log_line(f'os.name: {os.name}') + _log_line(f'sys.platform: {sys.platform}') + _log_line('```') + _log_line() + + _log_line('### error information') + _log_line() + _log_line('```') + _log_line_b(error_msg) + _log_line('```') + _log_line() + _log_line('```') + _log_line(formatted) + _log_line('```') + raise SystemExit(1) + + +@contextlib.contextmanager +def error_handler() -> Generator[None, None, None]: + try: + yield + except (Exception, KeyboardInterrupt) as e: + if isinstance(e, FatalError): + msg = 'An error has occurred' + elif isinstance(e, KeyboardInterrupt): + msg = 'Interrupted (^C)' + else: + msg = 'An unexpected error has occurred' + _log_and_exit(msg, e, traceback.format_exc()) diff --git a/pre_commit/file_lock.py b/pre_commit/file_lock.py new file mode 100644 index 0000000..ff0dc5e --- /dev/null +++ b/pre_commit/file_lock.py @@ -0,0 +1,76 @@ +import contextlib +import errno +import os +from typing import Callable +from typing import Generator + + +if os.name == 'nt': # pragma: no cover (windows) + import msvcrt + + # https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/locking + + # on windows we lock "regions" of files, we don't care about the actual + # byte region so we'll just pick *some* number here. + _region = 0xffff + + @contextlib.contextmanager + def _locked( + fileno: int, + blocked_cb: Callable[[], None], + ) -> Generator[None, None, None]: + try: + # TODO: https://github.com/python/typeshed/pull/3607 + msvcrt.locking(fileno, msvcrt.LK_NBLCK, _region) # type: ignore + except OSError: + blocked_cb() + while True: + try: + # TODO: https://github.com/python/typeshed/pull/3607 + msvcrt.locking(fileno, msvcrt.LK_LOCK, _region) # type: ignore # noqa: E501 + except OSError as e: + # Locking violation. Returned when the _LK_LOCK or _LK_RLCK + # flag is specified and the file cannot be locked after 10 + # attempts. + if e.errno != errno.EDEADLOCK: + raise + else: + break + + try: + yield + finally: + # From cursory testing, it seems to get unlocked when the file is + # closed so this may not be necessary. + # The documentation however states: + # "Regions should be locked only briefly and should be unlocked + # before closing a file or exiting the program." + # TODO: https://github.com/python/typeshed/pull/3607 + msvcrt.locking(fileno, msvcrt.LK_UNLCK, _region) # type: ignore +else: # pragma: win32 no cover + import fcntl + + @contextlib.contextmanager + def _locked( + fileno: int, + blocked_cb: Callable[[], None], + ) -> Generator[None, None, None]: + try: + fcntl.flock(fileno, fcntl.LOCK_EX | fcntl.LOCK_NB) + except OSError: # pragma: no cover (tests are single-threaded) + blocked_cb() + fcntl.flock(fileno, fcntl.LOCK_EX) + try: + yield + finally: + fcntl.flock(fileno, fcntl.LOCK_UN) + + +@contextlib.contextmanager +def lock( + path: str, + blocked_cb: Callable[[], None], +) -> Generator[None, None, None]: + with open(path, 'a+') as f: + with _locked(f.fileno(), blocked_cb): + yield diff --git a/pre_commit/git.py b/pre_commit/git.py new file mode 100644 index 0000000..7e757f2 --- /dev/null +++ b/pre_commit/git.py @@ -0,0 +1,196 @@ +import logging +import os.path +import sys +from typing import Dict +from typing import List +from typing import Optional +from typing import Set + +from pre_commit.util import cmd_output +from pre_commit.util import cmd_output_b +from pre_commit.util import EnvironT + + +logger = logging.getLogger(__name__) + + +def zsplit(s: str) -> List[str]: + s = s.strip('\0') + if s: + return s.split('\0') + else: + return [] + + +def no_git_env(_env: Optional[EnvironT] = None) -> Dict[str, str]: + # Too many bugs dealing with environment variables and GIT: + # https://github.com/pre-commit/pre-commit/issues/300 + # In git 2.6.3 (maybe others), git exports GIT_WORK_TREE while running + # pre-commit hooks + # In git 1.9.1 (maybe others), git exports GIT_DIR and GIT_INDEX_FILE + # while running pre-commit hooks in submodules. + # GIT_DIR: Causes git clone to clone wrong thing + # GIT_INDEX_FILE: Causes 'error invalid object ...' during commit + _env = _env if _env is not None else os.environ + return { + k: v for k, v in _env.items() + if not k.startswith('GIT_') or + k in { + 'GIT_EXEC_PATH', 'GIT_SSH', 'GIT_SSH_COMMAND', 'GIT_SSL_CAINFO', + 'GIT_SSL_NO_VERIFY', + } + } + + +def get_root() -> str: + return cmd_output('git', 'rev-parse', '--show-toplevel')[1].strip() + + +def get_git_dir(git_root: str = '.') -> str: + opts = ('--git-common-dir', '--git-dir') + _, out, _ = cmd_output('git', 'rev-parse', *opts, cwd=git_root) + for line, opt in zip(out.splitlines(), opts): + if line != opt: # pragma: no branch (git < 2.5) + return os.path.normpath(os.path.join(git_root, line)) + else: + raise AssertionError('unreachable: no git dir') + + +def get_remote_url(git_root: str) -> str: + _, out, _ = cmd_output('git', 'config', 'remote.origin.url', cwd=git_root) + return out.strip() + + +def is_in_merge_conflict() -> bool: + git_dir = get_git_dir('.') + return ( + os.path.exists(os.path.join(git_dir, 'MERGE_MSG')) and + os.path.exists(os.path.join(git_dir, 'MERGE_HEAD')) + ) + + +def parse_merge_msg_for_conflicts(merge_msg: bytes) -> List[str]: + # Conflicted files start with tabs + return [ + line.lstrip(b'#').strip().decode() + for line in merge_msg.splitlines() + # '#\t' for git 2.4.1 + if line.startswith((b'\t', b'#\t')) + ] + + +def get_conflicted_files() -> Set[str]: + logger.info('Checking merge-conflict files only.') + # Need to get the conflicted files from the MERGE_MSG because they could + # have resolved the conflict by choosing one side or the other + with open(os.path.join(get_git_dir('.'), 'MERGE_MSG'), 'rb') as f: + merge_msg = f.read() + merge_conflict_filenames = parse_merge_msg_for_conflicts(merge_msg) + + # This will get the rest of the changes made after the merge. + # If they resolved the merge conflict by choosing a mesh of both sides + # this will also include the conflicted files + tree_hash = cmd_output('git', 'write-tree')[1].strip() + merge_diff_filenames = zsplit( + cmd_output( + 'git', 'diff', '--name-only', '--no-ext-diff', '-z', + '-m', tree_hash, 'HEAD', 'MERGE_HEAD', + )[1], + ) + return set(merge_conflict_filenames) | set(merge_diff_filenames) + + +def get_staged_files(cwd: Optional[str] = None) -> List[str]: + return zsplit( + cmd_output( + 'git', 'diff', '--staged', '--name-only', '--no-ext-diff', '-z', + # Everything except for D + '--diff-filter=ACMRTUXB', + cwd=cwd, + )[1], + ) + + +def intent_to_add_files() -> List[str]: + _, stdout, _ = cmd_output('git', 'status', '--porcelain', '-z') + parts = list(reversed(zsplit(stdout))) + intent_to_add = [] + while parts: + line = parts.pop() + status, filename = line[:3], line[3:] + if status[0] in {'C', 'R'}: # renames / moves have an additional arg + parts.pop() + if status[1] == 'A': + intent_to_add.append(filename) + return intent_to_add + + +def get_all_files() -> List[str]: + return zsplit(cmd_output('git', 'ls-files', '-z')[1]) + + +def get_changed_files(old: str, new: str) -> List[str]: + return zsplit( + cmd_output( + 'git', 'diff', '--name-only', '--no-ext-diff', '-z', + f'{old}...{new}', + )[1], + ) + + +def head_rev(remote: str) -> str: + _, out, _ = cmd_output('git', 'ls-remote', '--exit-code', remote, 'HEAD') + return out.split()[0] + + +def has_diff(*args: str, repo: str = '.') -> bool: + cmd = ('git', 'diff', '--quiet', '--no-ext-diff', *args) + return cmd_output_b(*cmd, cwd=repo, retcode=None)[0] == 1 + + +def has_core_hookpaths_set() -> bool: + _, out, _ = cmd_output_b('git', 'config', 'core.hooksPath', retcode=None) + return bool(out.strip()) + + +def init_repo(path: str, remote: str) -> None: + if os.path.isdir(remote): + remote = os.path.abspath(remote) + + env = no_git_env() + cmd_output_b('git', 'init', path, env=env) + cmd_output_b('git', 'remote', 'add', 'origin', remote, cwd=path, env=env) + + +def commit(repo: str = '.') -> None: + env = no_git_env() + name, email = 'pre-commit', 'asottile+pre-commit@umich.edu' + env['GIT_AUTHOR_NAME'] = env['GIT_COMMITTER_NAME'] = name + env['GIT_AUTHOR_EMAIL'] = env['GIT_COMMITTER_EMAIL'] = email + cmd = ('git', 'commit', '--no-edit', '--no-gpg-sign', '-n', '-minit') + cmd_output_b(*cmd, cwd=repo, env=env) + + +def git_path(name: str, repo: str = '.') -> str: + _, out, _ = cmd_output('git', 'rev-parse', '--git-path', name, cwd=repo) + return os.path.join(repo, out.strip()) + + +def check_for_cygwin_mismatch() -> None: + """See https://github.com/pre-commit/pre-commit/issues/354""" + if sys.platform in ('cygwin', 'win32'): # pragma: no cover (windows) + is_cygwin_python = sys.platform == 'cygwin' + toplevel = cmd_output('git', 'rev-parse', '--show-toplevel')[1] + is_cygwin_git = toplevel.startswith('/') + + if is_cygwin_python ^ is_cygwin_git: + exe_type = {True: '(cygwin)', False: '(windows)'} + logger.warn( + f'pre-commit has detected a mix of cygwin python / git\n' + f'This combination is not supported, it is likely you will ' + f'receive an error later in the program.\n' + f'Make sure to use cygwin git+python while using cygwin\n' + f'These can be installed through the cygwin installer.\n' + f' - python {exe_type[is_cygwin_python]}\n' + f' - git {exe_type[is_cygwin_git]}\n', + ) diff --git a/pre_commit/hook.py b/pre_commit/hook.py new file mode 100644 index 0000000..b65ac42 --- /dev/null +++ b/pre_commit/hook.py @@ -0,0 +1,63 @@ +import logging +import shlex +from typing import Any +from typing import Dict +from typing import NamedTuple +from typing import Sequence +from typing import Tuple + +from pre_commit.prefix import Prefix + +logger = logging.getLogger('pre_commit') + + +class Hook(NamedTuple): + src: str + prefix: Prefix + id: str + name: str + entry: str + language: str + alias: str + files: str + exclude: str + types: Sequence[str] + exclude_types: Sequence[str] + additional_dependencies: Sequence[str] + args: Sequence[str] + always_run: bool + pass_filenames: bool + description: str + language_version: str + log_file: str + minimum_pre_commit_version: str + require_serial: bool + stages: Sequence[str] + verbose: bool + + @property + def cmd(self) -> Tuple[str, ...]: + return (*shlex.split(self.entry), *self.args) + + @property + def install_key(self) -> Tuple[Prefix, str, str, Tuple[str, ...]]: + return ( + self.prefix, + self.language, + self.language_version, + tuple(self.additional_dependencies), + ) + + @classmethod + def create(cls, src: str, prefix: Prefix, dct: Dict[str, Any]) -> 'Hook': + # TODO: have cfgv do this (?) + extra_keys = set(dct) - _KEYS + if extra_keys: + logger.warning( + f'Unexpected key(s) present on {src} => {dct["id"]}: ' + f'{", ".join(sorted(extra_keys))}', + ) + return cls(src=src, prefix=prefix, **{k: dct[k] for k in _KEYS}) + + +_KEYS = frozenset(set(Hook._fields) - {'src', 'prefix'}) diff --git a/pre_commit/languages/__init__.py b/pre_commit/languages/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/pre_commit/languages/__init__.py diff --git a/pre_commit/languages/all.py b/pre_commit/languages/all.py new file mode 100644 index 0000000..8f4ffa8 --- /dev/null +++ b/pre_commit/languages/all.py @@ -0,0 +1,60 @@ +from typing import Callable +from typing import NamedTuple +from typing import Optional +from typing import Sequence +from typing import Tuple + +from pre_commit.hook import Hook +from pre_commit.languages import conda +from pre_commit.languages import docker +from pre_commit.languages import docker_image +from pre_commit.languages import fail +from pre_commit.languages import golang +from pre_commit.languages import node +from pre_commit.languages import perl +from pre_commit.languages import pygrep +from pre_commit.languages import python +from pre_commit.languages import python_venv +from pre_commit.languages import ruby +from pre_commit.languages import rust +from pre_commit.languages import script +from pre_commit.languages import swift +from pre_commit.languages import system +from pre_commit.prefix import Prefix + + +class Language(NamedTuple): + name: str + # Use `None` for no installation / environment + ENVIRONMENT_DIR: Optional[str] + # return a value to replace `'default` for `language_version` + get_default_version: Callable[[], str] + # return whether the environment is healthy (or should be rebuilt) + healthy: Callable[[Prefix, str], bool] + # install a repository for the given language and language_version + install_environment: Callable[[Prefix, str, Sequence[str]], None] + # execute a hook and return the exit code and output + run_hook: 'Callable[[Hook, Sequence[str], bool], Tuple[int, bytes]]' + + +# TODO: back to modules + Protocol: https://github.com/python/mypy/issues/5018 +languages = { + # BEGIN GENERATED (testing/gen-languages-all) + 'conda': Language(name='conda', ENVIRONMENT_DIR=conda.ENVIRONMENT_DIR, get_default_version=conda.get_default_version, healthy=conda.healthy, install_environment=conda.install_environment, run_hook=conda.run_hook), # noqa: E501 + 'docker': Language(name='docker', ENVIRONMENT_DIR=docker.ENVIRONMENT_DIR, get_default_version=docker.get_default_version, healthy=docker.healthy, install_environment=docker.install_environment, run_hook=docker.run_hook), # noqa: E501 + 'docker_image': Language(name='docker_image', ENVIRONMENT_DIR=docker_image.ENVIRONMENT_DIR, get_default_version=docker_image.get_default_version, healthy=docker_image.healthy, install_environment=docker_image.install_environment, run_hook=docker_image.run_hook), # noqa: E501 + 'fail': Language(name='fail', ENVIRONMENT_DIR=fail.ENVIRONMENT_DIR, get_default_version=fail.get_default_version, healthy=fail.healthy, install_environment=fail.install_environment, run_hook=fail.run_hook), # noqa: E501 + 'golang': Language(name='golang', ENVIRONMENT_DIR=golang.ENVIRONMENT_DIR, get_default_version=golang.get_default_version, healthy=golang.healthy, install_environment=golang.install_environment, run_hook=golang.run_hook), # noqa: E501 + 'node': Language(name='node', ENVIRONMENT_DIR=node.ENVIRONMENT_DIR, get_default_version=node.get_default_version, healthy=node.healthy, install_environment=node.install_environment, run_hook=node.run_hook), # noqa: E501 + 'perl': Language(name='perl', ENVIRONMENT_DIR=perl.ENVIRONMENT_DIR, get_default_version=perl.get_default_version, healthy=perl.healthy, install_environment=perl.install_environment, run_hook=perl.run_hook), # noqa: E501 + 'pygrep': Language(name='pygrep', ENVIRONMENT_DIR=pygrep.ENVIRONMENT_DIR, get_default_version=pygrep.get_default_version, healthy=pygrep.healthy, install_environment=pygrep.install_environment, run_hook=pygrep.run_hook), # noqa: E501 + 'python': Language(name='python', ENVIRONMENT_DIR=python.ENVIRONMENT_DIR, get_default_version=python.get_default_version, healthy=python.healthy, install_environment=python.install_environment, run_hook=python.run_hook), # noqa: E501 + 'python_venv': Language(name='python_venv', ENVIRONMENT_DIR=python_venv.ENVIRONMENT_DIR, get_default_version=python_venv.get_default_version, healthy=python_venv.healthy, install_environment=python_venv.install_environment, run_hook=python_venv.run_hook), # noqa: E501 + 'ruby': Language(name='ruby', ENVIRONMENT_DIR=ruby.ENVIRONMENT_DIR, get_default_version=ruby.get_default_version, healthy=ruby.healthy, install_environment=ruby.install_environment, run_hook=ruby.run_hook), # noqa: E501 + 'rust': Language(name='rust', ENVIRONMENT_DIR=rust.ENVIRONMENT_DIR, get_default_version=rust.get_default_version, healthy=rust.healthy, install_environment=rust.install_environment, run_hook=rust.run_hook), # noqa: E501 + 'script': Language(name='script', ENVIRONMENT_DIR=script.ENVIRONMENT_DIR, get_default_version=script.get_default_version, healthy=script.healthy, install_environment=script.install_environment, run_hook=script.run_hook), # noqa: E501 + 'swift': Language(name='swift', ENVIRONMENT_DIR=swift.ENVIRONMENT_DIR, get_default_version=swift.get_default_version, healthy=swift.healthy, install_environment=swift.install_environment, run_hook=swift.run_hook), # noqa: E501 + 'system': Language(name='system', ENVIRONMENT_DIR=system.ENVIRONMENT_DIR, get_default_version=system.get_default_version, healthy=system.healthy, install_environment=system.install_environment, run_hook=system.run_hook), # noqa: E501 + # END GENERATED +} +all_languages = sorted(languages) diff --git a/pre_commit/languages/conda.py b/pre_commit/languages/conda.py new file mode 100644 index 0000000..071757a --- /dev/null +++ b/pre_commit/languages/conda.py @@ -0,0 +1,84 @@ +import contextlib +import os +from typing import Generator +from typing import Sequence +from typing import Tuple + +from pre_commit.envcontext import envcontext +from pre_commit.envcontext import PatchesT +from pre_commit.envcontext import SubstitutionT +from pre_commit.envcontext import UNSET +from pre_commit.envcontext import Var +from pre_commit.hook import Hook +from pre_commit.languages import helpers +from pre_commit.prefix import Prefix +from pre_commit.util import clean_path_on_failure +from pre_commit.util import cmd_output_b + +ENVIRONMENT_DIR = 'conda' +get_default_version = helpers.basic_get_default_version +healthy = helpers.basic_healthy + + +def get_env_patch(env: str) -> PatchesT: + # On non-windows systems executable live in $CONDA_PREFIX/bin, on Windows + # they can be in $CONDA_PREFIX/bin, $CONDA_PREFIX/Library/bin, + # $CONDA_PREFIX/Scripts and $CONDA_PREFIX. Whereas the latter only + # seems to be used for python.exe. + path: SubstitutionT = (os.path.join(env, 'bin'), os.pathsep, Var('PATH')) + if os.name == 'nt': # pragma: no cover (platform specific) + path = (env, os.pathsep, *path) + path = (os.path.join(env, 'Scripts'), os.pathsep, *path) + path = (os.path.join(env, 'Library', 'bin'), os.pathsep, *path) + + return ( + ('PYTHONHOME', UNSET), + ('VIRTUAL_ENV', UNSET), + ('CONDA_PREFIX', env), + ('PATH', path), + ) + + +@contextlib.contextmanager +def in_env( + prefix: Prefix, + language_version: str, +) -> Generator[None, None, None]: + directory = helpers.environment_dir(ENVIRONMENT_DIR, language_version) + envdir = prefix.path(directory) + with envcontext(get_env_patch(envdir)): + yield + + +def install_environment( + prefix: Prefix, + version: str, + additional_dependencies: Sequence[str], +) -> None: + helpers.assert_version_default('conda', version) + directory = helpers.environment_dir(ENVIRONMENT_DIR, version) + + env_dir = prefix.path(directory) + with clean_path_on_failure(env_dir): + cmd_output_b( + 'conda', 'env', 'create', '-p', env_dir, '--file', + 'environment.yml', cwd=prefix.prefix_dir, + ) + if additional_dependencies: + cmd_output_b( + 'conda', 'install', '-p', env_dir, *additional_dependencies, + cwd=prefix.prefix_dir, + ) + + +def run_hook( + hook: Hook, + file_args: Sequence[str], + color: bool, +) -> Tuple[int, bytes]: + # TODO: Some rare commands need to be run using `conda run` but mostly we + # can run them withot which is much quicker and produces a better + # output. + # cmd = ('conda', 'run', '-p', env_dir) + hook.cmd + with in_env(hook.prefix, hook.language_version): + return helpers.run_xargs(hook, hook.cmd, file_args, color=color) diff --git a/pre_commit/languages/docker.py b/pre_commit/languages/docker.py new file mode 100644 index 0000000..f449584 --- /dev/null +++ b/pre_commit/languages/docker.py @@ -0,0 +1,114 @@ +import hashlib +import os +from typing import Sequence +from typing import Tuple + +import pre_commit.constants as C +from pre_commit.hook import Hook +from pre_commit.languages import helpers +from pre_commit.prefix import Prefix +from pre_commit.util import CalledProcessError +from pre_commit.util import clean_path_on_failure +from pre_commit.util import cmd_output_b + +ENVIRONMENT_DIR = 'docker' +PRE_COMMIT_LABEL = 'PRE_COMMIT' +get_default_version = helpers.basic_get_default_version +healthy = helpers.basic_healthy + + +def md5(s: str) -> str: # pragma: win32 no cover + return hashlib.md5(s.encode()).hexdigest() + + +def docker_tag(prefix: Prefix) -> str: # pragma: win32 no cover + md5sum = md5(os.path.basename(prefix.prefix_dir)).lower() + return f'pre-commit-{md5sum}' + + +def docker_is_running() -> bool: # pragma: win32 no cover + try: + cmd_output_b('docker', 'ps') + except CalledProcessError: + return False + else: + return True + + +def assert_docker_available() -> None: # pragma: win32 no cover + assert docker_is_running(), ( + 'Docker is either not running or not configured in this environment' + ) + + +def build_docker_image( + prefix: Prefix, + *, + pull: bool, +) -> None: # pragma: win32 no cover + cmd: Tuple[str, ...] = ( + 'docker', 'build', + '--tag', docker_tag(prefix), + '--label', PRE_COMMIT_LABEL, + ) + if pull: + cmd += ('--pull',) + # This must come last for old versions of docker. See #477 + cmd += ('.',) + helpers.run_setup_cmd(prefix, cmd) + + +def install_environment( + prefix: Prefix, version: str, additional_dependencies: Sequence[str], +) -> None: # pragma: win32 no cover + helpers.assert_version_default('docker', version) + helpers.assert_no_additional_deps('docker', additional_dependencies) + assert_docker_available() + + directory = prefix.path( + helpers.environment_dir(ENVIRONMENT_DIR, C.DEFAULT), + ) + + # Docker doesn't really have relevant disk environment, but pre-commit + # still needs to cleanup its state files on failure + with clean_path_on_failure(directory): + build_docker_image(prefix, pull=True) + os.mkdir(directory) + + +def get_docker_user() -> str: # pragma: win32 no cover + try: + return f'{os.getuid()}:{os.getgid()}' + except AttributeError: + return '1000:1000' + + +def docker_cmd() -> Tuple[str, ...]: # pragma: win32 no cover + return ( + 'docker', 'run', + '--rm', + '-u', get_docker_user(), + # https://docs.docker.com/engine/reference/commandline/run/#mount-volumes-from-container-volumes-from + # The `Z` option tells Docker to label the content with a private + # unshared label. Only the current container can use a private volume. + '-v', f'{os.getcwd()}:/src:rw,Z', + '--workdir', '/src', + ) + + +def run_hook( + hook: Hook, + file_args: Sequence[str], + color: bool, +) -> Tuple[int, bytes]: # pragma: win32 no cover + assert_docker_available() + # Rebuild the docker image in case it has gone missing, as many people do + # automated cleanup of docker images. + build_docker_image(hook.prefix, pull=False) + + hook_cmd = hook.cmd + entry_exe, cmd_rest = hook.cmd[0], hook_cmd[1:] + + entry_tag = ('--entrypoint', entry_exe, docker_tag(hook.prefix)) + cmd = docker_cmd() + entry_tag + cmd_rest + return helpers.run_xargs(hook, cmd, file_args, color=color) diff --git a/pre_commit/languages/docker_image.py b/pre_commit/languages/docker_image.py new file mode 100644 index 0000000..0c51df6 --- /dev/null +++ b/pre_commit/languages/docker_image.py @@ -0,0 +1,22 @@ +from typing import Sequence +from typing import Tuple + +from pre_commit.hook import Hook +from pre_commit.languages import helpers +from pre_commit.languages.docker import assert_docker_available +from pre_commit.languages.docker import docker_cmd + +ENVIRONMENT_DIR = None +get_default_version = helpers.basic_get_default_version +healthy = helpers.basic_healthy +install_environment = helpers.no_install + + +def run_hook( + hook: Hook, + file_args: Sequence[str], + color: bool, +) -> Tuple[int, bytes]: # pragma: win32 no cover + assert_docker_available() + cmd = docker_cmd() + hook.cmd + return helpers.run_xargs(hook, cmd, file_args, color=color) diff --git a/pre_commit/languages/fail.py b/pre_commit/languages/fail.py new file mode 100644 index 0000000..d2b02d2 --- /dev/null +++ b/pre_commit/languages/fail.py @@ -0,0 +1,20 @@ +from typing import Sequence +from typing import Tuple + +from pre_commit.hook import Hook +from pre_commit.languages import helpers + +ENVIRONMENT_DIR = None +get_default_version = helpers.basic_get_default_version +healthy = helpers.basic_healthy +install_environment = helpers.no_install + + +def run_hook( + hook: Hook, + file_args: Sequence[str], + color: bool, +) -> Tuple[int, bytes]: + out = f'{hook.entry}\n\n'.encode() + out += b'\n'.join(f.encode() for f in file_args) + b'\n' + return 1, out diff --git a/pre_commit/languages/golang.py b/pre_commit/languages/golang.py new file mode 100644 index 0000000..91ade1e --- /dev/null +++ b/pre_commit/languages/golang.py @@ -0,0 +1,97 @@ +import contextlib +import os.path +import sys +from typing import Generator +from typing import Sequence +from typing import Tuple + +import pre_commit.constants as C +from pre_commit import git +from pre_commit.envcontext import envcontext +from pre_commit.envcontext import PatchesT +from pre_commit.envcontext import Var +from pre_commit.hook import Hook +from pre_commit.languages import helpers +from pre_commit.prefix import Prefix +from pre_commit.util import clean_path_on_failure +from pre_commit.util import cmd_output +from pre_commit.util import cmd_output_b +from pre_commit.util import rmtree + +ENVIRONMENT_DIR = 'golangenv' +get_default_version = helpers.basic_get_default_version +healthy = helpers.basic_healthy + + +def get_env_patch(venv: str) -> PatchesT: + return ( + ('PATH', (os.path.join(venv, 'bin'), os.pathsep, Var('PATH'))), + ) + + +@contextlib.contextmanager +def in_env(prefix: Prefix) -> Generator[None, None, None]: + envdir = prefix.path( + helpers.environment_dir(ENVIRONMENT_DIR, C.DEFAULT), + ) + with envcontext(get_env_patch(envdir)): + yield + + +def guess_go_dir(remote_url: str) -> str: + if remote_url.endswith('.git'): + remote_url = remote_url[:-1 * len('.git')] + looks_like_url = ( + not remote_url.startswith('file://') and + ('//' in remote_url or '@' in remote_url) + ) + remote_url = remote_url.replace(':', '/') + if looks_like_url: + _, _, remote_url = remote_url.rpartition('//') + _, _, remote_url = remote_url.rpartition('@') + return remote_url + else: + return 'unknown_src_dir' + + +def install_environment( + prefix: Prefix, + version: str, + additional_dependencies: Sequence[str], +) -> None: + helpers.assert_version_default('golang', version) + directory = prefix.path( + helpers.environment_dir(ENVIRONMENT_DIR, C.DEFAULT), + ) + + with clean_path_on_failure(directory): + remote = git.get_remote_url(prefix.prefix_dir) + repo_src_dir = os.path.join(directory, 'src', guess_go_dir(remote)) + + # Clone into the goenv we'll create + helpers.run_setup_cmd(prefix, ('git', 'clone', '.', repo_src_dir)) + + if sys.platform == 'cygwin': # pragma: no cover + _, gopath, _ = cmd_output('cygpath', '-w', directory) + gopath = gopath.strip() + else: + gopath = directory + env = dict(os.environ, GOPATH=gopath) + env.pop('GOBIN', None) + cmd_output_b('go', 'get', './...', cwd=repo_src_dir, env=env) + for dependency in additional_dependencies: + cmd_output_b('go', 'get', dependency, cwd=repo_src_dir, env=env) + # Same some disk space, we don't need these after installation + rmtree(prefix.path(directory, 'src')) + pkgdir = prefix.path(directory, 'pkg') + if os.path.exists(pkgdir): # pragma: no cover (go<1.10) + rmtree(pkgdir) + + +def run_hook( + hook: Hook, + file_args: Sequence[str], + color: bool, +) -> Tuple[int, bytes]: + with in_env(hook.prefix): + return helpers.run_xargs(hook, hook.cmd, file_args, color=color) diff --git a/pre_commit/languages/helpers.py b/pre_commit/languages/helpers.py new file mode 100644 index 0000000..b5c95e5 --- /dev/null +++ b/pre_commit/languages/helpers.py @@ -0,0 +1,109 @@ +import multiprocessing +import os +import random +from typing import Any +from typing import List +from typing import Optional +from typing import overload +from typing import Sequence +from typing import Tuple +from typing import TYPE_CHECKING + +import pre_commit.constants as C +from pre_commit.hook import Hook +from pre_commit.prefix import Prefix +from pre_commit.util import cmd_output_b +from pre_commit.xargs import xargs + +if TYPE_CHECKING: + from typing import NoReturn + +FIXED_RANDOM_SEED = 1542676186 + + +def run_setup_cmd(prefix: Prefix, cmd: Tuple[str, ...]) -> None: + cmd_output_b(*cmd, cwd=prefix.prefix_dir) + + +@overload +def environment_dir(d: None, language_version: str) -> None: ... +@overload +def environment_dir(d: str, language_version: str) -> str: ... + + +def environment_dir(d: Optional[str], language_version: str) -> Optional[str]: + if d is None: + return None + else: + return f'{d}-{language_version}' + + +def assert_version_default(binary: str, version: str) -> None: + if version != C.DEFAULT: + raise AssertionError( + f'For now, pre-commit requires system-installed {binary}', + ) + + +def assert_no_additional_deps( + lang: str, + additional_deps: Sequence[str], +) -> None: + if additional_deps: + raise AssertionError( + f'For now, pre-commit does not support ' + f'additional_dependencies for {lang}', + ) + + +def basic_get_default_version() -> str: + return C.DEFAULT + + +def basic_healthy(prefix: Prefix, language_version: str) -> bool: + return True + + +def no_install( + prefix: Prefix, + version: str, + additional_dependencies: Sequence[str], +) -> 'NoReturn': + raise AssertionError('This type is not installable') + + +def target_concurrency(hook: Hook) -> int: + if hook.require_serial or 'PRE_COMMIT_NO_CONCURRENCY' in os.environ: + return 1 + else: + # Travis appears to have a bunch of CPUs, but we can't use them all. + if 'TRAVIS' in os.environ: + return 2 + else: + try: + return multiprocessing.cpu_count() + except NotImplementedError: + return 1 + + +def _shuffled(seq: Sequence[str]) -> List[str]: + """Deterministically shuffle""" + fixed_random = random.Random() + fixed_random.seed(FIXED_RANDOM_SEED, version=1) + + seq = list(seq) + random.shuffle(seq, random=fixed_random.random) + return seq + + +def run_xargs( + hook: Hook, + cmd: Tuple[str, ...], + file_args: Sequence[str], + **kwargs: Any, +) -> Tuple[int, bytes]: + # Shuffle the files so that they more evenly fill out the xargs partitions, + # but do it deterministically in case a hook cares about ordering. + file_args = _shuffled(file_args) + kwargs['target_concurrency'] = target_concurrency(hook) + return xargs(cmd, file_args, **kwargs) diff --git a/pre_commit/languages/node.py b/pre_commit/languages/node.py new file mode 100644 index 0000000..79ff807 --- /dev/null +++ b/pre_commit/languages/node.py @@ -0,0 +1,93 @@ +import contextlib +import os +import sys +from typing import Generator +from typing import Sequence +from typing import Tuple + +import pre_commit.constants as C +from pre_commit.envcontext import envcontext +from pre_commit.envcontext import PatchesT +from pre_commit.envcontext import Var +from pre_commit.hook import Hook +from pre_commit.languages import helpers +from pre_commit.languages.python import bin_dir +from pre_commit.prefix import Prefix +from pre_commit.util import clean_path_on_failure +from pre_commit.util import cmd_output +from pre_commit.util import cmd_output_b + +ENVIRONMENT_DIR = 'node_env' +get_default_version = helpers.basic_get_default_version +healthy = helpers.basic_healthy + + +def _envdir(prefix: Prefix, version: str) -> str: + directory = helpers.environment_dir(ENVIRONMENT_DIR, version) + return prefix.path(directory) + + +def get_env_patch(venv: str) -> PatchesT: + if sys.platform == 'cygwin': # pragma: no cover + _, win_venv, _ = cmd_output('cygpath', '-w', venv) + install_prefix = fr'{win_venv.strip()}\bin' + lib_dir = 'lib' + elif sys.platform == 'win32': # pragma: no cover + install_prefix = bin_dir(venv) + lib_dir = 'Scripts' + else: # pragma: win32 no cover + install_prefix = venv + lib_dir = 'lib' + return ( + ('NODE_VIRTUAL_ENV', venv), + ('NPM_CONFIG_PREFIX', install_prefix), + ('npm_config_prefix', install_prefix), + ('NODE_PATH', os.path.join(venv, lib_dir, 'node_modules')), + ('PATH', (bin_dir(venv), os.pathsep, Var('PATH'))), + ) + + +@contextlib.contextmanager +def in_env( + prefix: Prefix, + language_version: str, +) -> Generator[None, None, None]: + with envcontext(get_env_patch(_envdir(prefix, language_version))): + yield + + +def install_environment( + prefix: Prefix, version: str, additional_dependencies: Sequence[str], +) -> None: + additional_dependencies = tuple(additional_dependencies) + assert prefix.exists('package.json') + envdir = _envdir(prefix, version) + + # https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx?f=255&MSPPError=-2147217396#maxpath + if sys.platform == 'win32': # pragma: no cover + envdir = f'\\\\?\\{os.path.normpath(envdir)}' + with clean_path_on_failure(envdir): + cmd = [ + sys.executable, '-mnodeenv', '--prebuilt', '--clean-src', envdir, + ] + if version != C.DEFAULT: + cmd.extend(['-n', version]) + cmd_output_b(*cmd) + + with in_env(prefix, version): + # https://npm.community/t/npm-install-g-git-vs-git-clone-cd-npm-install-g/5449 + # install as if we installed from git + helpers.run_setup_cmd(prefix, ('npm', 'install')) + helpers.run_setup_cmd( + prefix, + ('npm', 'install', '-g', '.', *additional_dependencies), + ) + + +def run_hook( + hook: Hook, + file_args: Sequence[str], + color: bool, +) -> Tuple[int, bytes]: + with in_env(hook.prefix, hook.language_version): + return helpers.run_xargs(hook, hook.cmd, file_args, color=color) diff --git a/pre_commit/languages/perl.py b/pre_commit/languages/perl.py new file mode 100644 index 0000000..bbf5504 --- /dev/null +++ b/pre_commit/languages/perl.py @@ -0,0 +1,67 @@ +import contextlib +import os +import shlex +from typing import Generator +from typing import Sequence +from typing import Tuple + +from pre_commit.envcontext import envcontext +from pre_commit.envcontext import PatchesT +from pre_commit.envcontext import Var +from pre_commit.hook import Hook +from pre_commit.languages import helpers +from pre_commit.prefix import Prefix +from pre_commit.util import clean_path_on_failure + +ENVIRONMENT_DIR = 'perl_env' +get_default_version = helpers.basic_get_default_version +healthy = helpers.basic_healthy + + +def _envdir(prefix: Prefix, version: str) -> str: + directory = helpers.environment_dir(ENVIRONMENT_DIR, version) + return prefix.path(directory) + + +def get_env_patch(venv: str) -> PatchesT: + return ( + ('PATH', (os.path.join(venv, 'bin'), os.pathsep, Var('PATH'))), + ('PERL5LIB', os.path.join(venv, 'lib', 'perl5')), + ('PERL_MB_OPT', f'--install_base {shlex.quote(venv)}'), + ( + 'PERL_MM_OPT', ( + f'INSTALL_BASE={shlex.quote(venv)} ' + f'INSTALLSITEMAN1DIR=none INSTALLSITEMAN3DIR=none' + ), + ), + ) + + +@contextlib.contextmanager +def in_env( + prefix: Prefix, + language_version: str, +) -> Generator[None, None, None]: + with envcontext(get_env_patch(_envdir(prefix, language_version))): + yield + + +def install_environment( + prefix: Prefix, version: str, additional_dependencies: Sequence[str], +) -> None: + helpers.assert_version_default('perl', version) + + with clean_path_on_failure(_envdir(prefix, version)): + with in_env(prefix, version): + helpers.run_setup_cmd( + prefix, ('cpan', '-T', '.', *additional_dependencies), + ) + + +def run_hook( + hook: Hook, + file_args: Sequence[str], + color: bool, +) -> Tuple[int, bytes]: + with in_env(hook.prefix, hook.language_version): + return helpers.run_xargs(hook, hook.cmd, file_args, color=color) diff --git a/pre_commit/languages/pygrep.py b/pre_commit/languages/pygrep.py new file mode 100644 index 0000000..40adba0 --- /dev/null +++ b/pre_commit/languages/pygrep.py @@ -0,0 +1,87 @@ +import argparse +import re +import sys +from typing import Optional +from typing import Pattern +from typing import Sequence +from typing import Tuple + +from pre_commit import output +from pre_commit.hook import Hook +from pre_commit.languages import helpers +from pre_commit.xargs import xargs + +ENVIRONMENT_DIR = None +get_default_version = helpers.basic_get_default_version +healthy = helpers.basic_healthy +install_environment = helpers.no_install + + +def _process_filename_by_line(pattern: Pattern[bytes], filename: str) -> int: + retv = 0 + with open(filename, 'rb') as f: + for line_no, line in enumerate(f, start=1): + if pattern.search(line): + retv = 1 + output.write(f'{filename}:{line_no}:') + output.write_line_b(line.rstrip(b'\r\n')) + return retv + + +def _process_filename_at_once(pattern: Pattern[bytes], filename: str) -> int: + retv = 0 + with open(filename, 'rb') as f: + contents = f.read() + match = pattern.search(contents) + if match: + retv = 1 + line_no = contents[:match.start()].count(b'\n') + output.write(f'{filename}:{line_no + 1}:') + + matched_lines = match[0].split(b'\n') + matched_lines[0] = contents.split(b'\n')[line_no] + + output.write_line_b(b'\n'.join(matched_lines)) + return retv + + +def run_hook( + hook: Hook, + file_args: Sequence[str], + color: bool, +) -> Tuple[int, bytes]: + exe = (sys.executable, '-m', __name__) + tuple(hook.args) + (hook.entry,) + return xargs(exe, file_args, color=color) + + +def main(argv: Optional[Sequence[str]] = None) -> int: + parser = argparse.ArgumentParser( + description=( + 'grep-like finder using python regexes. Unlike grep, this tool ' + 'returns nonzero when it finds a match and zero otherwise. The ' + 'idea here being that matches are "problems".' + ), + ) + parser.add_argument('-i', '--ignore-case', action='store_true') + parser.add_argument('--multiline', action='store_true') + parser.add_argument('pattern', help='python regex pattern.') + parser.add_argument('filenames', nargs='*') + args = parser.parse_args(argv) + + flags = re.IGNORECASE if args.ignore_case else 0 + if args.multiline: + flags |= re.MULTILINE | re.DOTALL + + pattern = re.compile(args.pattern.encode(), flags) + + retv = 0 + for filename in args.filenames: + if args.multiline: + retv |= _process_filename_at_once(pattern, filename) + else: + retv |= _process_filename_by_line(pattern, filename) + return retv + + +if __name__ == '__main__': + exit(main()) diff --git a/pre_commit/languages/python.py b/pre_commit/languages/python.py new file mode 100644 index 0000000..5073a8b --- /dev/null +++ b/pre_commit/languages/python.py @@ -0,0 +1,210 @@ +import contextlib +import functools +import os +import sys +from typing import Callable +from typing import ContextManager +from typing import Generator +from typing import Optional +from typing import Sequence +from typing import Tuple + +import pre_commit.constants as C +from pre_commit.envcontext import envcontext +from pre_commit.envcontext import PatchesT +from pre_commit.envcontext import UNSET +from pre_commit.envcontext import Var +from pre_commit.hook import Hook +from pre_commit.languages import helpers +from pre_commit.parse_shebang import find_executable +from pre_commit.prefix import Prefix +from pre_commit.util import CalledProcessError +from pre_commit.util import clean_path_on_failure +from pre_commit.util import cmd_output +from pre_commit.util import cmd_output_b + +ENVIRONMENT_DIR = 'py_env' + + +def bin_dir(venv: str) -> str: + """On windows there's a different directory for the virtualenv""" + bin_part = 'Scripts' if os.name == 'nt' else 'bin' + return os.path.join(venv, bin_part) + + +def get_env_patch(venv: str) -> PatchesT: + return ( + ('PYTHONHOME', UNSET), + ('VIRTUAL_ENV', venv), + ('PATH', (bin_dir(venv), os.pathsep, Var('PATH'))), + ) + + +def _find_by_py_launcher( + version: str, +) -> Optional[str]: # pragma: no cover (windows only) + if version.startswith('python'): + num = version[len('python'):] + try: + cmd = ('py', f'-{num}', '-c', 'import sys; print(sys.executable)') + return cmd_output(*cmd)[1].strip() + except CalledProcessError: + pass + return None + + +def _find_by_sys_executable() -> Optional[str]: + def _norm(path: str) -> Optional[str]: + _, exe = os.path.split(path.lower()) + exe, _, _ = exe.partition('.exe') + if exe not in {'python', 'pythonw'} and find_executable(exe): + return exe + return None + + # On linux, I see these common sys.executables: + # + # system `python`: /usr/bin/python -> python2.7 + # system `python2`: /usr/bin/python2 -> python2.7 + # virtualenv v: v/bin/python (will not return from this loop) + # virtualenv v -ppython2: v/bin/python -> python2 + # virtualenv v -ppython2.7: v/bin/python -> python2.7 + # virtualenv v -ppypy: v/bin/python -> v/bin/pypy + for path in (sys.executable, os.path.realpath(sys.executable)): + exe = _norm(path) + if exe: + return exe + return None + + +@functools.lru_cache(maxsize=1) +def get_default_version() -> str: # pragma: no cover (platform dependent) + # First attempt from `sys.executable` (or the realpath) + exe = _find_by_sys_executable() + if exe: + return exe + + # Next try the `pythonX.X` executable + exe = f'python{sys.version_info[0]}.{sys.version_info[1]}' + if find_executable(exe): + return exe + + if _find_by_py_launcher(exe): + return exe + + # Give a best-effort try for windows + default_folder_name = exe.replace('.', '') + if os.path.exists(fr'C:\{default_folder_name}\python.exe'): + return exe + + # We tried! + return C.DEFAULT + + +def _sys_executable_matches(version: str) -> bool: + if version == 'python': + return True + elif not version.startswith('python'): + return False + + try: + info = tuple(int(p) for p in version[len('python'):].split('.')) + except ValueError: + return False + + return sys.version_info[:len(info)] == info + + +def norm_version(version: str) -> str: + # first see if our current executable is appropriate + if _sys_executable_matches(version): + return sys.executable + + if os.name == 'nt': # pragma: no cover (windows) + version_exec = _find_by_py_launcher(version) + if version_exec: + return version_exec + + # Try looking up by name + version_exec = find_executable(version) + if version_exec and version_exec != version: + return version_exec + + # If it is in the form pythonx.x search in the default + # place on windows + if version.startswith('python'): + default_folder_name = version.replace('.', '') + return fr'C:\{default_folder_name}\python.exe' + + # Otherwise assume it is a path + return os.path.expanduser(version) + + +def py_interface( + _dir: str, + _make_venv: Callable[[str, str], None], +) -> Tuple[ + Callable[[Prefix, str], ContextManager[None]], + Callable[[Prefix, str], bool], + Callable[[Hook, Sequence[str], bool], Tuple[int, bytes]], + Callable[[Prefix, str, Sequence[str]], None], +]: + @contextlib.contextmanager + def in_env( + prefix: Prefix, + language_version: str, + ) -> Generator[None, None, None]: + envdir = prefix.path(helpers.environment_dir(_dir, language_version)) + with envcontext(get_env_patch(envdir)): + yield + + def healthy(prefix: Prefix, language_version: str) -> bool: + envdir = helpers.environment_dir(_dir, language_version) + exe_name = 'python.exe' if sys.platform == 'win32' else 'python' + py_exe = prefix.path(bin_dir(envdir), exe_name) + with in_env(prefix, language_version): + retcode, _, _ = cmd_output_b( + py_exe, '-c', 'import ctypes, datetime, io, os, ssl, weakref', + cwd='/', + retcode=None, + ) + return retcode == 0 + + def run_hook( + hook: Hook, + file_args: Sequence[str], + color: bool, + ) -> Tuple[int, bytes]: + with in_env(hook.prefix, hook.language_version): + return helpers.run_xargs(hook, hook.cmd, file_args, color=color) + + def install_environment( + prefix: Prefix, + version: str, + additional_dependencies: Sequence[str], + ) -> None: + additional_dependencies = tuple(additional_dependencies) + directory = helpers.environment_dir(_dir, version) + + env_dir = prefix.path(directory) + with clean_path_on_failure(env_dir): + if version != C.DEFAULT: + python = norm_version(version) + else: + python = os.path.realpath(sys.executable) + _make_venv(env_dir, python) + with in_env(prefix, version): + helpers.run_setup_cmd( + prefix, ('pip', 'install', '.') + additional_dependencies, + ) + + return in_env, healthy, run_hook, install_environment + + +def make_venv(envdir: str, python: str) -> None: + env = dict(os.environ, VIRTUALENV_NO_DOWNLOAD='1') + cmd = (sys.executable, '-mvirtualenv', envdir, '-p', python) + cmd_output_b(*cmd, env=env, cwd='/') + + +_interface = py_interface(ENVIRONMENT_DIR, make_venv) +in_env, healthy, run_hook, install_environment = _interface diff --git a/pre_commit/languages/python_venv.py b/pre_commit/languages/python_venv.py new file mode 100644 index 0000000..5404c8b --- /dev/null +++ b/pre_commit/languages/python_venv.py @@ -0,0 +1,46 @@ +import os.path + +from pre_commit.languages import python +from pre_commit.util import CalledProcessError +from pre_commit.util import cmd_output +from pre_commit.util import cmd_output_b + +ENVIRONMENT_DIR = 'py_venv' +get_default_version = python.get_default_version + + +def orig_py_exe(exe: str) -> str: # pragma: no cover (platform specific) + """A -mvenv virtualenv made from a -mvirtualenv virtualenv installs + packages to the incorrect location. Attempt to find the _original_ exe + and invoke `-mvenv` from there. + + See: + - https://github.com/pre-commit/pre-commit/issues/755 + - https://github.com/pypa/virtualenv/issues/1095 + - https://bugs.python.org/issue30811 + """ + try: + prefix_script = 'import sys; print(sys.real_prefix)' + _, prefix, _ = cmd_output(exe, '-c', prefix_script) + prefix = prefix.strip() + except CalledProcessError: + # not created from -mvirtualenv + return exe + + if os.name == 'nt': + expected = os.path.join(prefix, 'python.exe') + else: + expected = os.path.join(prefix, 'bin', os.path.basename(exe)) + + if os.path.exists(expected): + return expected + else: + return exe + + +def make_venv(envdir: str, python: str) -> None: + cmd_output_b(orig_py_exe(python), '-mvenv', envdir, cwd='/') + + +_interface = python.py_interface(ENVIRONMENT_DIR, make_venv) +in_env, healthy, run_hook, install_environment = _interface diff --git a/pre_commit/languages/ruby.py b/pre_commit/languages/ruby.py new file mode 100644 index 0000000..61241f8 --- /dev/null +++ b/pre_commit/languages/ruby.py @@ -0,0 +1,126 @@ +import contextlib +import os.path +import shutil +import tarfile +from typing import Generator +from typing import Sequence +from typing import Tuple + +import pre_commit.constants as C +from pre_commit.envcontext import envcontext +from pre_commit.envcontext import PatchesT +from pre_commit.envcontext import Var +from pre_commit.hook import Hook +from pre_commit.languages import helpers +from pre_commit.prefix import Prefix +from pre_commit.util import CalledProcessError +from pre_commit.util import clean_path_on_failure +from pre_commit.util import resource_bytesio + +ENVIRONMENT_DIR = 'rbenv' +get_default_version = helpers.basic_get_default_version +healthy = helpers.basic_healthy + + +def get_env_patch( + venv: str, + language_version: str, +) -> PatchesT: # pragma: win32 no cover + patches: PatchesT = ( + ('GEM_HOME', os.path.join(venv, 'gems')), + ('RBENV_ROOT', venv), + ('BUNDLE_IGNORE_CONFIG', '1'), + ( + 'PATH', ( + os.path.join(venv, 'gems', 'bin'), os.pathsep, + os.path.join(venv, 'shims'), os.pathsep, + os.path.join(venv, 'bin'), os.pathsep, Var('PATH'), + ), + ), + ) + if language_version != C.DEFAULT: + patches += (('RBENV_VERSION', language_version),) + return patches + + +@contextlib.contextmanager # pragma: win32 no cover +def in_env( + prefix: Prefix, + language_version: str, +) -> Generator[None, None, None]: + envdir = prefix.path( + helpers.environment_dir(ENVIRONMENT_DIR, language_version), + ) + with envcontext(get_env_patch(envdir, language_version)): + yield + + +def _extract_resource(filename: str, dest: str) -> None: + with resource_bytesio(filename) as bio: + with tarfile.open(fileobj=bio) as tf: + tf.extractall(dest) + + +def _install_rbenv( + prefix: Prefix, + version: str = C.DEFAULT, +) -> None: # pragma: win32 no cover + directory = helpers.environment_dir(ENVIRONMENT_DIR, version) + + _extract_resource('rbenv.tar.gz', prefix.path('.')) + shutil.move(prefix.path('rbenv'), prefix.path(directory)) + + # Only install ruby-build if the version is specified + if version != C.DEFAULT: + plugins_dir = prefix.path(directory, 'plugins') + _extract_resource('ruby-download.tar.gz', plugins_dir) + _extract_resource('ruby-build.tar.gz', plugins_dir) + + +def _install_ruby( + prefix: Prefix, + version: str, +) -> None: # pragma: win32 no cover + try: + helpers.run_setup_cmd(prefix, ('rbenv', 'download', version)) + except CalledProcessError: # pragma: no cover (usually find with download) + # Failed to download from mirror for some reason, build it instead + helpers.run_setup_cmd(prefix, ('rbenv', 'install', version)) + + +def install_environment( + prefix: Prefix, version: str, additional_dependencies: Sequence[str], +) -> None: # pragma: win32 no cover + additional_dependencies = tuple(additional_dependencies) + directory = helpers.environment_dir(ENVIRONMENT_DIR, version) + with clean_path_on_failure(prefix.path(directory)): + # TODO: this currently will fail if there's no version specified and + # there's no system ruby installed. Is this ok? + _install_rbenv(prefix, version=version) + with in_env(prefix, version): + # Need to call this before installing so rbenv's directories are + # set up + helpers.run_setup_cmd(prefix, ('rbenv', 'init', '-')) + if version != C.DEFAULT: + _install_ruby(prefix, version) + # Need to call this after installing to set up the shims + helpers.run_setup_cmd(prefix, ('rbenv', 'rehash')) + helpers.run_setup_cmd( + prefix, ('gem', 'build', *prefix.star('.gemspec')), + ) + helpers.run_setup_cmd( + prefix, + ( + 'gem', 'install', '--no-document', + *prefix.star('.gem'), *additional_dependencies, + ), + ) + + +def run_hook( + hook: Hook, + file_args: Sequence[str], + color: bool, +) -> Tuple[int, bytes]: # pragma: win32 no cover + with in_env(hook.prefix, hook.language_version): + return helpers.run_xargs(hook, hook.cmd, file_args, color=color) diff --git a/pre_commit/languages/rust.py b/pre_commit/languages/rust.py new file mode 100644 index 0000000..7ea3f54 --- /dev/null +++ b/pre_commit/languages/rust.py @@ -0,0 +1,106 @@ +import contextlib +import os.path +from typing import Generator +from typing import Sequence +from typing import Set +from typing import Tuple + +import toml + +import pre_commit.constants as C +from pre_commit.envcontext import envcontext +from pre_commit.envcontext import PatchesT +from pre_commit.envcontext import Var +from pre_commit.hook import Hook +from pre_commit.languages import helpers +from pre_commit.prefix import Prefix +from pre_commit.util import clean_path_on_failure +from pre_commit.util import cmd_output_b + +ENVIRONMENT_DIR = 'rustenv' +get_default_version = helpers.basic_get_default_version +healthy = helpers.basic_healthy + + +def get_env_patch(target_dir: str) -> PatchesT: + return ( + ('PATH', (os.path.join(target_dir, 'bin'), os.pathsep, Var('PATH'))), + ) + + +@contextlib.contextmanager +def in_env(prefix: Prefix) -> Generator[None, None, None]: + target_dir = prefix.path( + helpers.environment_dir(ENVIRONMENT_DIR, C.DEFAULT), + ) + with envcontext(get_env_patch(target_dir)): + yield + + +def _add_dependencies( + cargo_toml_path: str, + additional_dependencies: Set[str], +) -> None: + with open(cargo_toml_path, 'r+') as f: + cargo_toml = toml.load(f) + cargo_toml.setdefault('dependencies', {}) + for dep in additional_dependencies: + name, _, spec = dep.partition(':') + cargo_toml['dependencies'][name] = spec or '*' + f.seek(0) + toml.dump(cargo_toml, f) + f.truncate() + + +def install_environment( + prefix: Prefix, + version: str, + additional_dependencies: Sequence[str], +) -> None: + helpers.assert_version_default('rust', version) + directory = prefix.path( + helpers.environment_dir(ENVIRONMENT_DIR, C.DEFAULT), + ) + + # There are two cases where we might want to specify more dependencies: + # as dependencies for the library being built, and as binary packages + # to be `cargo install`'d. + # + # Unlike e.g. Python, if we just `cargo install` a library, it won't be + # used for compilation. And if we add a crate providing a binary to the + # `Cargo.toml`, the binary won't be built. + # + # Because of this, we allow specifying "cli" dependencies by prefixing + # with 'cli:'. + cli_deps = { + dep for dep in additional_dependencies if dep.startswith('cli:') + } + lib_deps = set(additional_dependencies) - cli_deps + + if len(lib_deps) > 0: + _add_dependencies(prefix.path('Cargo.toml'), lib_deps) + + with clean_path_on_failure(directory): + packages_to_install: Set[Tuple[str, ...]] = {('--path', '.')} + for cli_dep in cli_deps: + cli_dep = cli_dep[len('cli:'):] + package, _, version = cli_dep.partition(':') + if version != '': + packages_to_install.add((package, '--version', version)) + else: + packages_to_install.add((package,)) + + for args in packages_to_install: + cmd_output_b( + 'cargo', 'install', '--bins', '--root', directory, *args, + cwd=prefix.prefix_dir, + ) + + +def run_hook( + hook: Hook, + file_args: Sequence[str], + color: bool, +) -> Tuple[int, bytes]: + with in_env(hook.prefix): + return helpers.run_xargs(hook, hook.cmd, file_args, color=color) diff --git a/pre_commit/languages/script.py b/pre_commit/languages/script.py new file mode 100644 index 0000000..a5e1365 --- /dev/null +++ b/pre_commit/languages/script.py @@ -0,0 +1,19 @@ +from typing import Sequence +from typing import Tuple + +from pre_commit.hook import Hook +from pre_commit.languages import helpers + +ENVIRONMENT_DIR = None +get_default_version = helpers.basic_get_default_version +healthy = helpers.basic_healthy +install_environment = helpers.no_install + + +def run_hook( + hook: Hook, + file_args: Sequence[str], + color: bool, +) -> Tuple[int, bytes]: + cmd = (hook.prefix.path(hook.cmd[0]), *hook.cmd[1:]) + return helpers.run_xargs(hook, cmd, file_args, color=color) diff --git a/pre_commit/languages/swift.py b/pre_commit/languages/swift.py new file mode 100644 index 0000000..66aadc8 --- /dev/null +++ b/pre_commit/languages/swift.py @@ -0,0 +1,64 @@ +import contextlib +import os +from typing import Generator +from typing import Sequence +from typing import Tuple + +import pre_commit.constants as C +from pre_commit.envcontext import envcontext +from pre_commit.envcontext import PatchesT +from pre_commit.envcontext import Var +from pre_commit.hook import Hook +from pre_commit.languages import helpers +from pre_commit.prefix import Prefix +from pre_commit.util import clean_path_on_failure +from pre_commit.util import cmd_output_b + +ENVIRONMENT_DIR = 'swift_env' +get_default_version = helpers.basic_get_default_version +healthy = helpers.basic_healthy +BUILD_DIR = '.build' +BUILD_CONFIG = 'release' + + +def get_env_patch(venv: str) -> PatchesT: # pragma: win32 no cover + bin_path = os.path.join(venv, BUILD_DIR, BUILD_CONFIG) + return (('PATH', (bin_path, os.pathsep, Var('PATH'))),) + + +@contextlib.contextmanager # pragma: win32 no cover +def in_env(prefix: Prefix) -> Generator[None, None, None]: + envdir = prefix.path( + helpers.environment_dir(ENVIRONMENT_DIR, C.DEFAULT), + ) + with envcontext(get_env_patch(envdir)): + yield + + +def install_environment( + prefix: Prefix, version: str, additional_dependencies: Sequence[str], +) -> None: # pragma: win32 no cover + helpers.assert_version_default('swift', version) + helpers.assert_no_additional_deps('swift', additional_dependencies) + directory = prefix.path( + helpers.environment_dir(ENVIRONMENT_DIR, C.DEFAULT), + ) + + # Build the swift package + with clean_path_on_failure(directory): + os.mkdir(directory) + cmd_output_b( + 'swift', 'build', + '-C', prefix.prefix_dir, + '-c', BUILD_CONFIG, + '--build-path', os.path.join(directory, BUILD_DIR), + ) + + +def run_hook( + hook: Hook, + file_args: Sequence[str], + color: bool, +) -> Tuple[int, bytes]: # pragma: win32 no cover + with in_env(hook.prefix): + return helpers.run_xargs(hook, hook.cmd, file_args, color=color) diff --git a/pre_commit/languages/system.py b/pre_commit/languages/system.py new file mode 100644 index 0000000..139f45d --- /dev/null +++ b/pre_commit/languages/system.py @@ -0,0 +1,19 @@ +from typing import Sequence +from typing import Tuple + +from pre_commit.hook import Hook +from pre_commit.languages import helpers + + +ENVIRONMENT_DIR = None +get_default_version = helpers.basic_get_default_version +healthy = helpers.basic_healthy +install_environment = helpers.no_install + + +def run_hook( + hook: Hook, + file_args: Sequence[str], + color: bool, +) -> Tuple[int, bytes]: + return helpers.run_xargs(hook, hook.cmd, file_args, color=color) diff --git a/pre_commit/logging_handler.py b/pre_commit/logging_handler.py new file mode 100644 index 0000000..ba05295 --- /dev/null +++ b/pre_commit/logging_handler.py @@ -0,0 +1,40 @@ +import contextlib +import logging +from typing import Generator + +from pre_commit import color +from pre_commit import output + +logger = logging.getLogger('pre_commit') + +LOG_LEVEL_COLORS = { + 'DEBUG': '', + 'INFO': '', + 'WARNING': color.YELLOW, + 'ERROR': color.RED, +} + + +class LoggingHandler(logging.Handler): + def __init__(self, use_color: bool) -> None: + super().__init__() + self.use_color = use_color + + def emit(self, record: logging.LogRecord) -> None: + level_msg = color.format_color( + f'[{record.levelname}]', + LOG_LEVEL_COLORS[record.levelname], + self.use_color, + ) + output.write_line(f'{level_msg} {record.getMessage()}') + + +@contextlib.contextmanager +def logging_handler(use_color: bool) -> Generator[None, None, None]: + handler = LoggingHandler(use_color) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + try: + yield + finally: + logger.removeHandler(handler) diff --git a/pre_commit/main.py b/pre_commit/main.py new file mode 100644 index 0000000..790b347 --- /dev/null +++ b/pre_commit/main.py @@ -0,0 +1,410 @@ +import argparse +import logging +import os +import sys +from typing import Any +from typing import Optional +from typing import Sequence +from typing import Union + +import pre_commit.constants as C +from pre_commit import color +from pre_commit import git +from pre_commit.commands.autoupdate import autoupdate +from pre_commit.commands.clean import clean +from pre_commit.commands.gc import gc +from pre_commit.commands.hook_impl import hook_impl +from pre_commit.commands.init_templatedir import init_templatedir +from pre_commit.commands.install_uninstall import install +from pre_commit.commands.install_uninstall import install_hooks +from pre_commit.commands.install_uninstall import uninstall +from pre_commit.commands.migrate_config import migrate_config +from pre_commit.commands.run import run +from pre_commit.commands.sample_config import sample_config +from pre_commit.commands.try_repo import try_repo +from pre_commit.error_handler import error_handler +from pre_commit.error_handler import FatalError +from pre_commit.logging_handler import logging_handler +from pre_commit.store import Store +from pre_commit.util import CalledProcessError + + +logger = logging.getLogger('pre_commit') + +# https://github.com/pre-commit/pre-commit/issues/217 +# On OSX, making a virtualenv using pyvenv at . causes `virtualenv` and `pip` +# to install packages to the wrong place. We don't want anything to deal with +# pyvenv +os.environ.pop('__PYVENV_LAUNCHER__', None) + + +COMMANDS_NO_GIT = {'clean', 'gc', 'init-templatedir', 'sample-config'} + + +def _add_color_option(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + '--color', default=os.environ.get('PRE_COMMIT_COLOR', 'auto'), + type=color.use_color, + metavar='{' + ','.join(color.COLOR_CHOICES) + '}', + help='Whether to use color in output. Defaults to `%(default)s`.', + ) + + +def _add_config_option(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + '-c', '--config', default=C.CONFIG_FILE, + help='Path to alternate config file', + ) + + +class AppendReplaceDefault(argparse.Action): + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.appended = False + + def __call__( + self, + parser: argparse.ArgumentParser, + namespace: argparse.Namespace, + values: Union[str, Sequence[str], None], + option_string: Optional[str] = None, + ) -> None: + if not self.appended: + setattr(namespace, self.dest, []) + self.appended = True + getattr(namespace, self.dest).append(values) + + +def _add_hook_type_option(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + '-t', '--hook-type', choices=( + 'pre-commit', 'pre-merge-commit', 'pre-push', + 'prepare-commit-msg', 'commit-msg', 'post-checkout', + ), + action=AppendReplaceDefault, + default=['pre-commit'], + dest='hook_types', + ) + + +def _add_run_options(parser: argparse.ArgumentParser) -> None: + parser.add_argument('hook', nargs='?', help='A single hook-id to run') + parser.add_argument('--verbose', '-v', action='store_true', default=False) + mutex_group = parser.add_mutually_exclusive_group(required=False) + mutex_group.add_argument( + '--all-files', '-a', action='store_true', default=False, + help='Run on all the files in the repo.', + ) + mutex_group.add_argument( + '--files', nargs='*', default=[], + help='Specific filenames to run hooks on.', + ) + parser.add_argument( + '--show-diff-on-failure', action='store_true', + help='When hooks fail, run `git diff` directly afterward.', + ) + parser.add_argument( + '--hook-stage', choices=C.STAGES, default='commit', + help='The stage during which the hook is fired. One of %(choices)s', + ) + parser.add_argument( + '--from-ref', '--source', '-s', + help=( + '(for usage with `--from-ref`) -- this option represents the ' + 'original ref in a `from_ref...to_ref` diff expression. ' + 'For `pre-push` hooks, this represents the branch you are pushing ' + 'to. ' + 'For `post-checkout` hooks, this represents the branch that was ' + 'previously checked out.' + ), + ) + parser.add_argument( + '--to-ref', '--origin', '-o', + help=( + '(for usage with `--to-ref`) -- this option represents the ' + 'destination ref in a `from_ref...to_ref` diff expression. ' + 'For `pre-push` hooks, this represents the branch being pushed. ' + 'For `post-checkout` hooks, this represents the branch that is ' + 'now checked out.' + ), + ) + parser.add_argument( + '--commit-msg-filename', + help='Filename to check when running during `commit-msg`', + ) + parser.add_argument( + '--remote-name', help='Remote name used by `git push`.', + ) + parser.add_argument('--remote-url', help='Remote url used by `git push`.') + parser.add_argument( + '--checkout-type', + help=( + 'Indicates whether the checkout was a branch checkout ' + '(changing branches, flag=1) or a file checkout (retrieving a ' + 'file from the index, flag=0).' + ), + ) + + +def _adjust_args_and_chdir(args: argparse.Namespace) -> None: + # `--config` was specified relative to the non-root working directory + if os.path.exists(args.config): + args.config = os.path.abspath(args.config) + if args.command in {'run', 'try-repo'}: + args.files = [os.path.abspath(filename) for filename in args.files] + if args.command == 'try-repo' and os.path.exists(args.repo): + args.repo = os.path.abspath(args.repo) + + try: + toplevel = git.get_root() + except CalledProcessError: + raise FatalError( + 'git failed. Is it installed, and are you in a Git repository ' + 'directory?', + ) + else: + if toplevel == '': # pragma: no cover (old git) + raise FatalError( + 'git toplevel unexpectedly empty! make sure you are not ' + 'inside the `.git` directory of your repository.', + ) + else: + os.chdir(toplevel) + + args.config = os.path.relpath(args.config) + if args.command in {'run', 'try-repo'}: + args.files = [os.path.relpath(filename) for filename in args.files] + if args.command == 'try-repo' and os.path.exists(args.repo): + args.repo = os.path.relpath(args.repo) + + +def main(argv: Optional[Sequence[str]] = None) -> int: + argv = argv if argv is not None else sys.argv[1:] + parser = argparse.ArgumentParser(prog='pre-commit') + + # https://stackoverflow.com/a/8521644/812183 + parser.add_argument( + '-V', '--version', + action='version', + version=f'%(prog)s {C.VERSION}', + ) + + subparsers = parser.add_subparsers(dest='command') + + autoupdate_parser = subparsers.add_parser( + 'autoupdate', + help="Auto-update pre-commit config to the latest repos' versions.", + ) + _add_color_option(autoupdate_parser) + _add_config_option(autoupdate_parser) + autoupdate_parser.add_argument( + '--bleeding-edge', action='store_true', + help=( + 'Update to the bleeding edge of `master` instead of the latest ' + 'tagged version (the default behavior).' + ), + ) + autoupdate_parser.add_argument( + '--freeze', action='store_true', + help='Store "frozen" hashes in `rev` instead of tag names', + ) + autoupdate_parser.add_argument( + '--repo', dest='repos', action='append', metavar='REPO', + help='Only update this repository -- may be specified multiple times.', + ) + + clean_parser = subparsers.add_parser( + 'clean', help='Clean out pre-commit files.', + ) + _add_color_option(clean_parser) + _add_config_option(clean_parser) + + hook_impl_parser = subparsers.add_parser('hook-impl') + _add_color_option(hook_impl_parser) + _add_config_option(hook_impl_parser) + hook_impl_parser.add_argument('--hook-type') + hook_impl_parser.add_argument('--hook-dir') + hook_impl_parser.add_argument( + '--skip-on-missing-config', action='store_true', + ) + hook_impl_parser.add_argument(dest='rest', nargs=argparse.REMAINDER) + + gc_parser = subparsers.add_parser('gc', help='Clean unused cached repos.') + _add_color_option(gc_parser) + _add_config_option(gc_parser) + + init_templatedir_parser = subparsers.add_parser( + 'init-templatedir', + help=( + 'Install hook script in a directory intended for use with ' + '`git config init.templateDir`.' + ), + ) + _add_color_option(init_templatedir_parser) + _add_config_option(init_templatedir_parser) + init_templatedir_parser.add_argument( + 'directory', help='The directory in which to write the hook script.', + ) + _add_hook_type_option(init_templatedir_parser) + + install_parser = subparsers.add_parser( + 'install', help='Install the pre-commit script.', + ) + _add_color_option(install_parser) + _add_config_option(install_parser) + install_parser.add_argument( + '-f', '--overwrite', action='store_true', + help='Overwrite existing hooks / remove migration mode.', + ) + install_parser.add_argument( + '--install-hooks', action='store_true', + help=( + 'Whether to install hook environments for all environments ' + 'in the config file.' + ), + ) + _add_hook_type_option(install_parser) + install_parser.add_argument( + '--allow-missing-config', action='store_true', default=False, + help=( + 'Whether to allow a missing `pre-commit` configuration file ' + 'or exit with a failure code.' + ), + ) + + install_hooks_parser = subparsers.add_parser( + 'install-hooks', + help=( + 'Install hook environments for all environments in the config ' + 'file. You may find `pre-commit install --install-hooks` more ' + 'useful.' + ), + ) + _add_color_option(install_hooks_parser) + _add_config_option(install_hooks_parser) + + migrate_config_parser = subparsers.add_parser( + 'migrate-config', + help='Migrate list configuration to new map configuration.', + ) + _add_color_option(migrate_config_parser) + _add_config_option(migrate_config_parser) + + run_parser = subparsers.add_parser('run', help='Run hooks.') + _add_color_option(run_parser) + _add_config_option(run_parser) + _add_run_options(run_parser) + + sample_config_parser = subparsers.add_parser( + 'sample-config', help=f'Produce a sample {C.CONFIG_FILE} file', + ) + _add_color_option(sample_config_parser) + _add_config_option(sample_config_parser) + + try_repo_parser = subparsers.add_parser( + 'try-repo', + help='Try the hooks in a repository, useful for developing new hooks.', + ) + _add_color_option(try_repo_parser) + _add_config_option(try_repo_parser) + try_repo_parser.add_argument( + 'repo', help='Repository to source hooks from.', + ) + try_repo_parser.add_argument( + '--ref', '--rev', + help=( + 'Manually select a rev to run against, otherwise the `HEAD` ' + 'revision will be used.' + ), + ) + _add_run_options(try_repo_parser) + + uninstall_parser = subparsers.add_parser( + 'uninstall', help='Uninstall the pre-commit script.', + ) + _add_color_option(uninstall_parser) + _add_config_option(uninstall_parser) + _add_hook_type_option(uninstall_parser) + + help = subparsers.add_parser( + 'help', help='Show help for a specific command.', + ) + help.add_argument('help_cmd', nargs='?', help='Command to show help for.') + + # argparse doesn't really provide a way to use a `default` subparser + if len(argv) == 0: + argv = ['run'] + args = parser.parse_args(argv) + + if args.command == 'help' and args.help_cmd: + parser.parse_args([args.help_cmd, '--help']) + elif args.command == 'help': + parser.parse_args(['--help']) + + with error_handler(), logging_handler(args.color): + if args.command not in COMMANDS_NO_GIT: + _adjust_args_and_chdir(args) + + git.check_for_cygwin_mismatch() + + store = Store() + store.mark_config_used(args.config) + + if args.command == 'autoupdate': + return autoupdate( + args.config, store, + tags_only=not args.bleeding_edge, + freeze=args.freeze, + repos=args.repos, + ) + elif args.command == 'clean': + return clean(store) + elif args.command == 'gc': + return gc(store) + elif args.command == 'hook-impl': + return hook_impl( + store, + config=args.config, + color=args.color, + hook_type=args.hook_type, + hook_dir=args.hook_dir, + skip_on_missing_config=args.skip_on_missing_config, + args=args.rest[1:], + ) + elif args.command == 'install': + return install( + args.config, store, + hook_types=args.hook_types, + overwrite=args.overwrite, + hooks=args.install_hooks, + skip_on_missing_config=args.allow_missing_config, + ) + elif args.command == 'init-templatedir': + return init_templatedir( + args.config, store, args.directory, + hook_types=args.hook_types, + ) + elif args.command == 'install-hooks': + return install_hooks(args.config, store) + elif args.command == 'migrate-config': + return migrate_config(args.config) + elif args.command == 'run': + return run(args.config, store, args) + elif args.command == 'sample-config': + return sample_config() + elif args.command == 'try-repo': + return try_repo(args) + elif args.command == 'uninstall': + return uninstall(hook_types=args.hook_types) + else: + raise NotImplementedError( + f'Command {args.command} not implemented.', + ) + + raise AssertionError( + f'Command {args.command} failed to exit with a returncode', + ) + + +if __name__ == '__main__': + exit(main()) diff --git a/pre_commit/make_archives.py b/pre_commit/make_archives.py new file mode 100644 index 0000000..c31bcd7 --- /dev/null +++ b/pre_commit/make_archives.py @@ -0,0 +1,65 @@ +import argparse +import os.path +import tarfile +from typing import Optional +from typing import Sequence + +from pre_commit import output +from pre_commit.util import cmd_output_b +from pre_commit.util import rmtree +from pre_commit.util import tmpdir + + +# This is a script for generating the tarred resources for git repo +# dependencies. Currently it's just for "vendoring" ruby support packages. + + +REPOS = ( + ('rbenv', 'git://github.com/rbenv/rbenv', 'a3fa9b7'), + ('ruby-build', 'git://github.com/rbenv/ruby-build', '1a902f3'), + ( + 'ruby-download', + 'git://github.com/garnieretienne/rvm-download', + '09bd7c6', + ), +) + + +def make_archive(name: str, repo: str, ref: str, destdir: str) -> str: + """Makes an archive of a repository in the given destdir. + + :param text name: Name to give the archive. For instance foo. The file + that is created will be called foo.tar.gz. + :param text repo: Repository to clone. + :param text ref: Tag/SHA/branch to check out. + :param text destdir: Directory to place archives in. + """ + output_path = os.path.join(destdir, f'{name}.tar.gz') + with tmpdir() as tempdir: + # Clone the repository to the temporary directory + cmd_output_b('git', 'clone', repo, tempdir) + cmd_output_b('git', 'checkout', ref, cwd=tempdir) + + # We don't want the '.git' directory + # It adds a bunch of size to the archive and we don't use it at + # runtime + rmtree(os.path.join(tempdir, '.git')) + + with tarfile.open(output_path, 'w|gz') as tf: + tf.add(tempdir, name) + + return output_path + + +def main(argv: Optional[Sequence[str]] = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('--dest', default='pre_commit/resources') + args = parser.parse_args(argv) + for archive_name, repo, ref in REPOS: + output.write_line(f'Making {archive_name}.tar.gz for {repo}@{ref}') + make_archive(archive_name, repo, ref, args.dest) + return 0 + + +if __name__ == '__main__': + exit(main()) diff --git a/pre_commit/meta_hooks/__init__.py b/pre_commit/meta_hooks/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/pre_commit/meta_hooks/__init__.py diff --git a/pre_commit/meta_hooks/check_hooks_apply.py b/pre_commit/meta_hooks/check_hooks_apply.py new file mode 100644 index 0000000..d0244a9 --- /dev/null +++ b/pre_commit/meta_hooks/check_hooks_apply.py @@ -0,0 +1,39 @@ +import argparse +from typing import Optional +from typing import Sequence + +import pre_commit.constants as C +from pre_commit import git +from pre_commit.clientlib import load_config +from pre_commit.commands.run import Classifier +from pre_commit.repository import all_hooks +from pre_commit.store import Store + + +def check_all_hooks_match_files(config_file: str) -> int: + classifier = Classifier(git.get_all_files()) + retv = 0 + + for hook in all_hooks(load_config(config_file), Store()): + if hook.always_run or hook.language == 'fail': + continue + elif not classifier.filenames_for_hook(hook): + print(f'{hook.id} does not apply to this repository') + retv = 1 + + return retv + + +def main(argv: Optional[Sequence[str]] = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*', default=[C.CONFIG_FILE]) + args = parser.parse_args(argv) + + retv = 0 + for filename in args.filenames: + retv |= check_all_hooks_match_files(filename) + return retv + + +if __name__ == '__main__': + exit(main()) diff --git a/pre_commit/meta_hooks/check_useless_excludes.py b/pre_commit/meta_hooks/check_useless_excludes.py new file mode 100644 index 0000000..30b8d81 --- /dev/null +++ b/pre_commit/meta_hooks/check_useless_excludes.py @@ -0,0 +1,72 @@ +import argparse +import re +from typing import Optional +from typing import Sequence + +from cfgv import apply_defaults + +import pre_commit.constants as C +from pre_commit import git +from pre_commit.clientlib import load_config +from pre_commit.clientlib import MANIFEST_HOOK_DICT +from pre_commit.commands.run import Classifier + + +def exclude_matches_any( + filenames: Sequence[str], + include: str, + exclude: str, +) -> bool: + if exclude == '^$': + return True + include_re, exclude_re = re.compile(include), re.compile(exclude) + for filename in filenames: + if include_re.search(filename) and exclude_re.search(filename): + return True + return False + + +def check_useless_excludes(config_file: str) -> int: + config = load_config(config_file) + classifier = Classifier(git.get_all_files()) + retv = 0 + + exclude = config['exclude'] + if not exclude_matches_any(classifier.filenames, '', exclude): + print( + f'The global exclude pattern {exclude!r} does not match any files', + ) + retv = 1 + + for repo in config['repos']: + for hook in repo['hooks']: + # Not actually a manifest dict, but this more accurately reflects + # the defaults applied during runtime + hook = apply_defaults(hook, MANIFEST_HOOK_DICT) + names = classifier.filenames + types, exclude_types = hook['types'], hook['exclude_types'] + names = classifier.by_types(names, types, exclude_types) + include, exclude = hook['files'], hook['exclude'] + if not exclude_matches_any(names, include, exclude): + print( + f'The exclude pattern {exclude!r} for {hook["id"]} does ' + f'not match any files', + ) + retv = 1 + + return retv + + +def main(argv: Optional[Sequence[str]] = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*', default=[C.CONFIG_FILE]) + args = parser.parse_args(argv) + + retv = 0 + for filename in args.filenames: + retv |= check_useless_excludes(filename) + return retv + + +if __name__ == '__main__': + exit(main()) diff --git a/pre_commit/meta_hooks/identity.py b/pre_commit/meta_hooks/identity.py new file mode 100644 index 0000000..730d0ec --- /dev/null +++ b/pre_commit/meta_hooks/identity.py @@ -0,0 +1,16 @@ +import sys +from typing import Optional +from typing import Sequence + +from pre_commit import output + + +def main(argv: Optional[Sequence[str]] = None) -> int: + argv = argv if argv is not None else sys.argv[1:] + for arg in argv: + output.write_line(arg) + return 0 + + +if __name__ == '__main__': + exit(main()) diff --git a/pre_commit/output.py b/pre_commit/output.py new file mode 100644 index 0000000..24f9d84 --- /dev/null +++ b/pre_commit/output.py @@ -0,0 +1,32 @@ +import contextlib +import sys +from typing import Any +from typing import IO +from typing import Optional + + +def write(s: str, stream: IO[bytes] = sys.stdout.buffer) -> None: + stream.write(s.encode()) + stream.flush() + + +def write_line_b( + s: Optional[bytes] = None, + stream: IO[bytes] = sys.stdout.buffer, + logfile_name: Optional[str] = None, +) -> None: + with contextlib.ExitStack() as exit_stack: + output_streams = [stream] + if logfile_name: + stream = exit_stack.enter_context(open(logfile_name, 'ab')) + output_streams.append(stream) + + for output_stream in output_streams: + if s is not None: + output_stream.write(s) + output_stream.write(b'\n') + output_stream.flush() + + +def write_line(s: Optional[str] = None, **kwargs: Any) -> None: + write_line_b(s.encode() if s is not None else s, **kwargs) diff --git a/pre_commit/parse_shebang.py b/pre_commit/parse_shebang.py new file mode 100644 index 0000000..d344a1d --- /dev/null +++ b/pre_commit/parse_shebang.py @@ -0,0 +1,84 @@ +import os.path +from typing import Mapping +from typing import Optional +from typing import Tuple +from typing import TYPE_CHECKING + +from identify.identify import parse_shebang_from_file + +if TYPE_CHECKING: + from typing import NoReturn + + +class ExecutableNotFoundError(OSError): + def to_output(self) -> Tuple[int, bytes, None]: + return (1, self.args[0].encode(), None) + + +def parse_filename(filename: str) -> Tuple[str, ...]: + if not os.path.exists(filename): + return () + else: + return parse_shebang_from_file(filename) + + +def find_executable( + exe: str, _environ: Optional[Mapping[str, str]] = None, +) -> Optional[str]: + exe = os.path.normpath(exe) + if os.sep in exe: + return exe + + environ = _environ if _environ is not None else os.environ + + if 'PATHEXT' in environ: + exts = environ['PATHEXT'].split(os.pathsep) + possible_exe_names = tuple(f'{exe}{ext}' for ext in exts) + (exe,) + else: + possible_exe_names = (exe,) + + for path in environ.get('PATH', '').split(os.pathsep): + for possible_exe_name in possible_exe_names: + joined = os.path.join(path, possible_exe_name) + if os.path.isfile(joined) and os.access(joined, os.X_OK): + return joined + else: + return None + + +def normexe(orig: str) -> str: + def _error(msg: str) -> 'NoReturn': + raise ExecutableNotFoundError(f'Executable `{orig}` {msg}') + + if os.sep not in orig and (not os.altsep or os.altsep not in orig): + exe = find_executable(orig) + if exe is None: + _error('not found') + return exe + elif os.path.isdir(orig): + _error('is a directory') + elif not os.path.isfile(orig): + _error('not found') + elif not os.access(orig, os.X_OK): # pragma: win32 no cover + _error('is not executable') + else: + return orig + + +def normalize_cmd(cmd: Tuple[str, ...]) -> Tuple[str, ...]: + """Fixes for the following issues on windows + - https://bugs.python.org/issue8557 + - windows does not parse shebangs + + This function also makes deep-path shebangs work just fine + """ + # Use PATH to determine the executable + exe = normexe(cmd[0]) + + # Figure out the shebang from the resulting command + cmd = parse_filename(exe) + (exe,) + cmd[1:] + + # This could have given us back another bare executable + exe = normexe(cmd[0]) + + return (exe,) + cmd[1:] diff --git a/pre_commit/prefix.py b/pre_commit/prefix.py new file mode 100644 index 0000000..0e3ebbd --- /dev/null +++ b/pre_commit/prefix.py @@ -0,0 +1,17 @@ +import os.path +from typing import NamedTuple +from typing import Tuple + + +class Prefix(NamedTuple): + prefix_dir: str + + def path(self, *parts: str) -> str: + return os.path.normpath(os.path.join(self.prefix_dir, *parts)) + + def exists(self, *parts: str) -> bool: + return os.path.exists(self.path(*parts)) + + def star(self, end: str) -> Tuple[str, ...]: + paths = os.listdir(self.prefix_dir) + return tuple(path for path in paths if path.endswith(end)) diff --git a/pre_commit/repository.py b/pre_commit/repository.py new file mode 100644 index 0000000..77734ee --- /dev/null +++ b/pre_commit/repository.py @@ -0,0 +1,208 @@ +import json +import logging +import os +from typing import Any +from typing import Dict +from typing import List +from typing import Optional +from typing import Sequence +from typing import Set +from typing import Tuple + +import pre_commit.constants as C +from pre_commit.clientlib import load_manifest +from pre_commit.clientlib import LOCAL +from pre_commit.clientlib import META +from pre_commit.hook import Hook +from pre_commit.languages.all import languages +from pre_commit.languages.helpers import environment_dir +from pre_commit.prefix import Prefix +from pre_commit.store import Store +from pre_commit.util import parse_version +from pre_commit.util import rmtree + + +logger = logging.getLogger('pre_commit') + + +def _state(additional_deps: Sequence[str]) -> object: + return {'additional_dependencies': sorted(additional_deps)} + + +def _state_filename(prefix: Prefix, venv: str) -> str: + return prefix.path(venv, f'.install_state_v{C.INSTALLED_STATE_VERSION}') + + +def _read_state(prefix: Prefix, venv: str) -> Optional[object]: + filename = _state_filename(prefix, venv) + if not os.path.exists(filename): + return None + else: + with open(filename) as f: + return json.load(f) + + +def _write_state(prefix: Prefix, venv: str, state: object) -> None: + state_filename = _state_filename(prefix, venv) + staging = f'{state_filename}staging' + with open(staging, 'w') as state_file: + state_file.write(json.dumps(state)) + # Move the file into place atomically to indicate we've installed + os.rename(staging, state_filename) + + +def _hook_installed(hook: Hook) -> bool: + lang = languages[hook.language] + venv = environment_dir(lang.ENVIRONMENT_DIR, hook.language_version) + return ( + venv is None or ( + ( + _read_state(hook.prefix, venv) == + _state(hook.additional_dependencies) + ) and + lang.healthy(hook.prefix, hook.language_version) + ) + ) + + +def _hook_install(hook: Hook) -> None: + logger.info(f'Installing environment for {hook.src}.') + logger.info('Once installed this environment will be reused.') + logger.info('This may take a few minutes...') + + lang = languages[hook.language] + assert lang.ENVIRONMENT_DIR is not None + venv = environment_dir(lang.ENVIRONMENT_DIR, hook.language_version) + + # There's potentially incomplete cleanup from previous runs + # Clean it up! + if hook.prefix.exists(venv): + rmtree(hook.prefix.path(venv)) + + lang.install_environment( + hook.prefix, hook.language_version, hook.additional_dependencies, + ) + # Write our state to indicate we're installed + _write_state(hook.prefix, venv, _state(hook.additional_dependencies)) + + +def _hook( + *hook_dicts: Dict[str, Any], + root_config: Dict[str, Any], +) -> Dict[str, Any]: + ret, rest = dict(hook_dicts[0]), hook_dicts[1:] + for dct in rest: + ret.update(dct) + + version = ret['minimum_pre_commit_version'] + if parse_version(version) > parse_version(C.VERSION): + logger.error( + f'The hook `{ret["id"]}` requires pre-commit version {version} ' + f'but version {C.VERSION} is installed. ' + f'Perhaps run `pip install --upgrade pre-commit`.', + ) + exit(1) + + lang = ret['language'] + if ret['language_version'] == C.DEFAULT: + ret['language_version'] = root_config['default_language_version'][lang] + if ret['language_version'] == C.DEFAULT: + ret['language_version'] = languages[lang].get_default_version() + + if not ret['stages']: + ret['stages'] = root_config['default_stages'] + + return ret + + +def _non_cloned_repository_hooks( + repo_config: Dict[str, Any], + store: Store, + root_config: Dict[str, Any], +) -> Tuple[Hook, ...]: + def _prefix(language_name: str, deps: Sequence[str]) -> Prefix: + language = languages[language_name] + # pygrep / script / system / docker_image do not have + # environments so they work out of the current directory + if language.ENVIRONMENT_DIR is None: + return Prefix(os.getcwd()) + else: + return Prefix(store.make_local(deps)) + + return tuple( + Hook.create( + repo_config['repo'], + _prefix(hook['language'], hook['additional_dependencies']), + _hook(hook, root_config=root_config), + ) + for hook in repo_config['hooks'] + ) + + +def _cloned_repository_hooks( + repo_config: Dict[str, Any], + store: Store, + root_config: Dict[str, Any], +) -> Tuple[Hook, ...]: + repo, rev = repo_config['repo'], repo_config['rev'] + manifest_path = os.path.join(store.clone(repo, rev), C.MANIFEST_FILE) + by_id = {hook['id']: hook for hook in load_manifest(manifest_path)} + + for hook in repo_config['hooks']: + if hook['id'] not in by_id: + logger.error( + f'`{hook["id"]}` is not present in repository {repo}. ' + f'Typo? Perhaps it is introduced in a newer version? ' + f'Often `pre-commit autoupdate` fixes this.', + ) + exit(1) + + hook_dcts = [ + _hook(by_id[hook['id']], hook, root_config=root_config) + for hook in repo_config['hooks'] + ] + return tuple( + Hook.create( + repo_config['repo'], + Prefix(store.clone(repo, rev, hook['additional_dependencies'])), + hook, + ) + for hook in hook_dcts + ) + + +def _repository_hooks( + repo_config: Dict[str, Any], + store: Store, + root_config: Dict[str, Any], +) -> Tuple[Hook, ...]: + if repo_config['repo'] in {LOCAL, META}: + return _non_cloned_repository_hooks(repo_config, store, root_config) + else: + return _cloned_repository_hooks(repo_config, store, root_config) + + +def install_hook_envs(hooks: Sequence[Hook], store: Store) -> None: + def _need_installed() -> List[Hook]: + seen: Set[Tuple[Prefix, str, str, Tuple[str, ...]]] = set() + ret = [] + for hook in hooks: + if hook.install_key not in seen and not _hook_installed(hook): + ret.append(hook) + seen.add(hook.install_key) + return ret + + if not _need_installed(): + return + with store.exclusive_lock(): + # Another process may have already completed this work + for hook in _need_installed(): + _hook_install(hook) + + +def all_hooks(root_config: Dict[str, Any], store: Store) -> Tuple[Hook, ...]: + return tuple( + hook + for repo in root_config['repos'] + for hook in _repository_hooks(repo, store, root_config) + ) diff --git a/pre_commit/resources/__init__.py b/pre_commit/resources/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/pre_commit/resources/__init__.py diff --git a/pre_commit/resources/empty_template_.npmignore b/pre_commit/resources/empty_template_.npmignore new file mode 100644 index 0000000..72e8ffc --- /dev/null +++ b/pre_commit/resources/empty_template_.npmignore @@ -0,0 +1 @@ +* diff --git a/pre_commit/resources/empty_template_Cargo.toml b/pre_commit/resources/empty_template_Cargo.toml new file mode 100644 index 0000000..3dfeffa --- /dev/null +++ b/pre_commit/resources/empty_template_Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "__fake_crate" +version = "0.0.0" + +[[bin]] +name = "__fake_cmd" +path = "main.rs" diff --git a/pre_commit/resources/empty_template_Makefile.PL b/pre_commit/resources/empty_template_Makefile.PL new file mode 100644 index 0000000..ac75fe5 --- /dev/null +++ b/pre_commit/resources/empty_template_Makefile.PL @@ -0,0 +1,6 @@ +use ExtUtils::MakeMaker; + +WriteMakefile( + NAME => "PreCommitDummy", + VERSION => "0.0.1", +); diff --git a/pre_commit/resources/empty_template_environment.yml b/pre_commit/resources/empty_template_environment.yml new file mode 100644 index 0000000..0f29f0c --- /dev/null +++ b/pre_commit/resources/empty_template_environment.yml @@ -0,0 +1,9 @@ +channels: + - conda-forge + - defaults +dependencies: + # This cannot be empty as otherwise no environment will be created. + # We're using openssl here as it is available on all system and will + # most likely be always installed anyways. + # See https://github.com/conda/conda/issues/9487 + - openssl diff --git a/pre_commit/resources/empty_template_main.go b/pre_commit/resources/empty_template_main.go new file mode 100644 index 0000000..38dd16d --- /dev/null +++ b/pre_commit/resources/empty_template_main.go @@ -0,0 +1,3 @@ +package main + +func main() {} diff --git a/pre_commit/resources/empty_template_main.rs b/pre_commit/resources/empty_template_main.rs new file mode 100644 index 0000000..f328e4d --- /dev/null +++ b/pre_commit/resources/empty_template_main.rs @@ -0,0 +1 @@ +fn main() {} diff --git a/pre_commit/resources/empty_template_package.json b/pre_commit/resources/empty_template_package.json new file mode 100644 index 0000000..ac7b725 --- /dev/null +++ b/pre_commit/resources/empty_template_package.json @@ -0,0 +1,4 @@ +{ + "name": "pre_commit_dummy_package", + "version": "0.0.0" +} diff --git a/pre_commit/resources/empty_template_pre_commit_dummy_package.gemspec b/pre_commit/resources/empty_template_pre_commit_dummy_package.gemspec new file mode 100644 index 0000000..8bfb40c --- /dev/null +++ b/pre_commit/resources/empty_template_pre_commit_dummy_package.gemspec @@ -0,0 +1,6 @@ +Gem::Specification.new do |s| + s.name = 'pre_commit_dummy_package' + s.version = '0.0.0' + s.summary = 'dummy gem for pre-commit hooks' + s.authors = ['Anthony Sottile'] +end diff --git a/pre_commit/resources/empty_template_setup.py b/pre_commit/resources/empty_template_setup.py new file mode 100644 index 0000000..6886064 --- /dev/null +++ b/pre_commit/resources/empty_template_setup.py @@ -0,0 +1,4 @@ +from setuptools import setup + + +setup(name='pre-commit-dummy-package', version='0.0.0') diff --git a/pre_commit/resources/hook-tmpl b/pre_commit/resources/hook-tmpl new file mode 100755 index 0000000..299144e --- /dev/null +++ b/pre_commit/resources/hook-tmpl @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +# File generated by pre-commit: https://pre-commit.com +# ID: 138fd403232d2ddd5efb44317e38bf03 +import os +import sys + +# we try our best, but the shebang of this script is difficult to determine: +# - macos doesn't ship with python3 +# - windows executables are almost always `python.exe` +# therefore we continue to support python2 for this small script +if sys.version_info < (3, 3): + from distutils.spawn import find_executable as which +else: + from shutil import which + +# work around https://github.com/Homebrew/homebrew-core/issues/30445 +os.environ.pop('__PYVENV_LAUNCHER__', None) + +# start templated +INSTALL_PYTHON = '' +ARGS = ['hook-impl'] +# end templated +ARGS.extend(('--hook-dir', os.path.realpath(os.path.dirname(__file__)))) +ARGS.append('--') +ARGS.extend(sys.argv[1:]) + +DNE = '`pre-commit` not found. Did you forget to activate your virtualenv?' +if os.access(INSTALL_PYTHON, os.X_OK): + CMD = [INSTALL_PYTHON, '-mpre_commit'] +elif which('pre-commit'): + CMD = ['pre-commit'] +else: + raise SystemExit(DNE) + +CMD.extend(ARGS) +if sys.platform == 'win32': # https://bugs.python.org/issue19124 + import subprocess + + if sys.version_info < (3, 7): # https://bugs.python.org/issue25942 + raise SystemExit(subprocess.Popen(CMD).wait()) + else: + raise SystemExit(subprocess.call(CMD)) +else: + os.execvp(CMD[0], CMD) diff --git a/pre_commit/resources/rbenv.tar.gz b/pre_commit/resources/rbenv.tar.gz Binary files differnew file mode 100644 index 0000000..5307b19 --- /dev/null +++ b/pre_commit/resources/rbenv.tar.gz diff --git a/pre_commit/resources/ruby-build.tar.gz b/pre_commit/resources/ruby-build.tar.gz Binary files differnew file mode 100644 index 0000000..4a69a09 --- /dev/null +++ b/pre_commit/resources/ruby-build.tar.gz diff --git a/pre_commit/resources/ruby-download.tar.gz b/pre_commit/resources/ruby-download.tar.gz Binary files differnew file mode 100644 index 0000000..7ccfb6c --- /dev/null +++ b/pre_commit/resources/ruby-download.tar.gz diff --git a/pre_commit/staged_files_only.py b/pre_commit/staged_files_only.py new file mode 100644 index 0000000..09d323d --- /dev/null +++ b/pre_commit/staged_files_only.py @@ -0,0 +1,90 @@ +import contextlib +import logging +import os.path +import time +from typing import Generator + +from pre_commit import git +from pre_commit.util import CalledProcessError +from pre_commit.util import cmd_output +from pre_commit.util import cmd_output_b +from pre_commit.xargs import xargs + + +logger = logging.getLogger('pre_commit') + + +def _git_apply(patch: str) -> None: + args = ('apply', '--whitespace=nowarn', patch) + try: + cmd_output_b('git', *args) + except CalledProcessError: + # Retry with autocrlf=false -- see #570 + cmd_output_b('git', '-c', 'core.autocrlf=false', *args) + + +@contextlib.contextmanager +def _intent_to_add_cleared() -> Generator[None, None, None]: + intent_to_add = git.intent_to_add_files() + if intent_to_add: + logger.warning('Unstaged intent-to-add files detected.') + + xargs(('git', 'rm', '--cached', '--'), intent_to_add) + try: + yield + finally: + xargs(('git', 'add', '--intent-to-add', '--'), intent_to_add) + else: + yield + + +@contextlib.contextmanager +def _unstaged_changes_cleared(patch_dir: str) -> Generator[None, None, None]: + tree = cmd_output('git', 'write-tree')[1].strip() + retcode, diff_stdout_binary, _ = cmd_output_b( + 'git', 'diff-index', '--ignore-submodules', '--binary', + '--exit-code', '--no-color', '--no-ext-diff', tree, '--', + retcode=None, + ) + if retcode and diff_stdout_binary.strip(): + patch_filename = f'patch{int(time.time())}' + patch_filename = os.path.join(patch_dir, patch_filename) + logger.warning('Unstaged files detected.') + logger.info(f'Stashing unstaged files to {patch_filename}.') + # Save the current unstaged changes as a patch + os.makedirs(patch_dir, exist_ok=True) + with open(patch_filename, 'wb') as patch_file: + patch_file.write(diff_stdout_binary) + + # Clear the working directory of unstaged changes + cmd_output_b('git', 'checkout', '--', '.') + try: + yield + finally: + # Try to apply the patch we saved + try: + _git_apply(patch_filename) + except CalledProcessError: + logger.warning( + 'Stashed changes conflicted with hook auto-fixes... ' + 'Rolling back fixes...', + ) + # We failed to apply the patch, presumably due to fixes made + # by hooks. + # Roll back the changes made by hooks. + cmd_output_b('git', 'checkout', '--', '.') + _git_apply(patch_filename) + logger.info(f'Restored changes from {patch_filename}.') + else: + # There weren't any staged files so we don't need to do anything + # special + yield + + +@contextlib.contextmanager +def staged_files_only(patch_dir: str) -> Generator[None, None, None]: + """Clear any unstaged changes from the git working directory inside this + context. + """ + with _intent_to_add_cleared(), _unstaged_changes_cleared(patch_dir): + yield diff --git a/pre_commit/store.py b/pre_commit/store.py new file mode 100644 index 0000000..760b37a --- /dev/null +++ b/pre_commit/store.py @@ -0,0 +1,250 @@ +import contextlib +import logging +import os.path +import sqlite3 +import tempfile +from typing import Callable +from typing import Generator +from typing import List +from typing import Optional +from typing import Sequence +from typing import Tuple + +import pre_commit.constants as C +from pre_commit import file_lock +from pre_commit import git +from pre_commit.util import CalledProcessError +from pre_commit.util import clean_path_on_failure +from pre_commit.util import cmd_output_b +from pre_commit.util import resource_text +from pre_commit.util import rmtree + + +logger = logging.getLogger('pre_commit') + + +def _get_default_directory() -> str: + """Returns the default directory for the Store. This is intentionally + underscored to indicate that `Store.get_default_directory` is the intended + way to get this information. This is also done so + `Store.get_default_directory` can be mocked in tests and + `_get_default_directory` can be tested. + """ + return os.environ.get('PRE_COMMIT_HOME') or os.path.join( + os.environ.get('XDG_CACHE_HOME') or os.path.expanduser('~/.cache'), + 'pre-commit', + ) + + +class Store: + get_default_directory = staticmethod(_get_default_directory) + + def __init__(self, directory: Optional[str] = None) -> None: + self.directory = directory or Store.get_default_directory() + self.db_path = os.path.join(self.directory, 'db.db') + + if not os.path.exists(self.directory): + os.makedirs(self.directory, exist_ok=True) + with open(os.path.join(self.directory, 'README'), 'w') as f: + f.write( + 'This directory is maintained by the pre-commit project.\n' + 'Learn more: https://github.com/pre-commit/pre-commit\n', + ) + + if os.path.exists(self.db_path): + return + with self.exclusive_lock(): + # Another process may have already completed this work + if os.path.exists(self.db_path): # pragma: no cover (race) + return + # To avoid a race where someone ^Cs between db creation and + # execution of the CREATE TABLE statement + fd, tmpfile = tempfile.mkstemp(dir=self.directory) + # We'll be managing this file ourselves + os.close(fd) + with self.connect(db_path=tmpfile) as db: + db.executescript( + 'CREATE TABLE repos (' + ' repo TEXT NOT NULL,' + ' ref TEXT NOT NULL,' + ' path TEXT NOT NULL,' + ' PRIMARY KEY (repo, ref)' + ');', + ) + self._create_config_table(db) + + # Atomic file move + os.rename(tmpfile, self.db_path) + + @contextlib.contextmanager + def exclusive_lock(self) -> Generator[None, None, None]: + def blocked_cb() -> None: # pragma: no cover (tests are in-process) + logger.info('Locking pre-commit directory') + + with file_lock.lock(os.path.join(self.directory, '.lock'), blocked_cb): + yield + + @contextlib.contextmanager + def connect( + self, + db_path: Optional[str] = None, + ) -> Generator[sqlite3.Connection, None, None]: + db_path = db_path or self.db_path + # sqlite doesn't close its fd with its contextmanager >.< + # contextlib.closing fixes this. + # See: https://stackoverflow.com/a/28032829/812183 + with contextlib.closing(sqlite3.connect(db_path)) as db: + # this creates a transaction + with db: + yield db + + @classmethod + def db_repo_name(cls, repo: str, deps: Sequence[str]) -> str: + if deps: + return f'{repo}:{",".join(sorted(deps))}' + else: + return repo + + def _new_repo( + self, + repo: str, + ref: str, + deps: Sequence[str], + make_strategy: Callable[[str], None], + ) -> str: + repo = self.db_repo_name(repo, deps) + + def _get_result() -> Optional[str]: + # Check if we already exist + with self.connect() as db: + result = db.execute( + 'SELECT path FROM repos WHERE repo = ? AND ref = ?', + (repo, ref), + ).fetchone() + return result[0] if result else None + + result = _get_result() + if result: + return result + with self.exclusive_lock(): + # Another process may have already completed this work + result = _get_result() + if result: # pragma: no cover (race) + return result + + logger.info(f'Initializing environment for {repo}.') + + directory = tempfile.mkdtemp(prefix='repo', dir=self.directory) + with clean_path_on_failure(directory): + make_strategy(directory) + + # Update our db with the created repo + with self.connect() as db: + db.execute( + 'INSERT INTO repos (repo, ref, path) VALUES (?, ?, ?)', + [repo, ref, directory], + ) + return directory + + def _complete_clone(self, ref: str, git_cmd: Callable[..., None]) -> None: + """Perform a complete clone of a repository and its submodules """ + + git_cmd('fetch', 'origin', '--tags') + git_cmd('checkout', ref) + git_cmd('submodule', 'update', '--init', '--recursive') + + def _shallow_clone(self, ref: str, git_cmd: Callable[..., None]) -> None: + """Perform a shallow clone of a repository and its submodules """ + + git_config = 'protocol.version=2' + git_cmd('-c', git_config, 'fetch', 'origin', ref, '--depth=1') + git_cmd('checkout', 'FETCH_HEAD') + git_cmd( + '-c', git_config, 'submodule', 'update', '--init', '--recursive', + '--depth=1', + ) + + def clone(self, repo: str, ref: str, deps: Sequence[str] = ()) -> str: + """Clone the given url and checkout the specific ref.""" + + def clone_strategy(directory: str) -> None: + git.init_repo(directory, repo) + env = git.no_git_env() + + def _git_cmd(*args: str) -> None: + cmd_output_b('git', *args, cwd=directory, env=env) + + try: + self._shallow_clone(ref, _git_cmd) + except CalledProcessError: + self._complete_clone(ref, _git_cmd) + + return self._new_repo(repo, ref, deps, clone_strategy) + + LOCAL_RESOURCES = ( + 'Cargo.toml', 'main.go', 'main.rs', '.npmignore', 'package.json', + 'pre_commit_dummy_package.gemspec', 'setup.py', 'environment.yml', + 'Makefile.PL', + ) + + def make_local(self, deps: Sequence[str]) -> str: + def make_local_strategy(directory: str) -> None: + for resource in self.LOCAL_RESOURCES: + contents = resource_text(f'empty_template_{resource}') + with open(os.path.join(directory, resource), 'w') as f: + f.write(contents) + + env = git.no_git_env() + + # initialize the git repository so it looks more like cloned repos + def _git_cmd(*args: str) -> None: + cmd_output_b('git', *args, cwd=directory, env=env) + + git.init_repo(directory, '<<unknown>>') + _git_cmd('add', '.') + git.commit(repo=directory) + + return self._new_repo( + 'local', C.LOCAL_REPO_VERSION, deps, make_local_strategy, + ) + + def _create_config_table(self, db: sqlite3.Connection) -> None: + db.executescript( + 'CREATE TABLE IF NOT EXISTS configs (' + ' path TEXT NOT NULL,' + ' PRIMARY KEY (path)' + ');', + ) + + def mark_config_used(self, path: str) -> None: + path = os.path.realpath(path) + # don't insert config files that do not exist + if not os.path.exists(path): + return + with self.connect() as db: + # TODO: eventually remove this and only create in _create + self._create_config_table(db) + db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,)) + + def select_all_configs(self) -> List[str]: + with self.connect() as db: + self._create_config_table(db) + rows = db.execute('SELECT path FROM configs').fetchall() + return [path for path, in rows] + + def delete_configs(self, configs: List[str]) -> None: + with self.connect() as db: + rows = [(path,) for path in configs] + db.executemany('DELETE FROM configs WHERE path = ?', rows) + + def select_all_repos(self) -> List[Tuple[str, str, str]]: + with self.connect() as db: + return db.execute('SELECT repo, ref, path from repos').fetchall() + + def delete_repo(self, db_repo_name: str, ref: str, path: str) -> None: + with self.connect() as db: + db.execute( + 'DELETE FROM repos WHERE repo = ? and ref = ?', + (db_repo_name, ref), + ) + rmtree(path) diff --git a/pre_commit/util.py b/pre_commit/util.py new file mode 100644 index 0000000..2db579a --- /dev/null +++ b/pre_commit/util.py @@ -0,0 +1,272 @@ +import contextlib +import errno +import functools +import os.path +import shutil +import stat +import subprocess +import sys +import tempfile +from types import TracebackType +from typing import Any +from typing import Callable +from typing import Dict +from typing import Generator +from typing import IO +from typing import Optional +from typing import Tuple +from typing import Type +from typing import Union + +import yaml + +from pre_commit import parse_shebang + +if sys.version_info >= (3, 7): # pragma: no cover (PY37+) + from importlib.resources import open_binary + from importlib.resources import read_text +else: # pragma: no cover (<PY37) + from importlib_resources import open_binary + from importlib_resources import read_text + +EnvironT = Union[Dict[str, str], 'os._Environ'] + +Loader = getattr(yaml, 'CSafeLoader', yaml.SafeLoader) +yaml_load = functools.partial(yaml.load, Loader=Loader) +Dumper = getattr(yaml, 'CSafeDumper', yaml.SafeDumper) + + +def yaml_dump(o: Any) -> str: + # when python/mypy#1484 is solved, this can be `functools.partial` + return yaml.dump( + o, Dumper=Dumper, default_flow_style=False, indent=4, sort_keys=False, + ) + + +def force_bytes(exc: Any) -> bytes: + with contextlib.suppress(TypeError): + return bytes(exc) + with contextlib.suppress(Exception): + return str(exc).encode() + return f'<unprintable {type(exc).__name__} object>'.encode() + + +@contextlib.contextmanager +def clean_path_on_failure(path: str) -> Generator[None, None, None]: + """Cleans up the directory on an exceptional failure.""" + try: + yield + except BaseException: + if os.path.exists(path): + rmtree(path) + raise + + +@contextlib.contextmanager +def tmpdir() -> Generator[str, None, None]: + """Contextmanager to create a temporary directory. It will be cleaned up + afterwards. + """ + tempdir = tempfile.mkdtemp() + try: + yield tempdir + finally: + rmtree(tempdir) + + +def resource_bytesio(filename: str) -> IO[bytes]: + return open_binary('pre_commit.resources', filename) + + +def resource_text(filename: str) -> str: + return read_text('pre_commit.resources', filename) + + +def make_executable(filename: str) -> None: + original_mode = os.stat(filename).st_mode + new_mode = original_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH + os.chmod(filename, new_mode) + + +class CalledProcessError(RuntimeError): + def __init__( + self, + returncode: int, + cmd: Tuple[str, ...], + expected_returncode: int, + stdout: bytes, + stderr: Optional[bytes], + ) -> None: + super().__init__(returncode, cmd, expected_returncode, stdout, stderr) + self.returncode = returncode + self.cmd = cmd + self.expected_returncode = expected_returncode + self.stdout = stdout + self.stderr = stderr + + def __bytes__(self) -> bytes: + def _indent_or_none(part: Optional[bytes]) -> bytes: + if part: + return b'\n ' + part.replace(b'\n', b'\n ') + else: + return b' (none)' + + return b''.join(( + f'command: {self.cmd!r}\n'.encode(), + f'return code: {self.returncode}\n'.encode(), + f'expected return code: {self.expected_returncode}\n'.encode(), + b'stdout:', _indent_or_none(self.stdout), b'\n', + b'stderr:', _indent_or_none(self.stderr), + )) + + def __str__(self) -> str: + return self.__bytes__().decode() + + +def _setdefault_kwargs(kwargs: Dict[str, Any]) -> None: + for arg in ('stdin', 'stdout', 'stderr'): + kwargs.setdefault(arg, subprocess.PIPE) + + +def _oserror_to_output(e: OSError) -> Tuple[int, bytes, None]: + return 1, force_bytes(e).rstrip(b'\n') + b'\n', None + + +def cmd_output_b( + *cmd: str, + retcode: Optional[int] = 0, + **kwargs: Any, +) -> Tuple[int, bytes, Optional[bytes]]: + _setdefault_kwargs(kwargs) + + try: + cmd = parse_shebang.normalize_cmd(cmd) + except parse_shebang.ExecutableNotFoundError as e: + returncode, stdout_b, stderr_b = e.to_output() + else: + try: + proc = subprocess.Popen(cmd, **kwargs) + except OSError as e: + returncode, stdout_b, stderr_b = _oserror_to_output(e) + else: + stdout_b, stderr_b = proc.communicate() + returncode = proc.returncode + + if retcode is not None and retcode != returncode: + raise CalledProcessError(returncode, cmd, retcode, stdout_b, stderr_b) + + return returncode, stdout_b, stderr_b + + +def cmd_output(*cmd: str, **kwargs: Any) -> Tuple[int, str, Optional[str]]: + returncode, stdout_b, stderr_b = cmd_output_b(*cmd, **kwargs) + stdout = stdout_b.decode() if stdout_b is not None else None + stderr = stderr_b.decode() if stderr_b is not None else None + return returncode, stdout, stderr + + +if os.name != 'nt': # pragma: win32 no cover + from os import openpty + import termios + + class Pty: + def __init__(self) -> None: + self.r: Optional[int] = None + self.w: Optional[int] = None + + def __enter__(self) -> 'Pty': + self.r, self.w = openpty() + + # tty flags normally change \n to \r\n + attrs = termios.tcgetattr(self.r) + assert isinstance(attrs[1], int) + attrs[1] &= ~(termios.ONLCR | termios.OPOST) + termios.tcsetattr(self.r, termios.TCSANOW, attrs) + + return self + + def close_w(self) -> None: + if self.w is not None: + os.close(self.w) + self.w = None + + def close_r(self) -> None: + assert self.r is not None + os.close(self.r) + self.r = None + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> None: + self.close_w() + self.close_r() + + def cmd_output_p( + *cmd: str, + retcode: Optional[int] = 0, + **kwargs: Any, + ) -> Tuple[int, bytes, Optional[bytes]]: + assert retcode is None + assert kwargs['stderr'] == subprocess.STDOUT, kwargs['stderr'] + _setdefault_kwargs(kwargs) + + try: + cmd = parse_shebang.normalize_cmd(cmd) + except parse_shebang.ExecutableNotFoundError as e: + return e.to_output() + + with open(os.devnull) as devnull, Pty() as pty: + assert pty.r is not None + kwargs.update({'stdin': devnull, 'stdout': pty.w, 'stderr': pty.w}) + try: + proc = subprocess.Popen(cmd, **kwargs) + except OSError as e: + return _oserror_to_output(e) + + pty.close_w() + + buf = b'' + while True: + try: + bts = os.read(pty.r, 4096) + except OSError as e: + if e.errno == errno.EIO: + bts = b'' + else: + raise + else: + buf += bts + if not bts: + break + + return proc.wait(), buf, None +else: # pragma: no cover + cmd_output_p = cmd_output_b + + +def rmtree(path: str) -> None: + """On windows, rmtree fails for readonly dirs.""" + def handle_remove_readonly( + func: Callable[..., Any], + path: str, + exc: Tuple[Type[OSError], OSError, TracebackType], + ) -> None: + excvalue = exc[1] + if ( + func in (os.rmdir, os.remove, os.unlink) and + excvalue.errno == errno.EACCES + ): + for p in (path, os.path.dirname(path)): + os.chmod(p, os.stat(p).st_mode | stat.S_IWUSR) + func(path) + else: + raise + shutil.rmtree(path, ignore_errors=False, onerror=handle_remove_readonly) + + +def parse_version(s: str) -> Tuple[int, ...]: + """poor man's version comparison""" + return tuple(int(p) for p in s.split('.')) diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py new file mode 100644 index 0000000..5235dc6 --- /dev/null +++ b/pre_commit/xargs.py @@ -0,0 +1,157 @@ +import concurrent.futures +import contextlib +import math +import os +import subprocess +import sys +from typing import Any +from typing import Callable +from typing import Generator +from typing import Iterable +from typing import List +from typing import Optional +from typing import Sequence +from typing import Tuple +from typing import TypeVar + +from pre_commit import parse_shebang +from pre_commit.util import cmd_output_b +from pre_commit.util import cmd_output_p +from pre_commit.util import EnvironT + +TArg = TypeVar('TArg') +TRet = TypeVar('TRet') + + +def _environ_size(_env: Optional[EnvironT] = None) -> int: + environ = _env if _env is not None else getattr(os, 'environb', os.environ) + size = 8 * len(environ) # number of pointers in `envp` + for k, v in environ.items(): + size += len(k) + len(v) + 2 # c strings in `envp` + return size + + +def _get_platform_max_length() -> int: # pragma: no cover (platform specific) + if os.name == 'posix': + maximum = os.sysconf('SC_ARG_MAX') - 2048 - _environ_size() + maximum = max(min(maximum, 2 ** 17), 2 ** 12) + return maximum + elif os.name == 'nt': + return 2 ** 15 - 2048 # UNICODE_STRING max - headroom + else: + # posix minimum + return 2 ** 12 + + +def _command_length(*cmd: str) -> int: + full_cmd = ' '.join(cmd) + + # win32 uses the amount of characters, more details at: + # https://github.com/pre-commit/pre-commit/pull/839 + if sys.platform == 'win32': + return len(full_cmd.encode('utf-16le')) // 2 + else: + return len(full_cmd.encode(sys.getfilesystemencoding())) + + +class ArgumentTooLongError(RuntimeError): + pass + + +def partition( + cmd: Sequence[str], + varargs: Sequence[str], + target_concurrency: int, + _max_length: Optional[int] = None, +) -> Tuple[Tuple[str, ...], ...]: + _max_length = _max_length or _get_platform_max_length() + + # Generally, we try to partition evenly into at least `target_concurrency` + # partitions, but we don't want a bunch of tiny partitions. + max_args = max(4, math.ceil(len(varargs) / target_concurrency)) + + cmd = tuple(cmd) + ret = [] + + ret_cmd: List[str] = [] + # Reversed so arguments are in order + varargs = list(reversed(varargs)) + + total_length = _command_length(*cmd) + 1 + while varargs: + arg = varargs.pop() + + arg_length = _command_length(arg) + 1 + if ( + total_length + arg_length <= _max_length and + len(ret_cmd) < max_args + ): + ret_cmd.append(arg) + total_length += arg_length + elif not ret_cmd: + raise ArgumentTooLongError(arg) + else: + # We've exceeded the length, yield a command + ret.append(cmd + tuple(ret_cmd)) + ret_cmd = [] + total_length = _command_length(*cmd) + 1 + varargs.append(arg) + + ret.append(cmd + tuple(ret_cmd)) + + return tuple(ret) + + +@contextlib.contextmanager +def _thread_mapper(maxsize: int) -> Generator[ + Callable[[Callable[[TArg], TRet], Iterable[TArg]], Iterable[TRet]], + None, None, +]: + if maxsize == 1: + yield map + else: + with concurrent.futures.ThreadPoolExecutor(maxsize) as ex: + yield ex.map + + +def xargs( + cmd: Tuple[str, ...], + varargs: Sequence[str], + *, + color: bool = False, + target_concurrency: int = 1, + _max_length: int = _get_platform_max_length(), + **kwargs: Any, +) -> Tuple[int, bytes]: + """A simplified implementation of xargs. + + color: Make a pty if on a platform that supports it + target_concurrency: Target number of partitions to run concurrently + """ + cmd_fn = cmd_output_p if color else cmd_output_b + retcode = 0 + stdout = b'' + + try: + cmd = parse_shebang.normalize_cmd(cmd) + except parse_shebang.ExecutableNotFoundError as e: + return e.to_output()[:2] + + partitions = partition(cmd, varargs, target_concurrency, _max_length) + + def run_cmd_partition( + run_cmd: Tuple[str, ...], + ) -> Tuple[int, bytes, Optional[bytes]]: + return cmd_fn( + *run_cmd, retcode=None, stderr=subprocess.STDOUT, **kwargs, + ) + + threads = min(len(partitions), target_concurrency) + with _thread_mapper(threads) as thread_map: + results = thread_map(run_cmd_partition, partitions) + + for proc_retcode, proc_out, _ in results: + retcode = max(retcode, proc_retcode) + stdout += proc_out + + return retcode, stdout |