summaryrefslogtreecommitdiffstats
path: root/pre_commit
diff options
context:
space:
mode:
Diffstat (limited to 'pre_commit')
-rw-r--r--pre_commit/__init__.py0
-rw-r--r--pre_commit/__main__.py7
-rw-r--r--pre_commit/all_languages.py50
-rw-r--r--pre_commit/clientlib.py405
-rw-r--r--pre_commit/color.py109
-rw-r--r--pre_commit/commands/__init__.py0
-rw-r--r--pre_commit/commands/autoupdate.py215
-rw-r--r--pre_commit/commands/clean.py16
-rw-r--r--pre_commit/commands/gc.py89
-rw-r--r--pre_commit/commands/hook_impl.py271
-rw-r--r--pre_commit/commands/init_templatedir.py39
-rw-r--r--pre_commit/commands/install_uninstall.py167
-rw-r--r--pre_commit/commands/migrate_config.py75
-rw-r--r--pre_commit/commands/run.py447
-rw-r--r--pre_commit/commands/sample_config.py18
-rw-r--r--pre_commit/commands/try_repo.py77
-rw-r--r--pre_commit/commands/validate_config.py18
-rw-r--r--pre_commit/commands/validate_manifest.py18
-rw-r--r--pre_commit/constants.py13
-rw-r--r--pre_commit/envcontext.py62
-rw-r--r--pre_commit/error_handler.py81
-rw-r--r--pre_commit/errors.py5
-rw-r--r--pre_commit/file_lock.py75
-rw-r--r--pre_commit/git.py245
-rw-r--r--pre_commit/hook.py60
-rw-r--r--pre_commit/lang_base.py192
-rw-r--r--pre_commit/languages/__init__.py0
-rw-r--r--pre_commit/languages/conda.py77
-rw-r--r--pre_commit/languages/coursier.py76
-rw-r--r--pre_commit/languages/dart.py97
-rw-r--r--pre_commit/languages/docker.py146
-rw-r--r--pre_commit/languages/docker_image.py32
-rw-r--r--pre_commit/languages/dotnet.py111
-rw-r--r--pre_commit/languages/fail.py27
-rw-r--r--pre_commit/languages/golang.py160
-rw-r--r--pre_commit/languages/haskell.py56
-rw-r--r--pre_commit/languages/lua.py75
-rw-r--r--pre_commit/languages/node.py110
-rw-r--r--pre_commit/languages/perl.py50
-rw-r--r--pre_commit/languages/pygrep.py133
-rw-r--r--pre_commit/languages/python.py214
-rw-r--r--pre_commit/languages/r.py195
-rw-r--r--pre_commit/languages/ruby.py145
-rw-r--r--pre_commit/languages/rust.py160
-rw-r--r--pre_commit/languages/script.py32
-rw-r--r--pre_commit/languages/swift.py50
-rw-r--r--pre_commit/languages/system.py10
-rw-r--r--pre_commit/logging_handler.py42
-rw-r--r--pre_commit/main.py442
-rw-r--r--pre_commit/meta_hooks/__init__.py0
-rw-r--r--pre_commit/meta_hooks/check_hooks_apply.py43
-rw-r--r--pre_commit/meta_hooks/check_useless_excludes.py83
-rw-r--r--pre_commit/meta_hooks/identity.py17
-rw-r--r--pre_commit/output.py33
-rw-r--r--pre_commit/parse_shebang.py85
-rw-r--r--pre_commit/prefix.py18
-rw-r--r--pre_commit/repository.py246
-rw-r--r--pre_commit/resources/__init__.py0
-rw-r--r--pre_commit/resources/empty_template_.npmignore1
-rw-r--r--pre_commit/resources/empty_template_Cargo.toml7
-rw-r--r--pre_commit/resources/empty_template_LICENSE.renv7
-rw-r--r--pre_commit/resources/empty_template_Makefile.PL6
-rw-r--r--pre_commit/resources/empty_template_activate.R440
-rw-r--r--pre_commit/resources/empty_template_environment.yml9
-rw-r--r--pre_commit/resources/empty_template_go.mod1
-rw-r--r--pre_commit/resources/empty_template_main.go3
-rw-r--r--pre_commit/resources/empty_template_main.rs1
-rw-r--r--pre_commit/resources/empty_template_package.json4
-rw-r--r--pre_commit/resources/empty_template_pre-commit-package-dev-1.rockspec12
-rw-r--r--pre_commit/resources/empty_template_pre_commit_placeholder_package.gemspec6
-rw-r--r--pre_commit/resources/empty_template_pubspec.yaml4
-rw-r--r--pre_commit/resources/empty_template_renv.lock20
-rw-r--r--pre_commit/resources/empty_template_setup.py4
-rwxr-xr-xpre_commit/resources/hook-tmpl20
-rw-r--r--pre_commit/resources/rbenv.tar.gzbin0 -> 32551 bytes
-rw-r--r--pre_commit/resources/ruby-build.tar.gzbin0 -> 75808 bytes
-rw-r--r--pre_commit/resources/ruby-download.tar.gzbin0 -> 5271 bytes
-rw-r--r--pre_commit/staged_files_only.py113
-rw-r--r--pre_commit/store.py254
-rw-r--r--pre_commit/util.py238
-rw-r--r--pre_commit/xargs.py185
-rw-r--r--pre_commit/yaml.py18
82 files changed, 7072 insertions, 0 deletions
diff --git a/pre_commit/__init__.py b/pre_commit/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/pre_commit/__init__.py
diff --git a/pre_commit/__main__.py b/pre_commit/__main__.py
new file mode 100644
index 0000000..bda61ee
--- /dev/null
+++ b/pre_commit/__main__.py
@@ -0,0 +1,7 @@
+from __future__ import annotations
+
+from pre_commit.main import main
+
+
+if __name__ == '__main__':
+ raise SystemExit(main())
diff --git a/pre_commit/all_languages.py b/pre_commit/all_languages.py
new file mode 100644
index 0000000..476bad9
--- /dev/null
+++ b/pre_commit/all_languages.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+from pre_commit.lang_base import Language
+from pre_commit.languages import conda
+from pre_commit.languages import coursier
+from pre_commit.languages import dart
+from pre_commit.languages import docker
+from pre_commit.languages import docker_image
+from pre_commit.languages import dotnet
+from pre_commit.languages import fail
+from pre_commit.languages import golang
+from pre_commit.languages import haskell
+from pre_commit.languages import lua
+from pre_commit.languages import node
+from pre_commit.languages import perl
+from pre_commit.languages import pygrep
+from pre_commit.languages import python
+from pre_commit.languages import r
+from pre_commit.languages import ruby
+from pre_commit.languages import rust
+from pre_commit.languages import script
+from pre_commit.languages import swift
+from pre_commit.languages import system
+
+
+languages: dict[str, Language] = {
+ 'conda': conda,
+ 'coursier': coursier,
+ 'dart': dart,
+ 'docker': docker,
+ 'docker_image': docker_image,
+ 'dotnet': dotnet,
+ 'fail': fail,
+ 'golang': golang,
+ 'haskell': haskell,
+ 'lua': lua,
+ 'node': node,
+ 'perl': perl,
+ 'pygrep': pygrep,
+ 'python': python,
+ 'r': r,
+ 'ruby': ruby,
+ 'rust': rust,
+ 'script': script,
+ 'swift': swift,
+ 'system': system,
+ # TODO: fully deprecate `python_venv`
+ 'python_venv': python,
+}
+language_names = sorted(languages)
diff --git a/pre_commit/clientlib.py b/pre_commit/clientlib.py
new file mode 100644
index 0000000..a49465e
--- /dev/null
+++ b/pre_commit/clientlib.py
@@ -0,0 +1,405 @@
+from __future__ import annotations
+
+import functools
+import logging
+import re
+import shlex
+import sys
+from collections.abc import Sequence
+from typing import Any
+from typing import NamedTuple
+
+import cfgv
+from identify.identify import ALL_TAGS
+
+import pre_commit.constants as C
+from pre_commit.all_languages import language_names
+from pre_commit.errors import FatalError
+from pre_commit.yaml import yaml_load
+
+logger = logging.getLogger('pre_commit')
+
+check_string_regex = cfgv.check_and(cfgv.check_string, cfgv.check_regex)
+
+HOOK_TYPES = (
+ 'commit-msg',
+ 'post-checkout',
+ 'post-commit',
+ 'post-merge',
+ 'post-rewrite',
+ 'pre-commit',
+ 'pre-merge-commit',
+ 'pre-push',
+ 'pre-rebase',
+ 'prepare-commit-msg',
+)
+# `manual` is not invoked by any installed git hook. See #719
+STAGES = (*HOOK_TYPES, 'manual')
+
+
+def check_type_tag(tag: str) -> None:
+ if tag not in ALL_TAGS:
+ raise cfgv.ValidationError(
+ f'Type tag {tag!r} is not recognized. '
+ f'Try upgrading identify and pre-commit?',
+ )
+
+
+def parse_version(s: str) -> tuple[int, ...]:
+ """poor man's version comparison"""
+ return tuple(int(p) for p in s.split('.'))
+
+
+def check_min_version(version: str) -> None:
+ if parse_version(version) > parse_version(C.VERSION):
+ raise cfgv.ValidationError(
+ f'pre-commit version {version} is required but version '
+ f'{C.VERSION} is installed. '
+ f'Perhaps run `pip install --upgrade pre-commit`.',
+ )
+
+
+_STAGES = {
+ 'commit': 'pre-commit',
+ 'merge-commit': 'pre-merge-commit',
+ 'push': 'pre-push',
+}
+
+
+def transform_stage(stage: str) -> str:
+ return _STAGES.get(stage, stage)
+
+
+class StagesMigrationNoDefault(NamedTuple):
+ key: str
+ default: Sequence[str]
+
+ def check(self, dct: dict[str, Any]) -> None:
+ if self.key not in dct:
+ return
+
+ val = dct[self.key]
+ cfgv.check_array(cfgv.check_any)(val)
+
+ val = [transform_stage(v) for v in val]
+ cfgv.check_array(cfgv.check_one_of(STAGES))(val)
+
+ def apply_default(self, dct: dict[str, Any]) -> None:
+ if self.key not in dct:
+ return
+ dct[self.key] = [transform_stage(v) for v in dct[self.key]]
+
+ def remove_default(self, dct: dict[str, Any]) -> None:
+ raise NotImplementedError
+
+
+class StagesMigration(StagesMigrationNoDefault):
+ def apply_default(self, dct: dict[str, Any]) -> None:
+ dct.setdefault(self.key, self.default)
+ super().apply_default(dct)
+
+
+MANIFEST_HOOK_DICT = cfgv.Map(
+ 'Hook', 'id',
+
+ # check first in case it uses some newer, incompatible feature
+ cfgv.Optional(
+ 'minimum_pre_commit_version',
+ cfgv.check_and(cfgv.check_string, check_min_version),
+ '0',
+ ),
+
+ cfgv.Required('id', cfgv.check_string),
+ cfgv.Required('name', cfgv.check_string),
+ cfgv.Required('entry', cfgv.check_string),
+ cfgv.Required('language', cfgv.check_one_of(language_names)),
+ cfgv.Optional('alias', cfgv.check_string, ''),
+
+ cfgv.Optional('files', check_string_regex, ''),
+ cfgv.Optional('exclude', check_string_regex, '^$'),
+ cfgv.Optional('types', cfgv.check_array(check_type_tag), ['file']),
+ cfgv.Optional('types_or', cfgv.check_array(check_type_tag), []),
+ cfgv.Optional('exclude_types', cfgv.check_array(check_type_tag), []),
+
+ cfgv.Optional(
+ 'additional_dependencies', cfgv.check_array(cfgv.check_string), [],
+ ),
+ cfgv.Optional('args', cfgv.check_array(cfgv.check_string), []),
+ cfgv.Optional('always_run', cfgv.check_bool, False),
+ cfgv.Optional('fail_fast', cfgv.check_bool, False),
+ cfgv.Optional('pass_filenames', cfgv.check_bool, True),
+ cfgv.Optional('description', cfgv.check_string, ''),
+ cfgv.Optional('language_version', cfgv.check_string, C.DEFAULT),
+ cfgv.Optional('log_file', cfgv.check_string, ''),
+ cfgv.Optional('require_serial', cfgv.check_bool, False),
+ StagesMigration('stages', []),
+ cfgv.Optional('verbose', cfgv.check_bool, False),
+)
+MANIFEST_SCHEMA = cfgv.Array(MANIFEST_HOOK_DICT)
+
+
+class InvalidManifestError(FatalError):
+ pass
+
+
+load_manifest = functools.partial(
+ cfgv.load_from_filename,
+ schema=MANIFEST_SCHEMA,
+ load_strategy=yaml_load,
+ exc_tp=InvalidManifestError,
+)
+
+
+LOCAL = 'local'
+META = 'meta'
+
+
+class WarnMutableRev(cfgv.Conditional):
+ def check(self, dct: dict[str, Any]) -> None:
+ super().check(dct)
+
+ if self.key in dct:
+ rev = dct[self.key]
+
+ if '.' not in rev and not re.match(r'^[a-fA-F0-9]+$', rev):
+ logger.warning(
+ f'The {self.key!r} field of repo {dct["repo"]!r} '
+ f'appears to be a mutable reference '
+ f'(moving tag / branch). Mutable references are never '
+ f'updated after first install and are not supported. '
+ f'See https://pre-commit.com/#using-the-latest-version-for-a-repository ' # noqa: E501
+ f'for more details. '
+ f'Hint: `pre-commit autoupdate` often fixes this.',
+ )
+
+
+class OptionalSensibleRegexAtHook(cfgv.OptionalNoDefault):
+ def check(self, dct: dict[str, Any]) -> None:
+ super().check(dct)
+
+ if '/*' in dct.get(self.key, ''):
+ logger.warning(
+ f'The {self.key!r} field in hook {dct.get("id")!r} is a '
+ f"regex, not a glob -- matching '/*' probably isn't what you "
+ f'want here',
+ )
+ for fwd_slash_re in (r'[\\/]', r'[\/]', r'[/\\]'):
+ if fwd_slash_re in dct.get(self.key, ''):
+ logger.warning(
+ fr'pre-commit normalizes slashes in the {self.key!r} '
+ fr'field in hook {dct.get("id")!r} to forward slashes, '
+ fr'so you can use / instead of {fwd_slash_re}',
+ )
+
+
+class OptionalSensibleRegexAtTop(cfgv.OptionalNoDefault):
+ def check(self, dct: dict[str, Any]) -> None:
+ super().check(dct)
+
+ if '/*' in dct.get(self.key, ''):
+ logger.warning(
+ f'The top-level {self.key!r} field is a regex, not a glob -- '
+ f"matching '/*' probably isn't what you want here",
+ )
+ for fwd_slash_re in (r'[\\/]', r'[\/]', r'[/\\]'):
+ if fwd_slash_re in dct.get(self.key, ''):
+ logger.warning(
+ fr'pre-commit normalizes the slashes in the top-level '
+ fr'{self.key!r} field to forward slashes, so you '
+ fr'can use / instead of {fwd_slash_re}',
+ )
+
+
+def _entry(modname: str) -> str:
+ """the hook `entry` is passed through `shlex.split()` by the command
+ runner, so to prevent issues with spaces and backslashes (on Windows)
+ it must be quoted here.
+ """
+ return f'{shlex.quote(sys.executable)} -m pre_commit.meta_hooks.{modname}'
+
+
+def warn_unknown_keys_root(
+ extra: Sequence[str],
+ orig_keys: Sequence[str],
+ dct: dict[str, str],
+) -> None:
+ logger.warning(f'Unexpected key(s) present at root: {", ".join(extra)}')
+
+
+def warn_unknown_keys_repo(
+ extra: Sequence[str],
+ orig_keys: Sequence[str],
+ dct: dict[str, str],
+) -> None:
+ logger.warning(
+ f'Unexpected key(s) present on {dct["repo"]}: {", ".join(extra)}',
+ )
+
+
+_meta = (
+ (
+ 'check-hooks-apply', (
+ ('name', 'Check hooks apply to the repository'),
+ ('files', f'^{re.escape(C.CONFIG_FILE)}$'),
+ ('entry', _entry('check_hooks_apply')),
+ ),
+ ),
+ (
+ 'check-useless-excludes', (
+ ('name', 'Check for useless excludes'),
+ ('files', f'^{re.escape(C.CONFIG_FILE)}$'),
+ ('entry', _entry('check_useless_excludes')),
+ ),
+ ),
+ (
+ 'identity', (
+ ('name', 'identity'),
+ ('verbose', True),
+ ('entry', _entry('identity')),
+ ),
+ ),
+)
+
+
+class NotAllowed(cfgv.OptionalNoDefault):
+ def check(self, dct: dict[str, Any]) -> None:
+ if self.key in dct:
+ raise cfgv.ValidationError(f'{self.key!r} cannot be overridden')
+
+
+META_HOOK_DICT = cfgv.Map(
+ 'Hook', 'id',
+ cfgv.Required('id', cfgv.check_string),
+ cfgv.Required('id', cfgv.check_one_of(tuple(k for k, _ in _meta))),
+ # language must be system
+ cfgv.Optional('language', cfgv.check_one_of({'system'}), 'system'),
+ # entry cannot be overridden
+ NotAllowed('entry', cfgv.check_any),
+ *(
+ # default to the hook definition for the meta hooks
+ cfgv.ConditionalOptional(key, cfgv.check_any, value, 'id', hook_id)
+ for hook_id, values in _meta
+ for key, value in values
+ ),
+ *(
+ # default to the "manifest" parsing
+ cfgv.OptionalNoDefault(item.key, item.check_fn)
+ # these will always be defaulted above
+ if item.key in {'name', 'language', 'entry'} else
+ item
+ for item in MANIFEST_HOOK_DICT.items
+ ),
+)
+CONFIG_HOOK_DICT = cfgv.Map(
+ 'Hook', 'id',
+
+ cfgv.Required('id', cfgv.check_string),
+
+ # All keys in manifest hook dict are valid in a config hook dict, but
+ # are optional.
+ # No defaults are provided here as the config is merged on top of the
+ # manifest.
+ *(
+ cfgv.OptionalNoDefault(item.key, item.check_fn)
+ for item in MANIFEST_HOOK_DICT.items
+ if item.key != 'id'
+ if item.key != 'stages'
+ ),
+ StagesMigrationNoDefault('stages', []),
+ OptionalSensibleRegexAtHook('files', cfgv.check_string),
+ OptionalSensibleRegexAtHook('exclude', cfgv.check_string),
+)
+LOCAL_HOOK_DICT = cfgv.Map(
+ 'Hook', 'id',
+
+ *MANIFEST_HOOK_DICT.items,
+
+ OptionalSensibleRegexAtHook('files', cfgv.check_string),
+ OptionalSensibleRegexAtHook('exclude', cfgv.check_string),
+)
+CONFIG_REPO_DICT = cfgv.Map(
+ 'Repository', 'repo',
+
+ cfgv.Required('repo', cfgv.check_string),
+
+ cfgv.ConditionalRecurse(
+ 'hooks', cfgv.Array(CONFIG_HOOK_DICT),
+ 'repo', cfgv.NotIn(LOCAL, META),
+ ),
+ cfgv.ConditionalRecurse(
+ 'hooks', cfgv.Array(LOCAL_HOOK_DICT),
+ 'repo', LOCAL,
+ ),
+ cfgv.ConditionalRecurse(
+ 'hooks', cfgv.Array(META_HOOK_DICT),
+ 'repo', META,
+ ),
+
+ WarnMutableRev(
+ 'rev', cfgv.check_string,
+ condition_key='repo',
+ condition_value=cfgv.NotIn(LOCAL, META),
+ ensure_absent=True,
+ ),
+ cfgv.WarnAdditionalKeys(('repo', 'rev', 'hooks'), warn_unknown_keys_repo),
+)
+DEFAULT_LANGUAGE_VERSION = cfgv.Map(
+ 'DefaultLanguageVersion', None,
+ cfgv.NoAdditionalKeys(language_names),
+ *(cfgv.Optional(x, cfgv.check_string, C.DEFAULT) for x in language_names),
+)
+CONFIG_SCHEMA = cfgv.Map(
+ 'Config', None,
+
+ # check first in case it uses some newer, incompatible feature
+ cfgv.Optional(
+ 'minimum_pre_commit_version',
+ cfgv.check_and(cfgv.check_string, check_min_version),
+ '0',
+ ),
+
+ cfgv.RequiredRecurse('repos', cfgv.Array(CONFIG_REPO_DICT)),
+ cfgv.Optional(
+ 'default_install_hook_types',
+ cfgv.check_array(cfgv.check_one_of(HOOK_TYPES)),
+ ['pre-commit'],
+ ),
+ cfgv.OptionalRecurse(
+ 'default_language_version', DEFAULT_LANGUAGE_VERSION, {},
+ ),
+ StagesMigration('default_stages', STAGES),
+ cfgv.Optional('files', check_string_regex, ''),
+ cfgv.Optional('exclude', check_string_regex, '^$'),
+ cfgv.Optional('fail_fast', cfgv.check_bool, False),
+ cfgv.WarnAdditionalKeys(
+ (
+ 'repos',
+ 'default_install_hook_types',
+ 'default_language_version',
+ 'default_stages',
+ 'files',
+ 'exclude',
+ 'fail_fast',
+ 'minimum_pre_commit_version',
+ 'ci',
+ ),
+ warn_unknown_keys_root,
+ ),
+ OptionalSensibleRegexAtTop('files', cfgv.check_string),
+ OptionalSensibleRegexAtTop('exclude', cfgv.check_string),
+
+ # do not warn about configuration for pre-commit.ci
+ cfgv.OptionalNoDefault('ci', cfgv.check_type(dict)),
+)
+
+
+class InvalidConfigError(FatalError):
+ pass
+
+
+load_config = functools.partial(
+ cfgv.load_from_filename,
+ schema=CONFIG_SCHEMA,
+ load_strategy=yaml_load,
+ exc_tp=InvalidConfigError,
+)
diff --git a/pre_commit/color.py b/pre_commit/color.py
new file mode 100644
index 0000000..2d6f248
--- /dev/null
+++ b/pre_commit/color.py
@@ -0,0 +1,109 @@
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+
+if sys.platform == 'win32': # pragma: no cover (windows)
+ def _enable() -> None:
+ from ctypes import POINTER
+ from ctypes import windll
+ from ctypes import WinError
+ from ctypes import WINFUNCTYPE
+ from ctypes.wintypes import BOOL
+ from ctypes.wintypes import DWORD
+ from ctypes.wintypes import HANDLE
+
+ STD_ERROR_HANDLE = -12
+ ENABLE_VIRTUAL_TERMINAL_PROCESSING = 4
+
+ def bool_errcheck(result, func, args):
+ if not result:
+ raise WinError()
+ return args
+
+ GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(
+ ('GetStdHandle', windll.kernel32), ((1, 'nStdHandle'),),
+ )
+
+ GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(
+ ('GetConsoleMode', windll.kernel32),
+ ((1, 'hConsoleHandle'), (2, 'lpMode')),
+ )
+ GetConsoleMode.errcheck = bool_errcheck
+
+ SetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, DWORD)(
+ ('SetConsoleMode', windll.kernel32),
+ ((1, 'hConsoleHandle'), (1, 'dwMode')),
+ )
+ SetConsoleMode.errcheck = bool_errcheck
+
+ # As of Windows 10, the Windows console supports (some) ANSI escape
+ # sequences, but it needs to be enabled using `SetConsoleMode` first.
+ #
+ # More info on the escape sequences supported:
+ # https://msdn.microsoft.com/en-us/library/windows/desktop/mt638032(v=vs.85).aspx
+ stderr = GetStdHandle(STD_ERROR_HANDLE)
+ flags = GetConsoleMode(stderr)
+ SetConsoleMode(stderr, flags | ENABLE_VIRTUAL_TERMINAL_PROCESSING)
+
+ try:
+ _enable()
+ except OSError:
+ terminal_supports_color = False
+ else:
+ terminal_supports_color = True
+else: # pragma: win32 no cover
+ terminal_supports_color = True
+
+RED = '\033[41m'
+GREEN = '\033[42m'
+YELLOW = '\033[43;30m'
+TURQUOISE = '\033[46;30m'
+SUBTLE = '\033[2m'
+NORMAL = '\033[m'
+
+
+def format_color(text: str, color: str, use_color_setting: bool) -> str:
+ """Format text with color.
+
+ Args:
+ text - Text to be formatted with color if `use_color`
+ color - The color start string
+ use_color_setting - Whether or not to color
+ """
+ if use_color_setting:
+ return f'{color}{text}{NORMAL}'
+ else:
+ return text
+
+
+COLOR_CHOICES = ('auto', 'always', 'never')
+
+
+def use_color(setting: str) -> bool:
+ """Choose whether to use color based on the command argument.
+
+ Args:
+ setting - Either `auto`, `always`, or `never`
+ """
+ if setting not in COLOR_CHOICES:
+ raise ValueError(setting)
+
+ return (
+ setting == 'always' or (
+ setting == 'auto' and
+ sys.stderr.isatty() and
+ terminal_supports_color and
+ os.getenv('TERM') != 'dumb'
+ )
+ )
+
+
+def add_color_option(parser: argparse.ArgumentParser) -> None:
+ parser.add_argument(
+ '--color', default=os.environ.get('PRE_COMMIT_COLOR', 'auto'),
+ type=use_color,
+ metavar='{' + ','.join(COLOR_CHOICES) + '}',
+ help='Whether to use color in output. Defaults to `%(default)s`.',
+ )
diff --git a/pre_commit/commands/__init__.py b/pre_commit/commands/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/pre_commit/commands/__init__.py
diff --git a/pre_commit/commands/autoupdate.py b/pre_commit/commands/autoupdate.py
new file mode 100644
index 0000000..aa0c5e2
--- /dev/null
+++ b/pre_commit/commands/autoupdate.py
@@ -0,0 +1,215 @@
+from __future__ import annotations
+
+import concurrent.futures
+import os.path
+import re
+import tempfile
+from collections.abc import Sequence
+from typing import Any
+from typing import NamedTuple
+
+import pre_commit.constants as C
+from pre_commit import git
+from pre_commit import output
+from pre_commit import xargs
+from pre_commit.clientlib import InvalidManifestError
+from pre_commit.clientlib import load_config
+from pre_commit.clientlib import load_manifest
+from pre_commit.clientlib import LOCAL
+from pre_commit.clientlib import META
+from pre_commit.commands.migrate_config import migrate_config
+from pre_commit.util import CalledProcessError
+from pre_commit.util import cmd_output
+from pre_commit.util import cmd_output_b
+from pre_commit.yaml import yaml_dump
+from pre_commit.yaml import yaml_load
+
+
+class RevInfo(NamedTuple):
+ repo: str
+ rev: str
+ frozen: str | None = None
+ hook_ids: frozenset[str] = frozenset()
+
+ @classmethod
+ def from_config(cls, config: dict[str, Any]) -> RevInfo:
+ return cls(config['repo'], config['rev'])
+
+ def update(self, tags_only: bool, freeze: bool) -> RevInfo:
+ with tempfile.TemporaryDirectory() as tmp:
+ _git = ('git', *git.NO_FS_MONITOR, '-C', tmp)
+
+ if tags_only:
+ tag_opt = '--abbrev=0'
+ else:
+ tag_opt = '--exact'
+ tag_cmd = (*_git, 'describe', 'FETCH_HEAD', '--tags', tag_opt)
+
+ git.init_repo(tmp, self.repo)
+ cmd_output_b(*_git, 'config', 'extensions.partialClone', 'true')
+ cmd_output_b(
+ *_git, 'fetch', 'origin', 'HEAD',
+ '--quiet', '--filter=blob:none', '--tags',
+ )
+
+ try:
+ rev = cmd_output(*tag_cmd)[1].strip()
+ except CalledProcessError:
+ rev = cmd_output(*_git, 'rev-parse', 'FETCH_HEAD')[1].strip()
+ else:
+ if tags_only:
+ rev = git.get_best_candidate_tag(rev, tmp)
+
+ frozen = None
+ if freeze:
+ exact = cmd_output(*_git, 'rev-parse', rev)[1].strip()
+ if exact != rev:
+ rev, frozen = exact, rev
+
+ try:
+ # workaround for windows -- see #2865
+ cmd_output_b(*_git, 'show', f'{rev}:{C.MANIFEST_FILE}')
+ cmd_output(*_git, 'checkout', rev, '--', C.MANIFEST_FILE)
+ except CalledProcessError:
+ pass # this will be caught by manifest validating code
+ try:
+ manifest = load_manifest(os.path.join(tmp, C.MANIFEST_FILE))
+ except InvalidManifestError as e:
+ raise RepositoryCannotBeUpdatedError(f'[{self.repo}] {e}')
+ else:
+ hook_ids = frozenset(hook['id'] for hook in manifest)
+
+ return self._replace(rev=rev, frozen=frozen, hook_ids=hook_ids)
+
+
+class RepositoryCannotBeUpdatedError(RuntimeError):
+ pass
+
+
+def _check_hooks_still_exist_at_rev(
+ repo_config: dict[str, Any],
+ info: RevInfo,
+) -> None:
+ # See if any of our hooks were deleted with the new commits
+ hooks = {hook['id'] for hook in repo_config['hooks']}
+ hooks_missing = hooks - info.hook_ids
+ if hooks_missing:
+ raise RepositoryCannotBeUpdatedError(
+ f'[{info.repo}] Cannot update because the update target is '
+ f'missing these hooks: {", ".join(sorted(hooks_missing))}',
+ )
+
+
+def _update_one(
+ i: int,
+ repo: dict[str, Any],
+ *,
+ tags_only: bool,
+ freeze: bool,
+) -> tuple[int, RevInfo, RevInfo]:
+ old = RevInfo.from_config(repo)
+ new = old.update(tags_only=tags_only, freeze=freeze)
+ _check_hooks_still_exist_at_rev(repo, new)
+ return i, old, new
+
+
+REV_LINE_RE = re.compile(r'^(\s+)rev:(\s*)([\'"]?)([^\s#]+)(.*)(\r?\n)$')
+
+
+def _original_lines(
+ path: str,
+ rev_infos: list[RevInfo | None],
+ retry: bool = False,
+) -> tuple[list[str], list[int]]:
+ """detect `rev:` lines or reformat the file"""
+ with open(path, newline='') as f:
+ original = f.read()
+
+ lines = original.splitlines(True)
+ idxs = [i for i, line in enumerate(lines) if REV_LINE_RE.match(line)]
+ if len(idxs) == len(rev_infos):
+ return lines, idxs
+ elif retry:
+ raise AssertionError('could not find rev lines')
+ else:
+ with open(path, 'w') as f:
+ f.write(yaml_dump(yaml_load(original)))
+ return _original_lines(path, rev_infos, retry=True)
+
+
+def _write_new_config(path: str, rev_infos: list[RevInfo | None]) -> None:
+ lines, idxs = _original_lines(path, rev_infos)
+
+ for idx, rev_info in zip(idxs, rev_infos):
+ if rev_info is None:
+ continue
+ match = REV_LINE_RE.match(lines[idx])
+ assert match is not None
+ new_rev_s = yaml_dump({'rev': rev_info.rev}, default_style=match[3])
+ new_rev = new_rev_s.split(':', 1)[1].strip()
+ if rev_info.frozen is not None:
+ comment = f' # frozen: {rev_info.frozen}'
+ elif match[5].strip().startswith('# frozen:'):
+ comment = ''
+ else:
+ comment = match[5]
+ lines[idx] = f'{match[1]}rev:{match[2]}{new_rev}{comment}{match[6]}'
+
+ with open(path, 'w', newline='') as f:
+ f.write(''.join(lines))
+
+
+def autoupdate(
+ config_file: str,
+ tags_only: bool,
+ freeze: bool,
+ repos: Sequence[str] = (),
+ jobs: int = 1,
+) -> int:
+ """Auto-update the pre-commit config to the latest versions of repos."""
+ migrate_config(config_file, quiet=True)
+ changed = False
+ retv = 0
+
+ config_repos = [
+ repo for repo in load_config(config_file)['repos']
+ if repo['repo'] not in {LOCAL, META}
+ ]
+
+ rev_infos: list[RevInfo | None] = [None] * len(config_repos)
+ jobs = jobs or xargs.cpu_count() # 0 => number of cpus
+ jobs = min(jobs, len(repos) or len(config_repos)) # max 1-per-thread
+ jobs = max(jobs, 1) # at least one thread
+ with concurrent.futures.ThreadPoolExecutor(jobs) as exe:
+ futures = [
+ exe.submit(
+ _update_one,
+ i, repo, tags_only=tags_only, freeze=freeze,
+ )
+ for i, repo in enumerate(config_repos)
+ if not repos or repo['repo'] in repos
+ ]
+ for future in concurrent.futures.as_completed(futures):
+ try:
+ i, old, new = future.result()
+ except RepositoryCannotBeUpdatedError as e:
+ output.write_line(str(e))
+ retv = 1
+ else:
+ if new.rev != old.rev:
+ changed = True
+ if new.frozen:
+ new_s = f'{new.frozen} (frozen)'
+ else:
+ new_s = new.rev
+ msg = f'updating {old.rev} -> {new_s}'
+ rev_infos[i] = new
+ else:
+ msg = 'already up to date!'
+
+ output.write_line(f'[{old.repo}] {msg}')
+
+ if changed:
+ _write_new_config(config_file, rev_infos)
+
+ return retv
diff --git a/pre_commit/commands/clean.py b/pre_commit/commands/clean.py
new file mode 100644
index 0000000..5119f64
--- /dev/null
+++ b/pre_commit/commands/clean.py
@@ -0,0 +1,16 @@
+from __future__ import annotations
+
+import os.path
+
+from pre_commit import output
+from pre_commit.store import Store
+from pre_commit.util import rmtree
+
+
+def clean(store: Store) -> int:
+ legacy_path = os.path.expanduser('~/.pre-commit')
+ for directory in (store.directory, legacy_path):
+ if os.path.exists(directory):
+ rmtree(directory)
+ output.write_line(f'Cleaned {directory}.')
+ return 0
diff --git a/pre_commit/commands/gc.py b/pre_commit/commands/gc.py
new file mode 100644
index 0000000..6892e09
--- /dev/null
+++ b/pre_commit/commands/gc.py
@@ -0,0 +1,89 @@
+from __future__ import annotations
+
+import os.path
+from typing import Any
+
+import pre_commit.constants as C
+from pre_commit import output
+from pre_commit.clientlib import InvalidConfigError
+from pre_commit.clientlib import InvalidManifestError
+from pre_commit.clientlib import load_config
+from pre_commit.clientlib import load_manifest
+from pre_commit.clientlib import LOCAL
+from pre_commit.clientlib import META
+from pre_commit.store import Store
+
+
+def _mark_used_repos(
+ store: Store,
+ all_repos: dict[tuple[str, str], str],
+ unused_repos: set[tuple[str, str]],
+ repo: dict[str, Any],
+) -> None:
+ if repo['repo'] == META:
+ return
+ elif repo['repo'] == LOCAL:
+ for hook in repo['hooks']:
+ deps = hook.get('additional_dependencies')
+ unused_repos.discard((
+ store.db_repo_name(repo['repo'], deps), C.LOCAL_REPO_VERSION,
+ ))
+ else:
+ key = (repo['repo'], repo['rev'])
+ path = all_repos.get(key)
+ # can't inspect manifest if it isn't cloned
+ if path is None:
+ return
+
+ try:
+ manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE))
+ except InvalidManifestError:
+ return
+ else:
+ unused_repos.discard(key)
+ by_id = {hook['id']: hook for hook in manifest}
+
+ for hook in repo['hooks']:
+ if hook['id'] not in by_id:
+ continue
+
+ deps = hook.get(
+ 'additional_dependencies',
+ by_id[hook['id']]['additional_dependencies'],
+ )
+ unused_repos.discard((
+ store.db_repo_name(repo['repo'], deps), repo['rev'],
+ ))
+
+
+def _gc_repos(store: Store) -> int:
+ configs = store.select_all_configs()
+ repos = store.select_all_repos()
+
+ # delete config paths which do not exist
+ dead_configs = [p for p in configs if not os.path.exists(p)]
+ live_configs = [p for p in configs if os.path.exists(p)]
+
+ all_repos = {(repo, ref): path for repo, ref, path in repos}
+ unused_repos = set(all_repos)
+ for config_path in live_configs:
+ try:
+ config = load_config(config_path)
+ except InvalidConfigError:
+ dead_configs.append(config_path)
+ continue
+ else:
+ for repo in config['repos']:
+ _mark_used_repos(store, all_repos, unused_repos, repo)
+
+ store.delete_configs(dead_configs)
+ for db_repo_name, ref in unused_repos:
+ store.delete_repo(db_repo_name, ref, all_repos[(db_repo_name, ref)])
+ return len(unused_repos)
+
+
+def gc(store: Store) -> int:
+ with store.exclusive_lock():
+ repos_removed = _gc_repos(store)
+ output.write_line(f'{repos_removed} repo(s) removed.')
+ return 0
diff --git a/pre_commit/commands/hook_impl.py b/pre_commit/commands/hook_impl.py
new file mode 100644
index 0000000..49a80b7
--- /dev/null
+++ b/pre_commit/commands/hook_impl.py
@@ -0,0 +1,271 @@
+from __future__ import annotations
+
+import argparse
+import os.path
+import subprocess
+import sys
+from collections.abc import Sequence
+
+from pre_commit.commands.run import run
+from pre_commit.envcontext import envcontext
+from pre_commit.parse_shebang import normalize_cmd
+from pre_commit.store import Store
+
+Z40 = '0' * 40
+
+
+def _run_legacy(
+ hook_type: str,
+ hook_dir: str,
+ args: Sequence[str],
+) -> tuple[int, bytes]:
+ if os.environ.get('PRE_COMMIT_RUNNING_LEGACY'):
+ raise SystemExit(
+ f"bug: pre-commit's script is installed in migration mode\n"
+ f'run `pre-commit install -f --hook-type {hook_type}` to fix '
+ f'this\n\n'
+ f'Please report this bug at '
+ f'https://github.com/pre-commit/pre-commit/issues',
+ )
+
+ if hook_type == 'pre-push':
+ stdin = sys.stdin.buffer.read()
+ else:
+ stdin = b''
+
+ # not running in legacy mode
+ legacy_hook = os.path.join(hook_dir, f'{hook_type}.legacy')
+ if not os.access(legacy_hook, os.X_OK):
+ return 0, stdin
+
+ with envcontext((('PRE_COMMIT_RUNNING_LEGACY', '1'),)):
+ cmd = normalize_cmd((legacy_hook, *args))
+ return subprocess.run(cmd, input=stdin).returncode, stdin
+
+
+def _validate_config(
+ retv: int,
+ config: str,
+ skip_on_missing_config: bool,
+) -> None:
+ if not os.path.isfile(config):
+ if skip_on_missing_config or os.getenv('PRE_COMMIT_ALLOW_NO_CONFIG'):
+ print(f'`{config}` config file not found. Skipping `pre-commit`.')
+ raise SystemExit(retv)
+ else:
+ print(
+ f'No {config} file was found\n'
+ f'- To temporarily silence this, run '
+ f'`PRE_COMMIT_ALLOW_NO_CONFIG=1 git ...`\n'
+ f'- To permanently silence this, install pre-commit with the '
+ f'--allow-missing-config option\n'
+ f'- To uninstall pre-commit run `pre-commit uninstall`',
+ )
+ raise SystemExit(1)
+
+
+def _ns(
+ hook_type: str,
+ color: bool,
+ *,
+ all_files: bool = False,
+ remote_branch: str | None = None,
+ local_branch: str | None = None,
+ from_ref: str | None = None,
+ to_ref: str | None = None,
+ pre_rebase_upstream: str | None = None,
+ pre_rebase_branch: str | None = None,
+ remote_name: str | None = None,
+ remote_url: str | None = None,
+ commit_msg_filename: str | None = None,
+ prepare_commit_message_source: str | None = None,
+ commit_object_name: str | None = None,
+ checkout_type: str | None = None,
+ is_squash_merge: str | None = None,
+ rewrite_command: str | None = None,
+) -> argparse.Namespace:
+ return argparse.Namespace(
+ color=color,
+ hook_stage=hook_type,
+ remote_branch=remote_branch,
+ local_branch=local_branch,
+ from_ref=from_ref,
+ to_ref=to_ref,
+ pre_rebase_upstream=pre_rebase_upstream,
+ pre_rebase_branch=pre_rebase_branch,
+ remote_name=remote_name,
+ remote_url=remote_url,
+ commit_msg_filename=commit_msg_filename,
+ prepare_commit_message_source=prepare_commit_message_source,
+ commit_object_name=commit_object_name,
+ all_files=all_files,
+ checkout_type=checkout_type,
+ is_squash_merge=is_squash_merge,
+ rewrite_command=rewrite_command,
+ files=(),
+ hook=None,
+ verbose=False,
+ show_diff_on_failure=False,
+ )
+
+
+def _rev_exists(rev: str) -> bool:
+ return not subprocess.call(('git', 'rev-list', '--quiet', rev))
+
+
+def _pre_push_ns(
+ color: bool,
+ args: Sequence[str],
+ stdin: bytes,
+) -> argparse.Namespace | None:
+ remote_name = args[0]
+ remote_url = args[1]
+
+ for line in stdin.decode().splitlines():
+ parts = line.rsplit(maxsplit=3)
+ local_branch, local_sha, remote_branch, remote_sha = parts
+ if local_sha == Z40:
+ continue
+ elif remote_sha != Z40 and _rev_exists(remote_sha):
+ return _ns(
+ 'pre-push', color,
+ from_ref=remote_sha, to_ref=local_sha,
+ remote_branch=remote_branch,
+ local_branch=local_branch,
+ remote_name=remote_name, remote_url=remote_url,
+ )
+ else:
+ # ancestors not found in remote
+ ancestors = subprocess.check_output((
+ 'git', 'rev-list', local_sha, '--topo-order', '--reverse',
+ '--not', f'--remotes={remote_name}',
+ )).decode().strip()
+ if not ancestors:
+ continue
+ else:
+ first_ancestor = ancestors.splitlines()[0]
+ cmd = ('git', 'rev-list', '--max-parents=0', local_sha)
+ roots = set(subprocess.check_output(cmd).decode().splitlines())
+ if first_ancestor in roots:
+ # pushing the whole tree including root commit
+ return _ns(
+ 'pre-push', color,
+ all_files=True,
+ remote_name=remote_name, remote_url=remote_url,
+ remote_branch=remote_branch,
+ local_branch=local_branch,
+ )
+ else:
+ rev_cmd = ('git', 'rev-parse', f'{first_ancestor}^')
+ source = subprocess.check_output(rev_cmd).decode().strip()
+ return _ns(
+ 'pre-push', color,
+ from_ref=source, to_ref=local_sha,
+ remote_name=remote_name, remote_url=remote_url,
+ remote_branch=remote_branch,
+ local_branch=local_branch,
+ )
+
+ # nothing to push
+ return None
+
+
+_EXPECTED_ARG_LENGTH_BY_HOOK = {
+ 'commit-msg': 1,
+ 'post-checkout': 3,
+ 'post-commit': 0,
+ 'pre-commit': 0,
+ 'pre-merge-commit': 0,
+ 'post-merge': 1,
+ 'post-rewrite': 1,
+ 'pre-push': 2,
+}
+
+
+def _check_args_length(hook_type: str, args: Sequence[str]) -> None:
+ if hook_type == 'prepare-commit-msg':
+ if len(args) < 1 or len(args) > 3:
+ raise SystemExit(
+ f'hook-impl for {hook_type} expected 1, 2, or 3 arguments '
+ f'but got {len(args)}: {args}',
+ )
+ elif hook_type == 'pre-rebase':
+ if len(args) < 1 or len(args) > 2:
+ raise SystemExit(
+ f'hook-impl for {hook_type} expected 1 or 2 arguments '
+ f'but got {len(args)}: {args}',
+ )
+ elif hook_type in _EXPECTED_ARG_LENGTH_BY_HOOK:
+ expected = _EXPECTED_ARG_LENGTH_BY_HOOK[hook_type]
+ if len(args) != expected:
+ arguments_s = 'argument' if expected == 1 else 'arguments'
+ raise SystemExit(
+ f'hook-impl for {hook_type} expected {expected} {arguments_s} '
+ f'but got {len(args)}: {args}',
+ )
+ else:
+ raise AssertionError(f'unexpected hook type: {hook_type}')
+
+
+def _run_ns(
+ hook_type: str,
+ color: bool,
+ args: Sequence[str],
+ stdin: bytes,
+) -> argparse.Namespace | None:
+ _check_args_length(hook_type, args)
+ if hook_type == 'pre-push':
+ return _pre_push_ns(color, args, stdin)
+ elif hook_type in 'commit-msg':
+ return _ns(hook_type, color, commit_msg_filename=args[0])
+ elif hook_type == 'prepare-commit-msg' and len(args) == 1:
+ return _ns(hook_type, color, commit_msg_filename=args[0])
+ elif hook_type == 'prepare-commit-msg' and len(args) == 2:
+ return _ns(
+ hook_type, color, commit_msg_filename=args[0],
+ prepare_commit_message_source=args[1],
+ )
+ elif hook_type == 'prepare-commit-msg' and len(args) == 3:
+ return _ns(
+ hook_type, color, commit_msg_filename=args[0],
+ prepare_commit_message_source=args[1], commit_object_name=args[2],
+ )
+ elif hook_type in {'post-commit', 'pre-merge-commit', 'pre-commit'}:
+ return _ns(hook_type, color)
+ elif hook_type == 'post-checkout':
+ return _ns(
+ hook_type, color,
+ from_ref=args[0], to_ref=args[1], checkout_type=args[2],
+ )
+ elif hook_type == 'post-merge':
+ return _ns(hook_type, color, is_squash_merge=args[0])
+ elif hook_type == 'post-rewrite':
+ return _ns(hook_type, color, rewrite_command=args[0])
+ elif hook_type == 'pre-rebase' and len(args) == 1:
+ return _ns(hook_type, color, pre_rebase_upstream=args[0])
+ elif hook_type == 'pre-rebase' and len(args) == 2:
+ return _ns(
+ hook_type, color, pre_rebase_upstream=args[0],
+ pre_rebase_branch=args[1],
+ )
+ else:
+ raise AssertionError(f'unexpected hook type: {hook_type}')
+
+
+def hook_impl(
+ store: Store,
+ *,
+ config: str,
+ color: bool,
+ hook_type: str,
+ hook_dir: str,
+ skip_on_missing_config: bool,
+ args: Sequence[str],
+) -> int:
+ retv, stdin = _run_legacy(hook_type, hook_dir, args)
+ _validate_config(retv, config, skip_on_missing_config)
+ ns = _run_ns(hook_type, color, args, stdin)
+ if ns is None:
+ return retv
+ else:
+ return retv | run(config, store, ns)
diff --git a/pre_commit/commands/init_templatedir.py b/pre_commit/commands/init_templatedir.py
new file mode 100644
index 0000000..08af656
--- /dev/null
+++ b/pre_commit/commands/init_templatedir.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+import logging
+import os.path
+
+from pre_commit.commands.install_uninstall import install
+from pre_commit.store import Store
+from pre_commit.util import CalledProcessError
+from pre_commit.util import cmd_output
+
+logger = logging.getLogger('pre_commit')
+
+
+def init_templatedir(
+ config_file: str,
+ store: Store,
+ directory: str,
+ hook_types: list[str] | None,
+ skip_on_missing_config: bool = True,
+) -> int:
+ install(
+ config_file,
+ store,
+ hook_types=hook_types,
+ overwrite=True,
+ skip_on_missing_config=skip_on_missing_config,
+ git_dir=directory,
+ )
+ try:
+ _, out, _ = cmd_output('git', 'config', 'init.templateDir')
+ except CalledProcessError:
+ configured_path = None
+ else:
+ configured_path = os.path.realpath(os.path.expanduser(out.strip()))
+ dest = os.path.realpath(directory)
+ if configured_path != dest:
+ logger.warning('`init.templateDir` not set to the target directory')
+ logger.warning(f'maybe `git config --global init.templateDir {dest}`?')
+ return 0
diff --git a/pre_commit/commands/install_uninstall.py b/pre_commit/commands/install_uninstall.py
new file mode 100644
index 0000000..d19e0d4
--- /dev/null
+++ b/pre_commit/commands/install_uninstall.py
@@ -0,0 +1,167 @@
+from __future__ import annotations
+
+import logging
+import os.path
+import shlex
+import shutil
+import sys
+
+from pre_commit import git
+from pre_commit import output
+from pre_commit.clientlib import InvalidConfigError
+from pre_commit.clientlib import load_config
+from pre_commit.repository import all_hooks
+from pre_commit.repository import install_hook_envs
+from pre_commit.store import Store
+from pre_commit.util import make_executable
+from pre_commit.util import resource_text
+
+
+logger = logging.getLogger(__name__)
+
+# This is used to identify the hook file we install
+PRIOR_HASHES = (
+ b'4d9958c90bc262f47553e2c073f14cfe',
+ b'd8ee923c46731b42cd95cc869add4062',
+ b'49fd668cb42069aa1b6048464be5d395',
+ b'79f09a650522a87b0da915d0d983b2de',
+ b'e358c9dae00eac5d06b38dfdb1e33a8c',
+)
+CURRENT_HASH = b'138fd403232d2ddd5efb44317e38bf03'
+TEMPLATE_START = '# start templated\n'
+TEMPLATE_END = '# end templated\n'
+
+
+def _hook_types(cfg_filename: str, hook_types: list[str] | None) -> list[str]:
+ if hook_types is not None:
+ return hook_types
+ else:
+ try:
+ cfg = load_config(cfg_filename)
+ except InvalidConfigError:
+ return ['pre-commit']
+ else:
+ return cfg['default_install_hook_types']
+
+
+def _hook_paths(
+ hook_type: str,
+ git_dir: str | None = None,
+) -> tuple[str, str]:
+ git_dir = git_dir if git_dir is not None else git.get_git_common_dir()
+ pth = os.path.join(git_dir, 'hooks', hook_type)
+ return pth, f'{pth}.legacy'
+
+
+def is_our_script(filename: str) -> bool:
+ if not os.path.exists(filename): # pragma: win32 no cover (symlink)
+ return False
+ with open(filename, 'rb') as f:
+ contents = f.read()
+ return any(h in contents for h in (CURRENT_HASH,) + PRIOR_HASHES)
+
+
+def _install_hook_script(
+ config_file: str,
+ hook_type: str,
+ overwrite: bool = False,
+ skip_on_missing_config: bool = False,
+ git_dir: str | None = None,
+) -> None:
+ hook_path, legacy_path = _hook_paths(hook_type, git_dir=git_dir)
+
+ os.makedirs(os.path.dirname(hook_path), exist_ok=True)
+
+ # If we have an existing hook, move it to pre-commit.legacy
+ if os.path.lexists(hook_path) and not is_our_script(hook_path):
+ shutil.move(hook_path, legacy_path)
+
+ # If we specify overwrite, we simply delete the legacy file
+ if overwrite and os.path.exists(legacy_path):
+ os.remove(legacy_path)
+ elif os.path.exists(legacy_path):
+ output.write_line(
+ f'Running in migration mode with existing hooks at {legacy_path}\n'
+ f'Use -f to use only pre-commit.',
+ )
+
+ args = ['hook-impl', f'--config={config_file}', f'--hook-type={hook_type}']
+ if skip_on_missing_config:
+ args.append('--skip-on-missing-config')
+
+ with open(hook_path, 'w') as hook_file:
+ contents = resource_text('hook-tmpl')
+ before, rest = contents.split(TEMPLATE_START)
+ _, after = rest.split(TEMPLATE_END)
+
+ # on windows always use `/bin/sh` since `bash` might not be on PATH
+ # though we use bash-specific features `sh` on windows is actually
+ # bash in "POSIXLY_CORRECT" mode which still supports the features we
+ # use: subshells / arrays
+ if sys.platform == 'win32': # pragma: win32 cover
+ hook_file.write('#!/bin/sh\n')
+
+ hook_file.write(before + TEMPLATE_START)
+ hook_file.write(f'INSTALL_PYTHON={shlex.quote(sys.executable)}\n')
+ args_s = shlex.join(args)
+ hook_file.write(f'ARGS=({args_s})\n')
+ hook_file.write(TEMPLATE_END + after)
+ make_executable(hook_path)
+
+ output.write_line(f'pre-commit installed at {hook_path}')
+
+
+def install(
+ config_file: str,
+ store: Store,
+ hook_types: list[str] | None,
+ overwrite: bool = False,
+ hooks: bool = False,
+ skip_on_missing_config: bool = False,
+ git_dir: str | None = None,
+) -> int:
+ if git_dir is None and git.has_core_hookpaths_set():
+ logger.error(
+ 'Cowardly refusing to install hooks with `core.hooksPath` set.\n'
+ 'hint: `git config --unset-all core.hooksPath`',
+ )
+ return 1
+
+ for hook_type in _hook_types(config_file, hook_types):
+ _install_hook_script(
+ config_file, hook_type,
+ overwrite=overwrite,
+ skip_on_missing_config=skip_on_missing_config,
+ git_dir=git_dir,
+ )
+
+ if hooks:
+ install_hooks(config_file, store)
+
+ return 0
+
+
+def install_hooks(config_file: str, store: Store) -> int:
+ install_hook_envs(all_hooks(load_config(config_file), store), store)
+ return 0
+
+
+def _uninstall_hook_script(hook_type: str) -> None:
+ hook_path, legacy_path = _hook_paths(hook_type)
+
+ # If our file doesn't exist or it isn't ours, gtfo.
+ if not os.path.exists(hook_path) or not is_our_script(hook_path):
+ return
+
+ os.remove(hook_path)
+ output.write_line(f'{hook_type} uninstalled')
+
+ if os.path.exists(legacy_path):
+ os.replace(legacy_path, hook_path)
+ output.write_line(f'Restored previous hooks to {hook_path}')
+
+
+def uninstall(config_file: str, hook_types: list[str] | None) -> int:
+ for hook_type in _hook_types(config_file, hook_types):
+ _uninstall_hook_script(hook_type)
+ return 0
diff --git a/pre_commit/commands/migrate_config.py b/pre_commit/commands/migrate_config.py
new file mode 100644
index 0000000..842fb3a
--- /dev/null
+++ b/pre_commit/commands/migrate_config.py
@@ -0,0 +1,75 @@
+from __future__ import annotations
+
+import re
+import textwrap
+
+import cfgv
+import yaml
+
+from pre_commit.clientlib import InvalidConfigError
+from pre_commit.yaml import yaml_load
+
+
+def _is_header_line(line: str) -> bool:
+ return line.startswith(('#', '---')) or not line.strip()
+
+
+def _migrate_map(contents: str) -> str:
+ if isinstance(yaml_load(contents), list):
+ # Find the first non-header line
+ lines = contents.splitlines(True)
+ i = 0
+ # Only loop on non empty configuration file
+ while i < len(lines) and _is_header_line(lines[i]):
+ i += 1
+
+ header = ''.join(lines[:i])
+ rest = ''.join(lines[i:])
+
+ # If they are using the "default" flow style of yaml, this operation
+ # will yield a valid configuration
+ try:
+ trial_contents = f'{header}repos:\n{rest}'
+ yaml_load(trial_contents)
+ contents = trial_contents
+ except yaml.YAMLError:
+ contents = f'{header}repos:\n{textwrap.indent(rest, " " * 4)}'
+
+ return contents
+
+
+def _migrate_sha_to_rev(contents: str) -> str:
+ return re.sub(r'(\n\s+)sha:', r'\1rev:', contents)
+
+
+def _migrate_python_venv(contents: str) -> str:
+ return re.sub(
+ r'(\n\s+)language: python_venv\b',
+ r'\1language: python',
+ contents,
+ )
+
+
+def migrate_config(config_file: str, quiet: bool = False) -> int:
+ with open(config_file) as f:
+ orig_contents = contents = f.read()
+
+ with cfgv.reraise_as(InvalidConfigError):
+ with cfgv.validate_context(f'File {config_file}'):
+ try:
+ yaml_load(orig_contents)
+ except Exception as e:
+ raise cfgv.ValidationError(str(e))
+
+ contents = _migrate_map(contents)
+ contents = _migrate_sha_to_rev(contents)
+ contents = _migrate_python_venv(contents)
+
+ if contents != orig_contents:
+ with open(config_file, 'w') as f:
+ f.write(contents)
+
+ print('Configuration has been migrated.')
+ elif not quiet:
+ print('Configuration is already migrated.')
+ return 0
diff --git a/pre_commit/commands/run.py b/pre_commit/commands/run.py
new file mode 100644
index 0000000..076f16d
--- /dev/null
+++ b/pre_commit/commands/run.py
@@ -0,0 +1,447 @@
+from __future__ import annotations
+
+import argparse
+import contextlib
+import functools
+import logging
+import os
+import re
+import subprocess
+import time
+import unicodedata
+from collections.abc import Generator
+from collections.abc import Iterable
+from collections.abc import MutableMapping
+from collections.abc import Sequence
+from typing import Any
+
+from identify.identify import tags_from_path
+
+from pre_commit import color
+from pre_commit import git
+from pre_commit import output
+from pre_commit.all_languages import languages
+from pre_commit.clientlib import load_config
+from pre_commit.hook import Hook
+from pre_commit.repository import all_hooks
+from pre_commit.repository import install_hook_envs
+from pre_commit.staged_files_only import staged_files_only
+from pre_commit.store import Store
+from pre_commit.util import cmd_output_b
+
+
+logger = logging.getLogger('pre_commit')
+
+
+def _len_cjk(msg: str) -> int:
+ widths = {'A': 1, 'F': 2, 'H': 1, 'N': 1, 'Na': 1, 'W': 2}
+ return sum(widths[unicodedata.east_asian_width(c)] for c in msg)
+
+
+def _start_msg(*, start: str, cols: int, end_len: int) -> str:
+ dots = '.' * (cols - _len_cjk(start) - end_len - 1)
+ return f'{start}{dots}'
+
+
+def _full_msg(
+ *,
+ start: str,
+ cols: int,
+ end_msg: str,
+ end_color: str,
+ use_color: bool,
+ postfix: str = '',
+) -> str:
+ dots = '.' * (cols - _len_cjk(start) - len(postfix) - len(end_msg) - 1)
+ end = color.format_color(end_msg, end_color, use_color)
+ return f'{start}{dots}{postfix}{end}\n'
+
+
+def filter_by_include_exclude(
+ names: Iterable[str],
+ include: str,
+ exclude: str,
+) -> Generator[str, None, None]:
+ include_re, exclude_re = re.compile(include), re.compile(exclude)
+ return (
+ filename for filename in names
+ if include_re.search(filename)
+ if not exclude_re.search(filename)
+ )
+
+
+class Classifier:
+ def __init__(self, filenames: Iterable[str]) -> None:
+ self.filenames = [f for f in filenames if os.path.lexists(f)]
+
+ @functools.cache
+ def _types_for_file(self, filename: str) -> set[str]:
+ return tags_from_path(filename)
+
+ def by_types(
+ self,
+ names: Iterable[str],
+ types: Iterable[str],
+ types_or: Iterable[str],
+ exclude_types: Iterable[str],
+ ) -> Generator[str, None, None]:
+ types = frozenset(types)
+ types_or = frozenset(types_or)
+ exclude_types = frozenset(exclude_types)
+ for filename in names:
+ tags = self._types_for_file(filename)
+ if (
+ tags >= types and
+ (not types_or or tags & types_or) and
+ not tags & exclude_types
+ ):
+ yield filename
+
+ def filenames_for_hook(self, hook: Hook) -> Generator[str, None, None]:
+ return self.by_types(
+ filter_by_include_exclude(
+ self.filenames,
+ hook.files,
+ hook.exclude,
+ ),
+ hook.types,
+ hook.types_or,
+ hook.exclude_types,
+ )
+
+ @classmethod
+ def from_config(
+ cls,
+ filenames: Iterable[str],
+ include: str,
+ exclude: str,
+ ) -> Classifier:
+ # on windows we normalize all filenames to use forward slashes
+ # this makes it easier to filter using the `files:` regex
+ # this also makes improperly quoted shell-based hooks work better
+ # see #1173
+ if os.altsep == '/' and os.sep == '\\':
+ filenames = (f.replace(os.sep, os.altsep) for f in filenames)
+ filenames = filter_by_include_exclude(filenames, include, exclude)
+ return Classifier(filenames)
+
+
+def _get_skips(environ: MutableMapping[str, str]) -> set[str]:
+ skips = environ.get('SKIP', '')
+ return {skip.strip() for skip in skips.split(',') if skip.strip()}
+
+
+SKIPPED = 'Skipped'
+NO_FILES = '(no files to check)'
+
+
+def _subtle_line(s: str, use_color: bool) -> None:
+ output.write_line(color.format_color(s, color.SUBTLE, use_color))
+
+
+def _run_single_hook(
+ classifier: Classifier,
+ hook: Hook,
+ skips: set[str],
+ cols: int,
+ diff_before: bytes,
+ verbose: bool,
+ use_color: bool,
+) -> tuple[bool, bytes]:
+ filenames = tuple(classifier.filenames_for_hook(hook))
+
+ if hook.id in skips or hook.alias in skips:
+ output.write(
+ _full_msg(
+ start=hook.name,
+ end_msg=SKIPPED,
+ end_color=color.YELLOW,
+ use_color=use_color,
+ cols=cols,
+ ),
+ )
+ duration = None
+ retcode = 0
+ diff_after = diff_before
+ files_modified = False
+ out = b''
+ elif not filenames and not hook.always_run:
+ output.write(
+ _full_msg(
+ start=hook.name,
+ postfix=NO_FILES,
+ end_msg=SKIPPED,
+ end_color=color.TURQUOISE,
+ use_color=use_color,
+ cols=cols,
+ ),
+ )
+ duration = None
+ retcode = 0
+ diff_after = diff_before
+ files_modified = False
+ out = b''
+ else:
+ # print hook and dots first in case the hook takes a while to run
+ output.write(_start_msg(start=hook.name, end_len=6, cols=cols))
+
+ if not hook.pass_filenames:
+ filenames = ()
+ time_before = time.monotonic()
+ language = languages[hook.language]
+ with language.in_env(hook.prefix, hook.language_version):
+ retcode, out = language.run_hook(
+ hook.prefix,
+ hook.entry,
+ hook.args,
+ filenames,
+ is_local=hook.src == 'local',
+ require_serial=hook.require_serial,
+ color=use_color,
+ )
+ duration = round(time.monotonic() - time_before, 2) or 0
+ diff_after = _get_diff()
+
+ # if the hook makes changes, fail the commit
+ files_modified = diff_before != diff_after
+
+ if retcode or files_modified:
+ print_color = color.RED
+ status = 'Failed'
+ else:
+ print_color = color.GREEN
+ status = 'Passed'
+
+ output.write_line(color.format_color(status, print_color, use_color))
+
+ if verbose or hook.verbose or retcode or files_modified:
+ _subtle_line(f'- hook id: {hook.id}', use_color)
+
+ if (verbose or hook.verbose) and duration is not None:
+ _subtle_line(f'- duration: {duration}s', use_color)
+
+ if retcode:
+ _subtle_line(f'- exit code: {retcode}', use_color)
+
+ # Print a message if failing due to file modifications
+ if files_modified:
+ _subtle_line('- files were modified by this hook', use_color)
+
+ if out.strip():
+ output.write_line()
+ output.write_line_b(out.strip(), logfile_name=hook.log_file)
+ output.write_line()
+
+ return files_modified or bool(retcode), diff_after
+
+
+def _compute_cols(hooks: Sequence[Hook]) -> int:
+ """Compute the number of columns to display hook messages. The widest
+ that will be displayed is in the no files skipped case:
+
+ Hook name...(no files to check) Skipped
+ """
+ if hooks:
+ name_len = max(_len_cjk(hook.name) for hook in hooks)
+ else:
+ name_len = 0
+
+ cols = name_len + 3 + len(NO_FILES) + 1 + len(SKIPPED)
+ return max(cols, 80)
+
+
+def _all_filenames(args: argparse.Namespace) -> Iterable[str]:
+ # these hooks do not operate on files
+ if args.hook_stage in {
+ 'post-checkout', 'post-commit', 'post-merge', 'post-rewrite',
+ 'pre-rebase',
+ }:
+ return ()
+ elif args.hook_stage in {'prepare-commit-msg', 'commit-msg'}:
+ return (args.commit_msg_filename,)
+ elif args.from_ref and args.to_ref:
+ return git.get_changed_files(args.from_ref, args.to_ref)
+ elif args.files:
+ return args.files
+ elif args.all_files:
+ return git.get_all_files()
+ elif git.is_in_merge_conflict():
+ return git.get_conflicted_files()
+ else:
+ return git.get_staged_files()
+
+
+def _get_diff() -> bytes:
+ _, out, _ = cmd_output_b(
+ 'git', 'diff', '--no-ext-diff', '--no-textconv', '--ignore-submodules',
+ check=False,
+ )
+ return out
+
+
+def _run_hooks(
+ config: dict[str, Any],
+ hooks: Sequence[Hook],
+ skips: set[str],
+ args: argparse.Namespace,
+) -> int:
+ """Actually run the hooks."""
+ cols = _compute_cols(hooks)
+ classifier = Classifier.from_config(
+ _all_filenames(args), config['files'], config['exclude'],
+ )
+ retval = 0
+ prior_diff = _get_diff()
+ for hook in hooks:
+ current_retval, prior_diff = _run_single_hook(
+ classifier, hook, skips, cols, prior_diff,
+ verbose=args.verbose, use_color=args.color,
+ )
+ retval |= current_retval
+ if retval and (config['fail_fast'] or hook.fail_fast):
+ break
+ if retval and args.show_diff_on_failure and prior_diff:
+ if args.all_files:
+ output.write_line(
+ 'pre-commit hook(s) made changes.\n'
+ 'If you are seeing this message in CI, '
+ 'reproduce locally with: `pre-commit run --all-files`.\n'
+ 'To run `pre-commit` as part of git workflow, use '
+ '`pre-commit install`.',
+ )
+ output.write_line('All changes made by hooks:')
+ # args.color is a boolean.
+ # See user_color function in color.py
+ git_color_opt = 'always' if args.color else 'never'
+ subprocess.call((
+ 'git', '--no-pager', 'diff', '--no-ext-diff',
+ f'--color={git_color_opt}',
+ ))
+
+ return retval
+
+
+def _has_unmerged_paths() -> bool:
+ _, stdout, _ = cmd_output_b('git', 'ls-files', '--unmerged')
+ return bool(stdout.strip())
+
+
+def _has_unstaged_config(config_file: str) -> bool:
+ retcode, _, _ = cmd_output_b(
+ 'git', 'diff', '--quiet', '--no-ext-diff', config_file, check=False,
+ )
+ # be explicit, other git errors don't mean it has an unstaged config.
+ return retcode == 1
+
+
+def run(
+ config_file: str,
+ store: Store,
+ args: argparse.Namespace,
+ environ: MutableMapping[str, str] = os.environ,
+) -> int:
+ stash = not args.all_files and not args.files
+
+ # Check if we have unresolved merge conflict files and fail fast.
+ if stash and _has_unmerged_paths():
+ logger.error('Unmerged files. Resolve before committing.')
+ return 1
+ if bool(args.from_ref) != bool(args.to_ref):
+ logger.error('Specify both --from-ref and --to-ref.')
+ return 1
+ if stash and _has_unstaged_config(config_file):
+ logger.error(
+ f'Your pre-commit configuration is unstaged.\n'
+ f'`git add {config_file}` to fix this.',
+ )
+ return 1
+ if (
+ args.hook_stage in {'prepare-commit-msg', 'commit-msg'} and
+ not args.commit_msg_filename
+ ):
+ logger.error(
+ f'`--commit-msg-filename` is required for '
+ f'`--hook-stage {args.hook_stage}`',
+ )
+ return 1
+ # prevent recursive post-checkout hooks (#1418)
+ if (
+ args.hook_stage == 'post-checkout' and
+ environ.get('_PRE_COMMIT_SKIP_POST_CHECKOUT')
+ ):
+ return 0
+
+ # Expose prepare_commit_message_source / commit_object_name
+ # as environment variables for the hooks
+ if args.prepare_commit_message_source:
+ environ['PRE_COMMIT_COMMIT_MSG_SOURCE'] = (
+ args.prepare_commit_message_source
+ )
+
+ if args.commit_object_name:
+ environ['PRE_COMMIT_COMMIT_OBJECT_NAME'] = args.commit_object_name
+
+ # Expose from-ref / to-ref as environment variables for hooks to consume
+ if args.from_ref and args.to_ref:
+ # legacy names
+ environ['PRE_COMMIT_ORIGIN'] = args.from_ref
+ environ['PRE_COMMIT_SOURCE'] = args.to_ref
+ # new names
+ environ['PRE_COMMIT_FROM_REF'] = args.from_ref
+ environ['PRE_COMMIT_TO_REF'] = args.to_ref
+
+ if args.pre_rebase_upstream and args.pre_rebase_branch:
+ environ['PRE_COMMIT_PRE_REBASE_UPSTREAM'] = args.pre_rebase_upstream
+ environ['PRE_COMMIT_PRE_REBASE_BRANCH'] = args.pre_rebase_branch
+
+ if (
+ args.remote_name and args.remote_url and
+ args.remote_branch and args.local_branch
+ ):
+ environ['PRE_COMMIT_LOCAL_BRANCH'] = args.local_branch
+ environ['PRE_COMMIT_REMOTE_BRANCH'] = args.remote_branch
+ environ['PRE_COMMIT_REMOTE_NAME'] = args.remote_name
+ environ['PRE_COMMIT_REMOTE_URL'] = args.remote_url
+
+ if args.checkout_type:
+ environ['PRE_COMMIT_CHECKOUT_TYPE'] = args.checkout_type
+
+ if args.is_squash_merge:
+ environ['PRE_COMMIT_IS_SQUASH_MERGE'] = args.is_squash_merge
+
+ if args.rewrite_command:
+ environ['PRE_COMMIT_REWRITE_COMMAND'] = args.rewrite_command
+
+ # Set pre_commit flag
+ environ['PRE_COMMIT'] = '1'
+
+ with contextlib.ExitStack() as exit_stack:
+ if stash:
+ exit_stack.enter_context(staged_files_only(store.directory))
+
+ config = load_config(config_file)
+ hooks = [
+ hook
+ for hook in all_hooks(config, store)
+ if not args.hook or hook.id == args.hook or hook.alias == args.hook
+ if args.hook_stage in hook.stages
+ ]
+
+ if args.hook and not hooks:
+ output.write_line(
+ f'No hook with id `{args.hook}` in stage `{args.hook_stage}`',
+ )
+ return 1
+
+ skips = _get_skips(environ)
+ to_install = [
+ hook
+ for hook in hooks
+ if hook.id not in skips and hook.alias not in skips
+ ]
+ install_hook_envs(to_install, store)
+
+ return _run_hooks(config, hooks, skips, args)
+
+ # https://github.com/python/mypy/issues/7726
+ raise AssertionError('unreachable')
diff --git a/pre_commit/commands/sample_config.py b/pre_commit/commands/sample_config.py
new file mode 100644
index 0000000..ce22f65
--- /dev/null
+++ b/pre_commit/commands/sample_config.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+SAMPLE_CONFIG = '''\
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v3.2.0
+ hooks:
+ - id: trailing-whitespace
+ - id: end-of-file-fixer
+ - id: check-yaml
+ - id: check-added-large-files
+'''
+
+
+def sample_config() -> int:
+ print(SAMPLE_CONFIG, end='')
+ return 0
diff --git a/pre_commit/commands/try_repo.py b/pre_commit/commands/try_repo.py
new file mode 100644
index 0000000..539ed3c
--- /dev/null
+++ b/pre_commit/commands/try_repo.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+import argparse
+import logging
+import os.path
+import tempfile
+
+import pre_commit.constants as C
+from pre_commit import git
+from pre_commit import output
+from pre_commit.clientlib import load_manifest
+from pre_commit.commands.run import run
+from pre_commit.store import Store
+from pre_commit.util import cmd_output_b
+from pre_commit.xargs import xargs
+from pre_commit.yaml import yaml_dump
+
+logger = logging.getLogger(__name__)
+
+
+def _repo_ref(tmpdir: str, repo: str, ref: str | None) -> tuple[str, str]:
+ # if `ref` is explicitly passed, use it
+ if ref is not None:
+ return repo, ref
+
+ ref = git.head_rev(repo)
+ # if it exists on disk, we'll try and clone it with the local changes
+ if os.path.exists(repo) and git.has_diff('HEAD', repo=repo):
+ logger.warning('Creating temporary repo with uncommitted changes...')
+
+ shadow = os.path.join(tmpdir, 'shadow-repo')
+ cmd_output_b('git', 'clone', repo, shadow)
+ cmd_output_b('git', 'checkout', ref, '-b', '_pc_tmp', cwd=shadow)
+
+ idx = git.git_path('index', repo=shadow)
+ objs = git.git_path('objects', repo=shadow)
+ env = dict(os.environ, GIT_INDEX_FILE=idx, GIT_OBJECT_DIRECTORY=objs)
+
+ staged_files = git.get_staged_files(cwd=repo)
+ if staged_files:
+ xargs(('git', 'add', '--'), staged_files, cwd=repo, env=env)
+
+ cmd_output_b('git', 'add', '-u', cwd=repo, env=env)
+ git.commit(repo=shadow)
+
+ return shadow, git.head_rev(shadow)
+ else:
+ return repo, ref
+
+
+def try_repo(args: argparse.Namespace) -> int:
+ with tempfile.TemporaryDirectory() as tempdir:
+ repo, ref = _repo_ref(tempdir, args.repo, args.ref)
+
+ store = Store(tempdir)
+ if args.hook:
+ hooks = [{'id': args.hook}]
+ else:
+ repo_path = store.clone(repo, ref)
+ manifest = load_manifest(os.path.join(repo_path, C.MANIFEST_FILE))
+ manifest = sorted(manifest, key=lambda hook: hook['id'])
+ hooks = [{'id': hook['id']} for hook in manifest]
+
+ config = {'repos': [{'repo': repo, 'rev': ref, 'hooks': hooks}]}
+ config_s = yaml_dump(config)
+
+ config_filename = os.path.join(tempdir, C.CONFIG_FILE)
+ with open(config_filename, 'w') as cfg:
+ cfg.write(config_s)
+
+ output.write_line('=' * 79)
+ output.write_line('Using config:')
+ output.write_line('=' * 79)
+ output.write(config_s)
+ output.write_line('=' * 79)
+
+ return run(config_filename, store, args)
diff --git a/pre_commit/commands/validate_config.py b/pre_commit/commands/validate_config.py
new file mode 100644
index 0000000..b3de635
--- /dev/null
+++ b/pre_commit/commands/validate_config.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+from pre_commit import clientlib
+
+
+def validate_config(filenames: Sequence[str]) -> int:
+ ret = 0
+
+ for filename in filenames:
+ try:
+ clientlib.load_config(filename)
+ except clientlib.InvalidConfigError as e:
+ print(e)
+ ret = 1
+
+ return ret
diff --git a/pre_commit/commands/validate_manifest.py b/pre_commit/commands/validate_manifest.py
new file mode 100644
index 0000000..8493c6e
--- /dev/null
+++ b/pre_commit/commands/validate_manifest.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+from pre_commit import clientlib
+
+
+def validate_manifest(filenames: Sequence[str]) -> int:
+ ret = 0
+
+ for filename in filenames:
+ try:
+ clientlib.load_manifest(filename)
+ except clientlib.InvalidManifestError as e:
+ print(e)
+ ret = 1
+
+ return ret
diff --git a/pre_commit/constants.py b/pre_commit/constants.py
new file mode 100644
index 0000000..79a9bb6
--- /dev/null
+++ b/pre_commit/constants.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+
+import importlib.metadata
+
+CONFIG_FILE = '.pre-commit-config.yaml'
+MANIFEST_FILE = '.pre-commit-hooks.yaml'
+
+# Bump when modifying `empty_template`
+LOCAL_REPO_VERSION = '1'
+
+VERSION = importlib.metadata.version('pre_commit')
+
+DEFAULT = 'default'
diff --git a/pre_commit/envcontext.py b/pre_commit/envcontext.py
new file mode 100644
index 0000000..1f816ce
--- /dev/null
+++ b/pre_commit/envcontext.py
@@ -0,0 +1,62 @@
+from __future__ import annotations
+
+import contextlib
+import enum
+import os
+from collections.abc import Generator
+from collections.abc import MutableMapping
+from typing import NamedTuple
+from typing import Union
+
+_Unset = enum.Enum('_Unset', 'UNSET')
+UNSET = _Unset.UNSET
+
+
+class Var(NamedTuple):
+ name: str
+ default: str = ''
+
+
+SubstitutionT = tuple[Union[str, Var], ...]
+ValueT = Union[str, _Unset, SubstitutionT]
+PatchesT = tuple[tuple[str, ValueT], ...]
+
+
+def format_env(parts: SubstitutionT, env: MutableMapping[str, str]) -> str:
+ return ''.join(
+ env.get(part.name, part.default) if isinstance(part, Var) else part
+ for part in parts
+ )
+
+
+@contextlib.contextmanager
+def envcontext(
+ patch: PatchesT,
+ _env: MutableMapping[str, str] | None = None,
+) -> Generator[None, None, None]:
+ """In this context, `os.environ` is modified according to `patch`.
+
+ `patch` is an iterable of 2-tuples (key, value):
+ `key`: string
+ `value`:
+ - string: `environ[key] == value` inside the context.
+ - UNSET: `key not in environ` inside the context.
+ - template: A template is a tuple of strings and Var which will be
+ replaced with the previous environment
+ """
+ env = os.environ if _env is None else _env
+ before = dict(env)
+
+ for k, v in patch:
+ if v is UNSET:
+ env.pop(k, None)
+ elif isinstance(v, tuple):
+ env[k] = format_env(v, before)
+ else:
+ env[k] = v
+
+ try:
+ yield
+ finally:
+ env.clear()
+ env.update(before)
diff --git a/pre_commit/error_handler.py b/pre_commit/error_handler.py
new file mode 100644
index 0000000..73e608b
--- /dev/null
+++ b/pre_commit/error_handler.py
@@ -0,0 +1,81 @@
+from __future__ import annotations
+
+import contextlib
+import functools
+import os.path
+import sys
+import traceback
+from collections.abc import Generator
+from typing import IO
+
+import pre_commit.constants as C
+from pre_commit import output
+from pre_commit.errors import FatalError
+from pre_commit.store import Store
+from pre_commit.util import cmd_output_b
+from pre_commit.util import force_bytes
+
+
+def _log_and_exit(
+ msg: str,
+ ret_code: int,
+ exc: BaseException,
+ formatted: str,
+) -> None:
+ error_msg = f'{msg}: {type(exc).__name__}: '.encode() + force_bytes(exc)
+ output.write_line_b(error_msg)
+
+ _, git_version_b, _ = cmd_output_b('git', '--version', check=False)
+ git_version = git_version_b.decode(errors='backslashreplace').rstrip()
+
+ storedir = Store().directory
+ log_path = os.path.join(storedir, 'pre-commit.log')
+ with contextlib.ExitStack() as ctx:
+ if os.access(storedir, os.W_OK):
+ output.write_line(f'Check the log at {log_path}')
+ log: IO[bytes] = ctx.enter_context(open(log_path, 'wb'))
+ else: # pragma: win32 no cover
+ output.write_line(f'Failed to write to log at {log_path}')
+ log = sys.stdout.buffer
+
+ _log_line = functools.partial(output.write_line, stream=log)
+ _log_line_b = functools.partial(output.write_line_b, stream=log)
+
+ _log_line('### version information')
+ _log_line()
+ _log_line('```')
+ _log_line(f'pre-commit version: {C.VERSION}')
+ _log_line(f'git --version: {git_version}')
+ _log_line('sys.version:')
+ for line in sys.version.splitlines():
+ _log_line(f' {line}')
+ _log_line(f'sys.executable: {sys.executable}')
+ _log_line(f'os.name: {os.name}')
+ _log_line(f'sys.platform: {sys.platform}')
+ _log_line('```')
+ _log_line()
+
+ _log_line('### error information')
+ _log_line()
+ _log_line('```')
+ _log_line_b(error_msg)
+ _log_line('```')
+ _log_line()
+ _log_line('```')
+ _log_line(formatted.rstrip())
+ _log_line('```')
+ raise SystemExit(ret_code)
+
+
+@contextlib.contextmanager
+def error_handler() -> Generator[None, None, None]:
+ try:
+ yield
+ except (Exception, KeyboardInterrupt) as e:
+ if isinstance(e, FatalError):
+ msg, ret_code = 'An error has occurred', 1
+ elif isinstance(e, KeyboardInterrupt):
+ msg, ret_code = 'Interrupted (^C)', 130
+ else:
+ msg, ret_code = 'An unexpected error has occurred', 3
+ _log_and_exit(msg, ret_code, e, traceback.format_exc())
diff --git a/pre_commit/errors.py b/pre_commit/errors.py
new file mode 100644
index 0000000..eac34fa
--- /dev/null
+++ b/pre_commit/errors.py
@@ -0,0 +1,5 @@
+from __future__ import annotations
+
+
+class FatalError(RuntimeError):
+ pass
diff --git a/pre_commit/file_lock.py b/pre_commit/file_lock.py
new file mode 100644
index 0000000..d3dafb4
--- /dev/null
+++ b/pre_commit/file_lock.py
@@ -0,0 +1,75 @@
+from __future__ import annotations
+
+import contextlib
+import errno
+import sys
+from collections.abc import Generator
+from typing import Callable
+
+
+if sys.platform == 'win32': # pragma: no cover (windows)
+ import msvcrt
+
+ # https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/locking
+
+ # on windows we lock "regions" of files, we don't care about the actual
+ # byte region so we'll just pick *some* number here.
+ _region = 0xffff
+
+ @contextlib.contextmanager
+ def _locked(
+ fileno: int,
+ blocked_cb: Callable[[], None],
+ ) -> Generator[None, None, None]:
+ try:
+ msvcrt.locking(fileno, msvcrt.LK_NBLCK, _region)
+ except OSError:
+ blocked_cb()
+ while True:
+ try:
+ msvcrt.locking(fileno, msvcrt.LK_LOCK, _region)
+ except OSError as e:
+ # Locking violation. Returned when the _LK_LOCK or _LK_RLCK
+ # flag is specified and the file cannot be locked after 10
+ # attempts.
+ if e.errno != errno.EDEADLOCK:
+ raise
+ else:
+ break
+
+ try:
+ yield
+ finally:
+ # From cursory testing, it seems to get unlocked when the file is
+ # closed so this may not be necessary.
+ # The documentation however states:
+ # "Regions should be locked only briefly and should be unlocked
+ # before closing a file or exiting the program."
+ msvcrt.locking(fileno, msvcrt.LK_UNLCK, _region)
+else: # pragma: win32 no cover
+ import fcntl
+
+ @contextlib.contextmanager
+ def _locked(
+ fileno: int,
+ blocked_cb: Callable[[], None],
+ ) -> Generator[None, None, None]:
+ try:
+ fcntl.flock(fileno, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ except OSError: # pragma: no cover (tests are single-threaded)
+ blocked_cb()
+ fcntl.flock(fileno, fcntl.LOCK_EX)
+ try:
+ yield
+ finally:
+ fcntl.flock(fileno, fcntl.LOCK_UN)
+
+
+@contextlib.contextmanager
+def lock(
+ path: str,
+ blocked_cb: Callable[[], None],
+) -> Generator[None, None, None]:
+ with open(path, 'a+') as f:
+ with _locked(f.fileno(), blocked_cb):
+ yield
diff --git a/pre_commit/git.py b/pre_commit/git.py
new file mode 100644
index 0000000..19aac38
--- /dev/null
+++ b/pre_commit/git.py
@@ -0,0 +1,245 @@
+from __future__ import annotations
+
+import logging
+import os.path
+import sys
+from collections.abc import Mapping
+
+from pre_commit.errors import FatalError
+from pre_commit.util import CalledProcessError
+from pre_commit.util import cmd_output
+from pre_commit.util import cmd_output_b
+
+logger = logging.getLogger(__name__)
+
+# see #2046
+NO_FS_MONITOR = ('-c', 'core.useBuiltinFSMonitor=false')
+
+
+def zsplit(s: str) -> list[str]:
+ s = s.strip('\0')
+ if s:
+ return s.split('\0')
+ else:
+ return []
+
+
+def no_git_env(_env: Mapping[str, str] | None = None) -> dict[str, str]:
+ # Too many bugs dealing with environment variables and GIT:
+ # https://github.com/pre-commit/pre-commit/issues/300
+ # In git 2.6.3 (maybe others), git exports GIT_WORK_TREE while running
+ # pre-commit hooks
+ # In git 1.9.1 (maybe others), git exports GIT_DIR and GIT_INDEX_FILE
+ # while running pre-commit hooks in submodules.
+ # GIT_DIR: Causes git clone to clone wrong thing
+ # GIT_INDEX_FILE: Causes 'error invalid object ...' during commit
+ _env = _env if _env is not None else os.environ
+ return {
+ k: v for k, v in _env.items()
+ if not k.startswith('GIT_') or
+ k.startswith(('GIT_CONFIG_KEY_', 'GIT_CONFIG_VALUE_')) or
+ k in {
+ 'GIT_EXEC_PATH', 'GIT_SSH', 'GIT_SSH_COMMAND', 'GIT_SSL_CAINFO',
+ 'GIT_SSL_NO_VERIFY', 'GIT_CONFIG_COUNT',
+ 'GIT_HTTP_PROXY_AUTHMETHOD',
+ 'GIT_ALLOW_PROTOCOL',
+ 'GIT_ASKPASS',
+ }
+ }
+
+
+def get_root() -> str:
+ # Git 2.25 introduced a change to "rev-parse --show-toplevel" that exposed
+ # underlying volumes for Windows drives mapped with SUBST. We use
+ # "rev-parse --show-cdup" to get the appropriate path, but must perform
+ # an extra check to see if we are in the .git directory.
+ try:
+ root = os.path.abspath(
+ cmd_output('git', 'rev-parse', '--show-cdup')[1].strip(),
+ )
+ inside_git_dir = cmd_output(
+ 'git', 'rev-parse', '--is-inside-git-dir',
+ )[1].strip()
+ except CalledProcessError:
+ raise FatalError(
+ 'git failed. Is it installed, and are you in a Git repository '
+ 'directory?',
+ )
+ if inside_git_dir != 'false':
+ raise FatalError(
+ 'git toplevel unexpectedly empty! make sure you are not '
+ 'inside the `.git` directory of your repository.',
+ )
+ return root
+
+
+def get_git_dir(git_root: str = '.') -> str:
+ opt = '--git-dir'
+ _, out, _ = cmd_output('git', 'rev-parse', opt, cwd=git_root)
+ git_dir = out.strip()
+ if git_dir != opt:
+ return os.path.normpath(os.path.join(git_root, git_dir))
+ else:
+ raise AssertionError('unreachable: no git dir')
+
+
+def get_git_common_dir(git_root: str = '.') -> str:
+ opt = '--git-common-dir'
+ _, out, _ = cmd_output('git', 'rev-parse', opt, cwd=git_root)
+ git_common_dir = out.strip()
+ if git_common_dir != opt:
+ return os.path.normpath(os.path.join(git_root, git_common_dir))
+ else: # pragma: no cover (git < 2.5)
+ return get_git_dir(git_root)
+
+
+def is_in_merge_conflict() -> bool:
+ git_dir = get_git_dir('.')
+ return (
+ os.path.exists(os.path.join(git_dir, 'MERGE_MSG')) and
+ os.path.exists(os.path.join(git_dir, 'MERGE_HEAD'))
+ )
+
+
+def parse_merge_msg_for_conflicts(merge_msg: bytes) -> list[str]:
+ # Conflicted files start with tabs
+ return [
+ line.lstrip(b'#').strip().decode()
+ for line in merge_msg.splitlines()
+ # '#\t' for git 2.4.1
+ if line.startswith((b'\t', b'#\t'))
+ ]
+
+
+def get_conflicted_files() -> set[str]:
+ logger.info('Checking merge-conflict files only.')
+ # Need to get the conflicted files from the MERGE_MSG because they could
+ # have resolved the conflict by choosing one side or the other
+ with open(os.path.join(get_git_dir('.'), 'MERGE_MSG'), 'rb') as f:
+ merge_msg = f.read()
+ merge_conflict_filenames = parse_merge_msg_for_conflicts(merge_msg)
+
+ # This will get the rest of the changes made after the merge.
+ # If they resolved the merge conflict by choosing a mesh of both sides
+ # this will also include the conflicted files
+ tree_hash = cmd_output('git', 'write-tree')[1].strip()
+ merge_diff_filenames = zsplit(
+ cmd_output(
+ 'git', 'diff', '--name-only', '--no-ext-diff', '-z',
+ '-m', tree_hash, 'HEAD', 'MERGE_HEAD',
+ )[1],
+ )
+ return set(merge_conflict_filenames) | set(merge_diff_filenames)
+
+
+def get_staged_files(cwd: str | None = None) -> list[str]:
+ return zsplit(
+ cmd_output(
+ 'git', 'diff', '--staged', '--name-only', '--no-ext-diff', '-z',
+ # Everything except for D
+ '--diff-filter=ACMRTUXB',
+ cwd=cwd,
+ )[1],
+ )
+
+
+def intent_to_add_files() -> list[str]:
+ _, stdout, _ = cmd_output(
+ 'git', 'diff', '--no-ext-diff', '--ignore-submodules',
+ '--diff-filter=A', '--name-only', '-z',
+ )
+ return zsplit(stdout)
+
+
+def get_all_files() -> list[str]:
+ return zsplit(cmd_output('git', 'ls-files', '-z')[1])
+
+
+def get_changed_files(old: str, new: str) -> list[str]:
+ diff_cmd = ('git', 'diff', '--name-only', '--no-ext-diff', '-z')
+ try:
+ _, out, _ = cmd_output(*diff_cmd, f'{old}...{new}')
+ except CalledProcessError: # pragma: no cover (new git)
+ # on newer git where old and new do not have a merge base git fails
+ # so we try a full diff (this is what old git did for us!)
+ _, out, _ = cmd_output(*diff_cmd, f'{old}..{new}')
+
+ return zsplit(out)
+
+
+def head_rev(remote: str) -> str:
+ _, out, _ = cmd_output('git', 'ls-remote', '--exit-code', remote, 'HEAD')
+ return out.split()[0]
+
+
+def has_diff(*args: str, repo: str = '.') -> bool:
+ cmd = ('git', 'diff', '--quiet', '--no-ext-diff', *args)
+ return cmd_output_b(*cmd, cwd=repo, check=False)[0] == 1
+
+
+def has_core_hookpaths_set() -> bool:
+ _, out, _ = cmd_output_b('git', 'config', 'core.hooksPath', check=False)
+ return bool(out.strip())
+
+
+def init_repo(path: str, remote: str) -> None:
+ if os.path.isdir(remote):
+ remote = os.path.abspath(remote)
+
+ git = ('git', *NO_FS_MONITOR)
+ env = no_git_env()
+ # avoid the user's template so that hooks do not recurse
+ cmd_output_b(*git, 'init', '--template=', path, env=env)
+ cmd_output_b(*git, 'remote', 'add', 'origin', remote, cwd=path, env=env)
+
+
+def commit(repo: str = '.') -> None:
+ env = no_git_env()
+ name, email = 'pre-commit', 'asottile+pre-commit@umich.edu'
+ env['GIT_AUTHOR_NAME'] = env['GIT_COMMITTER_NAME'] = name
+ env['GIT_AUTHOR_EMAIL'] = env['GIT_COMMITTER_EMAIL'] = email
+ cmd = ('git', 'commit', '--no-edit', '--no-gpg-sign', '-n', '-minit')
+ cmd_output_b(*cmd, cwd=repo, env=env)
+
+
+def git_path(name: str, repo: str = '.') -> str:
+ _, out, _ = cmd_output('git', 'rev-parse', '--git-path', name, cwd=repo)
+ return os.path.join(repo, out.strip())
+
+
+def check_for_cygwin_mismatch() -> None:
+ """See https://github.com/pre-commit/pre-commit/issues/354"""
+ if sys.platform in ('cygwin', 'win32'): # pragma: no cover (windows)
+ is_cygwin_python = sys.platform == 'cygwin'
+ try:
+ toplevel = get_root()
+ except FatalError: # skip the check if we're not in a git repo
+ return
+ is_cygwin_git = toplevel.startswith('/')
+
+ if is_cygwin_python ^ is_cygwin_git:
+ exe_type = {True: '(cygwin)', False: '(windows)'}
+ logger.warn(
+ f'pre-commit has detected a mix of cygwin python / git\n'
+ f'This combination is not supported, it is likely you will '
+ f'receive an error later in the program.\n'
+ f'Make sure to use cygwin git+python while using cygwin\n'
+ f'These can be installed through the cygwin installer.\n'
+ f' - python {exe_type[is_cygwin_python]}\n'
+ f' - git {exe_type[is_cygwin_git]}\n',
+ )
+
+
+def get_best_candidate_tag(rev: str, git_repo: str) -> str:
+ """Get the best tag candidate.
+
+ Multiple tags can exist on a SHA. Sometimes a moving tag is attached
+ to a version tag. Try to pick the tag that looks like a version.
+ """
+ tags = cmd_output(
+ 'git', *NO_FS_MONITOR, 'tag', '--points-at', rev, cwd=git_repo,
+ )[1].splitlines()
+ for tag in tags:
+ if '.' in tag:
+ return tag
+ return rev
diff --git a/pre_commit/hook.py b/pre_commit/hook.py
new file mode 100644
index 0000000..309cd5b
--- /dev/null
+++ b/pre_commit/hook.py
@@ -0,0 +1,60 @@
+from __future__ import annotations
+
+import logging
+from collections.abc import Sequence
+from typing import Any
+from typing import NamedTuple
+
+from pre_commit.prefix import Prefix
+
+logger = logging.getLogger('pre_commit')
+
+
+class Hook(NamedTuple):
+ src: str
+ prefix: Prefix
+ id: str
+ name: str
+ entry: str
+ language: str
+ alias: str
+ files: str
+ exclude: str
+ types: Sequence[str]
+ types_or: Sequence[str]
+ exclude_types: Sequence[str]
+ additional_dependencies: Sequence[str]
+ args: Sequence[str]
+ always_run: bool
+ fail_fast: bool
+ pass_filenames: bool
+ description: str
+ language_version: str
+ log_file: str
+ minimum_pre_commit_version: str
+ require_serial: bool
+ stages: Sequence[str]
+ verbose: bool
+
+ @property
+ def install_key(self) -> tuple[Prefix, str, str, tuple[str, ...]]:
+ return (
+ self.prefix,
+ self.language,
+ self.language_version,
+ tuple(self.additional_dependencies),
+ )
+
+ @classmethod
+ def create(cls, src: str, prefix: Prefix, dct: dict[str, Any]) -> Hook:
+ # TODO: have cfgv do this (?)
+ extra_keys = set(dct) - _KEYS
+ if extra_keys:
+ logger.warning(
+ f'Unexpected key(s) present on {src} => {dct["id"]}: '
+ f'{", ".join(sorted(extra_keys))}',
+ )
+ return cls(src=src, prefix=prefix, **{k: dct[k] for k in _KEYS})
+
+
+_KEYS = frozenset(set(Hook._fields) - {'src', 'prefix'})
diff --git a/pre_commit/lang_base.py b/pre_commit/lang_base.py
new file mode 100644
index 0000000..5303948
--- /dev/null
+++ b/pre_commit/lang_base.py
@@ -0,0 +1,192 @@
+from __future__ import annotations
+
+import contextlib
+import os
+import random
+import re
+import shlex
+from collections.abc import Generator
+from collections.abc import Sequence
+from typing import Any
+from typing import ContextManager
+from typing import NoReturn
+from typing import Protocol
+
+import pre_commit.constants as C
+from pre_commit import parse_shebang
+from pre_commit import xargs
+from pre_commit.prefix import Prefix
+from pre_commit.util import cmd_output_b
+
+FIXED_RANDOM_SEED = 1542676187
+
+SHIMS_RE = re.compile(r'[/\\]shims[/\\]')
+
+
+class Language(Protocol):
+ # Use `None` for no installation / environment
+ @property
+ def ENVIRONMENT_DIR(self) -> str | None: ...
+ # return a value to replace `'default` for `language_version`
+ def get_default_version(self) -> str: ...
+ # return whether the environment is healthy (or should be rebuilt)
+ def health_check(self, prefix: Prefix, version: str) -> str | None: ...
+
+ # install a repository for the given language and language_version
+ def install_environment(
+ self,
+ prefix: Prefix,
+ version: str,
+ additional_dependencies: Sequence[str],
+ ) -> None:
+ ...
+
+ # modify the environment for hook execution
+ def in_env(self, prefix: Prefix, version: str) -> ContextManager[None]: ...
+
+ # execute a hook and return the exit code and output
+ def run_hook(
+ self,
+ prefix: Prefix,
+ entry: str,
+ args: Sequence[str],
+ file_args: Sequence[str],
+ *,
+ is_local: bool,
+ require_serial: bool,
+ color: bool,
+ ) -> tuple[int, bytes]:
+ ...
+
+
+def exe_exists(exe: str) -> bool:
+ found = parse_shebang.find_executable(exe)
+ if found is None: # exe exists
+ return False
+
+ homedir = os.path.expanduser('~')
+ try:
+ common: str | None = os.path.commonpath((found, homedir))
+ except ValueError: # on windows, different drives raises ValueError
+ common = None
+
+ return (
+ # it is not in a /shims/ directory
+ not SHIMS_RE.search(found) and
+ (
+ # the homedir is / (docker, service user, etc.)
+ os.path.dirname(homedir) == homedir or
+ # the exe is not contained in the home directory
+ common != homedir
+ )
+ )
+
+
+def setup_cmd(prefix: Prefix, cmd: tuple[str, ...], **kwargs: Any) -> None:
+ cmd_output_b(*cmd, cwd=prefix.prefix_dir, **kwargs)
+
+
+def environment_dir(prefix: Prefix, d: str, language_version: str) -> str:
+ return prefix.path(f'{d}-{language_version}')
+
+
+def assert_version_default(binary: str, version: str) -> None:
+ if version != C.DEFAULT:
+ raise AssertionError(
+ f'for now, pre-commit requires system-installed {binary} -- '
+ f'you selected `language_version: {version}`',
+ )
+
+
+def assert_no_additional_deps(
+ lang: str,
+ additional_deps: Sequence[str],
+) -> None:
+ if additional_deps:
+ raise AssertionError(
+ f'for now, pre-commit does not support '
+ f'additional_dependencies for {lang} -- '
+ f'you selected `additional_dependencies: {additional_deps}`',
+ )
+
+
+def basic_get_default_version() -> str:
+ return C.DEFAULT
+
+
+def basic_health_check(prefix: Prefix, language_version: str) -> str | None:
+ return None
+
+
+def no_install(
+ prefix: Prefix,
+ version: str,
+ additional_dependencies: Sequence[str],
+) -> NoReturn:
+ raise AssertionError('This language is not installable')
+
+
+@contextlib.contextmanager
+def no_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ yield
+
+
+def target_concurrency() -> int:
+ if 'PRE_COMMIT_NO_CONCURRENCY' in os.environ:
+ return 1
+ else:
+ # Travis appears to have a bunch of CPUs, but we can't use them all.
+ if 'TRAVIS' in os.environ:
+ return 2
+ else:
+ return xargs.cpu_count()
+
+
+def _shuffled(seq: Sequence[str]) -> list[str]:
+ """Deterministically shuffle"""
+ fixed_random = random.Random()
+ fixed_random.seed(FIXED_RANDOM_SEED, version=1)
+
+ seq = list(seq)
+ fixed_random.shuffle(seq)
+ return seq
+
+
+def run_xargs(
+ cmd: tuple[str, ...],
+ file_args: Sequence[str],
+ *,
+ require_serial: bool,
+ color: bool,
+) -> tuple[int, bytes]:
+ if require_serial:
+ jobs = 1
+ else:
+ # Shuffle the files so that they more evenly fill out the xargs
+ # partitions, but do it deterministically in case a hook cares about
+ # ordering.
+ file_args = _shuffled(file_args)
+ jobs = target_concurrency()
+ return xargs.xargs(cmd, file_args, target_concurrency=jobs, color=color)
+
+
+def hook_cmd(entry: str, args: Sequence[str]) -> tuple[str, ...]:
+ return (*shlex.split(entry), *args)
+
+
+def basic_run_hook(
+ prefix: Prefix,
+ entry: str,
+ args: Sequence[str],
+ file_args: Sequence[str],
+ *,
+ is_local: bool,
+ require_serial: bool,
+ color: bool,
+) -> tuple[int, bytes]:
+ return run_xargs(
+ hook_cmd(entry, args),
+ file_args,
+ require_serial=require_serial,
+ color=color,
+ )
diff --git a/pre_commit/languages/__init__.py b/pre_commit/languages/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/pre_commit/languages/__init__.py
diff --git a/pre_commit/languages/conda.py b/pre_commit/languages/conda.py
new file mode 100644
index 0000000..80b3e15
--- /dev/null
+++ b/pre_commit/languages/conda.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+import contextlib
+import os
+import sys
+from collections.abc import Generator
+from collections.abc import Sequence
+
+from pre_commit import lang_base
+from pre_commit.envcontext import envcontext
+from pre_commit.envcontext import PatchesT
+from pre_commit.envcontext import SubstitutionT
+from pre_commit.envcontext import UNSET
+from pre_commit.envcontext import Var
+from pre_commit.prefix import Prefix
+from pre_commit.util import cmd_output_b
+
+ENVIRONMENT_DIR = 'conda'
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+run_hook = lang_base.basic_run_hook
+
+
+def get_env_patch(env: str) -> PatchesT:
+ # On non-windows systems executable live in $CONDA_PREFIX/bin, on Windows
+ # they can be in $CONDA_PREFIX/bin, $CONDA_PREFIX/Library/bin,
+ # $CONDA_PREFIX/Scripts and $CONDA_PREFIX. Whereas the latter only
+ # seems to be used for python.exe.
+ path: SubstitutionT = (os.path.join(env, 'bin'), os.pathsep, Var('PATH'))
+ if sys.platform == 'win32': # pragma: win32 cover
+ path = (env, os.pathsep, *path)
+ path = (os.path.join(env, 'Scripts'), os.pathsep, *path)
+ path = (os.path.join(env, 'Library', 'bin'), os.pathsep, *path)
+
+ return (
+ ('PYTHONHOME', UNSET),
+ ('VIRTUAL_ENV', UNSET),
+ ('CONDA_PREFIX', env),
+ ('PATH', path),
+ )
+
+
+@contextlib.contextmanager
+def in_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with envcontext(get_env_patch(envdir)):
+ yield
+
+
+def _conda_exe() -> str:
+ if os.environ.get('PRE_COMMIT_USE_MICROMAMBA'):
+ return 'micromamba'
+ elif os.environ.get('PRE_COMMIT_USE_MAMBA'):
+ return 'mamba'
+ else:
+ return 'conda'
+
+
+def install_environment(
+ prefix: Prefix,
+ version: str,
+ additional_dependencies: Sequence[str],
+) -> None:
+ lang_base.assert_version_default('conda', version)
+
+ conda_exe = _conda_exe()
+
+ env_dir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ cmd_output_b(
+ conda_exe, 'env', 'create', '-p', env_dir, '--file',
+ 'environment.yml', cwd=prefix.prefix_dir,
+ )
+ if additional_dependencies:
+ cmd_output_b(
+ conda_exe, 'install', '-p', env_dir, *additional_dependencies,
+ cwd=prefix.prefix_dir,
+ )
diff --git a/pre_commit/languages/coursier.py b/pre_commit/languages/coursier.py
new file mode 100644
index 0000000..6558bf6
--- /dev/null
+++ b/pre_commit/languages/coursier.py
@@ -0,0 +1,76 @@
+from __future__ import annotations
+
+import contextlib
+import os.path
+from collections.abc import Generator
+from collections.abc import Sequence
+
+from pre_commit import lang_base
+from pre_commit.envcontext import envcontext
+from pre_commit.envcontext import PatchesT
+from pre_commit.envcontext import Var
+from pre_commit.errors import FatalError
+from pre_commit.parse_shebang import find_executable
+from pre_commit.prefix import Prefix
+
+ENVIRONMENT_DIR = 'coursier'
+
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+run_hook = lang_base.basic_run_hook
+
+
+def install_environment(
+ prefix: Prefix,
+ version: str,
+ additional_dependencies: Sequence[str],
+) -> None:
+ lang_base.assert_version_default('coursier', version)
+
+ # Support both possible executable names (either "cs" or "coursier")
+ cs = find_executable('cs') or find_executable('coursier')
+ if cs is None:
+ raise AssertionError(
+ 'pre-commit requires system-installed "cs" or "coursier" '
+ 'executables in the application search path',
+ )
+
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+
+ def _install(*opts: str) -> None:
+ assert cs is not None
+ lang_base.setup_cmd(prefix, (cs, 'fetch', *opts))
+ lang_base.setup_cmd(prefix, (cs, 'install', '--dir', envdir, *opts))
+
+ with in_env(prefix, version):
+ channel = prefix.path('.pre-commit-channel')
+ if os.path.isdir(channel):
+ for app_descriptor in os.listdir(channel):
+ _, app_file = os.path.split(app_descriptor)
+ app, _ = os.path.splitext(app_file)
+ _install(
+ '--default-channels=false',
+ '--channel', channel,
+ app,
+ )
+ elif not additional_dependencies:
+ raise FatalError(
+ 'expected .pre-commit-channel dir or additional_dependencies',
+ )
+
+ if additional_dependencies:
+ _install(*additional_dependencies)
+
+
+def get_env_patch(target_dir: str) -> PatchesT:
+ return (
+ ('PATH', (target_dir, os.pathsep, Var('PATH'))),
+ ('COURSIER_CACHE', os.path.join(target_dir, '.cs-cache')),
+ )
+
+
+@contextlib.contextmanager
+def in_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with envcontext(get_env_patch(envdir)):
+ yield
diff --git a/pre_commit/languages/dart.py b/pre_commit/languages/dart.py
new file mode 100644
index 0000000..129ac59
--- /dev/null
+++ b/pre_commit/languages/dart.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+
+import contextlib
+import os.path
+import shutil
+import tempfile
+from collections.abc import Generator
+from collections.abc import Sequence
+
+from pre_commit import lang_base
+from pre_commit.envcontext import envcontext
+from pre_commit.envcontext import PatchesT
+from pre_commit.envcontext import Var
+from pre_commit.prefix import Prefix
+from pre_commit.util import win_exe
+from pre_commit.yaml import yaml_load
+
+ENVIRONMENT_DIR = 'dartenv'
+
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+run_hook = lang_base.basic_run_hook
+
+
+def get_env_patch(venv: str) -> PatchesT:
+ return (
+ ('PATH', (os.path.join(venv, 'bin'), os.pathsep, Var('PATH'))),
+ )
+
+
+@contextlib.contextmanager
+def in_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with envcontext(get_env_patch(envdir)):
+ yield
+
+
+def install_environment(
+ prefix: Prefix,
+ version: str,
+ additional_dependencies: Sequence[str],
+) -> None:
+ lang_base.assert_version_default('dart', version)
+
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ bin_dir = os.path.join(envdir, 'bin')
+
+ def _install_dir(prefix_p: Prefix, pub_cache: str) -> None:
+ dart_env = {**os.environ, 'PUB_CACHE': pub_cache}
+
+ with open(prefix_p.path('pubspec.yaml')) as f:
+ pubspec_contents = yaml_load(f)
+
+ lang_base.setup_cmd(prefix_p, ('dart', 'pub', 'get'), env=dart_env)
+
+ for executable in pubspec_contents['executables']:
+ lang_base.setup_cmd(
+ prefix_p,
+ (
+ 'dart', 'compile', 'exe',
+ '--output', os.path.join(bin_dir, win_exe(executable)),
+ prefix_p.path('bin', f'{executable}.dart'),
+ ),
+ env=dart_env,
+ )
+
+ os.makedirs(bin_dir)
+
+ with tempfile.TemporaryDirectory() as tmp:
+ _install_dir(prefix, tmp)
+
+ for dep_s in additional_dependencies:
+ with tempfile.TemporaryDirectory() as dep_tmp:
+ dep, _, version = dep_s.partition(':')
+ if version:
+ dep_cmd: tuple[str, ...] = (dep, '--version', version)
+ else:
+ dep_cmd = (dep,)
+
+ lang_base.setup_cmd(
+ prefix,
+ ('dart', 'pub', 'cache', 'add', *dep_cmd),
+ env={**os.environ, 'PUB_CACHE': dep_tmp},
+ )
+
+ # try and find the 'pubspec.yaml' that just got added
+ for root, _, filenames in os.walk(dep_tmp):
+ if 'pubspec.yaml' in filenames:
+ with tempfile.TemporaryDirectory() as copied:
+ pkg = os.path.join(copied, 'pkg')
+ shutil.copytree(root, pkg)
+ _install_dir(Prefix(pkg), dep_tmp)
+ break
+ else:
+ raise AssertionError(
+ f'could not find pubspec.yaml for {dep_s}',
+ )
diff --git a/pre_commit/languages/docker.py b/pre_commit/languages/docker.py
new file mode 100644
index 0000000..2632851
--- /dev/null
+++ b/pre_commit/languages/docker.py
@@ -0,0 +1,146 @@
+from __future__ import annotations
+
+import hashlib
+import json
+import os
+from collections.abc import Sequence
+
+from pre_commit import lang_base
+from pre_commit.prefix import Prefix
+from pre_commit.util import CalledProcessError
+from pre_commit.util import cmd_output_b
+
+ENVIRONMENT_DIR = 'docker'
+PRE_COMMIT_LABEL = 'PRE_COMMIT'
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+in_env = lang_base.no_env # no special environment for docker
+
+
+def _is_in_docker() -> bool:
+ try:
+ with open('/proc/1/cgroup', 'rb') as f:
+ return b'docker' in f.read()
+ except FileNotFoundError:
+ return False
+
+
+def _get_container_id() -> str:
+ # It's assumed that we already check /proc/1/cgroup in _is_in_docker. The
+ # cpuset cgroup controller existed since cgroups were introduced so this
+ # way of getting the container ID is pretty reliable.
+ with open('/proc/1/cgroup', 'rb') as f:
+ for line in f.readlines():
+ if line.split(b':')[1] == b'cpuset':
+ return os.path.basename(line.split(b':')[2]).strip().decode()
+ raise RuntimeError('Failed to find the container ID in /proc/1/cgroup.')
+
+
+def _get_docker_path(path: str) -> str:
+ if not _is_in_docker():
+ return path
+
+ container_id = _get_container_id()
+
+ try:
+ _, out, _ = cmd_output_b('docker', 'inspect', container_id)
+ except CalledProcessError:
+ # self-container was not visible from here (perhaps docker-in-docker)
+ return path
+
+ container, = json.loads(out)
+ for mount in container['Mounts']:
+ src_path = mount['Source']
+ to_path = mount['Destination']
+ if os.path.commonpath((path, to_path)) == to_path:
+ # So there is something in common,
+ # and we can proceed remapping it
+ return path.replace(to_path, src_path)
+ # we're in Docker, but the path is not mounted, cannot really do anything,
+ # so fall back to original path
+ return path
+
+
+def md5(s: str) -> str: # pragma: win32 no cover
+ return hashlib.md5(s.encode()).hexdigest()
+
+
+def docker_tag(prefix: Prefix) -> str: # pragma: win32 no cover
+ md5sum = md5(os.path.basename(prefix.prefix_dir)).lower()
+ return f'pre-commit-{md5sum}'
+
+
+def build_docker_image(
+ prefix: Prefix,
+ *,
+ pull: bool,
+) -> None: # pragma: win32 no cover
+ cmd: tuple[str, ...] = (
+ 'docker', 'build',
+ '--tag', docker_tag(prefix),
+ '--label', PRE_COMMIT_LABEL,
+ )
+ if pull:
+ cmd += ('--pull',)
+ # This must come last for old versions of docker. See #477
+ cmd += ('.',)
+ lang_base.setup_cmd(prefix, cmd)
+
+
+def install_environment(
+ prefix: Prefix, version: str, additional_dependencies: Sequence[str],
+) -> None: # pragma: win32 no cover
+ lang_base.assert_version_default('docker', version)
+ lang_base.assert_no_additional_deps('docker', additional_dependencies)
+
+ directory = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+
+ # Docker doesn't really have relevant disk environment, but pre-commit
+ # still needs to cleanup its state files on failure
+ build_docker_image(prefix, pull=True)
+ os.mkdir(directory)
+
+
+def get_docker_user() -> tuple[str, ...]: # pragma: win32 no cover
+ try:
+ return ('-u', f'{os.getuid()}:{os.getgid()}')
+ except AttributeError:
+ return ()
+
+
+def docker_cmd() -> tuple[str, ...]: # pragma: win32 no cover
+ return (
+ 'docker', 'run',
+ '--rm',
+ *get_docker_user(),
+ # https://docs.docker.com/engine/reference/commandline/run/#mount-volumes-from-container-volumes-from
+ # The `Z` option tells Docker to label the content with a private
+ # unshared label. Only the current container can use a private volume.
+ '-v', f'{_get_docker_path(os.getcwd())}:/src:rw,Z',
+ '--workdir', '/src',
+ )
+
+
+def run_hook(
+ prefix: Prefix,
+ entry: str,
+ args: Sequence[str],
+ file_args: Sequence[str],
+ *,
+ is_local: bool,
+ require_serial: bool,
+ color: bool,
+) -> tuple[int, bytes]: # pragma: win32 no cover
+ # Rebuild the docker image in case it has gone missing, as many people do
+ # automated cleanup of docker images.
+ build_docker_image(prefix, pull=False)
+
+ entry_exe, *cmd_rest = lang_base.hook_cmd(entry, args)
+
+ entry_tag = ('--entrypoint', entry_exe, docker_tag(prefix))
+ return lang_base.run_xargs(
+ (*docker_cmd(), *entry_tag, *cmd_rest),
+ file_args,
+ require_serial=require_serial,
+ color=color,
+ )
diff --git a/pre_commit/languages/docker_image.py b/pre_commit/languages/docker_image.py
new file mode 100644
index 0000000..a1a2c16
--- /dev/null
+++ b/pre_commit/languages/docker_image.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+from pre_commit import lang_base
+from pre_commit.languages.docker import docker_cmd
+from pre_commit.prefix import Prefix
+
+ENVIRONMENT_DIR = None
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+install_environment = lang_base.no_install
+in_env = lang_base.no_env
+
+
+def run_hook(
+ prefix: Prefix,
+ entry: str,
+ args: Sequence[str],
+ file_args: Sequence[str],
+ *,
+ is_local: bool,
+ require_serial: bool,
+ color: bool,
+) -> tuple[int, bytes]: # pragma: win32 no cover
+ cmd = docker_cmd() + lang_base.hook_cmd(entry, args)
+ return lang_base.run_xargs(
+ cmd,
+ file_args,
+ require_serial=require_serial,
+ color=color,
+ )
diff --git a/pre_commit/languages/dotnet.py b/pre_commit/languages/dotnet.py
new file mode 100644
index 0000000..e1202c4
--- /dev/null
+++ b/pre_commit/languages/dotnet.py
@@ -0,0 +1,111 @@
+from __future__ import annotations
+
+import contextlib
+import os.path
+import re
+import tempfile
+import xml.etree.ElementTree
+import zipfile
+from collections.abc import Generator
+from collections.abc import Sequence
+
+from pre_commit import lang_base
+from pre_commit.envcontext import envcontext
+from pre_commit.envcontext import PatchesT
+from pre_commit.envcontext import Var
+from pre_commit.prefix import Prefix
+
+ENVIRONMENT_DIR = 'dotnetenv'
+BIN_DIR = 'bin'
+
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+run_hook = lang_base.basic_run_hook
+
+
+def get_env_patch(venv: str) -> PatchesT:
+ return (
+ ('PATH', (os.path.join(venv, BIN_DIR), os.pathsep, Var('PATH'))),
+ )
+
+
+@contextlib.contextmanager
+def in_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with envcontext(get_env_patch(envdir)):
+ yield
+
+
+@contextlib.contextmanager
+def _nuget_config_no_sources() -> Generator[str, None, None]:
+ with tempfile.TemporaryDirectory() as tmpdir:
+ nuget_config = os.path.join(tmpdir, 'nuget.config')
+ with open(nuget_config, 'w') as f:
+ f.write(
+ '<?xml version="1.0" encoding="utf-8"?>'
+ '<configuration>'
+ ' <packageSources>'
+ ' <clear />'
+ ' </packageSources>'
+ '</configuration>',
+ )
+ yield nuget_config
+
+
+def install_environment(
+ prefix: Prefix,
+ version: str,
+ additional_dependencies: Sequence[str],
+) -> None:
+ lang_base.assert_version_default('dotnet', version)
+ lang_base.assert_no_additional_deps('dotnet', additional_dependencies)
+
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ build_dir = prefix.path('pre-commit-build')
+
+ # Build & pack nupkg file
+ lang_base.setup_cmd(
+ prefix,
+ (
+ 'dotnet', 'pack',
+ '--configuration', 'Release',
+ '--property', f'PackageOutputPath={build_dir}',
+ ),
+ )
+
+ nupkg_dir = prefix.path(build_dir)
+ nupkgs = [x for x in os.listdir(nupkg_dir) if x.endswith('.nupkg')]
+
+ if not nupkgs:
+ raise AssertionError('could not find any build outputs to install')
+
+ for nupkg in nupkgs:
+ with zipfile.ZipFile(os.path.join(nupkg_dir, nupkg)) as f:
+ nuspec, = (x for x in f.namelist() if x.endswith('.nuspec'))
+ with f.open(nuspec) as spec:
+ tree = xml.etree.ElementTree.parse(spec)
+
+ namespace = re.match(r'{.*}', tree.getroot().tag)
+ if not namespace:
+ raise AssertionError('could not parse namespace from nuspec')
+
+ tool_id_element = tree.find(f'.//{namespace[0]}id')
+ if tool_id_element is None:
+ raise AssertionError('expected to find an "id" element')
+
+ tool_id = tool_id_element.text
+ if not tool_id:
+ raise AssertionError('"id" element missing tool name')
+
+ # Install to bin dir
+ with _nuget_config_no_sources() as nuget_config:
+ lang_base.setup_cmd(
+ prefix,
+ (
+ 'dotnet', 'tool', 'install',
+ '--configfile', nuget_config,
+ '--tool-path', os.path.join(envdir, BIN_DIR),
+ '--add-source', build_dir,
+ tool_id,
+ ),
+ )
diff --git a/pre_commit/languages/fail.py b/pre_commit/languages/fail.py
new file mode 100644
index 0000000..6ac4d76
--- /dev/null
+++ b/pre_commit/languages/fail.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+from pre_commit import lang_base
+from pre_commit.prefix import Prefix
+
+ENVIRONMENT_DIR = None
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+install_environment = lang_base.no_install
+in_env = lang_base.no_env
+
+
+def run_hook(
+ prefix: Prefix,
+ entry: str,
+ args: Sequence[str],
+ file_args: Sequence[str],
+ *,
+ is_local: bool,
+ require_serial: bool,
+ color: bool,
+) -> tuple[int, bytes]:
+ out = f'{entry}\n\n'.encode()
+ out += b'\n'.join(f.encode() for f in file_args) + b'\n'
+ return 1, out
diff --git a/pre_commit/languages/golang.py b/pre_commit/languages/golang.py
new file mode 100644
index 0000000..66e07cf
--- /dev/null
+++ b/pre_commit/languages/golang.py
@@ -0,0 +1,160 @@
+from __future__ import annotations
+
+import contextlib
+import functools
+import json
+import os.path
+import platform
+import shutil
+import sys
+import tarfile
+import tempfile
+import urllib.error
+import urllib.request
+import zipfile
+from collections.abc import Generator
+from collections.abc import Sequence
+from typing import ContextManager
+from typing import IO
+from typing import Protocol
+
+import pre_commit.constants as C
+from pre_commit import lang_base
+from pre_commit.envcontext import envcontext
+from pre_commit.envcontext import PatchesT
+from pre_commit.envcontext import Var
+from pre_commit.git import no_git_env
+from pre_commit.prefix import Prefix
+from pre_commit.util import cmd_output
+from pre_commit.util import rmtree
+
+ENVIRONMENT_DIR = 'golangenv'
+health_check = lang_base.basic_health_check
+run_hook = lang_base.basic_run_hook
+
+_ARCH_ALIASES = {
+ 'x86_64': 'amd64',
+ 'i386': '386',
+ 'aarch64': 'arm64',
+ 'armv8': 'arm64',
+ 'armv7l': 'armv6l',
+}
+_ARCH = platform.machine().lower()
+_ARCH = _ARCH_ALIASES.get(_ARCH, _ARCH)
+
+
+class ExtractAll(Protocol):
+ def extractall(self, path: str) -> None: ...
+
+
+if sys.platform == 'win32': # pragma: win32 cover
+ _EXT = 'zip'
+
+ def _open_archive(bio: IO[bytes]) -> ContextManager[ExtractAll]:
+ return zipfile.ZipFile(bio)
+else: # pragma: win32 no cover
+ _EXT = 'tar.gz'
+
+ def _open_archive(bio: IO[bytes]) -> ContextManager[ExtractAll]:
+ return tarfile.open(fileobj=bio)
+
+
+@functools.lru_cache(maxsize=1)
+def get_default_version() -> str:
+ if lang_base.exe_exists('go'):
+ return 'system'
+ else:
+ return C.DEFAULT
+
+
+def get_env_patch(venv: str, version: str) -> PatchesT:
+ if version == 'system':
+ return (
+ ('PATH', (os.path.join(venv, 'bin'), os.pathsep, Var('PATH'))),
+ )
+
+ return (
+ ('GOROOT', os.path.join(venv, '.go')),
+ (
+ 'PATH', (
+ os.path.join(venv, 'bin'), os.pathsep,
+ os.path.join(venv, '.go', 'bin'), os.pathsep, Var('PATH'),
+ ),
+ ),
+ )
+
+
+@functools.lru_cache
+def _infer_go_version(version: str) -> str:
+ if version != C.DEFAULT:
+ return version
+ resp = urllib.request.urlopen('https://go.dev/dl/?mode=json')
+ # TODO: 3.9+ .removeprefix('go')
+ return json.load(resp)[0]['version'][2:]
+
+
+def _get_url(version: str) -> str:
+ os_name = platform.system().lower()
+ version = _infer_go_version(version)
+ return f'https://dl.google.com/go/go{version}.{os_name}-{_ARCH}.{_EXT}'
+
+
+def _install_go(version: str, dest: str) -> None:
+ try:
+ resp = urllib.request.urlopen(_get_url(version))
+ except urllib.error.HTTPError as e: # pragma: no cover
+ if e.code == 404:
+ raise ValueError(
+ f'Could not find a version matching your system requirements '
+ f'(os={platform.system().lower()}; arch={_ARCH})',
+ ) from e
+ else:
+ raise
+ else:
+ with tempfile.TemporaryFile() as f:
+ shutil.copyfileobj(resp, f)
+ f.seek(0)
+
+ with _open_archive(f) as archive:
+ archive.extractall(dest)
+ shutil.move(os.path.join(dest, 'go'), os.path.join(dest, '.go'))
+
+
+@contextlib.contextmanager
+def in_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with envcontext(get_env_patch(envdir, version)):
+ yield
+
+
+def install_environment(
+ prefix: Prefix,
+ version: str,
+ additional_dependencies: Sequence[str],
+) -> None:
+ env_dir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+
+ if version != 'system':
+ _install_go(version, env_dir)
+
+ if sys.platform == 'cygwin': # pragma: no cover
+ gopath = cmd_output('cygpath', '-w', env_dir)[1].strip()
+ else:
+ gopath = env_dir
+
+ env = no_git_env(dict(os.environ, GOPATH=gopath))
+ env.pop('GOBIN', None)
+ if version != 'system':
+ env['GOROOT'] = os.path.join(env_dir, '.go')
+ env['PATH'] = os.pathsep.join((
+ os.path.join(env_dir, '.go', 'bin'), os.environ['PATH'],
+ ))
+
+ lang_base.setup_cmd(prefix, ('go', 'install', './...'), env=env)
+ for dependency in additional_dependencies:
+ lang_base.setup_cmd(prefix, ('go', 'install', dependency), env=env)
+
+ # save some disk space -- we don't need this after installation
+ pkgdir = os.path.join(env_dir, 'pkg')
+ if os.path.exists(pkgdir): # pragma: no branch (always true on windows?)
+ rmtree(pkgdir)
diff --git a/pre_commit/languages/haskell.py b/pre_commit/languages/haskell.py
new file mode 100644
index 0000000..c6945c8
--- /dev/null
+++ b/pre_commit/languages/haskell.py
@@ -0,0 +1,56 @@
+from __future__ import annotations
+
+import contextlib
+import os.path
+from collections.abc import Generator
+from collections.abc import Sequence
+
+from pre_commit import lang_base
+from pre_commit.envcontext import envcontext
+from pre_commit.envcontext import PatchesT
+from pre_commit.envcontext import Var
+from pre_commit.errors import FatalError
+from pre_commit.prefix import Prefix
+
+ENVIRONMENT_DIR = 'hs_env'
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+run_hook = lang_base.basic_run_hook
+
+
+def get_env_patch(target_dir: str) -> PatchesT:
+ bin_path = os.path.join(target_dir, 'bin')
+ return (('PATH', (bin_path, os.pathsep, Var('PATH'))),)
+
+
+@contextlib.contextmanager
+def in_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with envcontext(get_env_patch(envdir)):
+ yield
+
+
+def install_environment(
+ prefix: Prefix,
+ version: str,
+ additional_dependencies: Sequence[str],
+) -> None:
+ lang_base.assert_version_default('haskell', version)
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+
+ pkgs = [*prefix.star('.cabal'), *additional_dependencies]
+ if not pkgs:
+ raise FatalError('Expected .cabal files or additional_dependencies')
+
+ bindir = os.path.join(envdir, 'bin')
+ os.makedirs(bindir, exist_ok=True)
+ lang_base.setup_cmd(prefix, ('cabal', 'update'))
+ lang_base.setup_cmd(
+ prefix,
+ (
+ 'cabal', 'install',
+ '--install-method', 'copy',
+ '--installdir', bindir,
+ *pkgs,
+ ),
+ )
diff --git a/pre_commit/languages/lua.py b/pre_commit/languages/lua.py
new file mode 100644
index 0000000..a475ec9
--- /dev/null
+++ b/pre_commit/languages/lua.py
@@ -0,0 +1,75 @@
+from __future__ import annotations
+
+import contextlib
+import os
+import sys
+from collections.abc import Generator
+from collections.abc import Sequence
+
+from pre_commit import lang_base
+from pre_commit.envcontext import envcontext
+from pre_commit.envcontext import PatchesT
+from pre_commit.envcontext import Var
+from pre_commit.prefix import Prefix
+from pre_commit.util import cmd_output
+
+ENVIRONMENT_DIR = 'lua_env'
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+run_hook = lang_base.basic_run_hook
+
+
+def _get_lua_version() -> str: # pragma: win32 no cover
+ """Get the Lua version used in file paths."""
+ _, stdout, _ = cmd_output('luarocks', 'config', '--lua-ver')
+ return stdout.strip()
+
+
+def get_env_patch(d: str) -> PatchesT: # pragma: win32 no cover
+ version = _get_lua_version()
+ so_ext = 'dll' if sys.platform == 'win32' else 'so'
+ return (
+ ('PATH', (os.path.join(d, 'bin'), os.pathsep, Var('PATH'))),
+ (
+ 'LUA_PATH', (
+ os.path.join(d, 'share', 'lua', version, '?.lua;'),
+ os.path.join(d, 'share', 'lua', version, '?', 'init.lua;;'),
+ ),
+ ),
+ (
+ 'LUA_CPATH',
+ (os.path.join(d, 'lib', 'lua', version, f'?.{so_ext};;'),),
+ ),
+ )
+
+
+@contextlib.contextmanager # pragma: win32 no cover
+def in_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with envcontext(get_env_patch(envdir)):
+ yield
+
+
+def install_environment(
+ prefix: Prefix,
+ version: str,
+ additional_dependencies: Sequence[str],
+) -> None: # pragma: win32 no cover
+ lang_base.assert_version_default('lua', version)
+
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with in_env(prefix, version):
+ # luarocks doesn't bootstrap a tree prior to installing
+ # so ensure the directory exists.
+ os.makedirs(envdir, exist_ok=True)
+
+ # Older luarocks (e.g., 2.4.2) expect the rockspec as an arg
+ for rockspec in prefix.star('.rockspec'):
+ make_cmd = ('luarocks', '--tree', envdir, 'make', rockspec)
+ lang_base.setup_cmd(prefix, make_cmd)
+
+ # luarocks can't install multiple packages at once
+ # so install them individually.
+ for dependency in additional_dependencies:
+ cmd = ('luarocks', '--tree', envdir, 'install', dependency)
+ lang_base.setup_cmd(prefix, cmd)
diff --git a/pre_commit/languages/node.py b/pre_commit/languages/node.py
new file mode 100644
index 0000000..d49c0e3
--- /dev/null
+++ b/pre_commit/languages/node.py
@@ -0,0 +1,110 @@
+from __future__ import annotations
+
+import contextlib
+import functools
+import os
+import sys
+from collections.abc import Generator
+from collections.abc import Sequence
+
+import pre_commit.constants as C
+from pre_commit import lang_base
+from pre_commit.envcontext import envcontext
+from pre_commit.envcontext import PatchesT
+from pre_commit.envcontext import UNSET
+from pre_commit.envcontext import Var
+from pre_commit.languages.python import bin_dir
+from pre_commit.prefix import Prefix
+from pre_commit.util import cmd_output
+from pre_commit.util import cmd_output_b
+from pre_commit.util import rmtree
+
+ENVIRONMENT_DIR = 'node_env'
+run_hook = lang_base.basic_run_hook
+
+
+@functools.lru_cache(maxsize=1)
+def get_default_version() -> str:
+ # nodeenv does not yet support `-n system` on windows
+ if sys.platform == 'win32':
+ return C.DEFAULT
+ # if node is already installed, we can save a bunch of setup time by
+ # using the installed version
+ elif all(lang_base.exe_exists(exe) for exe in ('node', 'npm')):
+ return 'system'
+ else:
+ return C.DEFAULT
+
+
+def get_env_patch(venv: str) -> PatchesT:
+ if sys.platform == 'cygwin': # pragma: no cover
+ _, win_venv, _ = cmd_output('cygpath', '-w', venv)
+ install_prefix = fr'{win_venv.strip()}\bin'
+ lib_dir = 'lib'
+ elif sys.platform == 'win32': # pragma: no cover
+ install_prefix = bin_dir(venv)
+ lib_dir = 'Scripts'
+ else: # pragma: win32 no cover
+ install_prefix = venv
+ lib_dir = 'lib'
+ return (
+ ('NODE_VIRTUAL_ENV', venv),
+ ('NPM_CONFIG_PREFIX', install_prefix),
+ ('npm_config_prefix', install_prefix),
+ ('NPM_CONFIG_USERCONFIG', UNSET),
+ ('npm_config_userconfig', UNSET),
+ ('NODE_PATH', os.path.join(venv, lib_dir, 'node_modules')),
+ ('PATH', (bin_dir(venv), os.pathsep, Var('PATH'))),
+ )
+
+
+@contextlib.contextmanager
+def in_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with envcontext(get_env_patch(envdir)):
+ yield
+
+
+def health_check(prefix: Prefix, version: str) -> str | None:
+ with in_env(prefix, version):
+ retcode, _, _ = cmd_output_b('node', '--version', check=False)
+ if retcode != 0: # pragma: win32 no cover
+ return f'`node --version` returned {retcode}'
+ else:
+ return None
+
+
+def install_environment(
+ prefix: Prefix, version: str, additional_dependencies: Sequence[str],
+) -> None:
+ assert prefix.exists('package.json')
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+
+ # https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx?f=255&MSPPError=-2147217396#maxpath
+ if sys.platform == 'win32': # pragma: no cover
+ envdir = fr'\\?\{os.path.normpath(envdir)}'
+ cmd = [sys.executable, '-mnodeenv', '--prebuilt', '--clean-src', envdir]
+ if version != C.DEFAULT:
+ cmd.extend(['-n', version])
+ cmd_output_b(*cmd)
+
+ with in_env(prefix, version):
+ # https://npm.community/t/npm-install-g-git-vs-git-clone-cd-npm-install-g/5449
+ # install as if we installed from git
+
+ local_install_cmd = (
+ 'npm', 'install', '--include=dev', '--include=prod',
+ '--ignore-prepublish', '--no-progress', '--no-save',
+ )
+ lang_base.setup_cmd(prefix, local_install_cmd)
+
+ _, pkg, _ = cmd_output('npm', 'pack', cwd=prefix.prefix_dir)
+ pkg = prefix.path(pkg.strip())
+
+ install = ('npm', 'install', '-g', pkg, *additional_dependencies)
+ lang_base.setup_cmd(prefix, install)
+
+ # clean these up after installation
+ if prefix.exists('node_modules'): # pragma: win32 no cover
+ rmtree(prefix.path('node_modules'))
+ os.remove(pkg)
diff --git a/pre_commit/languages/perl.py b/pre_commit/languages/perl.py
new file mode 100644
index 0000000..61b1d11
--- /dev/null
+++ b/pre_commit/languages/perl.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+import contextlib
+import os
+import shlex
+from collections.abc import Generator
+from collections.abc import Sequence
+
+from pre_commit import lang_base
+from pre_commit.envcontext import envcontext
+from pre_commit.envcontext import PatchesT
+from pre_commit.envcontext import Var
+from pre_commit.prefix import Prefix
+
+ENVIRONMENT_DIR = 'perl_env'
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+run_hook = lang_base.basic_run_hook
+
+
+def get_env_patch(venv: str) -> PatchesT:
+ return (
+ ('PATH', (os.path.join(venv, 'bin'), os.pathsep, Var('PATH'))),
+ ('PERL5LIB', os.path.join(venv, 'lib', 'perl5')),
+ ('PERL_MB_OPT', f'--install_base {shlex.quote(venv)}'),
+ (
+ 'PERL_MM_OPT', (
+ f'INSTALL_BASE={shlex.quote(venv)} '
+ f'INSTALLSITEMAN1DIR=none INSTALLSITEMAN3DIR=none'
+ ),
+ ),
+ )
+
+
+@contextlib.contextmanager
+def in_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with envcontext(get_env_patch(envdir)):
+ yield
+
+
+def install_environment(
+ prefix: Prefix, version: str, additional_dependencies: Sequence[str],
+) -> None:
+ lang_base.assert_version_default('perl', version)
+
+ with in_env(prefix, version):
+ lang_base.setup_cmd(
+ prefix, ('cpan', '-T', '.', *additional_dependencies),
+ )
diff --git a/pre_commit/languages/pygrep.py b/pre_commit/languages/pygrep.py
new file mode 100644
index 0000000..72a9345
--- /dev/null
+++ b/pre_commit/languages/pygrep.py
@@ -0,0 +1,133 @@
+from __future__ import annotations
+
+import argparse
+import re
+import sys
+from collections.abc import Sequence
+from re import Pattern
+from typing import NamedTuple
+
+from pre_commit import lang_base
+from pre_commit import output
+from pre_commit.prefix import Prefix
+from pre_commit.xargs import xargs
+
+ENVIRONMENT_DIR = None
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+install_environment = lang_base.no_install
+in_env = lang_base.no_env
+
+
+def _process_filename_by_line(pattern: Pattern[bytes], filename: str) -> int:
+ retv = 0
+ with open(filename, 'rb') as f:
+ for line_no, line in enumerate(f, start=1):
+ if pattern.search(line):
+ retv = 1
+ output.write(f'{filename}:{line_no}:')
+ output.write_line_b(line.rstrip(b'\r\n'))
+ return retv
+
+
+def _process_filename_at_once(pattern: Pattern[bytes], filename: str) -> int:
+ retv = 0
+ with open(filename, 'rb') as f:
+ contents = f.read()
+ match = pattern.search(contents)
+ if match:
+ retv = 1
+ line_no = contents[:match.start()].count(b'\n')
+ output.write(f'{filename}:{line_no + 1}:')
+
+ matched_lines = match[0].split(b'\n')
+ matched_lines[0] = contents.split(b'\n')[line_no]
+
+ output.write_line_b(b'\n'.join(matched_lines))
+ return retv
+
+
+def _process_filename_by_line_negated(
+ pattern: Pattern[bytes],
+ filename: str,
+) -> int:
+ with open(filename, 'rb') as f:
+ for line in f:
+ if pattern.search(line):
+ return 0
+ else:
+ output.write_line(filename)
+ return 1
+
+
+def _process_filename_at_once_negated(
+ pattern: Pattern[bytes],
+ filename: str,
+) -> int:
+ with open(filename, 'rb') as f:
+ contents = f.read()
+ match = pattern.search(contents)
+ if match:
+ return 0
+ else:
+ output.write_line(filename)
+ return 1
+
+
+class Choice(NamedTuple):
+ multiline: bool
+ negate: bool
+
+
+FNS = {
+ Choice(multiline=True, negate=True): _process_filename_at_once_negated,
+ Choice(multiline=True, negate=False): _process_filename_at_once,
+ Choice(multiline=False, negate=True): _process_filename_by_line_negated,
+ Choice(multiline=False, negate=False): _process_filename_by_line,
+}
+
+
+def run_hook(
+ prefix: Prefix,
+ entry: str,
+ args: Sequence[str],
+ file_args: Sequence[str],
+ *,
+ is_local: bool,
+ require_serial: bool,
+ color: bool,
+) -> tuple[int, bytes]:
+ cmd = (sys.executable, '-m', __name__, *args, entry)
+ return xargs(cmd, file_args, color=color)
+
+
+def main(argv: Sequence[str] | None = None) -> int:
+ parser = argparse.ArgumentParser(
+ description=(
+ 'grep-like finder using python regexes. Unlike grep, this tool '
+ 'returns nonzero when it finds a match and zero otherwise. The '
+ 'idea here being that matches are "problems".'
+ ),
+ )
+ parser.add_argument('-i', '--ignore-case', action='store_true')
+ parser.add_argument('--multiline', action='store_true')
+ parser.add_argument('--negate', action='store_true')
+ parser.add_argument('pattern', help='python regex pattern.')
+ parser.add_argument('filenames', nargs='*')
+ args = parser.parse_args(argv)
+
+ flags = re.IGNORECASE if args.ignore_case else 0
+ if args.multiline:
+ flags |= re.MULTILINE | re.DOTALL
+
+ pattern = re.compile(args.pattern.encode(), flags)
+
+ retv = 0
+ process_fn = FNS[Choice(multiline=args.multiline, negate=args.negate)]
+ for filename in args.filenames:
+ retv |= process_fn(pattern, filename)
+ return retv
+
+
+if __name__ == '__main__':
+ raise SystemExit(main())
diff --git a/pre_commit/languages/python.py b/pre_commit/languages/python.py
new file mode 100644
index 0000000..9f4bf69
--- /dev/null
+++ b/pre_commit/languages/python.py
@@ -0,0 +1,214 @@
+from __future__ import annotations
+
+import contextlib
+import functools
+import os
+import sys
+from collections.abc import Generator
+from collections.abc import Sequence
+
+import pre_commit.constants as C
+from pre_commit import lang_base
+from pre_commit.envcontext import envcontext
+from pre_commit.envcontext import PatchesT
+from pre_commit.envcontext import UNSET
+from pre_commit.envcontext import Var
+from pre_commit.parse_shebang import find_executable
+from pre_commit.prefix import Prefix
+from pre_commit.util import CalledProcessError
+from pre_commit.util import cmd_output
+from pre_commit.util import cmd_output_b
+from pre_commit.util import win_exe
+
+ENVIRONMENT_DIR = 'py_env'
+run_hook = lang_base.basic_run_hook
+
+
+@functools.cache
+def _version_info(exe: str) -> str:
+ prog = 'import sys;print(".".join(str(p) for p in sys.version_info))'
+ try:
+ return cmd_output(exe, '-S', '-c', prog)[1].strip()
+ except CalledProcessError:
+ return f'<<error retrieving version from {exe}>>'
+
+
+def _read_pyvenv_cfg(filename: str) -> dict[str, str]:
+ ret = {}
+ with open(filename, encoding='UTF-8') as f:
+ for line in f:
+ try:
+ k, v = line.split('=')
+ except ValueError: # blank line / comment / etc.
+ continue
+ else:
+ ret[k.strip()] = v.strip()
+ return ret
+
+
+def bin_dir(venv: str) -> str:
+ """On windows there's a different directory for the virtualenv"""
+ bin_part = 'Scripts' if sys.platform == 'win32' else 'bin'
+ return os.path.join(venv, bin_part)
+
+
+def get_env_patch(venv: str) -> PatchesT:
+ return (
+ ('PIP_DISABLE_PIP_VERSION_CHECK', '1'),
+ ('PYTHONHOME', UNSET),
+ ('VIRTUAL_ENV', venv),
+ ('PATH', (bin_dir(venv), os.pathsep, Var('PATH'))),
+ )
+
+
+def _find_by_py_launcher(
+ version: str,
+) -> str | None: # pragma: no cover (windows only)
+ if version.startswith('python'):
+ num = version.removeprefix('python')
+ cmd = ('py', f'-{num}', '-c', 'import sys; print(sys.executable)')
+ env = dict(os.environ, PYTHONIOENCODING='UTF-8')
+ try:
+ return cmd_output(*cmd, env=env)[1].strip()
+ except CalledProcessError:
+ pass
+ return None
+
+
+def _find_by_sys_executable() -> str | None:
+ def _norm(path: str) -> str | None:
+ _, exe = os.path.split(path.lower())
+ exe, _, _ = exe.partition('.exe')
+ if exe not in {'python', 'pythonw'} and find_executable(exe):
+ return exe
+ return None
+
+ # On linux, I see these common sys.executables:
+ #
+ # system `python`: /usr/bin/python -> python2.7
+ # system `python2`: /usr/bin/python2 -> python2.7
+ # virtualenv v: v/bin/python (will not return from this loop)
+ # virtualenv v -ppython2: v/bin/python -> python2
+ # virtualenv v -ppython2.7: v/bin/python -> python2.7
+ # virtualenv v -ppypy: v/bin/python -> v/bin/pypy
+ for path in (sys.executable, os.path.realpath(sys.executable)):
+ exe = _norm(path)
+ if exe:
+ return exe
+ return None
+
+
+@functools.lru_cache(maxsize=1)
+def get_default_version() -> str: # pragma: no cover (platform dependent)
+ # First attempt from `sys.executable` (or the realpath)
+ exe = _find_by_sys_executable()
+ if exe:
+ return exe
+
+ # Next try the `pythonX.X` executable
+ exe = f'python{sys.version_info[0]}.{sys.version_info[1]}'
+ if find_executable(exe):
+ return exe
+
+ if _find_by_py_launcher(exe):
+ return exe
+
+ # We tried!
+ return C.DEFAULT
+
+
+def _sys_executable_matches(version: str) -> bool:
+ if version == 'python':
+ return True
+ elif not version.startswith('python'):
+ return False
+
+ try:
+ info = tuple(int(p) for p in version.removeprefix('python').split('.'))
+ except ValueError:
+ return False
+
+ return sys.version_info[:len(info)] == info
+
+
+def norm_version(version: str) -> str | None:
+ if version == C.DEFAULT: # use virtualenv's default
+ return None
+ elif _sys_executable_matches(version): # virtualenv defaults to our exe
+ return None
+
+ if sys.platform == 'win32': # pragma: no cover (windows)
+ version_exec = _find_by_py_launcher(version)
+ if version_exec:
+ return version_exec
+
+ # Try looking up by name
+ version_exec = find_executable(version)
+ if version_exec and version_exec != version:
+ return version_exec
+
+ # Otherwise assume it is a path
+ return os.path.expanduser(version)
+
+
+@contextlib.contextmanager
+def in_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with envcontext(get_env_patch(envdir)):
+ yield
+
+
+def health_check(prefix: Prefix, version: str) -> str | None:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ pyvenv_cfg = os.path.join(envdir, 'pyvenv.cfg')
+
+ # created with "old" virtualenv
+ if not os.path.exists(pyvenv_cfg):
+ return 'pyvenv.cfg does not exist (old virtualenv?)'
+
+ exe_name = win_exe('python')
+ py_exe = prefix.path(bin_dir(envdir), exe_name)
+ cfg = _read_pyvenv_cfg(pyvenv_cfg)
+
+ if 'version_info' not in cfg:
+ return "created virtualenv's pyvenv.cfg is missing `version_info`"
+
+ # always use uncached lookup here in case we replaced an unhealthy env
+ virtualenv_version = _version_info.__wrapped__(py_exe)
+ if virtualenv_version != cfg['version_info']:
+ return (
+ f'virtualenv python version did not match created version:\n'
+ f'- actual version: {virtualenv_version}\n'
+ f'- expected version: {cfg["version_info"]}\n'
+ )
+
+ # made with an older version of virtualenv? skip `base-executable` check
+ if 'base-executable' not in cfg:
+ return None
+
+ base_exe_version = _version_info(cfg['base-executable'])
+ if base_exe_version != cfg['version_info']:
+ return (
+ f'base executable python version does not match created version:\n'
+ f'- base-executable version: {base_exe_version}\n'
+ f'- expected version: {cfg["version_info"]}\n'
+ )
+ else:
+ return None
+
+
+def install_environment(
+ prefix: Prefix,
+ version: str,
+ additional_dependencies: Sequence[str],
+) -> None:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ venv_cmd = [sys.executable, '-mvirtualenv', envdir]
+ python = norm_version(version)
+ if python is not None:
+ venv_cmd.extend(('-p', python))
+ install_cmd = ('python', '-mpip', 'install', '.', *additional_dependencies)
+
+ cmd_output_b(*venv_cmd, cwd='/')
+ with in_env(prefix, version):
+ lang_base.setup_cmd(prefix, install_cmd)
diff --git a/pre_commit/languages/r.py b/pre_commit/languages/r.py
new file mode 100644
index 0000000..93b62bd
--- /dev/null
+++ b/pre_commit/languages/r.py
@@ -0,0 +1,195 @@
+from __future__ import annotations
+
+import contextlib
+import os
+import shlex
+import shutil
+import tempfile
+import textwrap
+from collections.abc import Generator
+from collections.abc import Sequence
+
+from pre_commit import lang_base
+from pre_commit.envcontext import envcontext
+from pre_commit.envcontext import PatchesT
+from pre_commit.envcontext import UNSET
+from pre_commit.prefix import Prefix
+from pre_commit.util import cmd_output_b
+from pre_commit.util import win_exe
+
+ENVIRONMENT_DIR = 'renv'
+RSCRIPT_OPTS = ('--no-save', '--no-restore', '--no-site-file', '--no-environ')
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+
+
+@contextlib.contextmanager
+def _r_code_in_tempfile(code: str) -> Generator[str, None, None]:
+ """
+ To avoid quoting and escaping issues, avoid `Rscript [options] -e {expr}`
+ but use `Rscript [options] path/to/file_with_expr.R`
+ """
+ with tempfile.TemporaryDirectory() as tmpdir:
+ fname = os.path.join(tmpdir, 'script.R')
+ with open(fname, 'w') as f:
+ f.write(_inline_r_setup(textwrap.dedent(code)))
+ yield fname
+
+
+def get_env_patch(venv: str) -> PatchesT:
+ return (
+ ('R_PROFILE_USER', os.path.join(venv, 'activate.R')),
+ ('RENV_PROJECT', UNSET),
+ )
+
+
+@contextlib.contextmanager
+def in_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with envcontext(get_env_patch(envdir)):
+ yield
+
+
+def _prefix_if_file_entry(
+ entry: list[str],
+ prefix: Prefix,
+ *,
+ is_local: bool,
+) -> Sequence[str]:
+ if entry[1] == '-e' or is_local:
+ return entry[1:]
+ else:
+ return (prefix.path(entry[1]),)
+
+
+def _rscript_exec() -> str:
+ r_home = os.environ.get('R_HOME')
+ if r_home is None:
+ return 'Rscript'
+ else:
+ return os.path.join(r_home, 'bin', win_exe('Rscript'))
+
+
+def _entry_validate(entry: list[str]) -> None:
+ """
+ Allowed entries:
+ # Rscript -e expr
+ # Rscript path/to/file
+ """
+ if entry[0] != 'Rscript':
+ raise ValueError('entry must start with `Rscript`.')
+
+ if entry[1] == '-e':
+ if len(entry) > 3:
+ raise ValueError('You can supply at most one expression.')
+ elif len(entry) > 2:
+ raise ValueError(
+ 'The only valid syntax is `Rscript -e {expr}`'
+ 'or `Rscript path/to/hook/script`',
+ )
+
+
+def _cmd_from_hook(
+ prefix: Prefix,
+ entry: str,
+ args: Sequence[str],
+ *,
+ is_local: bool,
+) -> tuple[str, ...]:
+ cmd = shlex.split(entry)
+ _entry_validate(cmd)
+
+ cmd_part = _prefix_if_file_entry(cmd, prefix, is_local=is_local)
+ return (cmd[0], *RSCRIPT_OPTS, *cmd_part, *args)
+
+
+def install_environment(
+ prefix: Prefix,
+ version: str,
+ additional_dependencies: Sequence[str],
+) -> None:
+ lang_base.assert_version_default('r', version)
+
+ env_dir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ os.makedirs(env_dir, exist_ok=True)
+ shutil.copy(prefix.path('renv.lock'), env_dir)
+ shutil.copytree(prefix.path('renv'), os.path.join(env_dir, 'renv'))
+
+ r_code_inst_environment = f"""\
+ prefix_dir <- {prefix.prefix_dir!r}
+ options(
+ repos = c(CRAN = "https://cran.rstudio.com"),
+ renv.consent = TRUE
+ )
+ source("renv/activate.R")
+ renv::restore()
+ activate_statement <- paste0(
+ 'suppressWarnings({{',
+ 'old <- setwd("', getwd(), '"); ',
+ 'source("renv/activate.R"); ',
+ 'setwd(old); ',
+ 'renv::load("', getwd(), '");}})'
+ )
+ writeLines(activate_statement, 'activate.R')
+ is_package <- tryCatch(
+ {{
+ path_desc <- file.path(prefix_dir, 'DESCRIPTION')
+ suppressWarnings(desc <- read.dcf(path_desc))
+ "Package" %in% colnames(desc)
+ }},
+ error = function(...) FALSE
+ )
+ if (is_package) {{
+ renv::install(prefix_dir)
+ }}
+ """
+
+ with _r_code_in_tempfile(r_code_inst_environment) as f:
+ cmd_output_b(_rscript_exec(), '--vanilla', f, cwd=env_dir)
+
+ if additional_dependencies:
+ r_code_inst_add = 'renv::install(commandArgs(trailingOnly = TRUE))'
+ with in_env(prefix, version):
+ with _r_code_in_tempfile(r_code_inst_add) as f:
+ cmd_output_b(
+ _rscript_exec(), *RSCRIPT_OPTS,
+ f,
+ *additional_dependencies,
+ cwd=env_dir,
+ )
+
+
+def _inline_r_setup(code: str) -> str:
+ """
+ Some behaviour of R cannot be configured via env variables, but can
+ only be configured via R options once R has started. These are set here.
+ """
+ with_option = [
+ textwrap.dedent("""\
+ options(
+ install.packages.compile.from.source = "never",
+ pkgType = "binary"
+ )
+ """),
+ code,
+ ]
+ return '\n'.join(with_option)
+
+
+def run_hook(
+ prefix: Prefix,
+ entry: str,
+ args: Sequence[str],
+ file_args: Sequence[str],
+ *,
+ is_local: bool,
+ require_serial: bool,
+ color: bool,
+) -> tuple[int, bytes]:
+ cmd = _cmd_from_hook(prefix, entry, args, is_local=is_local)
+ return lang_base.run_xargs(
+ cmd,
+ file_args,
+ require_serial=require_serial,
+ color=color,
+ )
diff --git a/pre_commit/languages/ruby.py b/pre_commit/languages/ruby.py
new file mode 100644
index 0000000..0438ae0
--- /dev/null
+++ b/pre_commit/languages/ruby.py
@@ -0,0 +1,145 @@
+from __future__ import annotations
+
+import contextlib
+import functools
+import importlib.resources
+import os.path
+import shutil
+import tarfile
+from collections.abc import Generator
+from collections.abc import Sequence
+from typing import IO
+
+import pre_commit.constants as C
+from pre_commit import lang_base
+from pre_commit.envcontext import envcontext
+from pre_commit.envcontext import PatchesT
+from pre_commit.envcontext import UNSET
+from pre_commit.envcontext import Var
+from pre_commit.prefix import Prefix
+from pre_commit.util import CalledProcessError
+
+ENVIRONMENT_DIR = 'rbenv'
+health_check = lang_base.basic_health_check
+run_hook = lang_base.basic_run_hook
+
+
+def _resource_bytesio(filename: str) -> IO[bytes]:
+ files = importlib.resources.files('pre_commit.resources')
+ return files.joinpath(filename).open('rb')
+
+
+@functools.lru_cache(maxsize=1)
+def get_default_version() -> str:
+ if all(lang_base.exe_exists(exe) for exe in ('ruby', 'gem')):
+ return 'system'
+ else:
+ return C.DEFAULT
+
+
+def get_env_patch(
+ venv: str,
+ language_version: str,
+) -> PatchesT:
+ patches: PatchesT = (
+ ('GEM_HOME', os.path.join(venv, 'gems')),
+ ('GEM_PATH', UNSET),
+ ('BUNDLE_IGNORE_CONFIG', '1'),
+ )
+ if language_version == 'system':
+ patches += (
+ (
+ 'PATH', (
+ os.path.join(venv, 'gems', 'bin'), os.pathsep,
+ Var('PATH'),
+ ),
+ ),
+ )
+ else: # pragma: win32 no cover
+ patches += (
+ ('RBENV_ROOT', venv),
+ (
+ 'PATH', (
+ os.path.join(venv, 'gems', 'bin'), os.pathsep,
+ os.path.join(venv, 'shims'), os.pathsep,
+ os.path.join(venv, 'bin'), os.pathsep, Var('PATH'),
+ ),
+ ),
+ )
+ if language_version not in {'system', 'default'}: # pragma: win32 no cover
+ patches += (('RBENV_VERSION', language_version),)
+
+ return patches
+
+
+@contextlib.contextmanager
+def in_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with envcontext(get_env_patch(envdir, version)):
+ yield
+
+
+def _extract_resource(filename: str, dest: str) -> None:
+ with _resource_bytesio(filename) as bio:
+ with tarfile.open(fileobj=bio) as tf:
+ tf.extractall(dest)
+
+
+def _install_rbenv(
+ prefix: Prefix,
+ version: str,
+) -> None: # pragma: win32 no cover
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+
+ _extract_resource('rbenv.tar.gz', prefix.path('.'))
+ shutil.move(prefix.path('rbenv'), envdir)
+
+ # Only install ruby-build if the version is specified
+ if version != C.DEFAULT:
+ plugins_dir = os.path.join(envdir, 'plugins')
+ _extract_resource('ruby-download.tar.gz', plugins_dir)
+ _extract_resource('ruby-build.tar.gz', plugins_dir)
+
+
+def _install_ruby(
+ prefix: Prefix,
+ version: str,
+) -> None: # pragma: win32 no cover
+ try:
+ lang_base.setup_cmd(prefix, ('rbenv', 'download', version))
+ except CalledProcessError: # pragma: no cover (usually find with download)
+ # Failed to download from mirror for some reason, build it instead
+ lang_base.setup_cmd(prefix, ('rbenv', 'install', version))
+
+
+def install_environment(
+ prefix: Prefix, version: str, additional_dependencies: Sequence[str],
+) -> None:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+
+ if version != 'system': # pragma: win32 no cover
+ _install_rbenv(prefix, version)
+ with in_env(prefix, version):
+ # Need to call this before installing so rbenv's directories
+ # are set up
+ lang_base.setup_cmd(prefix, ('rbenv', 'init', '-'))
+ if version != C.DEFAULT:
+ _install_ruby(prefix, version)
+ # Need to call this after installing to set up the shims
+ lang_base.setup_cmd(prefix, ('rbenv', 'rehash'))
+
+ with in_env(prefix, version):
+ lang_base.setup_cmd(
+ prefix, ('gem', 'build', *prefix.star('.gemspec')),
+ )
+ lang_base.setup_cmd(
+ prefix,
+ (
+ 'gem', 'install',
+ '--no-document', '--no-format-executable',
+ '--no-user-install',
+ '--install-dir', os.path.join(envdir, 'gems'),
+ '--bindir', os.path.join(envdir, 'gems', 'bin'),
+ *prefix.star('.gem'), *additional_dependencies,
+ ),
+ )
diff --git a/pre_commit/languages/rust.py b/pre_commit/languages/rust.py
new file mode 100644
index 0000000..7b04d6c
--- /dev/null
+++ b/pre_commit/languages/rust.py
@@ -0,0 +1,160 @@
+from __future__ import annotations
+
+import contextlib
+import functools
+import os.path
+import shutil
+import sys
+import tempfile
+import urllib.request
+from collections.abc import Generator
+from collections.abc import Sequence
+
+import pre_commit.constants as C
+from pre_commit import lang_base
+from pre_commit import parse_shebang
+from pre_commit.envcontext import envcontext
+from pre_commit.envcontext import PatchesT
+from pre_commit.envcontext import Var
+from pre_commit.prefix import Prefix
+from pre_commit.util import cmd_output_b
+from pre_commit.util import make_executable
+from pre_commit.util import win_exe
+
+ENVIRONMENT_DIR = 'rustenv'
+health_check = lang_base.basic_health_check
+run_hook = lang_base.basic_run_hook
+
+
+@functools.lru_cache(maxsize=1)
+def get_default_version() -> str:
+ # If rust is already installed, we can save a bunch of setup time by
+ # using the installed version.
+ #
+ # Just detecting the executable does not suffice, because if rustup is
+ # installed but no toolchain is available, then `cargo` exists but
+ # cannot be used without installing a toolchain first.
+ if cmd_output_b('cargo', '--version', check=False)[0] == 0:
+ return 'system'
+ else:
+ return C.DEFAULT
+
+
+def _rust_toolchain(language_version: str) -> str:
+ """Transform the language version into a rust toolchain version."""
+ if language_version == C.DEFAULT:
+ return 'stable'
+ else:
+ return language_version
+
+
+def get_env_patch(target_dir: str, version: str) -> PatchesT:
+ return (
+ ('PATH', (os.path.join(target_dir, 'bin'), os.pathsep, Var('PATH'))),
+ # Only set RUSTUP_TOOLCHAIN if we don't want use the system's default
+ # toolchain
+ *(
+ (('RUSTUP_TOOLCHAIN', _rust_toolchain(version)),)
+ if version != 'system' else ()
+ ),
+ )
+
+
+@contextlib.contextmanager
+def in_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with envcontext(get_env_patch(envdir, version)):
+ yield
+
+
+def _add_dependencies(
+ prefix: Prefix,
+ additional_dependencies: set[str],
+) -> None:
+ crates = []
+ for dep in additional_dependencies:
+ name, _, spec = dep.partition(':')
+ crate = f'{name}@{spec or "*"}'
+ crates.append(crate)
+
+ lang_base.setup_cmd(prefix, ('cargo', 'add', *crates))
+
+
+def install_rust_with_toolchain(toolchain: str, envdir: str) -> None:
+ with tempfile.TemporaryDirectory() as rustup_dir:
+ with envcontext((('CARGO_HOME', envdir), ('RUSTUP_HOME', rustup_dir))):
+ # acquire `rustup` if not present
+ if parse_shebang.find_executable('rustup') is None:
+ # We did not detect rustup and need to download it first.
+ if sys.platform == 'win32': # pragma: win32 cover
+ url = 'https://win.rustup.rs/x86_64'
+ else: # pragma: win32 no cover
+ url = 'https://sh.rustup.rs'
+
+ resp = urllib.request.urlopen(url)
+
+ rustup_init = os.path.join(rustup_dir, win_exe('rustup-init'))
+ with open(rustup_init, 'wb') as f:
+ shutil.copyfileobj(resp, f)
+ make_executable(rustup_init)
+
+ # install rustup into `$CARGO_HOME/bin`
+ cmd_output_b(
+ rustup_init, '-y', '--quiet', '--no-modify-path',
+ '--default-toolchain', 'none',
+ )
+
+ cmd_output_b(
+ 'rustup', 'toolchain', 'install', '--no-self-update',
+ toolchain,
+ )
+
+
+def install_environment(
+ prefix: Prefix,
+ version: str,
+ additional_dependencies: Sequence[str],
+) -> None:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+
+ # There are two cases where we might want to specify more dependencies:
+ # as dependencies for the library being built, and as binary packages
+ # to be `cargo install`'d.
+ #
+ # Unlike e.g. Python, if we just `cargo install` a library, it won't be
+ # used for compilation. And if we add a crate providing a binary to the
+ # `Cargo.toml`, the binary won't be built.
+ #
+ # Because of this, we allow specifying "cli" dependencies by prefixing
+ # with 'cli:'.
+ cli_deps = {
+ dep for dep in additional_dependencies if dep.startswith('cli:')
+ }
+ lib_deps = set(additional_dependencies) - cli_deps
+
+ packages_to_install: set[tuple[str, ...]] = {('--path', '.')}
+ for cli_dep in cli_deps:
+ cli_dep = cli_dep.removeprefix('cli:')
+ package, _, crate_version = cli_dep.partition(':')
+ if crate_version != '':
+ packages_to_install.add((package, '--version', crate_version))
+ else:
+ packages_to_install.add((package,))
+
+ with contextlib.ExitStack() as ctx:
+ ctx.enter_context(in_env(prefix, version))
+
+ if version != 'system':
+ install_rust_with_toolchain(_rust_toolchain(version), envdir)
+
+ tmpdir = ctx.enter_context(tempfile.TemporaryDirectory())
+ ctx.enter_context(envcontext((('RUSTUP_HOME', tmpdir),)))
+
+ if len(lib_deps) > 0:
+ _add_dependencies(prefix, lib_deps)
+
+ for args in packages_to_install:
+ cmd_output_b(
+ 'cargo', 'install', '--bins', '--root', envdir, *args,
+ cwd=prefix.prefix_dir,
+ )
diff --git a/pre_commit/languages/script.py b/pre_commit/languages/script.py
new file mode 100644
index 0000000..1eaa1e2
--- /dev/null
+++ b/pre_commit/languages/script.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+from pre_commit import lang_base
+from pre_commit.prefix import Prefix
+
+ENVIRONMENT_DIR = None
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+install_environment = lang_base.no_install
+in_env = lang_base.no_env
+
+
+def run_hook(
+ prefix: Prefix,
+ entry: str,
+ args: Sequence[str],
+ file_args: Sequence[str],
+ *,
+ is_local: bool,
+ require_serial: bool,
+ color: bool,
+) -> tuple[int, bytes]:
+ cmd = lang_base.hook_cmd(entry, args)
+ cmd = (prefix.path(cmd[0]), *cmd[1:])
+ return lang_base.run_xargs(
+ cmd,
+ file_args,
+ require_serial=require_serial,
+ color=color,
+ )
diff --git a/pre_commit/languages/swift.py b/pre_commit/languages/swift.py
new file mode 100644
index 0000000..f7bfe84
--- /dev/null
+++ b/pre_commit/languages/swift.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+import contextlib
+import os
+from collections.abc import Generator
+from collections.abc import Sequence
+
+from pre_commit import lang_base
+from pre_commit.envcontext import envcontext
+from pre_commit.envcontext import PatchesT
+from pre_commit.envcontext import Var
+from pre_commit.prefix import Prefix
+from pre_commit.util import cmd_output_b
+
+BUILD_DIR = '.build'
+BUILD_CONFIG = 'release'
+
+ENVIRONMENT_DIR = 'swift_env'
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+run_hook = lang_base.basic_run_hook
+
+
+def get_env_patch(venv: str) -> PatchesT: # pragma: win32 no cover
+ bin_path = os.path.join(venv, BUILD_DIR, BUILD_CONFIG)
+ return (('PATH', (bin_path, os.pathsep, Var('PATH'))),)
+
+
+@contextlib.contextmanager # pragma: win32 no cover
+def in_env(prefix: Prefix, version: str) -> Generator[None, None, None]:
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+ with envcontext(get_env_patch(envdir)):
+ yield
+
+
+def install_environment(
+ prefix: Prefix, version: str, additional_dependencies: Sequence[str],
+) -> None: # pragma: win32 no cover
+ lang_base.assert_version_default('swift', version)
+ lang_base.assert_no_additional_deps('swift', additional_dependencies)
+ envdir = lang_base.environment_dir(prefix, ENVIRONMENT_DIR, version)
+
+ # Build the swift package
+ os.mkdir(envdir)
+ cmd_output_b(
+ 'swift', 'build',
+ '--package-path', prefix.prefix_dir,
+ '-c', BUILD_CONFIG,
+ '--build-path', os.path.join(envdir, BUILD_DIR),
+ )
diff --git a/pre_commit/languages/system.py b/pre_commit/languages/system.py
new file mode 100644
index 0000000..f6ad688
--- /dev/null
+++ b/pre_commit/languages/system.py
@@ -0,0 +1,10 @@
+from __future__ import annotations
+
+from pre_commit import lang_base
+
+ENVIRONMENT_DIR = None
+get_default_version = lang_base.basic_get_default_version
+health_check = lang_base.basic_health_check
+install_environment = lang_base.no_install
+in_env = lang_base.no_env
+run_hook = lang_base.basic_run_hook
diff --git a/pre_commit/logging_handler.py b/pre_commit/logging_handler.py
new file mode 100644
index 0000000..cd33953
--- /dev/null
+++ b/pre_commit/logging_handler.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+import contextlib
+import logging
+from collections.abc import Generator
+
+from pre_commit import color
+from pre_commit import output
+
+logger = logging.getLogger('pre_commit')
+
+LOG_LEVEL_COLORS = {
+ 'DEBUG': '',
+ 'INFO': '',
+ 'WARNING': color.YELLOW,
+ 'ERROR': color.RED,
+}
+
+
+class LoggingHandler(logging.Handler):
+ def __init__(self, use_color: bool) -> None:
+ super().__init__()
+ self.use_color = use_color
+
+ def emit(self, record: logging.LogRecord) -> None:
+ level_msg = color.format_color(
+ f'[{record.levelname}]',
+ LOG_LEVEL_COLORS[record.levelname],
+ self.use_color,
+ )
+ output.write_line(f'{level_msg} {record.getMessage()}')
+
+
+@contextlib.contextmanager
+def logging_handler(use_color: bool) -> Generator[None, None, None]:
+ handler = LoggingHandler(use_color)
+ logger.addHandler(handler)
+ logger.setLevel(logging.INFO)
+ try:
+ yield
+ finally:
+ logger.removeHandler(handler)
diff --git a/pre_commit/main.py b/pre_commit/main.py
new file mode 100644
index 0000000..559c927
--- /dev/null
+++ b/pre_commit/main.py
@@ -0,0 +1,442 @@
+from __future__ import annotations
+
+import argparse
+import logging
+import os
+import sys
+from collections.abc import Sequence
+
+import pre_commit.constants as C
+from pre_commit import clientlib
+from pre_commit import git
+from pre_commit.color import add_color_option
+from pre_commit.commands.autoupdate import autoupdate
+from pre_commit.commands.clean import clean
+from pre_commit.commands.gc import gc
+from pre_commit.commands.hook_impl import hook_impl
+from pre_commit.commands.init_templatedir import init_templatedir
+from pre_commit.commands.install_uninstall import install
+from pre_commit.commands.install_uninstall import install_hooks
+from pre_commit.commands.install_uninstall import uninstall
+from pre_commit.commands.migrate_config import migrate_config
+from pre_commit.commands.run import run
+from pre_commit.commands.sample_config import sample_config
+from pre_commit.commands.try_repo import try_repo
+from pre_commit.commands.validate_config import validate_config
+from pre_commit.commands.validate_manifest import validate_manifest
+from pre_commit.error_handler import error_handler
+from pre_commit.logging_handler import logging_handler
+from pre_commit.store import Store
+
+
+logger = logging.getLogger('pre_commit')
+
+# https://github.com/pre-commit/pre-commit/issues/217
+# On OSX, making a virtualenv using pyvenv at . causes `virtualenv` and `pip`
+# to install packages to the wrong place. We don't want anything to deal with
+# pyvenv
+os.environ.pop('__PYVENV_LAUNCHER__', None)
+
+# https://github.com/getsentry/snuba/pull/5388
+os.environ.pop('PYTHONEXECUTABLE', None)
+
+COMMANDS_NO_GIT = {
+ 'clean', 'gc', 'init-templatedir', 'sample-config',
+ 'validate-config', 'validate-manifest',
+}
+
+
+def _add_config_option(parser: argparse.ArgumentParser) -> None:
+ parser.add_argument(
+ '-c', '--config', default=C.CONFIG_FILE,
+ help='Path to alternate config file',
+ )
+
+
+def _add_hook_type_option(parser: argparse.ArgumentParser) -> None:
+ parser.add_argument(
+ '-t', '--hook-type',
+ choices=clientlib.HOOK_TYPES, action='append', dest='hook_types',
+ )
+
+
+def _add_run_options(parser: argparse.ArgumentParser) -> None:
+ parser.add_argument('hook', nargs='?', help='A single hook-id to run')
+ parser.add_argument('--verbose', '-v', action='store_true', default=False)
+ mutex_group = parser.add_mutually_exclusive_group(required=False)
+ mutex_group.add_argument(
+ '--all-files', '-a', action='store_true', default=False,
+ help='Run on all the files in the repo.',
+ )
+ mutex_group.add_argument(
+ '--files', nargs='*', default=[],
+ help='Specific filenames to run hooks on.',
+ )
+ parser.add_argument(
+ '--show-diff-on-failure', action='store_true',
+ help='When hooks fail, run `git diff` directly afterward.',
+ )
+ parser.add_argument(
+ '--hook-stage',
+ choices=clientlib.STAGES,
+ type=clientlib.transform_stage,
+ default='pre-commit',
+ help='The stage during which the hook is fired. One of %(choices)s',
+ )
+ parser.add_argument(
+ '--remote-branch', help='Remote branch ref used by `git push`.',
+ )
+ parser.add_argument(
+ '--local-branch', help='Local branch ref used by `git push`.',
+ )
+ parser.add_argument(
+ '--from-ref', '--source', '-s',
+ help=(
+ '(for usage with `--to-ref`) -- this option represents the '
+ 'original ref in a `from_ref...to_ref` diff expression. '
+ 'For `pre-push` hooks, this represents the branch you are pushing '
+ 'to. '
+ 'For `post-checkout` hooks, this represents the branch that was '
+ 'previously checked out.'
+ ),
+ )
+ parser.add_argument(
+ '--to-ref', '--origin', '-o',
+ help=(
+ '(for usage with `--from-ref`) -- this option represents the '
+ 'destination ref in a `from_ref...to_ref` diff expression. '
+ 'For `pre-push` hooks, this represents the branch being pushed. '
+ 'For `post-checkout` hooks, this represents the branch that is '
+ 'now checked out.'
+ ),
+ )
+ parser.add_argument(
+ '--pre-rebase-upstream', help=(
+ 'The upstream from which the series was forked.'
+ ),
+ )
+ parser.add_argument(
+ '--pre-rebase-branch', help=(
+ 'The branch being rebased, and is not set when '
+ 'rebasing the current branch.'
+ ),
+ )
+ parser.add_argument(
+ '--commit-msg-filename',
+ help='Filename to check when running during `commit-msg`',
+ )
+ parser.add_argument(
+ '--prepare-commit-message-source',
+ help=(
+ 'Source of the commit message '
+ '(typically the second argument to .git/hooks/prepare-commit-msg)'
+ ),
+ )
+ parser.add_argument(
+ '--commit-object-name',
+ help=(
+ 'Commit object name '
+ '(typically the third argument to .git/hooks/prepare-commit-msg)'
+ ),
+ )
+ parser.add_argument(
+ '--remote-name', help='Remote name used by `git push`.',
+ )
+ parser.add_argument('--remote-url', help='Remote url used by `git push`.')
+ parser.add_argument(
+ '--checkout-type',
+ help=(
+ 'Indicates whether the checkout was a branch checkout '
+ '(changing branches, flag=1) or a file checkout (retrieving a '
+ 'file from the index, flag=0).'
+ ),
+ )
+ parser.add_argument(
+ '--is-squash-merge',
+ help=(
+ 'During a post-merge hook, indicates whether the merge was a '
+ 'squash merge'
+ ),
+ )
+ parser.add_argument(
+ '--rewrite-command',
+ help=(
+ 'During a post-rewrite hook, specifies the command that invoked '
+ 'the rewrite'
+ ),
+ )
+
+
+def _adjust_args_and_chdir(args: argparse.Namespace) -> None:
+ # `--config` was specified relative to the non-root working directory
+ if os.path.exists(args.config):
+ args.config = os.path.abspath(args.config)
+ if args.command in {'run', 'try-repo'}:
+ args.files = [os.path.abspath(filename) for filename in args.files]
+ if args.commit_msg_filename is not None:
+ args.commit_msg_filename = os.path.abspath(
+ args.commit_msg_filename,
+ )
+ if args.command == 'try-repo' and os.path.exists(args.repo):
+ args.repo = os.path.abspath(args.repo)
+
+ toplevel = git.get_root()
+ os.chdir(toplevel)
+
+ args.config = os.path.relpath(args.config)
+ if args.command in {'run', 'try-repo'}:
+ args.files = [os.path.relpath(filename) for filename in args.files]
+ if args.commit_msg_filename is not None:
+ args.commit_msg_filename = os.path.relpath(
+ args.commit_msg_filename,
+ )
+ if args.command == 'try-repo' and os.path.exists(args.repo):
+ args.repo = os.path.relpath(args.repo)
+
+
+def main(argv: Sequence[str] | None = None) -> int:
+ argv = argv if argv is not None else sys.argv[1:]
+ parser = argparse.ArgumentParser(prog='pre-commit')
+
+ # https://stackoverflow.com/a/8521644/812183
+ parser.add_argument(
+ '-V', '--version',
+ action='version',
+ version=f'%(prog)s {C.VERSION}',
+ )
+
+ subparsers = parser.add_subparsers(dest='command')
+
+ def _add_cmd(name: str, *, help: str) -> argparse.ArgumentParser:
+ parser = subparsers.add_parser(name, help=help)
+ add_color_option(parser)
+ return parser
+
+ autoupdate_parser = _add_cmd(
+ 'autoupdate',
+ help="Auto-update pre-commit config to the latest repos' versions.",
+ )
+ _add_config_option(autoupdate_parser)
+ autoupdate_parser.add_argument(
+ '--bleeding-edge', action='store_true',
+ help=(
+ 'Update to the bleeding edge of `HEAD` instead of the latest '
+ 'tagged version (the default behavior).'
+ ),
+ )
+ autoupdate_parser.add_argument(
+ '--freeze', action='store_true',
+ help='Store "frozen" hashes in `rev` instead of tag names',
+ )
+ autoupdate_parser.add_argument(
+ '--repo', dest='repos', action='append', metavar='REPO', default=[],
+ help='Only update this repository -- may be specified multiple times.',
+ )
+ autoupdate_parser.add_argument(
+ '-j', '--jobs', type=int, default=1,
+ help='Number of threads to use. (default %(default)s).',
+ )
+
+ _add_cmd('clean', help='Clean out pre-commit files.')
+
+ _add_cmd('gc', help='Clean unused cached repos.')
+
+ init_templatedir_parser = _add_cmd(
+ 'init-templatedir',
+ help=(
+ 'Install hook script in a directory intended for use with '
+ '`git config init.templateDir`.'
+ ),
+ )
+ _add_config_option(init_templatedir_parser)
+ init_templatedir_parser.add_argument(
+ 'directory', help='The directory in which to write the hook script.',
+ )
+ init_templatedir_parser.add_argument(
+ '--no-allow-missing-config',
+ action='store_false',
+ dest='allow_missing_config',
+ help='Assume cloned repos should have a `pre-commit` config.',
+ )
+ _add_hook_type_option(init_templatedir_parser)
+
+ install_parser = _add_cmd('install', help='Install the pre-commit script.')
+ _add_config_option(install_parser)
+ install_parser.add_argument(
+ '-f', '--overwrite', action='store_true',
+ help='Overwrite existing hooks / remove migration mode.',
+ )
+ install_parser.add_argument(
+ '--install-hooks', action='store_true',
+ help=(
+ 'Whether to install hook environments for all environments '
+ 'in the config file.'
+ ),
+ )
+ _add_hook_type_option(install_parser)
+ install_parser.add_argument(
+ '--allow-missing-config', action='store_true', default=False,
+ help=(
+ 'Whether to allow a missing `pre-commit` configuration file '
+ 'or exit with a failure code.'
+ ),
+ )
+
+ install_hooks_parser = _add_cmd(
+ 'install-hooks',
+ help=(
+ 'Install hook environments for all environments in the config '
+ 'file. You may find `pre-commit install --install-hooks` more '
+ 'useful.'
+ ),
+ )
+ _add_config_option(install_hooks_parser)
+
+ migrate_config_parser = _add_cmd(
+ 'migrate-config',
+ help='Migrate list configuration to new map configuration.',
+ )
+ _add_config_option(migrate_config_parser)
+
+ run_parser = _add_cmd('run', help='Run hooks.')
+ _add_config_option(run_parser)
+ _add_run_options(run_parser)
+
+ _add_cmd('sample-config', help=f'Produce a sample {C.CONFIG_FILE} file')
+
+ try_repo_parser = _add_cmd(
+ 'try-repo',
+ help='Try the hooks in a repository, useful for developing new hooks.',
+ )
+ _add_config_option(try_repo_parser)
+ try_repo_parser.add_argument(
+ 'repo', help='Repository to source hooks from.',
+ )
+ try_repo_parser.add_argument(
+ '--ref', '--rev',
+ help=(
+ 'Manually select a rev to run against, otherwise the `HEAD` '
+ 'revision will be used.'
+ ),
+ )
+ _add_run_options(try_repo_parser)
+
+ uninstall_parser = _add_cmd(
+ 'uninstall', help='Uninstall the pre-commit script.',
+ )
+ _add_config_option(uninstall_parser)
+ _add_hook_type_option(uninstall_parser)
+
+ validate_config_parser = _add_cmd(
+ 'validate-config', help='Validate .pre-commit-config.yaml files',
+ )
+ validate_config_parser.add_argument('filenames', nargs='*')
+
+ validate_manifest_parser = _add_cmd(
+ 'validate-manifest', help='Validate .pre-commit-hooks.yaml files',
+ )
+ validate_manifest_parser.add_argument('filenames', nargs='*')
+
+ # does not use `_add_cmd` because it doesn't use `--color`
+ help = subparsers.add_parser(
+ 'help', help='Show help for a specific command.',
+ )
+ help.add_argument('help_cmd', nargs='?', help='Command to show help for.')
+
+ # not intended for users to call this directly
+ hook_impl_parser = subparsers.add_parser('hook-impl')
+ add_color_option(hook_impl_parser)
+ _add_config_option(hook_impl_parser)
+ hook_impl_parser.add_argument('--hook-type')
+ hook_impl_parser.add_argument('--hook-dir')
+ hook_impl_parser.add_argument(
+ '--skip-on-missing-config', action='store_true',
+ )
+ hook_impl_parser.add_argument(dest='rest', nargs=argparse.REMAINDER)
+
+ # argparse doesn't really provide a way to use a `default` subparser
+ if len(argv) == 0:
+ argv = ['run']
+ args = parser.parse_args(argv)
+
+ if args.command == 'help' and args.help_cmd:
+ parser.parse_args([args.help_cmd, '--help'])
+ elif args.command == 'help':
+ parser.parse_args(['--help'])
+
+ with error_handler(), logging_handler(args.color):
+ git.check_for_cygwin_mismatch()
+
+ store = Store()
+
+ if args.command not in COMMANDS_NO_GIT:
+ _adjust_args_and_chdir(args)
+ store.mark_config_used(args.config)
+
+ if args.command == 'autoupdate':
+ return autoupdate(
+ args.config,
+ tags_only=not args.bleeding_edge,
+ freeze=args.freeze,
+ repos=args.repos,
+ jobs=args.jobs,
+ )
+ elif args.command == 'clean':
+ return clean(store)
+ elif args.command == 'gc':
+ return gc(store)
+ elif args.command == 'hook-impl':
+ return hook_impl(
+ store,
+ config=args.config,
+ color=args.color,
+ hook_type=args.hook_type,
+ hook_dir=args.hook_dir,
+ skip_on_missing_config=args.skip_on_missing_config,
+ args=args.rest[1:],
+ )
+ elif args.command == 'install':
+ return install(
+ args.config, store,
+ hook_types=args.hook_types,
+ overwrite=args.overwrite,
+ hooks=args.install_hooks,
+ skip_on_missing_config=args.allow_missing_config,
+ )
+ elif args.command == 'init-templatedir':
+ return init_templatedir(
+ args.config, store, args.directory,
+ hook_types=args.hook_types,
+ skip_on_missing_config=args.allow_missing_config,
+ )
+ elif args.command == 'install-hooks':
+ return install_hooks(args.config, store)
+ elif args.command == 'migrate-config':
+ return migrate_config(args.config)
+ elif args.command == 'run':
+ return run(args.config, store, args)
+ elif args.command == 'sample-config':
+ return sample_config()
+ elif args.command == 'try-repo':
+ return try_repo(args)
+ elif args.command == 'uninstall':
+ return uninstall(
+ config_file=args.config,
+ hook_types=args.hook_types,
+ )
+ elif args.command == 'validate-config':
+ return validate_config(args.filenames)
+ elif args.command == 'validate-manifest':
+ return validate_manifest(args.filenames)
+ else:
+ raise NotImplementedError(
+ f'Command {args.command} not implemented.',
+ )
+
+ raise AssertionError(
+ f'Command {args.command} failed to exit with a returncode',
+ )
+
+
+if __name__ == '__main__':
+ raise SystemExit(main())
diff --git a/pre_commit/meta_hooks/__init__.py b/pre_commit/meta_hooks/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/pre_commit/meta_hooks/__init__.py
diff --git a/pre_commit/meta_hooks/check_hooks_apply.py b/pre_commit/meta_hooks/check_hooks_apply.py
new file mode 100644
index 0000000..84c142b
--- /dev/null
+++ b/pre_commit/meta_hooks/check_hooks_apply.py
@@ -0,0 +1,43 @@
+from __future__ import annotations
+
+import argparse
+from collections.abc import Sequence
+
+import pre_commit.constants as C
+from pre_commit import git
+from pre_commit.clientlib import load_config
+from pre_commit.commands.run import Classifier
+from pre_commit.repository import all_hooks
+from pre_commit.store import Store
+
+
+def check_all_hooks_match_files(config_file: str) -> int:
+ config = load_config(config_file)
+ classifier = Classifier.from_config(
+ git.get_all_files(), config['files'], config['exclude'],
+ )
+ retv = 0
+
+ for hook in all_hooks(config, Store()):
+ if hook.always_run or hook.language == 'fail':
+ continue
+ elif not any(classifier.filenames_for_hook(hook)):
+ print(f'{hook.id} does not apply to this repository')
+ retv = 1
+
+ return retv
+
+
+def main(argv: Sequence[str] | None = None) -> int:
+ parser = argparse.ArgumentParser()
+ parser.add_argument('filenames', nargs='*', default=[C.CONFIG_FILE])
+ args = parser.parse_args(argv)
+
+ retv = 0
+ for filename in args.filenames:
+ retv |= check_all_hooks_match_files(filename)
+ return retv
+
+
+if __name__ == '__main__':
+ raise SystemExit(main())
diff --git a/pre_commit/meta_hooks/check_useless_excludes.py b/pre_commit/meta_hooks/check_useless_excludes.py
new file mode 100644
index 0000000..664251a
--- /dev/null
+++ b/pre_commit/meta_hooks/check_useless_excludes.py
@@ -0,0 +1,83 @@
+from __future__ import annotations
+
+import argparse
+import re
+from collections.abc import Iterable
+from collections.abc import Sequence
+
+from cfgv import apply_defaults
+
+import pre_commit.constants as C
+from pre_commit import git
+from pre_commit.clientlib import load_config
+from pre_commit.clientlib import MANIFEST_HOOK_DICT
+from pre_commit.commands.run import Classifier
+
+
+def exclude_matches_any(
+ filenames: Iterable[str],
+ include: str,
+ exclude: str,
+) -> bool:
+ if exclude == '^$':
+ return True
+ include_re, exclude_re = re.compile(include), re.compile(exclude)
+ for filename in filenames:
+ if include_re.search(filename) and exclude_re.search(filename):
+ return True
+ return False
+
+
+def check_useless_excludes(config_file: str) -> int:
+ config = load_config(config_file)
+ filenames = git.get_all_files()
+ classifier = Classifier.from_config(
+ filenames, config['files'], config['exclude'],
+ )
+ retv = 0
+
+ exclude = config['exclude']
+ if not exclude_matches_any(filenames, '', exclude):
+ print(
+ f'The global exclude pattern {exclude!r} does not match any files',
+ )
+ retv = 1
+
+ for repo in config['repos']:
+ for hook in repo['hooks']:
+ # the default of manifest hooks is `types: [file]` but we may
+ # be configuring a symlink hook while there's a broken symlink
+ hook.setdefault('types', [])
+ # Not actually a manifest dict, but this more accurately reflects
+ # the defaults applied during runtime
+ hook = apply_defaults(hook, MANIFEST_HOOK_DICT)
+ names = classifier.by_types(
+ classifier.filenames,
+ hook['types'],
+ hook['types_or'],
+ hook['exclude_types'],
+ )
+ include, exclude = hook['files'], hook['exclude']
+ if not exclude_matches_any(names, include, exclude):
+ print(
+ f'The exclude pattern {exclude!r} for {hook["id"]} does '
+ f'not match any files',
+ )
+ retv = 1
+
+ return retv
+
+
+def main(argv: Sequence[str] | None = None) -> int:
+ parser = argparse.ArgumentParser()
+ parser.add_argument('filenames', nargs='*', default=[C.CONFIG_FILE])
+ args = parser.parse_args(argv)
+
+ retv = 0
+ for filename in args.filenames:
+ retv |= check_useless_excludes(filename)
+ return retv
+
+
+if __name__ == '__main__':
+ raise SystemExit(main())
diff --git a/pre_commit/meta_hooks/identity.py b/pre_commit/meta_hooks/identity.py
new file mode 100644
index 0000000..3e20bbc
--- /dev/null
+++ b/pre_commit/meta_hooks/identity.py
@@ -0,0 +1,17 @@
+from __future__ import annotations
+
+import sys
+from collections.abc import Sequence
+
+from pre_commit import output
+
+
+def main(argv: Sequence[str] | None = None) -> int:
+ argv = argv if argv is not None else sys.argv[1:]
+ for arg in argv:
+ output.write_line(arg)
+ return 0
+
+
+if __name__ == '__main__':
+ raise SystemExit(main())
diff --git a/pre_commit/output.py b/pre_commit/output.py
new file mode 100644
index 0000000..4bcf27f
--- /dev/null
+++ b/pre_commit/output.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+import contextlib
+import sys
+from typing import Any
+from typing import IO
+
+
+def write(s: str, stream: IO[bytes] = sys.stdout.buffer) -> None:
+ stream.write(s.encode())
+ stream.flush()
+
+
+def write_line_b(
+ s: bytes | None = None,
+ stream: IO[bytes] = sys.stdout.buffer,
+ logfile_name: str | None = None,
+) -> None:
+ with contextlib.ExitStack() as exit_stack:
+ output_streams = [stream]
+ if logfile_name:
+ stream = exit_stack.enter_context(open(logfile_name, 'ab'))
+ output_streams.append(stream)
+
+ for output_stream in output_streams:
+ if s is not None:
+ output_stream.write(s)
+ output_stream.write(b'\n')
+ output_stream.flush()
+
+
+def write_line(s: str | None = None, **kwargs: Any) -> None:
+ write_line_b(s.encode() if s is not None else s, **kwargs)
diff --git a/pre_commit/parse_shebang.py b/pre_commit/parse_shebang.py
new file mode 100644
index 0000000..043a9b5
--- /dev/null
+++ b/pre_commit/parse_shebang.py
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+import os.path
+from collections.abc import Mapping
+from typing import NoReturn
+
+from identify.identify import parse_shebang_from_file
+
+
+class ExecutableNotFoundError(OSError):
+ def to_output(self) -> tuple[int, bytes, None]:
+ return (1, self.args[0].encode(), None)
+
+
+def parse_filename(filename: str) -> tuple[str, ...]:
+ if not os.path.exists(filename):
+ return ()
+ else:
+ return parse_shebang_from_file(filename)
+
+
+def find_executable(
+ exe: str, *, env: Mapping[str, str] | None = None,
+) -> str | None:
+ exe = os.path.normpath(exe)
+ if os.sep in exe:
+ return exe
+
+ environ = env if env is not None else os.environ
+
+ if 'PATHEXT' in environ:
+ exts = environ['PATHEXT'].split(os.pathsep)
+ possible_exe_names = tuple(f'{exe}{ext}' for ext in exts) + (exe,)
+ else:
+ possible_exe_names = (exe,)
+
+ for path in environ.get('PATH', '').split(os.pathsep):
+ for possible_exe_name in possible_exe_names:
+ joined = os.path.join(path, possible_exe_name)
+ if os.path.isfile(joined) and os.access(joined, os.X_OK):
+ return joined
+ else:
+ return None
+
+
+def normexe(orig: str, *, env: Mapping[str, str] | None = None) -> str:
+ def _error(msg: str) -> NoReturn:
+ raise ExecutableNotFoundError(f'Executable `{orig}` {msg}')
+
+ if os.sep not in orig and (not os.altsep or os.altsep not in orig):
+ exe = find_executable(orig, env=env)
+ if exe is None:
+ _error('not found')
+ return exe
+ elif os.path.isdir(orig):
+ _error('is a directory')
+ elif not os.path.isfile(orig):
+ _error('not found')
+ elif not os.access(orig, os.X_OK): # pragma: win32 no cover
+ _error('is not executable')
+ else:
+ return orig
+
+
+def normalize_cmd(
+ cmd: tuple[str, ...],
+ *,
+ env: Mapping[str, str] | None = None,
+) -> tuple[str, ...]:
+ """Fixes for the following issues on windows
+ - https://bugs.python.org/issue8557
+ - windows does not parse shebangs
+
+ This function also makes deep-path shebangs work just fine
+ """
+ # Use PATH to determine the executable
+ exe = normexe(cmd[0], env=env)
+
+ # Figure out the shebang from the resulting command
+ cmd = parse_filename(exe) + (exe,) + cmd[1:]
+
+ # This could have given us back another bare executable
+ exe = normexe(cmd[0], env=env)
+
+ return (exe,) + cmd[1:]
diff --git a/pre_commit/prefix.py b/pre_commit/prefix.py
new file mode 100644
index 0000000..f1b28c1
--- /dev/null
+++ b/pre_commit/prefix.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+import os.path
+from typing import NamedTuple
+
+
+class Prefix(NamedTuple):
+ prefix_dir: str
+
+ def path(self, *parts: str) -> str:
+ return os.path.normpath(os.path.join(self.prefix_dir, *parts))
+
+ def exists(self, *parts: str) -> bool:
+ return os.path.exists(self.path(*parts))
+
+ def star(self, end: str) -> tuple[str, ...]:
+ paths = os.listdir(self.prefix_dir)
+ return tuple(path for path in paths if path.endswith(end))
diff --git a/pre_commit/repository.py b/pre_commit/repository.py
new file mode 100644
index 0000000..aa84185
--- /dev/null
+++ b/pre_commit/repository.py
@@ -0,0 +1,246 @@
+from __future__ import annotations
+
+import json
+import logging
+import os
+import shlex
+from collections.abc import Sequence
+from typing import Any
+
+import pre_commit.constants as C
+from pre_commit.all_languages import languages
+from pre_commit.clientlib import load_manifest
+from pre_commit.clientlib import LOCAL
+from pre_commit.clientlib import META
+from pre_commit.hook import Hook
+from pre_commit.lang_base import environment_dir
+from pre_commit.prefix import Prefix
+from pre_commit.store import Store
+from pre_commit.util import clean_path_on_failure
+from pre_commit.util import rmtree
+
+
+logger = logging.getLogger('pre_commit')
+
+
+def _state_filename_v1(venv: str) -> str:
+ return os.path.join(venv, '.install_state_v1')
+
+
+def _state_filename_v2(venv: str) -> str:
+ return os.path.join(venv, '.install_state_v2')
+
+
+def _state(additional_deps: Sequence[str]) -> object:
+ return {'additional_dependencies': additional_deps}
+
+
+def _read_state(venv: str) -> object | None:
+ filename = _state_filename_v1(venv)
+ if not os.path.exists(filename):
+ return None
+ else:
+ with open(filename) as f:
+ return json.load(f)
+
+
+def _hook_installed(hook: Hook) -> bool:
+ lang = languages[hook.language]
+ if lang.ENVIRONMENT_DIR is None:
+ return True
+
+ venv = environment_dir(
+ hook.prefix,
+ lang.ENVIRONMENT_DIR,
+ hook.language_version,
+ )
+ return (
+ (
+ os.path.exists(_state_filename_v2(venv)) or
+ _read_state(venv) == _state(hook.additional_dependencies)
+ ) and
+ not lang.health_check(hook.prefix, hook.language_version)
+ )
+
+
+def _hook_install(hook: Hook) -> None:
+ logger.info(f'Installing environment for {hook.src}.')
+ logger.info('Once installed this environment will be reused.')
+ logger.info('This may take a few minutes...')
+
+ if hook.language == 'python_venv':
+ logger.warning(
+ f'`repo: {hook.src}` uses deprecated `language: python_venv`. '
+ f'This is an alias for `language: python`. '
+ f'Often `pre-commit autoupdate --repo {shlex.quote(hook.src)}` '
+ f'will fix this.',
+ )
+
+ lang = languages[hook.language]
+ assert lang.ENVIRONMENT_DIR is not None
+
+ venv = environment_dir(
+ hook.prefix,
+ lang.ENVIRONMENT_DIR,
+ hook.language_version,
+ )
+
+ # There's potentially incomplete cleanup from previous runs
+ # Clean it up!
+ if os.path.exists(venv):
+ rmtree(venv)
+
+ with clean_path_on_failure(venv):
+ lang.install_environment(
+ hook.prefix, hook.language_version, hook.additional_dependencies,
+ )
+ health_error = lang.health_check(hook.prefix, hook.language_version)
+ if health_error:
+ raise AssertionError(
+ f'BUG: expected environment for {hook.language} to be healthy '
+ f'immediately after install, please open an issue describing '
+ f'your environment\n\n'
+ f'more info:\n\n{health_error}',
+ )
+
+ # TODO: remove v1 state writing, no longer needed after pre-commit 3.0
+ # Write our state to indicate we're installed
+ state_filename = _state_filename_v1(venv)
+ staging = f'{state_filename}staging'
+ with open(staging, 'w') as state_file:
+ state_file.write(json.dumps(_state(hook.additional_dependencies)))
+ # Move the file into place atomically to indicate we've installed
+ os.replace(staging, state_filename)
+
+ open(_state_filename_v2(venv), 'a+').close()
+
+
+def _hook(
+ *hook_dicts: dict[str, Any],
+ root_config: dict[str, Any],
+) -> dict[str, Any]:
+ ret, rest = dict(hook_dicts[0]), hook_dicts[1:]
+ for dct in rest:
+ ret.update(dct)
+
+ lang = ret['language']
+ if ret['language_version'] == C.DEFAULT:
+ ret['language_version'] = root_config['default_language_version'][lang]
+ if ret['language_version'] == C.DEFAULT:
+ ret['language_version'] = languages[lang].get_default_version()
+
+ if not ret['stages']:
+ ret['stages'] = root_config['default_stages']
+
+ if languages[lang].ENVIRONMENT_DIR is None:
+ if ret['language_version'] != C.DEFAULT:
+ logger.error(
+ f'The hook `{ret["id"]}` specifies `language_version` but is '
+ f'using language `{lang}` which does not install an '
+ f'environment. '
+ f'Perhaps you meant to use a specific language?',
+ )
+ exit(1)
+ if ret['additional_dependencies']:
+ logger.error(
+ f'The hook `{ret["id"]}` specifies `additional_dependencies` '
+ f'but is using language `{lang}` which does not install an '
+ f'environment. '
+ f'Perhaps you meant to use a specific language?',
+ )
+ exit(1)
+
+ return ret
+
+
+def _non_cloned_repository_hooks(
+ repo_config: dict[str, Any],
+ store: Store,
+ root_config: dict[str, Any],
+) -> tuple[Hook, ...]:
+ def _prefix(language_name: str, deps: Sequence[str]) -> Prefix:
+ language = languages[language_name]
+ # pygrep / script / system / docker_image do not have
+ # environments so they work out of the current directory
+ if language.ENVIRONMENT_DIR is None:
+ return Prefix(os.getcwd())
+ else:
+ return Prefix(store.make_local(deps))
+
+ return tuple(
+ Hook.create(
+ repo_config['repo'],
+ _prefix(hook['language'], hook['additional_dependencies']),
+ _hook(hook, root_config=root_config),
+ )
+ for hook in repo_config['hooks']
+ )
+
+
+def _cloned_repository_hooks(
+ repo_config: dict[str, Any],
+ store: Store,
+ root_config: dict[str, Any],
+) -> tuple[Hook, ...]:
+ repo, rev = repo_config['repo'], repo_config['rev']
+ manifest_path = os.path.join(store.clone(repo, rev), C.MANIFEST_FILE)
+ by_id = {hook['id']: hook for hook in load_manifest(manifest_path)}
+
+ for hook in repo_config['hooks']:
+ if hook['id'] not in by_id:
+ logger.error(
+ f'`{hook["id"]}` is not present in repository {repo}. '
+ f'Typo? Perhaps it is introduced in a newer version? '
+ f'Often `pre-commit autoupdate` fixes this.',
+ )
+ exit(1)
+
+ hook_dcts = [
+ _hook(by_id[hook['id']], hook, root_config=root_config)
+ for hook in repo_config['hooks']
+ ]
+ return tuple(
+ Hook.create(
+ repo_config['repo'],
+ Prefix(store.clone(repo, rev, hook['additional_dependencies'])),
+ hook,
+ )
+ for hook in hook_dcts
+ )
+
+
+def _repository_hooks(
+ repo_config: dict[str, Any],
+ store: Store,
+ root_config: dict[str, Any],
+) -> tuple[Hook, ...]:
+ if repo_config['repo'] in {LOCAL, META}:
+ return _non_cloned_repository_hooks(repo_config, store, root_config)
+ else:
+ return _cloned_repository_hooks(repo_config, store, root_config)
+
+
+def install_hook_envs(hooks: Sequence[Hook], store: Store) -> None:
+ def _need_installed() -> list[Hook]:
+ seen: set[tuple[Prefix, str, str, tuple[str, ...]]] = set()
+ ret = []
+ for hook in hooks:
+ if hook.install_key not in seen and not _hook_installed(hook):
+ ret.append(hook)
+ seen.add(hook.install_key)
+ return ret
+
+ if not _need_installed():
+ return
+ with store.exclusive_lock():
+ # Another process may have already completed this work
+ for hook in _need_installed():
+ _hook_install(hook)
+
+
+def all_hooks(root_config: dict[str, Any], store: Store) -> tuple[Hook, ...]:
+ return tuple(
+ hook
+ for repo in root_config['repos']
+ for hook in _repository_hooks(repo, store, root_config)
+ )
diff --git a/pre_commit/resources/__init__.py b/pre_commit/resources/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/pre_commit/resources/__init__.py
diff --git a/pre_commit/resources/empty_template_.npmignore b/pre_commit/resources/empty_template_.npmignore
new file mode 100644
index 0000000..72e8ffc
--- /dev/null
+++ b/pre_commit/resources/empty_template_.npmignore
@@ -0,0 +1 @@
+*
diff --git a/pre_commit/resources/empty_template_Cargo.toml b/pre_commit/resources/empty_template_Cargo.toml
new file mode 100644
index 0000000..3dfeffa
--- /dev/null
+++ b/pre_commit/resources/empty_template_Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "__fake_crate"
+version = "0.0.0"
+
+[[bin]]
+name = "__fake_cmd"
+path = "main.rs"
diff --git a/pre_commit/resources/empty_template_LICENSE.renv b/pre_commit/resources/empty_template_LICENSE.renv
new file mode 100644
index 0000000..253c5d1
--- /dev/null
+++ b/pre_commit/resources/empty_template_LICENSE.renv
@@ -0,0 +1,7 @@
+Copyright 2021 RStudio, PBC
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/pre_commit/resources/empty_template_Makefile.PL b/pre_commit/resources/empty_template_Makefile.PL
new file mode 100644
index 0000000..45a0ba3
--- /dev/null
+++ b/pre_commit/resources/empty_template_Makefile.PL
@@ -0,0 +1,6 @@
+use ExtUtils::MakeMaker;
+
+WriteMakefile(
+ NAME => "PreCommitPlaceholder",
+ VERSION => "0.0.1",
+);
diff --git a/pre_commit/resources/empty_template_activate.R b/pre_commit/resources/empty_template_activate.R
new file mode 100644
index 0000000..d8d092c
--- /dev/null
+++ b/pre_commit/resources/empty_template_activate.R
@@ -0,0 +1,440 @@
+
+local({
+
+ # the requested version of renv
+ version <- "0.12.5"
+
+ # the project directory
+ project <- getwd()
+
+ # avoid recursion
+ if (!is.na(Sys.getenv("RENV_R_INITIALIZING", unset = NA)))
+ return(invisible(TRUE))
+
+ # signal that we're loading renv during R startup
+ Sys.setenv("RENV_R_INITIALIZING" = "true")
+ on.exit(Sys.unsetenv("RENV_R_INITIALIZING"), add = TRUE)
+
+ # signal that we've consented to use renv
+ options(renv.consent = TRUE)
+
+ # load the 'utils' package eagerly -- this ensures that renv shims, which
+ # mask 'utils' packages, will come first on the search path
+ library(utils, lib.loc = .Library)
+
+ # check to see if renv has already been loaded
+ if ("renv" %in% loadedNamespaces()) {
+
+ # if renv has already been loaded, and it's the requested version of renv,
+ # nothing to do
+ spec <- .getNamespaceInfo(.getNamespace("renv"), "spec")
+ if (identical(spec[["version"]], version))
+ return(invisible(TRUE))
+
+ # otherwise, unload and attempt to load the correct version of renv
+ unloadNamespace("renv")
+
+ }
+
+ # load bootstrap tools
+ bootstrap <- function(version, library) {
+
+ # attempt to download renv
+ tarball <- tryCatch(renv_bootstrap_download(version), error = identity)
+ if (inherits(tarball, "error"))
+ stop("failed to download renv ", version)
+
+ # now attempt to install
+ status <- tryCatch(renv_bootstrap_install(version, tarball, library), error = identity)
+ if (inherits(status, "error"))
+ stop("failed to install renv ", version)
+
+ }
+
+ renv_bootstrap_tests_running <- function() {
+ getOption("renv.tests.running", default = FALSE)
+ }
+
+ renv_bootstrap_repos <- function() {
+
+ # check for repos override
+ repos <- Sys.getenv("RENV_CONFIG_REPOS_OVERRIDE", unset = NA)
+ if (!is.na(repos))
+ return(repos)
+
+ # if we're testing, re-use the test repositories
+ if (renv_bootstrap_tests_running())
+ return(getOption("renv.tests.repos"))
+
+ # retrieve current repos
+ repos <- getOption("repos")
+
+ # ensure @CRAN@ entries are resolved
+ repos[repos == "@CRAN@"] <- "https://cloud.r-project.org"
+
+ # add in renv.bootstrap.repos if set
+ default <- c(CRAN = "https://cloud.r-project.org")
+ extra <- getOption("renv.bootstrap.repos", default = default)
+ repos <- c(repos, extra)
+
+ # remove duplicates that might've snuck in
+ dupes <- duplicated(repos) | duplicated(names(repos))
+ repos[!dupes]
+
+ }
+
+ renv_bootstrap_download <- function(version) {
+
+ # if the renv version number has 4 components, assume it must
+ # be retrieved via github
+ nv <- numeric_version(version)
+ components <- unclass(nv)[[1]]
+
+ methods <- if (length(components) == 4L) {
+ list(
+ renv_bootstrap_download_github
+ )
+ } else {
+ list(
+ renv_bootstrap_download_cran_latest,
+ renv_bootstrap_download_cran_archive
+ )
+ }
+
+ for (method in methods) {
+ path <- tryCatch(method(version), error = identity)
+ if (is.character(path) && file.exists(path))
+ return(path)
+ }
+
+ stop("failed to download renv ", version)
+
+ }
+
+ renv_bootstrap_download_impl <- function(url, destfile) {
+
+ mode <- "wb"
+
+ # https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17715
+ fixup <-
+ Sys.info()[["sysname"]] == "Windows" &&
+ substring(url, 1L, 5L) == "file:"
+
+ if (fixup)
+ mode <- "w+b"
+
+ utils::download.file(
+ url = url,
+ destfile = destfile,
+ mode = mode,
+ quiet = TRUE
+ )
+
+ }
+
+ renv_bootstrap_download_cran_latest <- function(version) {
+
+ repos <- renv_bootstrap_download_cran_latest_find(version)
+
+ message("* Downloading renv ", version, " from CRAN ... ", appendLF = FALSE)
+
+ info <- tryCatch(
+ utils::download.packages(
+ pkgs = "renv",
+ repos = repos,
+ destdir = tempdir(),
+ quiet = TRUE
+ ),
+ condition = identity
+ )
+
+ if (inherits(info, "condition")) {
+ message("FAILED")
+ return(FALSE)
+ }
+
+ message("OK")
+ info[1, 2]
+
+ }
+
+ renv_bootstrap_download_cran_latest_find <- function(version) {
+
+ all <- renv_bootstrap_repos()
+
+ for (repos in all) {
+
+ db <- tryCatch(
+ as.data.frame(
+ x = utils::available.packages(repos = repos),
+ stringsAsFactors = FALSE
+ ),
+ error = identity
+ )
+
+ if (inherits(db, "error"))
+ next
+
+ entry <- db[db$Package %in% "renv" & db$Version %in% version, ]
+ if (nrow(entry) == 0)
+ next
+
+ return(repos)
+
+ }
+
+ fmt <- "renv %s is not available from your declared package repositories"
+ stop(sprintf(fmt, version))
+
+ }
+
+ renv_bootstrap_download_cran_archive <- function(version) {
+
+ name <- sprintf("renv_%s.tar.gz", version)
+ repos <- renv_bootstrap_repos()
+ urls <- file.path(repos, "src/contrib/Archive/renv", name)
+ destfile <- file.path(tempdir(), name)
+
+ message("* Downloading renv ", version, " from CRAN archive ... ", appendLF = FALSE)
+
+ for (url in urls) {
+
+ status <- tryCatch(
+ renv_bootstrap_download_impl(url, destfile),
+ condition = identity
+ )
+
+ if (identical(status, 0L)) {
+ message("OK")
+ return(destfile)
+ }
+
+ }
+
+ message("FAILED")
+ return(FALSE)
+
+ }
+
+ renv_bootstrap_download_github <- function(version) {
+
+ enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE")
+ if (!identical(enabled, "TRUE"))
+ return(FALSE)
+
+ # prepare download options
+ pat <- Sys.getenv("GITHUB_PAT")
+ if (nzchar(Sys.which("curl")) && nzchar(pat)) {
+ fmt <- "--location --fail --header \"Authorization: token %s\""
+ extra <- sprintf(fmt, pat)
+ saved <- options("download.file.method", "download.file.extra")
+ options(download.file.method = "curl", download.file.extra = extra)
+ on.exit(do.call(base::options, saved), add = TRUE)
+ } else if (nzchar(Sys.which("wget")) && nzchar(pat)) {
+ fmt <- "--header=\"Authorization: token %s\""
+ extra <- sprintf(fmt, pat)
+ saved <- options("download.file.method", "download.file.extra")
+ options(download.file.method = "wget", download.file.extra = extra)
+ on.exit(do.call(base::options, saved), add = TRUE)
+ }
+
+ message("* Downloading renv ", version, " from GitHub ... ", appendLF = FALSE)
+
+ url <- file.path("https://api.github.com/repos/rstudio/renv/tarball", version)
+ name <- sprintf("renv_%s.tar.gz", version)
+ destfile <- file.path(tempdir(), name)
+
+ status <- tryCatch(
+ renv_bootstrap_download_impl(url, destfile),
+ condition = identity
+ )
+
+ if (!identical(status, 0L)) {
+ message("FAILED")
+ return(FALSE)
+ }
+
+ message("OK")
+ return(destfile)
+
+ }
+
+ renv_bootstrap_install <- function(version, tarball, library) {
+
+ # attempt to install it into project library
+ message("* Installing renv ", version, " ... ", appendLF = FALSE)
+ dir.create(library, showWarnings = FALSE, recursive = TRUE)
+
+ # invoke using system2 so we can capture and report output
+ bin <- R.home("bin")
+ exe <- if (Sys.info()[["sysname"]] == "Windows") "R.exe" else "R"
+ r <- file.path(bin, exe)
+ args <- c("--vanilla", "CMD", "INSTALL", "-l", shQuote(library), shQuote(tarball))
+ output <- system2(r, args, stdout = TRUE, stderr = TRUE)
+ message("Done!")
+
+ # check for successful install
+ status <- attr(output, "status")
+ if (is.numeric(status) && !identical(status, 0L)) {
+ header <- "Error installing renv:"
+ lines <- paste(rep.int("=", nchar(header)), collapse = "")
+ text <- c(header, lines, output)
+ writeLines(text, con = stderr())
+ }
+
+ status
+
+ }
+
+ renv_bootstrap_prefix <- function() {
+
+ # construct version prefix
+ version <- paste(R.version$major, R.version$minor, sep = ".")
+ prefix <- paste("R", numeric_version(version)[1, 1:2], sep = "-")
+
+ # include SVN revision for development versions of R
+ # (to avoid sharing platform-specific artefacts with released versions of R)
+ devel <-
+ identical(R.version[["status"]], "Under development (unstable)") ||
+ identical(R.version[["nickname"]], "Unsuffered Consequences")
+
+ if (devel)
+ prefix <- paste(prefix, R.version[["svn rev"]], sep = "-r")
+
+ # build list of path components
+ components <- c(prefix, R.version$platform)
+
+ # include prefix if provided by user
+ prefix <- Sys.getenv("RENV_PATHS_PREFIX")
+ if (nzchar(prefix))
+ components <- c(prefix, components)
+
+ # build prefix
+ paste(components, collapse = "/")
+
+ }
+
+ renv_bootstrap_library_root_name <- function(project) {
+
+ # use project name as-is if requested
+ asis <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT_ASIS", unset = "FALSE")
+ if (asis)
+ return(basename(project))
+
+ # otherwise, disambiguate based on project's path
+ id <- substring(renv_bootstrap_hash_text(project), 1L, 8L)
+ paste(basename(project), id, sep = "-")
+
+ }
+
+ renv_bootstrap_library_root <- function(project) {
+
+ path <- Sys.getenv("RENV_PATHS_LIBRARY", unset = NA)
+ if (!is.na(path))
+ return(path)
+
+ path <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT", unset = NA)
+ if (!is.na(path)) {
+ name <- renv_bootstrap_library_root_name(project)
+ return(file.path(path, name))
+ }
+
+ file.path(project, "renv/library")
+
+ }
+
+ renv_bootstrap_validate_version <- function(version) {
+
+ loadedversion <- utils::packageDescription("renv", fields = "Version")
+ if (version == loadedversion)
+ return(TRUE)
+
+ # assume four-component versions are from GitHub; three-component
+ # versions are from CRAN
+ components <- strsplit(loadedversion, "[.-]")[[1]]
+ remote <- if (length(components) == 4L)
+ paste("rstudio/renv", loadedversion, sep = "@")
+ else
+ paste("renv", loadedversion, sep = "@")
+
+ fmt <- paste(
+ "renv %1$s was loaded from project library, but this project is configured to use renv %2$s.",
+ "Use `renv::record(\"%3$s\")` to record renv %1$s in the lockfile.",
+ "Use `renv::restore(packages = \"renv\")` to install renv %2$s into the project library.",
+ sep = "\n"
+ )
+
+ msg <- sprintf(fmt, loadedversion, version, remote)
+ warning(msg, call. = FALSE)
+
+ FALSE
+
+ }
+
+ renv_bootstrap_hash_text <- function(text) {
+
+ hashfile <- tempfile("renv-hash-")
+ on.exit(unlink(hashfile), add = TRUE)
+
+ writeLines(text, con = hashfile)
+ tools::md5sum(hashfile)
+
+ }
+
+ renv_bootstrap_load <- function(project, libpath, version) {
+
+ # try to load renv from the project library
+ if (!requireNamespace("renv", lib.loc = libpath, quietly = TRUE))
+ return(FALSE)
+
+ # warn if the version of renv loaded does not match
+ renv_bootstrap_validate_version(version)
+
+ # load the project
+ renv::load(project)
+
+ TRUE
+
+ }
+
+ # construct path to library root
+ root <- renv_bootstrap_library_root(project)
+
+ # construct library prefix for platform
+ prefix <- renv_bootstrap_prefix()
+
+ # construct full libpath
+ libpath <- file.path(root, prefix)
+
+ # attempt to load
+ if (renv_bootstrap_load(project, libpath, version))
+ return(TRUE)
+
+ # load failed; inform user we're about to bootstrap
+ prefix <- paste("# Bootstrapping renv", version)
+ postfix <- paste(rep.int("-", 77L - nchar(prefix)), collapse = "")
+ header <- paste(prefix, postfix)
+ message(header)
+
+ # perform bootstrap
+ bootstrap(version, libpath)
+
+ # exit early if we're just testing bootstrap
+ if (!is.na(Sys.getenv("RENV_BOOTSTRAP_INSTALL_ONLY", unset = NA)))
+ return(TRUE)
+
+ # try again to load
+ if (requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) {
+ message("* Successfully installed and loaded renv ", version, ".")
+ return(renv::load())
+ }
+
+ # failed to download or load renv; warn the user
+ msg <- c(
+ "Failed to find an renv installation: the project will not be loaded.",
+ "Use `renv::activate()` to re-initialize the project."
+ )
+
+ warning(paste(msg, collapse = "\n"), call. = FALSE)
+
+})
diff --git a/pre_commit/resources/empty_template_environment.yml b/pre_commit/resources/empty_template_environment.yml
new file mode 100644
index 0000000..0f29f0c
--- /dev/null
+++ b/pre_commit/resources/empty_template_environment.yml
@@ -0,0 +1,9 @@
+channels:
+ - conda-forge
+ - defaults
+dependencies:
+ # This cannot be empty as otherwise no environment will be created.
+ # We're using openssl here as it is available on all system and will
+ # most likely be always installed anyways.
+ # See https://github.com/conda/conda/issues/9487
+ - openssl
diff --git a/pre_commit/resources/empty_template_go.mod b/pre_commit/resources/empty_template_go.mod
new file mode 100644
index 0000000..892c4e5
--- /dev/null
+++ b/pre_commit/resources/empty_template_go.mod
@@ -0,0 +1 @@
+module pre-commit-placeholder-empty-module
diff --git a/pre_commit/resources/empty_template_main.go b/pre_commit/resources/empty_template_main.go
new file mode 100644
index 0000000..38dd16d
--- /dev/null
+++ b/pre_commit/resources/empty_template_main.go
@@ -0,0 +1,3 @@
+package main
+
+func main() {}
diff --git a/pre_commit/resources/empty_template_main.rs b/pre_commit/resources/empty_template_main.rs
new file mode 100644
index 0000000..f328e4d
--- /dev/null
+++ b/pre_commit/resources/empty_template_main.rs
@@ -0,0 +1 @@
+fn main() {}
diff --git a/pre_commit/resources/empty_template_package.json b/pre_commit/resources/empty_template_package.json
new file mode 100644
index 0000000..042e958
--- /dev/null
+++ b/pre_commit/resources/empty_template_package.json
@@ -0,0 +1,4 @@
+{
+ "name": "pre_commit_placeholder_package",
+ "version": "0.0.0"
+}
diff --git a/pre_commit/resources/empty_template_pre-commit-package-dev-1.rockspec b/pre_commit/resources/empty_template_pre-commit-package-dev-1.rockspec
new file mode 100644
index 0000000..f063c8e
--- /dev/null
+++ b/pre_commit/resources/empty_template_pre-commit-package-dev-1.rockspec
@@ -0,0 +1,12 @@
+package = "pre-commit-package"
+version = "dev-1"
+
+source = {
+ url = "git+ssh://git@github.com/pre-commit/pre-commit.git"
+}
+description = {}
+dependencies = {}
+build = {
+ type = "builtin",
+ modules = {},
+}
diff --git a/pre_commit/resources/empty_template_pre_commit_placeholder_package.gemspec b/pre_commit/resources/empty_template_pre_commit_placeholder_package.gemspec
new file mode 100644
index 0000000..630f0d4
--- /dev/null
+++ b/pre_commit/resources/empty_template_pre_commit_placeholder_package.gemspec
@@ -0,0 +1,6 @@
+Gem::Specification.new do |s|
+ s.name = 'pre_commit_placeholder_package'
+ s.version = '0.0.0'
+ s.summary = 'placeholder gem for pre-commit hooks'
+ s.authors = ['Anthony Sottile']
+end
diff --git a/pre_commit/resources/empty_template_pubspec.yaml b/pre_commit/resources/empty_template_pubspec.yaml
new file mode 100644
index 0000000..3be6ffe
--- /dev/null
+++ b/pre_commit/resources/empty_template_pubspec.yaml
@@ -0,0 +1,4 @@
+name: pre_commit_empty_pubspec
+environment:
+ sdk: '>=2.10.0'
+executables: {}
diff --git a/pre_commit/resources/empty_template_renv.lock b/pre_commit/resources/empty_template_renv.lock
new file mode 100644
index 0000000..d6e31f8
--- /dev/null
+++ b/pre_commit/resources/empty_template_renv.lock
@@ -0,0 +1,20 @@
+{
+ "R": {
+ "Version": "4.0.3",
+ "Repositories": [
+ {
+ "Name": "CRAN",
+ "URL": "https://cran.rstudio.com"
+ }
+ ]
+ },
+ "Packages": {
+ "renv": {
+ "Package": "renv",
+ "Version": "0.12.5",
+ "Source": "Repository",
+ "Repository": "CRAN",
+ "Hash": "5c0cdb37f063c58cdab3c7e9fbb8bd2c"
+ }
+ }
+}
diff --git a/pre_commit/resources/empty_template_setup.py b/pre_commit/resources/empty_template_setup.py
new file mode 100644
index 0000000..ef05eef
--- /dev/null
+++ b/pre_commit/resources/empty_template_setup.py
@@ -0,0 +1,4 @@
+from setuptools import setup
+
+
+setup(name='pre-commit-placeholder-package', version='0.0.0')
diff --git a/pre_commit/resources/hook-tmpl b/pre_commit/resources/hook-tmpl
new file mode 100755
index 0000000..53d29f9
--- /dev/null
+++ b/pre_commit/resources/hook-tmpl
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# File generated by pre-commit: https://pre-commit.com
+# ID: 138fd403232d2ddd5efb44317e38bf03
+
+# start templated
+INSTALL_PYTHON=''
+ARGS=(hook-impl)
+# end templated
+
+HERE="$(cd "$(dirname "$0")" && pwd)"
+ARGS+=(--hook-dir "$HERE" -- "$@")
+
+if [ -x "$INSTALL_PYTHON" ]; then
+ exec "$INSTALL_PYTHON" -mpre_commit "${ARGS[@]}"
+elif command -v pre-commit > /dev/null; then
+ exec pre-commit "${ARGS[@]}"
+else
+ echo '`pre-commit` not found. Did you forget to activate your virtualenv?' 1>&2
+ exit 1
+fi
diff --git a/pre_commit/resources/rbenv.tar.gz b/pre_commit/resources/rbenv.tar.gz
new file mode 100644
index 0000000..da2514e
--- /dev/null
+++ b/pre_commit/resources/rbenv.tar.gz
Binary files differ
diff --git a/pre_commit/resources/ruby-build.tar.gz b/pre_commit/resources/ruby-build.tar.gz
new file mode 100644
index 0000000..19d467f
--- /dev/null
+++ b/pre_commit/resources/ruby-build.tar.gz
Binary files differ
diff --git a/pre_commit/resources/ruby-download.tar.gz b/pre_commit/resources/ruby-download.tar.gz
new file mode 100644
index 0000000..92502a7
--- /dev/null
+++ b/pre_commit/resources/ruby-download.tar.gz
Binary files differ
diff --git a/pre_commit/staged_files_only.py b/pre_commit/staged_files_only.py
new file mode 100644
index 0000000..e1f81ba
--- /dev/null
+++ b/pre_commit/staged_files_only.py
@@ -0,0 +1,113 @@
+from __future__ import annotations
+
+import contextlib
+import logging
+import os.path
+import time
+from collections.abc import Generator
+
+from pre_commit import git
+from pre_commit.errors import FatalError
+from pre_commit.util import CalledProcessError
+from pre_commit.util import cmd_output
+from pre_commit.util import cmd_output_b
+from pre_commit.xargs import xargs
+
+
+logger = logging.getLogger('pre_commit')
+
+# without forcing submodule.recurse=0, changes in nested submodules will be
+# discarded if `submodule.recurse=1` is configured
+# we choose this instead of `--no-recurse-submodules` because it works on
+# versions of git before that option was added to `git checkout`
+_CHECKOUT_CMD = ('git', '-c', 'submodule.recurse=0', 'checkout', '--', '.')
+
+
+def _git_apply(patch: str) -> None:
+ args = ('apply', '--whitespace=nowarn', patch)
+ try:
+ cmd_output_b('git', *args)
+ except CalledProcessError:
+ # Retry with autocrlf=false -- see #570
+ cmd_output_b('git', '-c', 'core.autocrlf=false', *args)
+
+
+@contextlib.contextmanager
+def _intent_to_add_cleared() -> Generator[None, None, None]:
+ intent_to_add = git.intent_to_add_files()
+ if intent_to_add:
+ logger.warning('Unstaged intent-to-add files detected.')
+
+ xargs(('git', 'rm', '--cached', '--'), intent_to_add)
+ try:
+ yield
+ finally:
+ xargs(('git', 'add', '--intent-to-add', '--'), intent_to_add)
+ else:
+ yield
+
+
+@contextlib.contextmanager
+def _unstaged_changes_cleared(patch_dir: str) -> Generator[None, None, None]:
+ tree = cmd_output('git', 'write-tree')[1].strip()
+ diff_cmd = (
+ 'git', 'diff-index', '--ignore-submodules', '--binary',
+ '--exit-code', '--no-color', '--no-ext-diff', tree, '--',
+ )
+ retcode, diff_stdout, diff_stderr = cmd_output_b(*diff_cmd, check=False)
+ if retcode == 0:
+ # There weren't any staged files so we don't need to do anything
+ # special
+ yield
+ elif retcode == 1 and not diff_stdout.strip():
+ # due to behaviour (probably a bug?) in git with crlf endings and
+ # autocrlf set to either `true` or `input` sometimes git will refuse
+ # to show a crlf-only diff to us :(
+ yield
+ elif retcode == 1 and diff_stdout.strip():
+ patch_filename = f'patch{int(time.time())}-{os.getpid()}'
+ patch_filename = os.path.join(patch_dir, patch_filename)
+ logger.warning('Unstaged files detected.')
+ logger.info(f'Stashing unstaged files to {patch_filename}.')
+ # Save the current unstaged changes as a patch
+ os.makedirs(patch_dir, exist_ok=True)
+ with open(patch_filename, 'wb') as patch_file:
+ patch_file.write(diff_stdout)
+
+ # prevent recursive post-checkout hooks (#1418)
+ no_checkout_env = dict(os.environ, _PRE_COMMIT_SKIP_POST_CHECKOUT='1')
+
+ try:
+ cmd_output_b(*_CHECKOUT_CMD, env=no_checkout_env)
+ yield
+ finally:
+ # Try to apply the patch we saved
+ try:
+ _git_apply(patch_filename)
+ except CalledProcessError:
+ logger.warning(
+ 'Stashed changes conflicted with hook auto-fixes... '
+ 'Rolling back fixes...',
+ )
+ # We failed to apply the patch, presumably due to fixes made
+ # by hooks.
+ # Roll back the changes made by hooks.
+ cmd_output_b(*_CHECKOUT_CMD, env=no_checkout_env)
+ _git_apply(patch_filename)
+
+ logger.info(f'Restored changes from {patch_filename}.')
+ else: # pragma: win32 no cover
+ # some error occurred while requesting the diff
+ e = CalledProcessError(retcode, diff_cmd, b'', diff_stderr)
+ raise FatalError(
+ f'pre-commit failed to diff -- perhaps due to permissions?\n\n{e}',
+ )
+
+
+@contextlib.contextmanager
+def staged_files_only(patch_dir: str) -> Generator[None, None, None]:
+ """Clear any unstaged changes from the git working directory inside this
+ context.
+ """
+ with _intent_to_add_cleared(), _unstaged_changes_cleared(patch_dir):
+ yield
diff --git a/pre_commit/store.py b/pre_commit/store.py
new file mode 100644
index 0000000..84bc09a
--- /dev/null
+++ b/pre_commit/store.py
@@ -0,0 +1,254 @@
+from __future__ import annotations
+
+import contextlib
+import logging
+import os.path
+import sqlite3
+import tempfile
+from collections.abc import Generator
+from collections.abc import Sequence
+from typing import Callable
+
+import pre_commit.constants as C
+from pre_commit import file_lock
+from pre_commit import git
+from pre_commit.util import CalledProcessError
+from pre_commit.util import clean_path_on_failure
+from pre_commit.util import cmd_output_b
+from pre_commit.util import resource_text
+from pre_commit.util import rmtree
+
+
+logger = logging.getLogger('pre_commit')
+
+
+def _get_default_directory() -> str:
+ """Returns the default directory for the Store. This is intentionally
+ underscored to indicate that `Store.get_default_directory` is the intended
+ way to get this information. This is also done so
+ `Store.get_default_directory` can be mocked in tests and
+ `_get_default_directory` can be tested.
+ """
+ ret = os.environ.get('PRE_COMMIT_HOME') or os.path.join(
+ os.environ.get('XDG_CACHE_HOME') or os.path.expanduser('~/.cache'),
+ 'pre-commit',
+ )
+ return os.path.realpath(ret)
+
+
+_LOCAL_RESOURCES = (
+ 'Cargo.toml', 'main.go', 'go.mod', 'main.rs', '.npmignore',
+ 'package.json', 'pre-commit-package-dev-1.rockspec',
+ 'pre_commit_placeholder_package.gemspec', 'setup.py',
+ 'environment.yml', 'Makefile.PL', 'pubspec.yaml',
+ 'renv.lock', 'renv/activate.R', 'renv/LICENSE.renv',
+)
+
+
+def _make_local_repo(directory: str) -> None:
+ for resource in _LOCAL_RESOURCES:
+ resource_dirname, resource_basename = os.path.split(resource)
+ contents = resource_text(f'empty_template_{resource_basename}')
+ target_dir = os.path.join(directory, resource_dirname)
+ target_file = os.path.join(target_dir, resource_basename)
+ os.makedirs(target_dir, exist_ok=True)
+ with open(target_file, 'w') as f:
+ f.write(contents)
+
+
+class Store:
+ get_default_directory = staticmethod(_get_default_directory)
+
+ def __init__(self, directory: str | None = None) -> None:
+ self.directory = directory or Store.get_default_directory()
+ self.db_path = os.path.join(self.directory, 'db.db')
+ self.readonly = (
+ os.path.exists(self.directory) and
+ not os.access(self.directory, os.W_OK)
+ )
+
+ if not os.path.exists(self.directory):
+ os.makedirs(self.directory, exist_ok=True)
+ with open(os.path.join(self.directory, 'README'), 'w') as f:
+ f.write(
+ 'This directory is maintained by the pre-commit project.\n'
+ 'Learn more: https://github.com/pre-commit/pre-commit\n',
+ )
+
+ if os.path.exists(self.db_path):
+ return
+ with self.exclusive_lock():
+ # Another process may have already completed this work
+ if os.path.exists(self.db_path): # pragma: no cover (race)
+ return
+ # To avoid a race where someone ^Cs between db creation and
+ # execution of the CREATE TABLE statement
+ fd, tmpfile = tempfile.mkstemp(dir=self.directory)
+ # We'll be managing this file ourselves
+ os.close(fd)
+ with self.connect(db_path=tmpfile) as db:
+ db.executescript(
+ 'CREATE TABLE repos ('
+ ' repo TEXT NOT NULL,'
+ ' ref TEXT NOT NULL,'
+ ' path TEXT NOT NULL,'
+ ' PRIMARY KEY (repo, ref)'
+ ');',
+ )
+ self._create_config_table(db)
+
+ # Atomic file move
+ os.replace(tmpfile, self.db_path)
+
+ @contextlib.contextmanager
+ def exclusive_lock(self) -> Generator[None, None, None]:
+ def blocked_cb() -> None: # pragma: no cover (tests are in-process)
+ logger.info('Locking pre-commit directory')
+
+ with file_lock.lock(os.path.join(self.directory, '.lock'), blocked_cb):
+ yield
+
+ @contextlib.contextmanager
+ def connect(
+ self,
+ db_path: str | None = None,
+ ) -> Generator[sqlite3.Connection, None, None]:
+ db_path = db_path or self.db_path
+ # sqlite doesn't close its fd with its contextmanager >.<
+ # contextlib.closing fixes this.
+ # See: https://stackoverflow.com/a/28032829/812183
+ with contextlib.closing(sqlite3.connect(db_path)) as db:
+ # this creates a transaction
+ with db:
+ yield db
+
+ @classmethod
+ def db_repo_name(cls, repo: str, deps: Sequence[str]) -> str:
+ if deps:
+ return f'{repo}:{",".join(deps)}'
+ else:
+ return repo
+
+ def _new_repo(
+ self,
+ repo: str,
+ ref: str,
+ deps: Sequence[str],
+ make_strategy: Callable[[str], None],
+ ) -> str:
+ repo = self.db_repo_name(repo, deps)
+
+ def _get_result() -> str | None:
+ # Check if we already exist
+ with self.connect() as db:
+ result = db.execute(
+ 'SELECT path FROM repos WHERE repo = ? AND ref = ?',
+ (repo, ref),
+ ).fetchone()
+ return result[0] if result else None
+
+ result = _get_result()
+ if result:
+ return result
+ with self.exclusive_lock():
+ # Another process may have already completed this work
+ result = _get_result()
+ if result: # pragma: no cover (race)
+ return result
+
+ logger.info(f'Initializing environment for {repo}.')
+
+ directory = tempfile.mkdtemp(prefix='repo', dir=self.directory)
+ with clean_path_on_failure(directory):
+ make_strategy(directory)
+
+ # Update our db with the created repo
+ with self.connect() as db:
+ db.execute(
+ 'INSERT INTO repos (repo, ref, path) VALUES (?, ?, ?)',
+ [repo, ref, directory],
+ )
+ return directory
+
+ def _complete_clone(self, ref: str, git_cmd: Callable[..., None]) -> None:
+ """Perform a complete clone of a repository and its submodules """
+
+ git_cmd('fetch', 'origin', '--tags')
+ git_cmd('checkout', ref)
+ git_cmd('submodule', 'update', '--init', '--recursive')
+
+ def _shallow_clone(self, ref: str, git_cmd: Callable[..., None]) -> None:
+ """Perform a shallow clone of a repository and its submodules """
+
+ git_config = 'protocol.version=2'
+ git_cmd('-c', git_config, 'fetch', 'origin', ref, '--depth=1')
+ git_cmd('checkout', 'FETCH_HEAD')
+ git_cmd(
+ '-c', git_config, 'submodule', 'update', '--init', '--recursive',
+ '--depth=1',
+ )
+
+ def clone(self, repo: str, ref: str, deps: Sequence[str] = ()) -> str:
+ """Clone the given url and checkout the specific ref."""
+
+ def clone_strategy(directory: str) -> None:
+ git.init_repo(directory, repo)
+ env = git.no_git_env()
+
+ def _git_cmd(*args: str) -> None:
+ cmd_output_b('git', *args, cwd=directory, env=env)
+
+ try:
+ self._shallow_clone(ref, _git_cmd)
+ except CalledProcessError:
+ self._complete_clone(ref, _git_cmd)
+
+ return self._new_repo(repo, ref, deps, clone_strategy)
+
+ def make_local(self, deps: Sequence[str]) -> str:
+ return self._new_repo(
+ 'local', C.LOCAL_REPO_VERSION, deps, _make_local_repo,
+ )
+
+ def _create_config_table(self, db: sqlite3.Connection) -> None:
+ db.executescript(
+ 'CREATE TABLE IF NOT EXISTS configs ('
+ ' path TEXT NOT NULL,'
+ ' PRIMARY KEY (path)'
+ ');',
+ )
+
+ def mark_config_used(self, path: str) -> None:
+ if self.readonly: # pragma: win32 no cover
+ return
+ path = os.path.realpath(path)
+ # don't insert config files that do not exist
+ if not os.path.exists(path):
+ return
+ with self.connect() as db:
+ # TODO: eventually remove this and only create in _create
+ self._create_config_table(db)
+ db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,))
+
+ def select_all_configs(self) -> list[str]:
+ with self.connect() as db:
+ self._create_config_table(db)
+ rows = db.execute('SELECT path FROM configs').fetchall()
+ return [path for path, in rows]
+
+ def delete_configs(self, configs: list[str]) -> None:
+ with self.connect() as db:
+ rows = [(path,) for path in configs]
+ db.executemany('DELETE FROM configs WHERE path = ?', rows)
+
+ def select_all_repos(self) -> list[tuple[str, str, str]]:
+ with self.connect() as db:
+ return db.execute('SELECT repo, ref, path from repos').fetchall()
+
+ def delete_repo(self, db_repo_name: str, ref: str, path: str) -> None:
+ with self.connect() as db:
+ db.execute(
+ 'DELETE FROM repos WHERE repo = ? and ref = ?',
+ (db_repo_name, ref),
+ )
+ rmtree(path)
diff --git a/pre_commit/util.py b/pre_commit/util.py
new file mode 100644
index 0000000..b3682d4
--- /dev/null
+++ b/pre_commit/util.py
@@ -0,0 +1,238 @@
+from __future__ import annotations
+
+import contextlib
+import errno
+import importlib.resources
+import os.path
+import shutil
+import stat
+import subprocess
+import sys
+from collections.abc import Generator
+from types import TracebackType
+from typing import Any
+from typing import Callable
+
+from pre_commit import parse_shebang
+
+
+def force_bytes(exc: Any) -> bytes:
+ with contextlib.suppress(TypeError):
+ return bytes(exc)
+ with contextlib.suppress(Exception):
+ return str(exc).encode()
+ return f'<unprintable {type(exc).__name__} object>'.encode()
+
+
+@contextlib.contextmanager
+def clean_path_on_failure(path: str) -> Generator[None, None, None]:
+ """Cleans up the directory on an exceptional failure."""
+ try:
+ yield
+ except BaseException:
+ if os.path.exists(path):
+ rmtree(path)
+ raise
+
+
+def resource_text(filename: str) -> str:
+ files = importlib.resources.files('pre_commit.resources')
+ return files.joinpath(filename).read_text()
+
+
+def make_executable(filename: str) -> None:
+ original_mode = os.stat(filename).st_mode
+ new_mode = original_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
+ os.chmod(filename, new_mode)
+
+
+class CalledProcessError(RuntimeError):
+ def __init__(
+ self,
+ returncode: int,
+ cmd: tuple[str, ...],
+ stdout: bytes,
+ stderr: bytes | None,
+ ) -> None:
+ super().__init__(returncode, cmd, stdout, stderr)
+ self.returncode = returncode
+ self.cmd = cmd
+ self.stdout = stdout
+ self.stderr = stderr
+
+ def __bytes__(self) -> bytes:
+ def _indent_or_none(part: bytes | None) -> bytes:
+ if part:
+ return b'\n ' + part.replace(b'\n', b'\n ').rstrip()
+ else:
+ return b' (none)'
+
+ return b''.join((
+ f'command: {self.cmd!r}\n'.encode(),
+ f'return code: {self.returncode}\n'.encode(),
+ b'stdout:', _indent_or_none(self.stdout), b'\n',
+ b'stderr:', _indent_or_none(self.stderr),
+ ))
+
+ def __str__(self) -> str:
+ return self.__bytes__().decode()
+
+
+def _setdefault_kwargs(kwargs: dict[str, Any]) -> None:
+ for arg in ('stdin', 'stdout', 'stderr'):
+ kwargs.setdefault(arg, subprocess.PIPE)
+
+
+def _oserror_to_output(e: OSError) -> tuple[int, bytes, None]:
+ return 1, force_bytes(e).rstrip(b'\n') + b'\n', None
+
+
+def cmd_output_b(
+ *cmd: str,
+ check: bool = True,
+ **kwargs: Any,
+) -> tuple[int, bytes, bytes | None]:
+ _setdefault_kwargs(kwargs)
+
+ try:
+ cmd = parse_shebang.normalize_cmd(cmd, env=kwargs.get('env'))
+ except parse_shebang.ExecutableNotFoundError as e:
+ returncode, stdout_b, stderr_b = e.to_output()
+ else:
+ try:
+ proc = subprocess.Popen(cmd, **kwargs)
+ except OSError as e:
+ returncode, stdout_b, stderr_b = _oserror_to_output(e)
+ else:
+ stdout_b, stderr_b = proc.communicate()
+ returncode = proc.returncode
+
+ if check and returncode:
+ raise CalledProcessError(returncode, cmd, stdout_b, stderr_b)
+
+ return returncode, stdout_b, stderr_b
+
+
+def cmd_output(*cmd: str, **kwargs: Any) -> tuple[int, str, str | None]:
+ returncode, stdout_b, stderr_b = cmd_output_b(*cmd, **kwargs)
+ stdout = stdout_b.decode() if stdout_b is not None else None
+ stderr = stderr_b.decode() if stderr_b is not None else None
+ return returncode, stdout, stderr
+
+
+if sys.platform != 'win32': # pragma: win32 no cover
+ from os import openpty
+ import termios
+
+ class Pty:
+ def __init__(self) -> None:
+ self.r: int | None = None
+ self.w: int | None = None
+
+ def __enter__(self) -> Pty:
+ self.r, self.w = openpty()
+
+ # tty flags normally change \n to \r\n
+ attrs = termios.tcgetattr(self.w)
+ assert isinstance(attrs[1], int)
+ attrs[1] &= ~(termios.ONLCR | termios.OPOST)
+ termios.tcsetattr(self.w, termios.TCSANOW, attrs)
+
+ return self
+
+ def close_w(self) -> None:
+ if self.w is not None:
+ os.close(self.w)
+ self.w = None
+
+ def close_r(self) -> None:
+ assert self.r is not None
+ os.close(self.r)
+ self.r = None
+
+ def __exit__(
+ self,
+ exc_type: type[BaseException] | None,
+ exc_value: BaseException | None,
+ traceback: TracebackType | None,
+ ) -> None:
+ self.close_w()
+ self.close_r()
+
+ def cmd_output_p(
+ *cmd: str,
+ check: bool = True,
+ **kwargs: Any,
+ ) -> tuple[int, bytes, bytes | None]:
+ assert check is False
+ assert kwargs['stderr'] == subprocess.STDOUT, kwargs['stderr']
+ _setdefault_kwargs(kwargs)
+
+ try:
+ cmd = parse_shebang.normalize_cmd(cmd)
+ except parse_shebang.ExecutableNotFoundError as e:
+ return e.to_output()
+
+ with open(os.devnull) as devnull, Pty() as pty:
+ assert pty.r is not None
+ kwargs.update({'stdin': devnull, 'stdout': pty.w, 'stderr': pty.w})
+ try:
+ proc = subprocess.Popen(cmd, **kwargs)
+ except OSError as e:
+ return _oserror_to_output(e)
+
+ pty.close_w()
+
+ buf = b''
+ while True:
+ try:
+ bts = os.read(pty.r, 4096)
+ except OSError as e:
+ if e.errno == errno.EIO:
+ bts = b''
+ else:
+ raise
+ else:
+ buf += bts
+ if not bts:
+ break
+
+ return proc.wait(), buf, None
+else: # pragma: no cover
+ cmd_output_p = cmd_output_b
+
+
+def _handle_readonly(
+ func: Callable[[str], object],
+ path: str,
+ exc: OSError,
+) -> None:
+ if (
+ func in (os.rmdir, os.remove, os.unlink) and
+ exc.errno in {errno.EACCES, errno.EPERM}
+ ):
+ for p in (path, os.path.dirname(path)):
+ os.chmod(p, os.stat(p).st_mode | stat.S_IWUSR)
+ func(path)
+ else:
+ raise
+
+
+if sys.version_info < (3, 12): # pragma: <3.12 cover
+ def _handle_readonly_old(
+ func: Callable[[str], object],
+ path: str,
+ excinfo: tuple[type[OSError], OSError, TracebackType],
+ ) -> None:
+ return _handle_readonly(func, path, excinfo[1])
+
+ def rmtree(path: str) -> None:
+ shutil.rmtree(path, ignore_errors=False, onerror=_handle_readonly_old)
+else: # pragma: >=3.12 cover
+ def rmtree(path: str) -> None:
+ """On windows, rmtree fails for readonly dirs."""
+ shutil.rmtree(path, ignore_errors=False, onexc=_handle_readonly)
+
+
+def win_exe(s: str) -> str:
+ return s if sys.platform != 'win32' else f'{s}.exe'
diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py
new file mode 100644
index 0000000..22580f5
--- /dev/null
+++ b/pre_commit/xargs.py
@@ -0,0 +1,185 @@
+from __future__ import annotations
+
+import concurrent.futures
+import contextlib
+import math
+import multiprocessing
+import os
+import subprocess
+import sys
+from collections.abc import Generator
+from collections.abc import Iterable
+from collections.abc import MutableMapping
+from collections.abc import Sequence
+from typing import Any
+from typing import Callable
+from typing import TypeVar
+
+from pre_commit import parse_shebang
+from pre_commit.util import cmd_output_b
+from pre_commit.util import cmd_output_p
+
+TArg = TypeVar('TArg')
+TRet = TypeVar('TRet')
+
+
+def cpu_count() -> int:
+ try:
+ # On systems that support it, this will return a more accurate count of
+ # usable CPUs for the current process, which will take into account
+ # cgroup limits
+ return len(os.sched_getaffinity(0))
+ except AttributeError:
+ pass
+
+ try:
+ return multiprocessing.cpu_count()
+ except NotImplementedError:
+ return 1
+
+
+def _environ_size(_env: MutableMapping[str, str] | None = None) -> int:
+ environ = _env if _env is not None else getattr(os, 'environb', os.environ)
+ size = 8 * len(environ) # number of pointers in `envp`
+ for k, v in environ.items():
+ size += len(k) + len(v) + 2 # c strings in `envp`
+ return size
+
+
+def _get_platform_max_length() -> int: # pragma: no cover (platform specific)
+ if os.name == 'posix':
+ maximum = os.sysconf('SC_ARG_MAX') - 2048 - _environ_size()
+ maximum = max(min(maximum, 2 ** 17), 2 ** 12)
+ return maximum
+ elif os.name == 'nt':
+ return 2 ** 15 - 2048 # UNICODE_STRING max - headroom
+ else:
+ # posix minimum
+ return 2 ** 12
+
+
+def _command_length(*cmd: str) -> int:
+ full_cmd = ' '.join(cmd)
+
+ # win32 uses the amount of characters, more details at:
+ # https://github.com/pre-commit/pre-commit/pull/839
+ if sys.platform == 'win32':
+ return len(full_cmd.encode('utf-16le')) // 2
+ else:
+ return len(full_cmd.encode(sys.getfilesystemencoding()))
+
+
+class ArgumentTooLongError(RuntimeError):
+ pass
+
+
+def partition(
+ cmd: Sequence[str],
+ varargs: Sequence[str],
+ target_concurrency: int,
+ _max_length: int | None = None,
+) -> tuple[tuple[str, ...], ...]:
+ _max_length = _max_length or _get_platform_max_length()
+
+ # Generally, we try to partition evenly into at least `target_concurrency`
+ # partitions, but we don't want a bunch of tiny partitions.
+ max_args = max(4, math.ceil(len(varargs) / target_concurrency))
+
+ cmd = tuple(cmd)
+ ret = []
+
+ ret_cmd: list[str] = []
+ # Reversed so arguments are in order
+ varargs = list(reversed(varargs))
+
+ total_length = _command_length(*cmd) + 1
+ while varargs:
+ arg = varargs.pop()
+
+ arg_length = _command_length(arg) + 1
+ if (
+ total_length + arg_length <= _max_length and
+ len(ret_cmd) < max_args
+ ):
+ ret_cmd.append(arg)
+ total_length += arg_length
+ elif not ret_cmd:
+ raise ArgumentTooLongError(arg)
+ else:
+ # We've exceeded the length, yield a command
+ ret.append(cmd + tuple(ret_cmd))
+ ret_cmd = []
+ total_length = _command_length(*cmd) + 1
+ varargs.append(arg)
+
+ ret.append(cmd + tuple(ret_cmd))
+
+ return tuple(ret)
+
+
+@contextlib.contextmanager
+def _thread_mapper(maxsize: int) -> Generator[
+ Callable[[Callable[[TArg], TRet], Iterable[TArg]], Iterable[TRet]],
+ None, None,
+]:
+ if maxsize == 1:
+ yield map
+ else:
+ with concurrent.futures.ThreadPoolExecutor(maxsize) as ex:
+ yield ex.map
+
+
+def xargs(
+ cmd: tuple[str, ...],
+ varargs: Sequence[str],
+ *,
+ color: bool = False,
+ target_concurrency: int = 1,
+ _max_length: int = _get_platform_max_length(),
+ **kwargs: Any,
+) -> tuple[int, bytes]:
+ """A simplified implementation of xargs.
+
+ color: Make a pty if on a platform that supports it
+ target_concurrency: Target number of partitions to run concurrently
+ """
+ cmd_fn = cmd_output_p if color else cmd_output_b
+ retcode = 0
+ stdout = b''
+
+ try:
+ cmd = parse_shebang.normalize_cmd(cmd)
+ except parse_shebang.ExecutableNotFoundError as e:
+ return e.to_output()[:2]
+
+ # on windows, batch files have a separate length limit than windows itself
+ if (
+ sys.platform == 'win32' and
+ cmd[0].lower().endswith(('.bat', '.cmd'))
+ ): # pragma: win32 cover
+ # this is implementation details but the command gets translated into
+ # full/path/to/cmd.exe /c *cmd
+ cmd_exe = parse_shebang.find_executable('cmd.exe')
+ # 1024 is additionally subtracted to give headroom for further
+ # expansion inside the batch file
+ _max_length = 8192 - len(cmd_exe) - len(' /c ') - 1024
+
+ partitions = partition(cmd, varargs, target_concurrency, _max_length)
+
+ def run_cmd_partition(
+ run_cmd: tuple[str, ...],
+ ) -> tuple[int, bytes, bytes | None]:
+ return cmd_fn(
+ *run_cmd, check=False, stderr=subprocess.STDOUT, **kwargs,
+ )
+
+ threads = min(len(partitions), target_concurrency)
+ with _thread_mapper(threads) as thread_map:
+ results = thread_map(run_cmd_partition, partitions)
+
+ for proc_retcode, proc_out, _ in results:
+ if abs(proc_retcode) > abs(retcode):
+ retcode = proc_retcode
+ stdout += proc_out
+
+ return retcode, stdout
diff --git a/pre_commit/yaml.py b/pre_commit/yaml.py
new file mode 100644
index 0000000..bdf4ec4
--- /dev/null
+++ b/pre_commit/yaml.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+import functools
+from typing import Any
+
+import yaml
+
+Loader = getattr(yaml, 'CSafeLoader', yaml.SafeLoader)
+yaml_load = functools.partial(yaml.load, Loader=Loader)
+Dumper = getattr(yaml, 'CSafeDumper', yaml.SafeDumper)
+
+
+def yaml_dump(o: Any, **kwargs: Any) -> str:
+ # when python/mypy#1484 is solved, this can be `functools.partial`
+ return yaml.dump(
+ o, Dumper=Dumper, default_flow_style=False, indent=4, sort_keys=False,
+ **kwargs,
+ )