diff options
Diffstat (limited to 'pre_commit/git.py')
-rw-r--r-- | pre_commit/git.py | 245 |
1 files changed, 245 insertions, 0 deletions
diff --git a/pre_commit/git.py b/pre_commit/git.py new file mode 100644 index 0000000..19aac38 --- /dev/null +++ b/pre_commit/git.py @@ -0,0 +1,245 @@ +from __future__ import annotations + +import logging +import os.path +import sys +from collections.abc import Mapping + +from pre_commit.errors import FatalError +from pre_commit.util import CalledProcessError +from pre_commit.util import cmd_output +from pre_commit.util import cmd_output_b + +logger = logging.getLogger(__name__) + +# see #2046 +NO_FS_MONITOR = ('-c', 'core.useBuiltinFSMonitor=false') + + +def zsplit(s: str) -> list[str]: + s = s.strip('\0') + if s: + return s.split('\0') + else: + return [] + + +def no_git_env(_env: Mapping[str, str] | None = None) -> dict[str, str]: + # Too many bugs dealing with environment variables and GIT: + # https://github.com/pre-commit/pre-commit/issues/300 + # In git 2.6.3 (maybe others), git exports GIT_WORK_TREE while running + # pre-commit hooks + # In git 1.9.1 (maybe others), git exports GIT_DIR and GIT_INDEX_FILE + # while running pre-commit hooks in submodules. + # GIT_DIR: Causes git clone to clone wrong thing + # GIT_INDEX_FILE: Causes 'error invalid object ...' during commit + _env = _env if _env is not None else os.environ + return { + k: v for k, v in _env.items() + if not k.startswith('GIT_') or + k.startswith(('GIT_CONFIG_KEY_', 'GIT_CONFIG_VALUE_')) or + k in { + 'GIT_EXEC_PATH', 'GIT_SSH', 'GIT_SSH_COMMAND', 'GIT_SSL_CAINFO', + 'GIT_SSL_NO_VERIFY', 'GIT_CONFIG_COUNT', + 'GIT_HTTP_PROXY_AUTHMETHOD', + 'GIT_ALLOW_PROTOCOL', + 'GIT_ASKPASS', + } + } + + +def get_root() -> str: + # Git 2.25 introduced a change to "rev-parse --show-toplevel" that exposed + # underlying volumes for Windows drives mapped with SUBST. We use + # "rev-parse --show-cdup" to get the appropriate path, but must perform + # an extra check to see if we are in the .git directory. + try: + root = os.path.abspath( + cmd_output('git', 'rev-parse', '--show-cdup')[1].strip(), + ) + inside_git_dir = cmd_output( + 'git', 'rev-parse', '--is-inside-git-dir', + )[1].strip() + except CalledProcessError: + raise FatalError( + 'git failed. Is it installed, and are you in a Git repository ' + 'directory?', + ) + if inside_git_dir != 'false': + raise FatalError( + 'git toplevel unexpectedly empty! make sure you are not ' + 'inside the `.git` directory of your repository.', + ) + return root + + +def get_git_dir(git_root: str = '.') -> str: + opt = '--git-dir' + _, out, _ = cmd_output('git', 'rev-parse', opt, cwd=git_root) + git_dir = out.strip() + if git_dir != opt: + return os.path.normpath(os.path.join(git_root, git_dir)) + else: + raise AssertionError('unreachable: no git dir') + + +def get_git_common_dir(git_root: str = '.') -> str: + opt = '--git-common-dir' + _, out, _ = cmd_output('git', 'rev-parse', opt, cwd=git_root) + git_common_dir = out.strip() + if git_common_dir != opt: + return os.path.normpath(os.path.join(git_root, git_common_dir)) + else: # pragma: no cover (git < 2.5) + return get_git_dir(git_root) + + +def is_in_merge_conflict() -> bool: + git_dir = get_git_dir('.') + return ( + os.path.exists(os.path.join(git_dir, 'MERGE_MSG')) and + os.path.exists(os.path.join(git_dir, 'MERGE_HEAD')) + ) + + +def parse_merge_msg_for_conflicts(merge_msg: bytes) -> list[str]: + # Conflicted files start with tabs + return [ + line.lstrip(b'#').strip().decode() + for line in merge_msg.splitlines() + # '#\t' for git 2.4.1 + if line.startswith((b'\t', b'#\t')) + ] + + +def get_conflicted_files() -> set[str]: + logger.info('Checking merge-conflict files only.') + # Need to get the conflicted files from the MERGE_MSG because they could + # have resolved the conflict by choosing one side or the other + with open(os.path.join(get_git_dir('.'), 'MERGE_MSG'), 'rb') as f: + merge_msg = f.read() + merge_conflict_filenames = parse_merge_msg_for_conflicts(merge_msg) + + # This will get the rest of the changes made after the merge. + # If they resolved the merge conflict by choosing a mesh of both sides + # this will also include the conflicted files + tree_hash = cmd_output('git', 'write-tree')[1].strip() + merge_diff_filenames = zsplit( + cmd_output( + 'git', 'diff', '--name-only', '--no-ext-diff', '-z', + '-m', tree_hash, 'HEAD', 'MERGE_HEAD', + )[1], + ) + return set(merge_conflict_filenames) | set(merge_diff_filenames) + + +def get_staged_files(cwd: str | None = None) -> list[str]: + return zsplit( + cmd_output( + 'git', 'diff', '--staged', '--name-only', '--no-ext-diff', '-z', + # Everything except for D + '--diff-filter=ACMRTUXB', + cwd=cwd, + )[1], + ) + + +def intent_to_add_files() -> list[str]: + _, stdout, _ = cmd_output( + 'git', 'diff', '--no-ext-diff', '--ignore-submodules', + '--diff-filter=A', '--name-only', '-z', + ) + return zsplit(stdout) + + +def get_all_files() -> list[str]: + return zsplit(cmd_output('git', 'ls-files', '-z')[1]) + + +def get_changed_files(old: str, new: str) -> list[str]: + diff_cmd = ('git', 'diff', '--name-only', '--no-ext-diff', '-z') + try: + _, out, _ = cmd_output(*diff_cmd, f'{old}...{new}') + except CalledProcessError: # pragma: no cover (new git) + # on newer git where old and new do not have a merge base git fails + # so we try a full diff (this is what old git did for us!) + _, out, _ = cmd_output(*diff_cmd, f'{old}..{new}') + + return zsplit(out) + + +def head_rev(remote: str) -> str: + _, out, _ = cmd_output('git', 'ls-remote', '--exit-code', remote, 'HEAD') + return out.split()[0] + + +def has_diff(*args: str, repo: str = '.') -> bool: + cmd = ('git', 'diff', '--quiet', '--no-ext-diff', *args) + return cmd_output_b(*cmd, cwd=repo, check=False)[0] == 1 + + +def has_core_hookpaths_set() -> bool: + _, out, _ = cmd_output_b('git', 'config', 'core.hooksPath', check=False) + return bool(out.strip()) + + +def init_repo(path: str, remote: str) -> None: + if os.path.isdir(remote): + remote = os.path.abspath(remote) + + git = ('git', *NO_FS_MONITOR) + env = no_git_env() + # avoid the user's template so that hooks do not recurse + cmd_output_b(*git, 'init', '--template=', path, env=env) + cmd_output_b(*git, 'remote', 'add', 'origin', remote, cwd=path, env=env) + + +def commit(repo: str = '.') -> None: + env = no_git_env() + name, email = 'pre-commit', 'asottile+pre-commit@umich.edu' + env['GIT_AUTHOR_NAME'] = env['GIT_COMMITTER_NAME'] = name + env['GIT_AUTHOR_EMAIL'] = env['GIT_COMMITTER_EMAIL'] = email + cmd = ('git', 'commit', '--no-edit', '--no-gpg-sign', '-n', '-minit') + cmd_output_b(*cmd, cwd=repo, env=env) + + +def git_path(name: str, repo: str = '.') -> str: + _, out, _ = cmd_output('git', 'rev-parse', '--git-path', name, cwd=repo) + return os.path.join(repo, out.strip()) + + +def check_for_cygwin_mismatch() -> None: + """See https://github.com/pre-commit/pre-commit/issues/354""" + if sys.platform in ('cygwin', 'win32'): # pragma: no cover (windows) + is_cygwin_python = sys.platform == 'cygwin' + try: + toplevel = get_root() + except FatalError: # skip the check if we're not in a git repo + return + is_cygwin_git = toplevel.startswith('/') + + if is_cygwin_python ^ is_cygwin_git: + exe_type = {True: '(cygwin)', False: '(windows)'} + logger.warn( + f'pre-commit has detected a mix of cygwin python / git\n' + f'This combination is not supported, it is likely you will ' + f'receive an error later in the program.\n' + f'Make sure to use cygwin git+python while using cygwin\n' + f'These can be installed through the cygwin installer.\n' + f' - python {exe_type[is_cygwin_python]}\n' + f' - git {exe_type[is_cygwin_git]}\n', + ) + + +def get_best_candidate_tag(rev: str, git_repo: str) -> str: + """Get the best tag candidate. + + Multiple tags can exist on a SHA. Sometimes a moving tag is attached + to a version tag. Try to pick the tag that looks like a version. + """ + tags = cmd_output( + 'git', *NO_FS_MONITOR, 'tag', '--points-at', rev, cwd=git_repo, + )[1].splitlines() + for tag in tags: + if '.' in tag: + return tag + return rev |