From a2aa51f5702b18016c25d943499941323952704d Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 19 Nov 2022 15:52:46 +0100 Subject: Adding upstream version 0.18.0. Signed-off-by: Daniel Baumann --- gitlint-core/gitlint/git.py | 286 +++++++++++++++++++++++++++++++------------- 1 file changed, 203 insertions(+), 83 deletions(-) (limited to 'gitlint-core/gitlint/git.py') diff --git a/gitlint-core/gitlint/git.py b/gitlint-core/gitlint/git.py index 2ac8b3d..4b292f0 100644 --- a/gitlint-core/gitlint/git.py +++ b/gitlint-core/gitlint/git.py @@ -1,9 +1,11 @@ import logging import os +from pathlib import Path import arrow from gitlint import shell as sh + # import exceptions separately, this makes it a little easier to mock them out in the unit tests from gitlint.shell import CommandNotFound, ErrorReturnCode @@ -18,15 +20,17 @@ LOG = logging.getLogger(__name__) class GitContextError(GitlintError): - """ Exception indicating there is an issue with the git context """ + """Exception indicating there is an issue with the git context""" + pass class GitNotInstalledError(GitContextError): def __init__(self): super().__init__( - "'git' command not found. You need to install git to use gitlint on a local repository. " + - "See https://git-scm.com/book/en/v2/Getting-Started-Installing-Git on how to install git.") + "'git' command not found. You need to install git to use gitlint on a local repository. " + "See https://git-scm.com/book/en/v2/Getting-Started-Installing-Git on how to install git." + ) class GitExitCodeError(GitContextError): @@ -37,8 +41,8 @@ class GitExitCodeError(GitContextError): def _git(*command_parts, **kwargs): - """ Convenience function for running git commands. Automatically deals with exceptions and unicode. """ - git_kwargs = {'_tty_out': False} + """Convenience function for running git commands. Automatically deals with exceptions and unicode.""" + git_kwargs = {"_tty_out": False} git_kwargs.update(kwargs) try: LOG.debug(command_parts) @@ -46,7 +50,7 @@ def _git(*command_parts, **kwargs): # If we reach this point and the result has an exit_code that is larger than 0, this means that we didn't # get an exception (which is the default sh behavior for non-zero exit codes) and so the user is expecting # a non-zero exit code -> just return the entire result - if hasattr(result, 'exit_code') and result.exit_code > 0: + if hasattr(result, "exit_code") and result.exit_code > 0: return result return str(result) except CommandNotFound as e: @@ -54,11 +58,13 @@ def _git(*command_parts, **kwargs): except ErrorReturnCode as e: # Something went wrong while executing the git command error_msg = e.stderr.strip() error_msg_lower = error_msg.lower() - if '_cwd' in git_kwargs and b"not a git repository" in error_msg_lower: + if "_cwd" in git_kwargs and b"not a git repository" in error_msg_lower: raise GitContextError(f"{git_kwargs['_cwd']} is not a git repository.") from e - if (b"does not have any commits yet" in error_msg_lower or - b"ambiguous argument 'head': unknown revision" in error_msg_lower): + if ( + b"does not have any commits yet" in error_msg_lower + or b"ambiguous argument 'head': unknown revision" in error_msg_lower + ): msg = "Current branch has no commits. Gitlint requires at least one commit to function." raise GitContextError(msg) from e @@ -66,34 +72,54 @@ def _git(*command_parts, **kwargs): def git_version(): - """ Determine the git version installed on this host by calling git --version""" + """Determine the git version installed on this host by calling git --version""" return _git("--version").replace("\n", "") def git_commentchar(repository_path=None): - """ Shortcut for retrieving comment char from git config """ + """Shortcut for retrieving comment char from git config""" commentchar = _git("config", "--get", "core.commentchar", _cwd=repository_path, _ok_code=[0, 1]) # git will return an exit code of 1 if it can't find a config value, in this case we fall-back to # as commentchar - if hasattr(commentchar, 'exit_code') and commentchar.exit_code == 1: # pylint: disable=no-member + if hasattr(commentchar, "exit_code") and commentchar.exit_code == 1: # pylint: disable=no-member commentchar = "#" return commentchar.replace("\n", "") def git_hooks_dir(repository_path): - """ Determine hooks directory for a given target dir """ + """Determine hooks directory for a given target dir""" hooks_dir = _git("rev-parse", "--git-path", "hooks", _cwd=repository_path) hooks_dir = hooks_dir.replace("\n", "") return os.path.realpath(os.path.join(repository_path, hooks_dir)) +def _parse_git_changed_file_stats(changed_files_stats_raw): + """Parse the output of git diff --numstat and return a dict of: + dict[filename: GitChangedFileStats(filename, additions, deletions)]""" + changed_files_stats_lines = changed_files_stats_raw.split("\n") + changed_files_stats = {} + for line in changed_files_stats_lines[:-1]: # drop last empty line + line_stats = line.split() + + # If the file is binary, numstat will show "-" + # See https://git-scm.com/docs/git-diff#Documentation/git-diff.txt---numstat + additions = int(line_stats[0]) if line_stats[0] != "-" else None + deletions = int(line_stats[1]) if line_stats[1] != "-" else None + + changed_file_stat = GitChangedFileStats(line_stats[2], additions, deletions) + changed_files_stats[line_stats[2]] = changed_file_stat + + return changed_files_stats + + class GitCommitMessage: - """ Class representing a git commit message. A commit message consists of the following: - - context: The `GitContext` this commit message is part of - - original: The actual commit message as returned by `git log` - - full: original, but stripped of any comments - - title: the first line of full - - body: all lines following the title + """Class representing a git commit message. A commit message consists of the following: + - context: The `GitContext` this commit message is part of + - original: The actual commit message as returned by `git log` + - full: original, but stripped of any comments + - title: the first line of full + - body: all lines following the title """ + def __init__(self, context, original=None, full=None, title=None, body=None): self.context = context self.original = original @@ -103,7 +129,7 @@ class GitCommitMessage: @staticmethod def from_full_message(context, commit_msg_str): - """ Parses a full git commit message by parsing a given string into the different parts of a commit message """ + """Parses a full git commit message by parsing a given string into the different parts of a commit message""" all_lines = commit_msg_str.splitlines() cutline = f"{context.commentchar} ------------------------ >8 ------------------------" try: @@ -120,19 +146,59 @@ class GitCommitMessage: return self.full def __eq__(self, other): - return (isinstance(other, GitCommitMessage) and self.original == other.original - and self.full == other.full and self.title == other.title and self.body == other.body) # noqa + return ( + isinstance(other, GitCommitMessage) + and self.original == other.original + and self.full == other.full + and self.title == other.title + and self.body == other.body + ) + + +class GitChangedFileStats: + """Class representing the stats for a changed file in git""" + + def __init__(self, filepath, additions, deletions): + self.filepath = Path(filepath) + self.additions = additions + self.deletions = deletions + + def __eq__(self, other): + return ( + isinstance(other, GitChangedFileStats) + and self.filepath == other.filepath + and self.additions == other.additions + and self.deletions == other.deletions + ) + + def __str__(self) -> str: + return f"{self.filepath}: {self.additions} additions, {self.deletions} deletions" + + def __repr__(self) -> str: + return ( + f'GitChangedFileStats(filepath="{self.filepath}", additions={self.additions}, deletions={self.deletions})' + ) class GitCommit: - """ Class representing a git commit. - A commit consists of: context, message, author name, author email, date, list of parent commit shas, - list of changed files, list of branch names. - In the context of gitlint, only the git context and commit message are required. + """Class representing a git commit. + A commit consists of: context, message, author name, author email, date, list of parent commit shas, + list of changed files, list of branch names. + In the context of gitlint, only the git context and commit message are required. """ - def __init__(self, context, message, sha=None, date=None, author_name=None, # pylint: disable=too-many-arguments - author_email=None, parents=None, changed_files=None, branches=None): + def __init__( + self, + context, + message, + sha=None, + date=None, + author_name=None, # pylint: disable=too-many-arguments + author_email=None, + parents=None, + changed_files_stats=None, + branches=None, + ): self.context = context self.message = message self.sha = sha @@ -140,7 +206,7 @@ class GitCommit: self.author_name = author_name self.author_email = author_email self.parents = parents or [] # parent commit hashes - self.changed_files = changed_files or [] + self.changed_files_stats = changed_files_stats or {} self.branches = branches or [] @property @@ -155,57 +221,87 @@ class GitCommit: def is_squash_commit(self): return self.message.title.startswith("squash!") + @property + def is_fixup_amend_commit(self): + return self.message.title.startswith("amend!") + @property def is_revert_commit(self): return self.message.title.startswith("Revert") + @property + def changed_files(self): + return list(self.changed_files_stats.keys()) + def __str__(self): date_str = arrow.get(self.date).format(GIT_TIMEFORMAT) if self.date else None - return (f"--- Commit Message ----\n{self.message}\n" - "--- Meta info ---------\n" - f"Author: {self.author_name} <{self.author_email}>\n" - f"Date: {date_str}\n" - f"is-merge-commit: {self.is_merge_commit}\n" - f"is-fixup-commit: {self.is_fixup_commit}\n" - f"is-squash-commit: {self.is_squash_commit}\n" - f"is-revert-commit: {self.is_revert_commit}\n" - f"Branches: {self.branches}\n" - f"Changed Files: {self.changed_files}\n" - "-----------------------") + + if len(self.changed_files_stats) > 0: + changed_files_stats_str = "\n " + "\n ".join([str(stats) for stats in self.changed_files_stats.values()]) + else: + changed_files_stats_str = " {}" + + return ( + f"--- Commit Message ----\n{self.message}\n" + "--- Meta info ---------\n" + f"Author: {self.author_name} <{self.author_email}>\n" + f"Date: {date_str}\n" + f"is-merge-commit: {self.is_merge_commit}\n" + f"is-fixup-commit: {self.is_fixup_commit}\n" + f"is-fixup-amend-commit: {self.is_fixup_amend_commit}\n" + f"is-squash-commit: {self.is_squash_commit}\n" + f"is-revert-commit: {self.is_revert_commit}\n" + f"Parents: {self.parents}\n" + f"Branches: {self.branches}\n" + f"Changed Files: {self.changed_files}\n" + f"Changed Files Stats:{changed_files_stats_str}\n" + "-----------------------" + ) def __eq__(self, other): # skip checking the context as context refers back to this obj, this will trigger a cyclic dependency - return (isinstance(other, GitCommit) and self.message == other.message - and self.sha == other.sha and self.author_name == other.author_name - and self.author_email == other.author_email - and self.date == other.date and self.parents == other.parents - and self.is_merge_commit == other.is_merge_commit and self.is_fixup_commit == other.is_fixup_commit - and self.is_squash_commit == other.is_squash_commit and self.is_revert_commit == other.is_revert_commit - and self.changed_files == other.changed_files and self.branches == other.branches) # noqa + return ( + isinstance(other, GitCommit) + and self.message == other.message + and self.sha == other.sha + and self.author_name == other.author_name + and self.author_email == other.author_email + and self.date == other.date + and self.parents == other.parents + and self.is_merge_commit == other.is_merge_commit + and self.is_fixup_commit == other.is_fixup_commit + and self.is_fixup_amend_commit == other.is_fixup_amend_commit + and self.is_squash_commit == other.is_squash_commit + and self.is_revert_commit == other.is_revert_commit + and self.changed_files == other.changed_files + and self.changed_files_stats == other.changed_files_stats + and self.branches == other.branches + ) class LocalGitCommit(GitCommit, PropertyCache): - """ Class representing a git commit that exists in the local git repository. - This class uses lazy loading: it defers reading information from the local git repository until the associated - property is accessed for the first time. Properties are then cached for subsequent access. - - This approach ensures that we don't do 'expensive' git calls when certain properties are not actually used. - In addition, reading the required info when it's needed rather than up front avoids adding delay during gitlint - startup time and reduces gitlint's memory footprint. - """ + """Class representing a git commit that exists in the local git repository. + This class uses lazy loading: it defers reading information from the local git repository until the associated + property is accessed for the first time. Properties are then cached for subsequent access. + + This approach ensures that we don't do 'expensive' git calls when certain properties are not actually used. + In addition, reading the required info when it's needed rather than up front avoids adding delay during gitlint + startup time and reduces gitlint's memory footprint. + """ + def __init__(self, context, sha): # pylint: disable=super-init-not-called PropertyCache.__init__(self) self.context = context self.sha = sha def _log(self): - """ Does a call to `git log` to determine a bunch of information about the commit. """ + """Does a call to `git log` to determine a bunch of information about the commit.""" long_format = "--pretty=%aN%x00%aE%x00%ai%x00%P%n%B" raw_commit = _git("log", self.sha, "-1", long_format, _cwd=self.context.repository_path).split("\n") - (name, email, date, parents), commit_msg = raw_commit[0].split('\x00'), "\n".join(raw_commit[1:]) + (name, email, date, parents), commit_msg = raw_commit[0].split("\x00"), "\n".join(raw_commit[1:]) - commit_parents = parents.split(" ") + commit_parents = [] if parents == "" else parents.split(" ") commit_is_merge_commit = len(commit_parents) > 1 # "YYYY-MM-DD HH:mm:ss Z" -> ISO 8601-like format @@ -216,8 +312,16 @@ class LocalGitCommit(GitCommit, PropertyCache): # Create Git commit object with the retrieved info commit_msg_obj = GitCommitMessage.from_full_message(self.context, commit_msg) - self._cache.update({'message': commit_msg_obj, 'author_name': name, 'author_email': email, 'date': commit_date, - 'parents': commit_parents, 'is_merge_commit': commit_is_merge_commit}) + self._cache.update( + { + "message": commit_msg_obj, + "author_name": name, + "author_email": email, + "date": commit_date, + "parents": commit_parents, + "is_merge_commit": commit_is_merge_commit, + } + ) @property def message(self): @@ -251,7 +355,7 @@ class LocalGitCommit(GitCommit, PropertyCache): # safely do this since git branches cannot contain '*' anywhere, so if we find an '*' we know it's output # from the git CLI and not part of the branch name. See https://git-scm.com/docs/git-check-ref-format # We also drop the last empty line from the output. - self._cache['branches'] = [branch.replace("*", "").strip() for branch in branches[:-1]] + self._cache["branches"] = [branch.replace("*", "").strip() for branch in branches[:-1]] return self._try_cache("branches", cache_branches) @@ -260,20 +364,22 @@ class LocalGitCommit(GitCommit, PropertyCache): return self._try_cache("is_merge_commit", self._log) @property - def changed_files(self): - def cache_changed_files(): - self._cache['changed_files'] = _git("diff-tree", "--no-commit-id", "--name-only", "-r", "--root", - self.sha, _cwd=self.context.repository_path).split() + def changed_files_stats(self): + def cache_changed_files_stats(): + changed_files_stats_raw = _git( + "diff-tree", "--no-commit-id", "--numstat", "-r", "--root", self.sha, _cwd=self.context.repository_path + ) + self._cache["changed_files_stats"] = _parse_git_changed_file_stats(changed_files_stats_raw) - return self._try_cache("changed_files", cache_changed_files) + return self._try_cache("changed_files_stats", cache_changed_files_stats) class StagedLocalGitCommit(GitCommit, PropertyCache): - """ Class representing a git commit that has been staged, but not committed. + """Class representing a git commit that has been staged, but not committed. - Other than the commit message itself (and changed files), a lot of information is actually not known at staging - time, since the commit hasn't happened yet. However, we can make educated guesses based on existing repository - information. + Other than the commit message itself (and changed files), a lot of information is actually not known at staging + time, since the commit hasn't happened yet. However, we can make educated guesses based on existing repository + information. """ def __init__(self, context, commit_message): # pylint: disable=super-init-not-called @@ -315,12 +421,16 @@ class StagedLocalGitCommit(GitCommit, PropertyCache): return [self.context.current_branch] @property - def changed_files(self): - return _git("diff", "--staged", "--name-only", "-r", _cwd=self.context.repository_path).split() + def changed_files_stats(self): + def cache_changed_files_stats(): + changed_files_stats_raw = _git("diff", "--staged", "--numstat", "-r", _cwd=self.context.repository_path) + self._cache["changed_files_stats"] = _parse_git_changed_file_stats(changed_files_stats_raw) + + return self._try_cache("changed_files_stats", cache_changed_files_stats) class GitContext(PropertyCache): - """ Class representing the git context in which gitlint is operating: a data object storing information about + """Class representing the git context in which gitlint is operating: a data object storing information about the git repository that gitlint is linting. """ @@ -337,12 +447,16 @@ class GitContext(PropertyCache): @property @cache def current_branch(self): - current_branch = _git("rev-parse", "--abbrev-ref", "HEAD", _cwd=self.repository_path).strip() + try: + current_branch = _git("rev-parse", "--abbrev-ref", "HEAD", _cwd=self.repository_path).strip() + except GitContextError: + # Maybe there is no commit. Try another way to get current branch (need Git 2.22+) + current_branch = _git("branch", "--show-current", _cwd=self.repository_path).strip() return current_branch @staticmethod def from_commit_msg(commit_msg_str): - """ Determines git context based on a commit message. + """Determines git context based on a commit message. :param commit_msg_str: Full git commit message. """ context = GitContext() @@ -353,7 +467,7 @@ class GitContext(PropertyCache): @staticmethod def from_staged_commit(commit_msg_str, repository_path): - """ Determines git context based on a commit message that is a staged commit for a local git repository. + """Determines git context based on a commit message that is a staged commit for a local git repository. :param commit_msg_str: Full git commit message. :param repository_path: Path to the git repository to retrieve the context from """ @@ -364,8 +478,8 @@ class GitContext(PropertyCache): return context @staticmethod - def from_local_repository(repository_path, refspec=None, commit_hash=None): - """ Retrieves the git context from a local git repository. + def from_local_repository(repository_path, refspec=None, commit_hashes=None): + """Retrieves the git context from a local git repository. :param repository_path: Path to the git repository to retrieve the context from :param refspec: The commit(s) to retrieve (mutually exclusive with `commit_hash`) :param commit_hash: Hash of the commit to retrieve (mutually exclusive with `refspec`) @@ -375,11 +489,13 @@ class GitContext(PropertyCache): if refspec: sha_list = _git("rev-list", refspec, _cwd=repository_path).split() - elif commit_hash: # Single commit, just pass it to `git log -1` + elif commit_hashes: # One or more commit hashes, just pass it to `git log -1` # Even though we have already been passed the commit hash, we ask git to retrieve this hash and # return it to us. This way we verify that the passed hash is a valid hash for the target repo and we # also convert it to the full hash format (we might have been passed a short hash). - sha_list = [_git("log", "-1", commit_hash, "--pretty=%H", _cwd=repository_path).replace("\n", "")] + sha_list = [] + for commit_hash in commit_hashes: + sha_list.append(_git("log", "-1", commit_hash, "--pretty=%H", _cwd=repository_path).replace("\n", "")) else: # If no refspec is defined, fallback to the last commit on the current branch # We tried many things here e.g.: defaulting to e.g. HEAD or HEAD^... (incl. dealing with # repos that only have a single commit - HEAD^... doesn't work there), but then we still get into @@ -393,6 +509,10 @@ class GitContext(PropertyCache): return context def __eq__(self, other): - return (isinstance(other, GitContext) and self.commits == other.commits - and self.repository_path == other.repository_path - and self.commentchar == other.commentchar and self.current_branch == other.current_branch) # noqa + return ( + isinstance(other, GitContext) + and self.commits == other.commits + and self.repository_path == other.repository_path + and self.commentchar == other.commentchar + and self.current_branch == other.current_branch + ) -- cgit v1.2.3