diff options
Diffstat (limited to 'gitlint/git.py')
-rw-r--r-- | gitlint/git.py | 395 |
1 files changed, 0 insertions, 395 deletions
diff --git a/gitlint/git.py b/gitlint/git.py deleted file mode 100644 index ca7ad92..0000000 --- a/gitlint/git.py +++ /dev/null @@ -1,395 +0,0 @@ -import os -import arrow - -from gitlint import shell as sh -# import exceptions separately, this makes it a little easier to mock them out in the unit tests -from gitlint.shell import CommandNotFound, ErrorReturnCode - -from gitlint.cache import PropertyCache, cache -from gitlint.utils import ustr, sstr - -# For now, the git date format we use is fixed, but technically this format is determined by `git config log.date` -# We should fix this at some point :-) -GIT_TIMEFORMAT = "YYYY-MM-DD HH:mm:ss Z" - - -class GitContextError(Exception): - """ Exception indicating there is an issue with the git context """ - pass - - -class GitNotInstalledError(GitContextError): - def __init__(self): - super(GitNotInstalledError, self).__init__( - u"'git' command not found. You need to install git to use gitlint on a local repository. " + - u"See https://git-scm.com/book/en/v2/Getting-Started-Installing-Git on how to install git.") - - -def _git(*command_parts, **kwargs): - """ Convenience function for running git commands. Automatically deals with exceptions and unicode. """ - git_kwargs = {'_tty_out': False} - git_kwargs.update(kwargs) - try: - result = sh.git(*command_parts, **git_kwargs) # pylint: disable=unexpected-keyword-arg - # If we reach this point and the result has an exit_code that is larger than 0, this means that we didn't - # get an exception (which is the default sh behavior for non-zero exit codes) and so the user is expecting - # a non-zero exit code -> just return the entire result - if hasattr(result, 'exit_code') and result.exit_code > 0: - return result - return ustr(result) - except CommandNotFound: - raise GitNotInstalledError() - except ErrorReturnCode as e: # Something went wrong while executing the git command - error_msg = e.stderr.strip() - error_msg_lower = error_msg.lower() - if '_cwd' in git_kwargs and b"not a git repository" in error_msg_lower: - error_msg = u"{0} is not a git repository.".format(git_kwargs['_cwd']) - elif (b"does not have any commits yet" in error_msg_lower or - b"ambiguous argument 'head': unknown revision" in error_msg_lower): - raise GitContextError(u"Current branch has no commits. Gitlint requires at least one commit to function.") - else: - error_msg = u"An error occurred while executing '{0}': {1}".format(e.full_cmd, error_msg) - raise GitContextError(error_msg) - - -def git_version(): - """ Determine the git version installed on this host by calling git --version""" - return _git("--version").replace(u"\n", u"") - - -def git_commentchar(repository_path=None): - """ Shortcut for retrieving comment char from git config """ - commentchar = _git("config", "--get", "core.commentchar", _cwd=repository_path, _ok_code=[0, 1]) - # git will return an exit code of 1 if it can't find a config value, in this case we fall-back to # as commentchar - if hasattr(commentchar, 'exit_code') and commentchar.exit_code == 1: # pylint: disable=no-member - commentchar = "#" - return ustr(commentchar).replace(u"\n", u"") - - -def git_hooks_dir(repository_path): - """ Determine hooks directory for a given target dir """ - hooks_dir = _git("rev-parse", "--git-path", "hooks", _cwd=repository_path) - hooks_dir = ustr(hooks_dir).replace(u"\n", u"") - return os.path.realpath(os.path.join(repository_path, hooks_dir)) - - -class GitCommitMessage(object): - """ Class representing a git commit message. A commit message consists of the following: - - context: The `GitContext` this commit message is part of - - original: The actual commit message as returned by `git log` - - full: original, but stripped of any comments - - title: the first line of full - - body: all lines following the title - """ - def __init__(self, context, original=None, full=None, title=None, body=None): - self.context = context - self.original = original - self.full = full - self.title = title - self.body = body - - @staticmethod - def from_full_message(context, commit_msg_str): - """ Parses a full git commit message by parsing a given string into the different parts of a commit message """ - all_lines = commit_msg_str.splitlines() - cutline = u"{0} ------------------------ >8 ------------------------".format(context.commentchar) - try: - cutline_index = all_lines.index(cutline) - except ValueError: - cutline_index = None - lines = [ustr(line) for line in all_lines[:cutline_index] if not line.startswith(context.commentchar)] - full = "\n".join(lines) - title = lines[0] if lines else "" - body = lines[1:] if len(lines) > 1 else [] - return GitCommitMessage(context=context, original=commit_msg_str, full=full, title=title, body=body) - - def __unicode__(self): - return self.full # pragma: no cover - - def __str__(self): - return sstr(self.__unicode__()) # pragma: no cover - - def __repr__(self): - return self.__str__() # pragma: no cover - - def __eq__(self, other): - return (isinstance(other, GitCommitMessage) and self.original == other.original - and self.full == other.full and self.title == other.title and self.body == other.body) # noqa - - def __ne__(self, other): - return not self.__eq__(other) # required for py2 - - -class GitCommit(object): - """ Class representing a git commit. - A commit consists of: context, message, author name, author email, date, list of parent commit shas, - list of changed files, list of branch names. - In the context of gitlint, only the git context and commit message are required. - """ - - def __init__(self, context, message, sha=None, date=None, author_name=None, # pylint: disable=too-many-arguments - author_email=None, parents=None, changed_files=None, branches=None): - self.context = context - self.message = message - self.sha = sha - self.date = date - self.author_name = author_name - self.author_email = author_email - self.parents = parents or [] # parent commit hashes - self.changed_files = changed_files or [] - self.branches = branches or [] - - @property - def is_merge_commit(self): - return self.message.title.startswith(u"Merge") - - @property - def is_fixup_commit(self): - return self.message.title.startswith(u"fixup!") - - @property - def is_squash_commit(self): - return self.message.title.startswith(u"squash!") - - @property - def is_revert_commit(self): - return self.message.title.startswith(u"Revert") - - def __unicode__(self): - format_str = (u"--- Commit Message ----\n%s\n" - u"--- Meta info ---------\n" - u"Author: %s <%s>\nDate: %s\n" - u"is-merge-commit: %s\nis-fixup-commit: %s\n" - u"is-squash-commit: %s\nis-revert-commit: %s\n" - u"Branches: %s\n" - u"Changed Files: %s\n" - u"-----------------------") # pragma: no cover - date_str = arrow.get(self.date).format(GIT_TIMEFORMAT) if self.date else None - return format_str % (ustr(self.message), self.author_name, self.author_email, date_str, - self.is_merge_commit, self.is_fixup_commit, self.is_squash_commit, - self.is_revert_commit, sstr(self.branches), sstr(self.changed_files)) # pragma: no cover - - def __str__(self): - return sstr(self.__unicode__()) # pragma: no cover - - def __repr__(self): - return self.__str__() # pragma: no cover - - def __eq__(self, other): - # skip checking the context as context refers back to this obj, this will trigger a cyclic dependency - return (isinstance(other, GitCommit) and self.message == other.message - and self.sha == other.sha and self.author_name == other.author_name - and self.author_email == other.author_email - and self.date == other.date and self.parents == other.parents - and self.is_merge_commit == other.is_merge_commit and self.is_fixup_commit == other.is_fixup_commit - and self.is_squash_commit == other.is_squash_commit and self.is_revert_commit == other.is_revert_commit - and self.changed_files == other.changed_files and self.branches == other.branches) # noqa - - def __ne__(self, other): - return not self.__eq__(other) # required for py2 - - -class LocalGitCommit(GitCommit, PropertyCache): - """ Class representing a git commit that exists in the local git repository. - This class uses lazy loading: it defers reading information from the local git repository until the associated - property is accessed for the first time. Properties are then cached for subsequent access. - - This approach ensures that we don't do 'expensive' git calls when certain properties are not actually used. - In addition, reading the required info when it's needed rather than up front avoids adding delay during gitlint - startup time and reduces gitlint's memory footprint. - """ - def __init__(self, context, sha): # pylint: disable=super-init-not-called - PropertyCache.__init__(self) - self.context = context - self.sha = sha - - def _log(self): - """ Does a call to `git log` to determine a bunch of information about the commit. """ - long_format = "--pretty=%aN%x00%aE%x00%ai%x00%P%n%B" - raw_commit = _git("log", self.sha, "-1", long_format, _cwd=self.context.repository_path).split("\n") - - (name, email, date, parents), commit_msg = raw_commit[0].split('\x00'), "\n".join(raw_commit[1:]) - - commit_parents = parents.split(" ") - commit_is_merge_commit = len(commit_parents) > 1 - - # "YYYY-MM-DD HH:mm:ss Z" -> ISO 8601-like format - # Use arrow for datetime parsing, because apparently python is quirky around ISO-8601 dates: - # http://stackoverflow.com/a/30696682/381010 - commit_date = arrow.get(ustr(date), GIT_TIMEFORMAT).datetime - - # Create Git commit object with the retrieved info - commit_msg_obj = GitCommitMessage.from_full_message(self.context, commit_msg) - - self._cache.update({'message': commit_msg_obj, 'author_name': name, 'author_email': email, 'date': commit_date, - 'parents': commit_parents, 'is_merge_commit': commit_is_merge_commit}) - - @property - def message(self): - return self._try_cache("message", self._log) - - @property - def author_name(self): - return self._try_cache("author_name", self._log) - - @property - def author_email(self): - return self._try_cache("author_email", self._log) - - @property - def date(self): - return self._try_cache("date", self._log) - - @property - def parents(self): - return self._try_cache("parents", self._log) - - @property - def branches(self): - def cache_branches(): - # We have to parse 'git branch --contains <sha>' instead of 'git for-each-ref' to be compatible with - # git versions < 2.7.0 - # https://stackoverflow.com/questions/45173979/can-i-force-git-branch-contains-tag-to-not-print-the-asterisk - branches = _git("branch", "--contains", self.sha, _cwd=self.context.repository_path).split("\n") - - # This means that we need to remove any leading * that indicates the current branch. Note that we can - # safely do this since git branches cannot contain '*' anywhere, so if we find an '*' we know it's output - # from the git CLI and not part of the branch name. See https://git-scm.com/docs/git-check-ref-format - # We also drop the last empty line from the output. - self._cache['branches'] = [ustr(branch.replace("*", "").strip()) for branch in branches[:-1]] - - return self._try_cache("branches", cache_branches) - - @property - def is_merge_commit(self): - return self._try_cache("is_merge_commit", self._log) - - @property - def changed_files(self): - def cache_changed_files(): - self._cache['changed_files'] = _git("diff-tree", "--no-commit-id", "--name-only", "-r", "--root", - self.sha, _cwd=self.context.repository_path).split() - - return self._try_cache("changed_files", cache_changed_files) - - -class StagedLocalGitCommit(GitCommit, PropertyCache): - """ Class representing a git commit that has been staged, but not committed. - - Other than the commit message itself (and changed files), a lot of information is actually not known at staging - time, since the commit hasn't happened yet. However, we can make educated guesses based on existing repository - information. - """ - - def __init__(self, context, commit_message): # pylint: disable=super-init-not-called - PropertyCache.__init__(self) - self.context = context - self.message = commit_message - self.sha = None - self.parents = [] # Not really possible to determine before a commit - - @property - @cache - def author_name(self): - return ustr(_git("config", "--get", "user.name", _cwd=self.context.repository_path)).strip() - - @property - @cache - def author_email(self): - return ustr(_git("config", "--get", "user.email", _cwd=self.context.repository_path)).strip() - - @property - @cache - def date(self): - # We don't know the actual commit date yet, but we make a pragmatic trade-off here by providing the current date - # We get current date from arrow, reformat in git date format, then re-interpret it as a date. - # This ensure we capture the same precision and timezone information that git does. - return arrow.get(arrow.now().format(GIT_TIMEFORMAT), GIT_TIMEFORMAT).datetime - - @property - @cache - def branches(self): - # We don't know the branch this commit will be part of yet, but we're pragmatic here and just return the - # current branch, as for all intents and purposes, this will be what the user is looking for. - return [self.context.current_branch] - - @property - def changed_files(self): - return _git("diff", "--staged", "--name-only", "-r", _cwd=self.context.repository_path).split() - - -class GitContext(PropertyCache): - """ Class representing the git context in which gitlint is operating: a data object storing information about - the git repository that gitlint is linting. - """ - - def __init__(self, repository_path=None): - PropertyCache.__init__(self) - self.commits = [] - self.repository_path = repository_path - - @property - @cache - def commentchar(self): - return git_commentchar(self.repository_path) - - @property - @cache - def current_branch(self): - current_branch = ustr(_git("rev-parse", "--abbrev-ref", "HEAD", _cwd=self.repository_path)).strip() - return current_branch - - @staticmethod - def from_commit_msg(commit_msg_str): - """ Determines git context based on a commit message. - :param commit_msg_str: Full git commit message. - """ - context = GitContext() - commit_msg_obj = GitCommitMessage.from_full_message(context, commit_msg_str) - commit = GitCommit(context, commit_msg_obj) - context.commits.append(commit) - return context - - @staticmethod - def from_staged_commit(commit_msg_str, repository_path): - """ Determines git context based on a commit message that is a staged commit for a local git repository. - :param commit_msg_str: Full git commit message. - :param repository_path: Path to the git repository to retrieve the context from - """ - context = GitContext(repository_path=repository_path) - commit_msg_obj = GitCommitMessage.from_full_message(context, commit_msg_str) - commit = StagedLocalGitCommit(context, commit_msg_obj) - context.commits.append(commit) - return context - - @staticmethod - def from_local_repository(repository_path, refspec=None): - """ Retrieves the git context from a local git repository. - :param repository_path: Path to the git repository to retrieve the context from - :param refspec: The commit(s) to retrieve - """ - - context = GitContext(repository_path=repository_path) - - # If no refspec is defined, fallback to the last commit on the current branch - if refspec is None: - # We tried many things here e.g.: defaulting to e.g. HEAD or HEAD^... (incl. dealing with - # repos that only have a single commit - HEAD^... doesn't work there), but then we still get into - # problems with e.g. merge commits. Easiest solution is just taking the SHA from `git log -1`. - sha_list = [_git("log", "-1", "--pretty=%H", _cwd=repository_path).replace(u"\n", u"")] - else: - sha_list = _git("rev-list", refspec, _cwd=repository_path).split() - - for sha in sha_list: - commit = LocalGitCommit(context, sha) - context.commits.append(commit) - - return context - - def __eq__(self, other): - return (isinstance(other, GitContext) and self.commits == other.commits - and self.repository_path == other.repository_path - and self.commentchar == other.commentchar and self.current_branch == other.current_branch) # noqa - - def __ne__(self, other): - return not self.__eq__(other) # required for py2 |