diff options
Diffstat (limited to 'src/script/ceph-release-notes')
-rwxr-xr-x | src/script/ceph-release-notes | 310 |
1 files changed, 310 insertions, 0 deletions
diff --git a/src/script/ceph-release-notes b/src/script/ceph-release-notes new file mode 100755 index 000000000..b4500137a --- /dev/null +++ b/src/script/ceph-release-notes @@ -0,0 +1,310 @@ +#!/usr/bin/env python +# Originally modified from A. Israel's script seen at +# https://gist.github.com/aisrael/b2b78d9dfdd176a232b9 +"""To run this script first install the dependencies + + + python3 -m venv v + source v/bin/activate + pip install githubpy GitPython requests + +Generate a github access token; this is needed as the anonymous access +to Github's API will easily hit the limit even with a single invocation. +For details see: +https://help.github.com/articles/creating-an-access-token-for-command-line-use/ + +Next either set the github token as an env variable +`GITHUB_ACCESS_TOKEN` or alternatively invoke the script with +`--token` switch. + +Example: + + ceph-release-notes -r tags/v0.87..origin/giant \ + $(git rev-parse --show-toplevel) + +""" + +from __future__ import print_function +import argparse +import github +import os +import re +import sys +import requests + +from git import Repo + + +fixes_re = re.compile(r"Fixes\:? #(\d+)") +reviewed_by_re = re.compile(r"Rev(.*)By", re.IGNORECASE) +# labels is the list of relevant labels defined for github.com/ceph/ceph +labels = {'bluestore', 'build/ops', 'cephfs', 'common', 'core', 'mgr', + 'mon', 'performance', 'pybind', 'rdma', 'rgw', 'rbd', 'tests', + 'tools'} +merge_re = re.compile("Merge (pull request|PR) #(\d+).*") +# prefixes is the list of commit description prefixes we recognize +prefixes = ['bluestore', 'build/ops', 'cephfs', 'cephx', 'cli', 'cmake', + 'common', 'core', 'crush', 'doc', 'fs', 'librados', 'librbd', + 'log', 'mds', 'mgr', 'mon', 'msg', 'objecter', 'osd', 'pybind', + 'rbd', 'rbd-mirror', 'rbd-nbd', 'rgw', 'tests', 'tools'] +signed_off_re = re.compile("Signed-off-by: (.+) <") +tracker_re = re.compile("http://tracker.ceph.com/issues/(\d+)") +rst_link_re = re.compile(r"([a-zA-Z0-9])_(\W)") +tracker_uri = "http://tracker.ceph.com/issues/{0}.json" + + +def get_original_issue(issue, verbose): + r = requests.get(tracker_uri.format(issue), + params={"include": "relations"}).json() + + # looking up for the original issue only makes sense + # when dealing with an issue in the Backport tracker + if r["issue"]["tracker"]["name"] != "Backport": + if verbose: + print ("http://tracker.ceph.com/issues/" + issue + + " is from the tracker " + r["issue"]["tracker"]["name"] + + ", do not look for the original issue") + return issue + + # if a Backport issue does not have a relation, keep it + if "relations" not in r["issue"]: + if verbose: + print ("http://tracker.ceph.com/issues/" + issue + + " has no relations, do not look for the original issue") + return issue + + copied_to = [ + str(i['issue_id']) for i in r["issue"]["relations"] + if i["relation_type"] == "copied_to" + ] + if copied_to: + if len(copied_to) > 1: + if verbose: + print ("ERROR: http://tracker.ceph.com/issues/" + issue + + " has more than one Copied To relation") + return issue + if verbose: + print ("http://tracker.ceph.com/issues/" + issue + + " is the backport of http://tracker.ceph.com/issues/" + + copied_to[0]) + return copied_to[0] + else: + if verbose: + print ("http://tracker.ceph.com/issues/" + issue + + " has no copied_to relations; do not look for the" + + " original issue") + return issue + + +def split_component(title, gh, number): + title_re = '(' + '|'.join(prefixes) + ')(:.*)' + match = re.match(title_re, title) + if match: + return match.group(1)+match.group(2) + else: + issue = gh.repos("ceph")("ceph").issues(number).get() + issue_labels = {it['name'] for it in issue['labels']} + if 'documentation' in issue_labels: + return 'doc: ' + title + item = set(prefixes).intersection(issue_labels) + if item: + return ",".join(sorted(item)) + ': ' + title + else: + return 'UNKNOWN: ' + title + +def _title_message(commit, pr, strict): + title = pr['title'] + message_lines = commit.message.split('\n') + if strict or len(message_lines) < 1: + return (title, None) + lines = [] + for line in message_lines[1:]: + if reviewed_by_re.match(line): + continue + line = line.strip() + if line: + lines.append(line) + if len(lines) == 0: + return (title, None) + duplicates_pr_title = lines[0] == pr['title'].strip() + if duplicates_pr_title: + return (title, None) + assert len(lines) > 0, "missing message content" + if len(lines) == 1: + # assume that a single line means the intention is to + # re-write the PR title + return (lines[0], None) + message = " " + "\n ".join(lines) + return (title, message) + +def make_release_notes(gh, repo, ref, plaintext, verbose, strict, use_tags): + + issue2prs = {} + pr2issues = {} + pr2info = {} + + for commit in repo.iter_commits(ref, merges=True): + merge = merge_re.match(commit.summary) + if not merge: + continue + number = merge.group(2) + print ("Considering PR#" + number) + # do not pick up ceph/ceph-qa-suite.git PRs + if int(number) < 1311: + print ("Ignoring low-numbered PR, probably picked up from" + " ceph/ceph-qa-suite.git") + continue + pr = gh.repos("ceph")("ceph").pulls(number).get() + (title, message) = _title_message(commit, pr, strict) + issues = [] + if pr['body']: + issues = fixes_re.findall(pr['body']) + tracker_re.findall( + pr['body'] + ) + + authors = {} + for c in repo.iter_commits( + "{sha1}^1..{sha1}^2".format(sha1=commit.hexsha) + ): + for author in re.findall( + "Signed-off-by:\s*(.*?)\s*<", c.message + ): + authors[author] = 1 + issues.extend(fixes_re.findall(c.message) + + tracker_re.findall(c.message)) + if authors: + author = ", ".join(authors.keys()) + else: + author = commit.parents[-1].author.name + + if strict and not issues: + print ("ERROR: https://github.com/ceph/ceph/pull/" + + str(number) + " has no associated issue") + continue + + if strict: + title_re = ( + '^(?:hammer|infernalis|jewel|kraken|luminous|mimic|nautilus|octopus):\s+(' + + '|'.join(prefixes) + + ')(:.*)' + ) + match = re.match(title_re, title) + if not match: + print ("ERROR: https://github.com/ceph/ceph/pull/" + + str(number) + " title " + title.encode("utf-8") + + " does not match " + title_re) + else: + title = match.group(1) + match.group(2) + if use_tags: + title = split_component(title, gh, number) + + title = title.strip(' \t\n\r\f\v\.\,\;\:\-\=') + # escape asterisks, which is used by reStructuredTextrst for inline + # emphasis + title = title.replace('*', '\*') + # and escape the underscores for noting a link + title = rst_link_re.sub(r'\1\_\2', title) + pr2info[number] = (author, title, message) + + for issue in set(issues): + if strict: + issue = get_original_issue(issue, verbose) + issue2prs.setdefault(issue, set([])).add(number) + pr2issues.setdefault(number, set([])).add(issue) + sys.stdout.write('.') + + print (" done collecting merges.") + + if strict: + for (issue, prs) in issue2prs.items(): + if len(prs) > 1: + print (">>>>>>> " + str(len(prs)) + " pr for issue " + + issue + " " + str(prs)) + + for (pr, (author, title, message)) in sorted( + pr2info.items(), key=lambda title: title[1][1] + ): + if pr in pr2issues: + if plaintext: + issues = map(lambda issue: '#' + str(issue), pr2issues[pr]) + else: + issues = map(lambda issue: ( + '`issue#{issue} <http://tracker.ceph.com/issues/{issue}>`_' + ).format(issue=issue), pr2issues[pr] + ) + issues = ", ".join(issues) + ", " + else: + issues = '' + if plaintext: + print ("* {title} ({issues}{author})".format( + title=title.encode("utf-8"), + issues=issues, + author=author.encode("utf-8") + ) + ) + else: + print ( + ( + "* {title} ({issues}`pr#{pr} <" + "https://github.com/ceph/ceph/pull/{pr}" + ">`_, {author})" + ).format( + title=title.encode("utf-8"), + issues=issues, + author=author.encode("utf-8"), pr=pr + ) + ) + if message: + print (message) + + +if __name__ == "__main__": + desc = ''' + Make ceph release notes for a given revision. Eg usage: + + $ ceph-release-notes -r tags/v0.87..origin/giant \ + $(git rev-parse --show-toplevel) + + It is recommended to set the github env. token in order to avoid + hitting the api rate limits. + ''' + + parser = argparse.ArgumentParser( + description=desc, + formatter_class=argparse.RawTextHelpFormatter + ) + + parser.add_argument("--rev", "-r", + help="git revision range for creating release notes") + parser.add_argument("--text", "-t", + action='store_true', default=None, + help="output plain text only, no links") + parser.add_argument("--verbose", "-v", + action='store_true', default=None, + help="verbose") + parser.add_argument("--strict", + action='store_true', default=None, + help="strict, recommended only for backport releases") + parser.add_argument("repo", metavar="repo", + help="path to ceph git repo") + parser.add_argument( + "--token", + default=os.getenv("GITHUB_ACCESS_TOKEN"), + help="Github Access Token ($GITHUB_ACCESS_TOKEN otherwise)", + ) + parser.add_argument("--use-tags", default=False, + help="Use github tags to guess the component") + + args = parser.parse_args() + gh = github.GitHub( + access_token=args.token) + + make_release_notes( + gh, + Repo(args.repo), + args.rev, + args.text, + args.verbose, + args.strict, + args.use_tags + ) |