summaryrefslogtreecommitdiffstats
path: root/tools/generate_authors.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 20:34:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 20:34:10 +0000
commite4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc (patch)
tree68cb5ef9081156392f1dd62a00c6ccc1451b93df /tools/generate_authors.py
parentInitial commit. (diff)
downloadwireshark-e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc.tar.xz
wireshark-e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc.zip
Adding upstream version 4.2.2.upstream/4.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/generate_authors.py')
-rwxr-xr-xtools/generate_authors.py144
1 files changed, 144 insertions, 0 deletions
diff --git a/tools/generate_authors.py b/tools/generate_authors.py
new file mode 100755
index 0000000..a74ef1c
--- /dev/null
+++ b/tools/generate_authors.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+
+#
+# Generate the AUTHORS file combining existing AUTHORS file with
+# git commit log.
+#
+# Usage: generate_authors.py AUTHORS.src
+
+# Copyright 2022 Moshe Kaplan
+# Based on generate_authors.pl by Michael Mann
+#
+# Wireshark - Network traffic analyzer
+# By Gerald Combs <gerald@wireshark.org>
+# Copyright 1998 Gerald Combs
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import argparse
+import io
+import re
+import subprocess
+import sys
+
+
+def get_git_authors():
+ '''
+ Sample line:
+ # 4321 Navin R. Johnson <nrjohnson@example.com>
+ '''
+ GIT_LINE_REGEX = r"^\s*\d+\s+([^<]*)\s*<([^>]*)>"
+ cmd = "git --no-pager shortlog --email --summary HEAD".split(' ')
+ # check_output is used for Python 3.4 compatability
+ git_cmd_output = subprocess.check_output(cmd, universal_newlines=True, encoding='utf-8')
+
+ git_authors = []
+ for line in git_cmd_output.splitlines():
+ # Check if this is needed:
+ line = line.strip()
+ match = re.match(GIT_LINE_REGEX, line)
+ name = match.group(1).strip()
+ email = match.group(2).strip()
+ # Try to lower how much spam people get:
+ email = email.replace('@', '[AT]')
+ git_authors.append((name, email))
+ return git_authors
+
+
+def extract_contributors(authors_content):
+ # Extract names and email addresses from the AUTHORS file Contributors
+ contributors_content = authors_content.split("= Contributors =", 1)[1]
+ CONTRIBUTOR_LINE_REGEX = r"^([\w\.\-\'\x80-\xff]+(\s*[\w+\.\-\'\x80-\xff])*)\s+<([^>]*)>"
+ contributors = []
+ state = ""
+ for line in contributors_content.splitlines():
+ contributor_match = re.match(CONTRIBUTOR_LINE_REGEX, line)
+ if re.search(r'([^\{]*)\{', line):
+ if contributor_match:
+ name = contributor_match.group(1)
+ email = contributor_match.group(3)
+ contributors.append((name, email))
+ state = "s_in_bracket"
+ elif state == "s_in_bracket":
+ if re.search(r'([^\}]*)\}', line):
+ state = ""
+ elif re.search('<', line):
+ if contributor_match:
+ name = contributor_match.group(1)
+ email = contributor_match.group(3)
+ contributors.append((name, email))
+ elif re.search(r"(e-mail address removed at contributor's request)", line):
+ if contributor_match:
+ name = contributor_match.group(1)
+ email = contributor_match.group(3)
+ contributors.append((name, email))
+ else:
+ pass
+ return contributors
+
+
+def generate_git_contributors_text(contributors_emails, git_authors_emails):
+ # Track the email addresses seen to avoid including the same email address twice
+ emails_addresses_seen = set()
+ for name, email in contributors_emails:
+ emails_addresses_seen.add(email.lower())
+
+ output_lines = []
+ for name, email in git_authors_emails:
+ if email.lower() in emails_addresses_seen:
+ continue
+
+ # Skip Gerald, since he's part of the header:
+ if email == "gerald[AT]wireshark.org":
+ continue
+
+ ntab = 3
+ if len(name) >= 8*ntab:
+ line = "{name} <{email}>".format(name=name, email=email)
+ else:
+ ntab -= len(name)/8
+ if len(name) % 8:
+ ntab += 1
+ tabs = '\t'*int(ntab)
+ line = "{name}{tabs}<{email}>".format(name=name, tabs=tabs, email=email)
+
+ emails_addresses_seen.add(email.lower())
+ output_lines += [line]
+ return "\n".join(output_lines)
+
+
+# Read authos file until we find gitlog entries, then stop
+def read_authors(parsed_args):
+ lines = []
+ with open(parsed_args.authors[0], 'r', encoding='utf-8') as fh:
+ for line in fh.readlines():
+ if '= From git log =' in line:
+ break
+ lines.append(line)
+ return ''.join(lines)
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Generate the AUTHORS file combining existing AUTHORS file with git commit log.")
+ parser.add_argument("authors", metavar='authors', nargs=1, help="path to AUTHORS file")
+ parsed_args = parser.parse_args()
+
+ author_content = read_authors(parsed_args)
+
+ # Collect the listed contributors emails so that we don't duplicate them
+ # in the listing of git contributors
+ contributors_emails = extract_contributors(author_content)
+ git_authors_emails = get_git_authors()
+ # Then generate the text output for git contributors
+ git_contributors_text = generate_git_contributors_text(contributors_emails, git_authors_emails)
+
+ # Now we can write our output:
+ git_contributor_header = '= From git log =\n\n'
+ output = author_content + git_contributor_header + git_contributors_text + '\n'
+
+ with open(parsed_args.authors[0], 'w', encoding='utf-8') as fh:
+ fh.write(output)
+
+
+if __name__ == '__main__':
+ main()