diff options
Diffstat (limited to 'debian/update-copyright')
-rwxr-xr-x | debian/update-copyright | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/debian/update-copyright b/debian/update-copyright new file mode 100755 index 0000000..5a8b89e --- /dev/null +++ b/debian/update-copyright @@ -0,0 +1,291 @@ +#!/usr/bin/python3 +# +# Copyright © 2016 Dr. Tobias Quathamer <toddy@debian.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import re +import sys +import textwrap +from pathlib import Path + +# The standard short names in Debian are defined here: +# https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ + +license_information = [ + { + "shortname": "BSD-2-clause", + "filename": "BSD-2-clause", + "upstream_names": ["BSD_2_CLAUSE_UCB", "BSD-2-Clause"], + }, + { + "shortname": "BSD-3-clause", + "filename": "BSD-3-clause-UCB", + "upstream_names": ["BSD_3_CLAUSE_UCB", "BSD-3-Clause"], + }, + { + "shortname": "BSD-4-clause", + "filename": "BSD-4-clause-UCB", + "upstream_names": ["BSD_4_CLAUSE_UCB", "BSD_ONELINE_CDROM", "BSD-4-Clause-UC"], + }, + { + "shortname": "Expat", + "filename": "Expat", + "upstream_names": ["PERMISSIVE_MISC", "MIT"], + }, + { + # This shortname is not defined by the standard. + "shortname": "freely-redistributable", + "filename": "freely-redistributable", + "upstream_names": ["FREELY_REDISTRIBUTABLE"], + }, + { + "shortname": "GPL-1+", + "filename": "GPL-1+", + "upstream_names": ["gpl-1-or-later", "GPL-1.0-or-later"], + }, + { + "shortname": "GPL-2", + "filename": "GPL-2", + "upstream_names": ["GPLv2_MISC", "GPLv2_ONELINE", "GPL-2.0-only"], + }, + { + "shortname": "GPL-2+", + "filename": "GPL-2+", + "upstream_names": [ + "GPL_NOVERSION_ONELINE", + "GPLv2+", + "GPLv2+_DOC_FULL", + "GPLv2+_DOC_MISC", + "GPLv2+_DOC_ONEPARA", + "GPLv2+_SW_3_PARA", + "GPLv2+_SW_ONEPARA", + "GPL-2.0-or-later", + ], + }, + { + "shortname": "Linux-man-pages-1-para", + "filename": "Linux-man-pages-1-para", + "upstream_names": ["Linux-man-pages-1-para"], + }, + { + "shortname": "Linux-man-pages-copyleft-2-para", + "filename": "Linux-man-pages-copyleft-2-para", + "upstream_names": ["Linux-man-pages-copyleft-2-para"], + }, + { + "shortname": "Linux-man-pages-copyleft", + "filename": "Linux-man-pages-copyleft", + "upstream_names": ["Linux-man-pages-copyleft", "Copyleft"], + }, + { + "shortname": "Linux-man-pages-copyleft-var", + "filename": "Linux-man-pages-copyleft-var", + "upstream_names": ["Linux-man-pages-copyleft-var"], + }, + { + # This shortname is not defined by the standard. + "shortname": "henry-spencer-regex", + "filename": "henry-spencer-regex", + "upstream_names": ["MISC"], + }, + { + # This shortname is not defined by the standard. + "shortname": "LDPv1", + "filename": "LDPv1", + "upstream_names": ["LDPv1"], + }, + { + "shortname": "public-domain", + "filename": "public-domain", + "upstream_names": ["PUBLIC_DOMAIN"], + }, + { + # This shortname is not defined by the standard. + "shortname": "verbatim", + "filename": "verbatim", + "upstream_names": [ + "VERBATIM", + "VERBATIM_ONE_PARA", + "VERBATIM_TWO_PARA", + "VERBATIM_PROF", + ], + }, +] + +licenses_with_manpages = {} +symlinks = {} + + +def get_license_shortname(name): + """Gets the Debian shortname for the name supplied by upstream. + + Includes a check that upstream's names are only assigned + to one shortname. + """ + shortname = "" + already_found = False + for info in license_information: + if name in info["upstream_names"]: + if not already_found: + shortname = info["shortname"] + already_found = True + else: + sys.exit( + "Fatal error: Upstream license name defined multiple times: " + name + ) + return shortname + + +def add_manpage_to_shortname(manpage, copyright_holders, licenses): + # Ensure a string for the filename + filename = str(manpage) + # Strip the leading "../" + filename = filename[3 : len(filename)] + # Common case: only one license for the manpage, so + # the shortname is just e.g. "GPL-2+" + shortname = " and ".join(sorted(licenses)) + if shortname not in licenses_with_manpages: + licenses_with_manpages[shortname] = { + "files": [filename], + "copyright": copyright_holders, + } + else: + licenses_with_manpages[shortname]["files"].append(filename) + # Do not add same lines twice + existing_copyright_holders = licenses_with_manpages[shortname]["copyright"] + joined_copyright_holders = list( + set(existing_copyright_holders + copyright_holders) + ) + licenses_with_manpages[shortname]["copyright"] = joined_copyright_holders + + +def get_copyright_stanza(shortname, file_info): + stanza = "" + # Collect files and symlinks into a common list + all_files = file_info["files"] + for file in file_info["files"]: + if file in symlinks: + all_files = all_files + symlinks[file] + # Join the files into a whitespace separated list, + # at most 76 characters long + files = " ".join(sorted(all_files)) + # The wrap is 69 + 7 (length of "Files: ") = 76 + files = textwrap.wrap( + files, width=69, break_long_words=False, break_on_hyphens=False + ) + files = "\n ".join(files) + # Now format the copyright holders + copyright = "\n ".join(sorted(file_info["copyright"])) + # An empty field is an error, so ensure a value + if len(copyright) == 0: + copyright = "(could not be detected automatically)" + # Finally, create the stanza + stanza += "Files: " + files + stanza += "\nCopyright: " + copyright + stanza += "\nLicense: " + shortname + "\n\n" + return stanza + + +def get_license_text(shortname): + """Gets the text for the Debian license shortname.""" + text = "" + for info in license_information: + if info["shortname"] == shortname: + text += "License: " + shortname + "\n" + with open("licenses/" + info["filename"]) as licensefile: + for line in licensefile: + text += " " + line + return text + + +p = Path("..") +for manpage in p.glob("man*/*"): + with manpage.open() as file: + licenses = [] + copyright_holders = [] + manpage_is_symlink = False + for line in file: + # Do not create copyright stanzas for symlink files + # but add them to a symlink list + symlink = re.search(r"^\.so (.*)", line) + if symlink: + manpage_is_symlink = True + # Ensure a string for the filename + linkname = str(manpage) + # Strip the leading "../" + linkname = linkname[3 : len(linkname)] + filename = symlink.group(1) + if filename in symlinks: + symlinks[filename].append(linkname) + else: + symlinks[filename] = [linkname] + break + # Only parse the header, so stop after seeing ".TH" + if re.search(r"^\.TH", line): + break + # Extract all copyright holders + copyright = re.search(r"^\.\\\".*?Copyright (.*)", line) + if copyright: + copyright_holders.append(copyright.group(1)) + # Match the beginning of the license + license_start = re.search(r"^\.\\\" SPDX-License-Identifier: (.+)", line) + if license_start: + license_name = license_start.group(1) + license_short_name = get_license_shortname(license_name) + if not license_short_name: + sys.exit( + "Fatal error: Upstream license name not known: " + license_name + ) + licenses.append(license_short_name) + if not manpage_is_symlink: + add_manpage_to_shortname(manpage, copyright_holders, licenses) + +# Flatten the symlinks by detecting and removing +# a symlink which points to another symlink. +for link_to_test in symlinks: + # Now cycle through all symlink entries + for link in symlinks: + if link_to_test in symlinks[link]: + symlinks[link] = symlinks[link] + symlinks[link_to_test] + +# Make sorting of licenses deterministic +stanzas = "" +license_texts = [] +for shortname in sorted(licenses_with_manpages): + stanzas += get_copyright_stanza(shortname, licenses_with_manpages[shortname]) + text = get_license_text(shortname) + if text: + license_texts.append(text) + +# Read in the first lines of copyright, without +# the automatically generated parts. Stop after +# the third occurence of "License". +manual_lines = "" +license_line_count = 0 +with open("copyright") as copyright_file: + for line in copyright_file: + manual_lines += line + if re.search(r"^License:", line): + license_line_count += 1 + if license_line_count == 3: + # Add a final newline for separation + manual_lines += "\n" + break + +# Open the file for output +with open("copyright", "w") as copyright_file: + copyright_file.write(manual_lines) + copyright_file.write(stanzas) + copyright_file.write("\n".join(license_texts)) |