diff options
Diffstat (limited to 'scripts/reproducible-check')
-rwxr-xr-x | scripts/reproducible-check | 266 |
1 files changed, 266 insertions, 0 deletions
diff --git a/scripts/reproducible-check b/scripts/reproducible-check new file mode 100755 index 0000000..498507d --- /dev/null +++ b/scripts/reproducible-check @@ -0,0 +1,266 @@ +#!/usr/bin/env python3 +# +# Copyright © 2017, 2020 Chris Lamb <lamby@debian.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# pylint: disable=invalid-name +# pylint: enable=invalid-name + +import argparse +import bz2 +import collections +import json +import logging +import os +import re +import subprocess +import sys +import time + +import apt +import requests + +try: + from xdg.BaseDirectory import xdg_cache_home +except ImportError: + print("This script requires the xdg python3 module.", file=sys.stderr) + print( + "Please install the python3-xdg Debian package in order to use this utility.", + file=sys.stderr, + ) + sys.exit(1) + + +class ReproducibleCheck: + HELP = """ + Reports on the reproducible status of installed packages. + For more details please see <https://reproducible-builds.org>. + """ + + NAME = os.path.basename(__file__) + VERSION = 1 + + STATUS_URL = "https://tests.reproducible-builds.org/debian/reproducible.json.bz2" + + CACHE = os.path.join(xdg_cache_home, NAME, os.path.basename(STATUS_URL)) + CACHE_AGE_SECONDS = 86400 + + @classmethod + def parse(cls): + parser = argparse.ArgumentParser(description=cls.HELP) + + parser.add_argument( + "-d", + "--debug", + help="show debugging messages", + default=False, + action="store_true", + ) + + parser.add_argument( + "-r", + "--raw", + help="print unreproducible binary packages only (for dd-list -i)", + default=False, + action="store_true", + ) + + parser.add_argument( + "--version", + help="print version and exit", + default=False, + action="store_true", + ) + + return cls(parser.parse_args()) + + def __init__(self, args): + self.args = args + + logging.basicConfig( + format="%(asctime).19s %(levelname).1s: %(message)s", + level=logging.DEBUG if args.debug else logging.INFO, + ) + + self.log = logging.getLogger() + + def main(self): + if self.args.version: + print(f"{self.NAME} version {self.VERSION}") + return 0 + + if self.get_distributor_id() != "Debian": + self.log.error("Refusing to return results for non-Debian distributions") + return 2 + + self.update_cache() + + installed = self.get_installed_packages() + reproducible = self.get_reproducible_packages() + + if self.args.raw: + self.output_raw(installed, reproducible) + else: + self.output_by_source(installed, reproducible) + + self.log.info( + "These results are based on data from the Reproducible Builds " + "CI framework, showing only the theoretical (and " + "unofficial) reproducibility of these Debian packages." + ) + + return 0 + + def get_distributor_id(self): + try: + distribution_id = ( + subprocess.check_output(("lsb_release", "-is")).decode("utf-8").strip() + ) + except subprocess.CalledProcessError: + distribution_id = "" + + self.log.debug("Detected distribution %s", distribution_id or "(unknown)") + + return distribution_id + + def update_cache(self): + self.log.debug("Checking cache file %s ...", self.CACHE) + + try: + if os.path.getmtime(self.CACHE) >= time.time() - self.CACHE_AGE_SECONDS: + self.log.debug("Cache is up to date") + return + except OSError: + pass + + new_cache = f"{self.CACHE}.new" + self.log.info("Updating cache to %s...", new_cache) + + response = requests.get(self.STATUS_URL, timeout=60) + + os.makedirs(os.path.dirname(self.CACHE), exist_ok=True) + + with open(new_cache, "wb") as f: + for content in response.iter_content(chunk_size=2**16): + f.write(content) + + os.rename(new_cache, self.CACHE) + + def get_reproducible_packages(self): + """ + Return (source, architecture, version) triplets for reproducible source + packages. + """ + + self.log.debug("Loading data from cache %s", self.CACHE) + + data = set() + source_packages = set() + + with bz2.open(self.CACHE) as f: + all_packages = json.loads(f.read().decode("utf-8")) + + for pkg in all_packages: + if pkg["status"] != "reproducible": + continue + + data.add((pkg["package"], pkg["architecture"], pkg["version"])) + + source_packages.add(pkg["package"]) + + self.log.debug("Parsed data about %d source packages", len(source_packages)) + + return data + + def get_installed_packages(self): + """ + Return (binary_package, architecture, version) triplets, mapped to + their corresponding source package. + """ + + result = {} + for pkg in apt.Cache(): + for pkg_ver in pkg.versions: + if not pkg_ver.is_installed: + continue + + # We may have installed a binNMU version locally so we need to + # strip these off when looking up against the JSON of results. + version = re.sub(r"\+b\d+$", "", pkg_ver.version) + + result[ + (pkg.shortname, pkg_ver.architecture, version) + ] = pkg_ver.source_name + + self.log.debug("Parsed %d installed binary packages", len(result)) + + return result + + def iter_installed_unreproducible(self, installed, reproducible): + # "Architecture: all" binary packages should pretend to the system's + # default architecture for lookup purposes. + default_architecture = apt.apt_pkg.config.find("APT::Architecture") + self.log.debug("Using %s as our 'Architecture: all' lookup") + + for (binary, architecture, version), source in sorted(installed.items()): + if architecture == "all": + architecture = default_architecture + + lookup_key = (source, architecture, version) + + if lookup_key not in reproducible: + yield binary, source, version + + def output_by_source(self, installed, reproducible): + by_source = collections.defaultdict(set) + + num_unreproducible = 0 + for binary, source, version in self.iter_installed_unreproducible( + installed, reproducible + ): + by_source[(source, version)].add(binary) + num_unreproducible += 1 + + for (source, version), binaries in sorted(by_source.items()): + # Calculate some clarifying suffixes/prefixes + src = "" + pkgs = "" + if binaries != {source}: + src = "src:" + pkgs = f" ({', '.join(binaries)})" + + print( + f"{src}{source} ({version}){pkgs} is not reproducible " + f"<https://tests.reproducible-builds.org/debian/{source}>" + ) + + num_installed = len(installed) + num_reproducible = len(installed) - num_unreproducible + percent = 100.0 * num_reproducible / num_installed + print( + f"{num_unreproducible}/{num_installed} ({percent:.2f}%) of " + f"installed binary packages are reproducible." + ) + + def output_raw(self, installed, reproducible): + for binary, _, _ in self.iter_installed_unreproducible(installed, reproducible): + print(binary) + + +if __name__ == "__main__": + try: + sys.exit(ReproducibleCheck.parse().main()) + except (KeyboardInterrupt, BrokenPipeError): + sys.exit(1) |