1 files changed, 263 insertions, 0 deletions
diff --git a/testing/mozharness/scripts/release/generate-checksums.py b/testing/mozharness/scripts/release/generate-checksums.py
new file mode 100644
index 0000000000..ae092ae4de
--- /dev/null
+++ b/testing/mozharness/scripts/release/generate-checksums.py
@@ -0,0 +1,263 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import binascii
+import hashlib
+import os
+import re
+import sys
+from multiprocessing.pool import ThreadPool
+
+import six
+
+sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0])))
+
+from mozharness.base.python import VirtualenvMixin, virtualenv_config_options
+from mozharness.base.script import BaseScript
+from mozharness.mozilla.checksums import parse_checksums_file
+from mozharness.mozilla.merkle import MerkleTree
+
+
+class ChecksumsGenerator(BaseScript, VirtualenvMixin):
+    config_options = [
+        [
+            ["--stage-product"],
+            {
+                "dest": "stage_product",
+                "help": "Name of product used in file server's directory structure, "
+                "e.g.: firefox, mobile",
+            },
+        ],
+        [
+            ["--version"],
+            {
+                "dest": "version",
+                "help": "Version of release, e.g.: 59.0b5",
+            },
+        ],
+        [
+            ["--build-number"],
+            {
+                "dest": "build_number",
+                "help": "Build number of release, e.g.: 2",
+            },
+        ],
+        [
+            ["--bucket-name"],
+            {
+                "dest": "bucket_name",
+                "help": "Full bucket name e.g.: moz-fx-productdelivery-pr-38b5-productdelivery.",
+            },
+        ],
+        [
+            ["-j", "--parallelization"],
+            {
+                "dest": "parallelization",
+                "default": 20,
+                "type": int,
+                "help": "Number of checksums file to download concurrently",
+            },
+        ],
+        [
+            ["--branch"],
+            {
+                "dest": "branch",
+                "help": "dummy option",
+            },
+        ],
+        [
+            ["--build-pool"],
+            {
+                "dest": "build_pool",
+                "help": "dummy option",
+            },
+        ],
+    ] + virtualenv_config_options
+
+    def __init__(self):
+        BaseScript.__init__(
+            self,
+            config_options=self.config_options,
+            require_config_file=False,
+            config={
+                "virtualenv_modules": [
+                    "boto",
+                ],
+                "virtualenv_path": "venv",
+            },
+            all_actions=[
+                "create-virtualenv",
+                "collect-individual-checksums",
+                "create-big-checksums",
+                "create-summary",
+            ],
+            default_actions=[
+                "create-virtualenv",
+                "collect-individual-checksums",
+                "create-big-checksums",
+                "create-summary",
+            ],
+        )
+
+        self.checksums = {}
+        self.file_prefix = self._get_file_prefix()
+
+    def _pre_config_lock(self, rw_config):
+        super(ChecksumsGenerator, self)._pre_config_lock(rw_config)
+
+        # These defaults are set here rather in the config because default
+        # lists cannot be completely overidden, only appended to.
+        if not self.config.get("formats"):
+            self.config["formats"] = ["sha512", "sha256"]
+
+        if not self.config.get("includes"):
+            self.config["includes"] = [
+                r"^.*\.tar\.bz2$",
+                r"^.*\.tar\.xz$",
+                r"^.*\.snap$",
+                r"^.*\.dmg$",
+                r"^.*\.pkg$",
+                r"^.*\.bundle$",
+                r"^.*\.mar$",
+                r"^.*Setup.*\.exe$",
+                r"^.*Installer\.exe$",
+                r"^.*\.msi$",
+                r"^.*\.xpi$",
+                r"^.*fennec.*\.apk$",
+                r"^.*/jsshell.*$",
+            ]
+
+    def _get_file_prefix(self):
+        return "pub/{}/candidates/{}-candidates/build{}/".format(
+            self.config["stage_product"],
+            self.config["version"],
+            self.config["build_number"],
+        )
+
+    def _get_sums_filename(self, format_):
+        return "{}SUMS".format(format_.upper())
+
+    def _get_summary_filename(self, format_):
+        return "{}SUMMARY".format(format_.upper())
+
+    def _get_hash_function(self, format_):
+        if format_ in ("sha256", "sha384", "sha512"):
+            return getattr(hashlib, format_)
+        else:
+            self.fatal("Unsupported format {}".format(format_))
+
+    def _get_bucket(self):
+        self.activate_virtualenv()
+        from boto import connect_s3
+
+        self.info("Connecting to S3")
+        conn = connect_s3(anon=True, host="storage.googleapis.com")
+        self.info("Connecting to bucket {}".format(self.config["bucket_name"]))
+        self.bucket = conn.get_bucket(self.config["bucket_name"])
+        return self.bucket
+
+    def collect_individual_checksums(self):
+        """This step grabs all of the small checksums files for the release,
+        filters out any unwanted files from within them, and adds the remainder
+        to self.checksums for subsequent steps to use."""
+        bucket = self._get_bucket()
+        self.info("File prefix is: {}".format(self.file_prefix))
+
+        # temporary holding place for checksums
+        raw_checksums = []
+
+        def worker(item):
+            self.debug("Downloading {}".format(item))
+            sums = bucket.get_key(item).get_contents_as_string()
+            raw_checksums.append(sums)
+
+        def find_checksums_files():
+            self.info("Getting key names from bucket")
+            checksum_files = {"beets": [], "checksums": []}
+            for key in bucket.list(prefix=self.file_prefix):
+                if key.key.endswith(".checksums"):
+                    self.debug("Found checksums file: {}".format(key.key))
+                    checksum_files["checksums"].append(key.key)
+                elif key.key.endswith(".beet"):
+                    self.debug("Found beet file: {}".format(key.key))
+                    checksum_files["beets"].append(key.key)
+                else:
+                    self.debug("Ignoring non-checksums file: {}".format(key.key))
+            if checksum_files["beets"]:
+                self.log("Using beet format")
+                return checksum_files["beets"]
+            else:
+                self.log("Using checksums format")
+                return checksum_files["checksums"]
+
+        pool = ThreadPool(self.config["parallelization"])
+        pool.map(worker, find_checksums_files())
+
+        for c in raw_checksums:
+            for f, info in six.iteritems(parse_checksums_file(c)):
+                for pattern in self.config["includes"]:
+                    if re.search(pattern, f):
+                        if f in self.checksums:
+                            if info == self.checksums[f]:
+                                self.debug(
+                                    "Duplicate checksum for file {}"
+                                    " but the data matches;"
+                                    " continuing...".format(f)
+                                )
+                                continue
+                            self.fatal(
+                                "Found duplicate checksum entry for {}, "
+                                "don't know which one to pick.".format(f)
+                            )
+                        if not set(self.config["formats"]) <= set(info["hashes"]):
+                            self.fatal("Missing necessary format for file {}".format(f))
+                        self.debug("Adding checksums for file: {}".format(f))
+                        self.checksums[f] = info
+                        break
+                else:
+                    self.debug("Ignoring checksums for file: {}".format(f))
+
+    def create_summary(self):
+        """
+        This step computes a Merkle tree over the checksums for each format
+        and writes a file containing the head of the tree and inclusion proofs
+        for each file.
+        """
+        for fmt in self.config["formats"]:
+            hash_fn = self._get_hash_function(fmt)
+            files = [fn for fn in sorted(self.checksums)]
+            data = [self.checksums[fn]["hashes"][fmt] for fn in files]
+
+            tree = MerkleTree(hash_fn, data)
+            head = binascii.hexlify(tree.head())
+            proofs = [
+                binascii.hexlify(tree.inclusion_proof(i).to_rfc6962_bis())
+                for i in range(len(files))
+            ]
+
+            summary = self._get_summary_filename(fmt)
+            self.info("Creating summary file: {}".format(summary))
+
+            content = "{} TREE_HEAD\n".format(head.decode("ascii"))
+            for i in range(len(files)):
+                content += "{} {}\n".format(proofs[i].decode("ascii"), files[i])
+
+            self.write_to_file(summary, content)
+
+    def create_big_checksums(self):
+        for fmt in self.config["formats"]:
+            sums = self._get_sums_filename(fmt)
+            self.info("Creating big checksums file: {}".format(sums))
+            with open(sums, "w+") as output_file:
+                for fn in sorted(self.checksums):
+                    output_file.write(
+                        "{}  {}\n".format(
+                            self.checksums[fn]["hashes"][fmt].decode("ascii"), fn
+                        )
+                    )
+
+
+if __name__ == "__main__":
+    myScript = ChecksumsGenerator()
+    myScript.run_and_exit()