summaryrefslogtreecommitdiffstats
path: root/testing/mozharness/scripts/release/generate-checksums.py
diff options
context:
space:
mode:
Diffstat (limited to 'testing/mozharness/scripts/release/generate-checksums.py')
-rw-r--r--testing/mozharness/scripts/release/generate-checksums.py263
1 files changed, 263 insertions, 0 deletions
diff --git a/testing/mozharness/scripts/release/generate-checksums.py b/testing/mozharness/scripts/release/generate-checksums.py
new file mode 100644
index 0000000000..6d01923e44
--- /dev/null
+++ b/testing/mozharness/scripts/release/generate-checksums.py
@@ -0,0 +1,263 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import binascii
+import hashlib
+import os
+import re
+import sys
+from multiprocessing.pool import ThreadPool
+
+import six
+
+sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0])))
+
+from mozharness.base.python import VirtualenvMixin, virtualenv_config_options
+from mozharness.base.script import BaseScript
+from mozharness.mozilla.checksums import parse_checksums_file
+from mozharness.mozilla.merkle import MerkleTree
+
+
+class ChecksumsGenerator(BaseScript, VirtualenvMixin):
+ config_options = [
+ [
+ ["--stage-product"],
+ {
+ "dest": "stage_product",
+ "help": "Name of product used in file server's directory structure, "
+ "e.g.: firefox, mobile",
+ },
+ ],
+ [
+ ["--version"],
+ {
+ "dest": "version",
+ "help": "Version of release, e.g.: 59.0b5",
+ },
+ ],
+ [
+ ["--build-number"],
+ {
+ "dest": "build_number",
+ "help": "Build number of release, e.g.: 2",
+ },
+ ],
+ [
+ ["--bucket-name"],
+ {
+ "dest": "bucket_name",
+ "help": "Full bucket name e.g.: net-mozaws-prod-delivery-{firefox,archive}.",
+ },
+ ],
+ [
+ ["-j", "--parallelization"],
+ {
+ "dest": "parallelization",
+ "default": 20,
+ "type": int,
+ "help": "Number of checksums file to download concurrently",
+ },
+ ],
+ [
+ ["--branch"],
+ {
+ "dest": "branch",
+ "help": "dummy option",
+ },
+ ],
+ [
+ ["--build-pool"],
+ {
+ "dest": "build_pool",
+ "help": "dummy option",
+ },
+ ],
+ ] + virtualenv_config_options
+
+ def __init__(self):
+ BaseScript.__init__(
+ self,
+ config_options=self.config_options,
+ require_config_file=False,
+ config={
+ "virtualenv_modules": [
+ "boto",
+ ],
+ "virtualenv_path": "venv",
+ },
+ all_actions=[
+ "create-virtualenv",
+ "collect-individual-checksums",
+ "create-big-checksums",
+ "create-summary",
+ ],
+ default_actions=[
+ "create-virtualenv",
+ "collect-individual-checksums",
+ "create-big-checksums",
+ "create-summary",
+ ],
+ )
+
+ self.checksums = {}
+ self.file_prefix = self._get_file_prefix()
+
+ def _pre_config_lock(self, rw_config):
+ super(ChecksumsGenerator, self)._pre_config_lock(rw_config)
+
+ # These defaults are set here rather in the config because default
+ # lists cannot be completely overidden, only appended to.
+ if not self.config.get("formats"):
+ self.config["formats"] = ["sha512", "sha256"]
+
+ if not self.config.get("includes"):
+ self.config["includes"] = [
+ r"^.*\.tar\.bz2$",
+ r"^.*\.tar\.xz$",
+ r"^.*\.snap$",
+ r"^.*\.dmg$",
+ r"^.*\.pkg$",
+ r"^.*\.bundle$",
+ r"^.*\.mar$",
+ r"^.*Setup.*\.exe$",
+ r"^.*Installer\.exe$",
+ r"^.*\.msi$",
+ r"^.*\.xpi$",
+ r"^.*fennec.*\.apk$",
+ r"^.*/jsshell.*$",
+ ]
+
+ def _get_file_prefix(self):
+ return "pub/{}/candidates/{}-candidates/build{}/".format(
+ self.config["stage_product"],
+ self.config["version"],
+ self.config["build_number"],
+ )
+
+ def _get_sums_filename(self, format_):
+ return "{}SUMS".format(format_.upper())
+
+ def _get_summary_filename(self, format_):
+ return "{}SUMMARY".format(format_.upper())
+
+ def _get_hash_function(self, format_):
+ if format_ in ("sha256", "sha384", "sha512"):
+ return getattr(hashlib, format_)
+ else:
+ self.fatal("Unsupported format {}".format(format_))
+
+ def _get_bucket(self):
+ self.activate_virtualenv()
+ from boto import connect_s3
+
+ self.info("Connecting to S3")
+ conn = connect_s3(anon=True)
+ self.info("Connecting to bucket {}".format(self.config["bucket_name"]))
+ self.bucket = conn.get_bucket(self.config["bucket_name"])
+ return self.bucket
+
+ def collect_individual_checksums(self):
+ """This step grabs all of the small checksums files for the release,
+ filters out any unwanted files from within them, and adds the remainder
+ to self.checksums for subsequent steps to use."""
+ bucket = self._get_bucket()
+ self.info("File prefix is: {}".format(self.file_prefix))
+
+ # temporary holding place for checksums
+ raw_checksums = []
+
+ def worker(item):
+ self.debug("Downloading {}".format(item))
+ sums = bucket.get_key(item).get_contents_as_string()
+ raw_checksums.append(sums)
+
+ def find_checksums_files():
+ self.info("Getting key names from bucket")
+ checksum_files = {"beets": [], "checksums": []}
+ for key in bucket.list(prefix=self.file_prefix):
+ if key.key.endswith(".checksums"):
+ self.debug("Found checksums file: {}".format(key.key))
+ checksum_files["checksums"].append(key.key)
+ elif key.key.endswith(".beet"):
+ self.debug("Found beet file: {}".format(key.key))
+ checksum_files["beets"].append(key.key)
+ else:
+ self.debug("Ignoring non-checksums file: {}".format(key.key))
+ if checksum_files["beets"]:
+ self.log("Using beet format")
+ return checksum_files["beets"]
+ else:
+ self.log("Using checksums format")
+ return checksum_files["checksums"]
+
+ pool = ThreadPool(self.config["parallelization"])
+ pool.map(worker, find_checksums_files())
+
+ for c in raw_checksums:
+ for f, info in six.iteritems(parse_checksums_file(c)):
+ for pattern in self.config["includes"]:
+ if re.search(pattern, f):
+ if f in self.checksums:
+ if info == self.checksums[f]:
+ self.debug(
+ "Duplicate checksum for file {}"
+ " but the data matches;"
+ " continuing...".format(f)
+ )
+ continue
+ self.fatal(
+ "Found duplicate checksum entry for {}, "
+ "don't know which one to pick.".format(f)
+ )
+ if not set(self.config["formats"]) <= set(info["hashes"]):
+ self.fatal("Missing necessary format for file {}".format(f))
+ self.debug("Adding checksums for file: {}".format(f))
+ self.checksums[f] = info
+ break
+ else:
+ self.debug("Ignoring checksums for file: {}".format(f))
+
+ def create_summary(self):
+ """
+ This step computes a Merkle tree over the checksums for each format
+ and writes a file containing the head of the tree and inclusion proofs
+ for each file.
+ """
+ for fmt in self.config["formats"]:
+ hash_fn = self._get_hash_function(fmt)
+ files = [fn for fn in sorted(self.checksums)]
+ data = [self.checksums[fn]["hashes"][fmt] for fn in files]
+
+ tree = MerkleTree(hash_fn, data)
+ head = binascii.hexlify(tree.head())
+ proofs = [
+ binascii.hexlify(tree.inclusion_proof(i).to_rfc6962_bis())
+ for i in range(len(files))
+ ]
+
+ summary = self._get_summary_filename(fmt)
+ self.info("Creating summary file: {}".format(summary))
+
+ content = "{} TREE_HEAD\n".format(head.decode("ascii"))
+ for i in range(len(files)):
+ content += "{} {}\n".format(proofs[i].decode("ascii"), files[i])
+
+ self.write_to_file(summary, content)
+
+ def create_big_checksums(self):
+ for fmt in self.config["formats"]:
+ sums = self._get_sums_filename(fmt)
+ self.info("Creating big checksums file: {}".format(sums))
+ with open(sums, "w+") as output_file:
+ for fn in sorted(self.checksums):
+ output_file.write(
+ "{} {}\n".format(
+ self.checksums[fn]["hashes"][fmt].decode("ascii"), fn
+ )
+ )
+
+
+if __name__ == "__main__":
+ myScript = ChecksumsGenerator()
+ myScript.run_and_exit()