diff options
Diffstat (limited to 'tools/moztreedocs/upload.py')
-rw-r--r-- | tools/moztreedocs/upload.py | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/tools/moztreedocs/upload.py b/tools/moztreedocs/upload.py new file mode 100644 index 0000000000..ebc8715cda --- /dev/null +++ b/tools/moztreedocs/upload.py @@ -0,0 +1,175 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, # You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, unicode_literals, print_function + +import io +import mimetypes +import os +import sys + +import botocore +import boto3 +import concurrent.futures as futures +import requests +from pprint import pprint + +from mozbuild.util import memoize + + +@memoize +def create_aws_session(): + """ + This function creates an aws session that is + shared between upload and delete both. + """ + region = "us-west-2" + level = os.environ.get("MOZ_SCM_LEVEL", "1") + bucket = { + "1": "gecko-docs.mozilla.org-l1", + "2": "gecko-docs.mozilla.org-l2", + "3": "gecko-docs.mozilla.org", + }[level] + secrets_url = "http://taskcluster/secrets/v1/secret/" + secrets_url += "project/releng/gecko/build/level-{}/gecko-docs-upload".format(level) + + # Get the credentials from the TC secrets service. Note that these + # differ per SCM level + if "TASK_ID" in os.environ: + print("Using AWS credentials from the secrets service") + session = requests.Session() + res = session.get(secrets_url) + res.raise_for_status() + secret = res.json()["secret"] + session = boto3.session.Session( + aws_access_key_id=secret["AWS_ACCESS_KEY_ID"], + aws_secret_access_key=secret["AWS_SECRET_ACCESS_KEY"], + region_name=region, + ) + else: + print("Trying to use your AWS credentials..") + session = boto3.session.Session(region_name=region) + + s3 = session.client("s3", config=botocore.client.Config(max_pool_connections=20)) + + return s3, bucket + + +@memoize +def get_s3_keys(s3, bucket): + kwargs = {"Bucket": bucket} + all_keys = [] + while True: + response = s3.list_objects_v2(**kwargs) + for obj in response["Contents"]: + all_keys.append(obj["Key"]) + + try: + kwargs["ContinuationToken"] = response["NextContinuationToken"] + except KeyError: + break + + return all_keys + + +def s3_set_redirects(redirects): + + s3, bucket = create_aws_session() + + configuration = {"IndexDocument": {"Suffix": "index.html"}, "RoutingRules": []} + + for path, redirect in redirects.items(): + rule = { + "Condition": {"KeyPrefixEquals": path}, + "Redirect": {"ReplaceKeyPrefixWith": redirect}, + } + if os.environ.get("MOZ_SCM_LEVEL") == "3": + rule["Redirect"]["HostName"] = "firefox-source-docs.mozilla.org" + + configuration["RoutingRules"].append(rule) + + s3.put_bucket_website( + Bucket=bucket, + WebsiteConfiguration=configuration, + ) + + +def s3_delete_missing(files, key_prefix=None): + """Delete files in the S3 bucket. + + Delete files on the S3 bucket that doesn't match the files + given as the param. If the key_prefix is not specified, missing + files that has main/ as a prefix will be removed. Otherwise, it + will remove files with the same prefix as key_prefix. + """ + s3, bucket = create_aws_session() + files_on_server = get_s3_keys(s3, bucket) + if key_prefix: + files_on_server = [ + path for path in files_on_server if path.startswith(key_prefix) + ] + else: + files_on_server = [ + path for path in files_on_server if not path.startswith("main/") + ] + files = [key_prefix + "/" + path if key_prefix else path for path, f in files] + files_to_delete = [path for path in files_on_server if path not in files] + + query_size = 1000 + while files_to_delete: + keys_to_remove = [{"Key": key} for key in files_to_delete[:query_size]] + response = s3.delete_objects( + Bucket=bucket, + Delete={ + "Objects": keys_to_remove, + }, # NOQA + ) + pprint(response, indent=2) + files_to_delete = files_to_delete[query_size:] + + +def s3_upload(files, key_prefix=None): + """Upload files to an S3 bucket. + + ``files`` is an iterable of ``(path, BaseFile)`` (typically from a + mozpack Finder). + + Keys in the bucket correspond to source filenames. If ``key_prefix`` is + defined, key names will be ``<key_prefix>/<path>``. + """ + s3, bucket = create_aws_session() + + def upload(f, path, bucket, key, extra_args): + # Need to flush to avoid buffering/interleaving from multiple threads. + sys.stdout.write("uploading %s to %s\n" % (path, key)) + sys.stdout.flush() + s3.upload_fileobj(f, bucket, key, ExtraArgs=extra_args) + + fs = [] + with futures.ThreadPoolExecutor(20) as e: + for path, f in files: + content_type, content_encoding = mimetypes.guess_type(path) + extra_args = {} + if content_type: + if content_type.startswith("text/"): + content_type += '; charset="utf-8"' + extra_args["ContentType"] = content_type + if content_encoding: + extra_args["ContentEncoding"] = content_encoding + + if key_prefix: + key = "%s/%s" % (key_prefix, path) + else: + key = path + + # The file types returned by mozpack behave like file objects. But + # they don't accept an argument to read(). So we wrap in a BytesIO. + fs.append( + e.submit(upload, io.BytesIO(f.read()), path, bucket, key, extra_args) + ) + + s3_delete_missing(files, key_prefix) + # Need to do this to catch any exceptions. + for f in fs: + f.result() |