summaryrefslogtreecommitdiffstats
path: root/tools/moztreedocs/upload.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tools/moztreedocs/upload.py175
1 files changed, 175 insertions, 0 deletions
diff --git a/tools/moztreedocs/upload.py b/tools/moztreedocs/upload.py
new file mode 100644
index 0000000000..ebc8715cda
--- /dev/null
+++ b/tools/moztreedocs/upload.py
@@ -0,0 +1,175 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, # You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, unicode_literals, print_function
+
+import io
+import mimetypes
+import os
+import sys
+
+import botocore
+import boto3
+import concurrent.futures as futures
+import requests
+from pprint import pprint
+
+from mozbuild.util import memoize
+
+
+@memoize
+def create_aws_session():
+ """
+ This function creates an aws session that is
+ shared between upload and delete both.
+ """
+ region = "us-west-2"
+ level = os.environ.get("MOZ_SCM_LEVEL", "1")
+ bucket = {
+ "1": "gecko-docs.mozilla.org-l1",
+ "2": "gecko-docs.mozilla.org-l2",
+ "3": "gecko-docs.mozilla.org",
+ }[level]
+ secrets_url = "http://taskcluster/secrets/v1/secret/"
+ secrets_url += "project/releng/gecko/build/level-{}/gecko-docs-upload".format(level)
+
+ # Get the credentials from the TC secrets service. Note that these
+ # differ per SCM level
+ if "TASK_ID" in os.environ:
+ print("Using AWS credentials from the secrets service")
+ session = requests.Session()
+ res = session.get(secrets_url)
+ res.raise_for_status()
+ secret = res.json()["secret"]
+ session = boto3.session.Session(
+ aws_access_key_id=secret["AWS_ACCESS_KEY_ID"],
+ aws_secret_access_key=secret["AWS_SECRET_ACCESS_KEY"],
+ region_name=region,
+ )
+ else:
+ print("Trying to use your AWS credentials..")
+ session = boto3.session.Session(region_name=region)
+
+ s3 = session.client("s3", config=botocore.client.Config(max_pool_connections=20))
+
+ return s3, bucket
+
+
+@memoize
+def get_s3_keys(s3, bucket):
+ kwargs = {"Bucket": bucket}
+ all_keys = []
+ while True:
+ response = s3.list_objects_v2(**kwargs)
+ for obj in response["Contents"]:
+ all_keys.append(obj["Key"])
+
+ try:
+ kwargs["ContinuationToken"] = response["NextContinuationToken"]
+ except KeyError:
+ break
+
+ return all_keys
+
+
+def s3_set_redirects(redirects):
+
+ s3, bucket = create_aws_session()
+
+ configuration = {"IndexDocument": {"Suffix": "index.html"}, "RoutingRules": []}
+
+ for path, redirect in redirects.items():
+ rule = {
+ "Condition": {"KeyPrefixEquals": path},
+ "Redirect": {"ReplaceKeyPrefixWith": redirect},
+ }
+ if os.environ.get("MOZ_SCM_LEVEL") == "3":
+ rule["Redirect"]["HostName"] = "firefox-source-docs.mozilla.org"
+
+ configuration["RoutingRules"].append(rule)
+
+ s3.put_bucket_website(
+ Bucket=bucket,
+ WebsiteConfiguration=configuration,
+ )
+
+
+def s3_delete_missing(files, key_prefix=None):
+ """Delete files in the S3 bucket.
+
+ Delete files on the S3 bucket that doesn't match the files
+ given as the param. If the key_prefix is not specified, missing
+ files that has main/ as a prefix will be removed. Otherwise, it
+ will remove files with the same prefix as key_prefix.
+ """
+ s3, bucket = create_aws_session()
+ files_on_server = get_s3_keys(s3, bucket)
+ if key_prefix:
+ files_on_server = [
+ path for path in files_on_server if path.startswith(key_prefix)
+ ]
+ else:
+ files_on_server = [
+ path for path in files_on_server if not path.startswith("main/")
+ ]
+ files = [key_prefix + "/" + path if key_prefix else path for path, f in files]
+ files_to_delete = [path for path in files_on_server if path not in files]
+
+ query_size = 1000
+ while files_to_delete:
+ keys_to_remove = [{"Key": key} for key in files_to_delete[:query_size]]
+ response = s3.delete_objects(
+ Bucket=bucket,
+ Delete={
+ "Objects": keys_to_remove,
+ }, # NOQA
+ )
+ pprint(response, indent=2)
+ files_to_delete = files_to_delete[query_size:]
+
+
+def s3_upload(files, key_prefix=None):
+ """Upload files to an S3 bucket.
+
+ ``files`` is an iterable of ``(path, BaseFile)`` (typically from a
+ mozpack Finder).
+
+ Keys in the bucket correspond to source filenames. If ``key_prefix`` is
+ defined, key names will be ``<key_prefix>/<path>``.
+ """
+ s3, bucket = create_aws_session()
+
+ def upload(f, path, bucket, key, extra_args):
+ # Need to flush to avoid buffering/interleaving from multiple threads.
+ sys.stdout.write("uploading %s to %s\n" % (path, key))
+ sys.stdout.flush()
+ s3.upload_fileobj(f, bucket, key, ExtraArgs=extra_args)
+
+ fs = []
+ with futures.ThreadPoolExecutor(20) as e:
+ for path, f in files:
+ content_type, content_encoding = mimetypes.guess_type(path)
+ extra_args = {}
+ if content_type:
+ if content_type.startswith("text/"):
+ content_type += '; charset="utf-8"'
+ extra_args["ContentType"] = content_type
+ if content_encoding:
+ extra_args["ContentEncoding"] = content_encoding
+
+ if key_prefix:
+ key = "%s/%s" % (key_prefix, path)
+ else:
+ key = path
+
+ # The file types returned by mozpack behave like file objects. But
+ # they don't accept an argument to read(). So we wrap in a BytesIO.
+ fs.append(
+ e.submit(upload, io.BytesIO(f.read()), path, bucket, key, extra_args)
+ )
+
+ s3_delete_missing(files, key_prefix)
+ # Need to do this to catch any exceptions.
+ for f in fs:
+ f.result()