tools/moztreedocs/upload.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, # You can obtain one at http://mozilla.org/MPL/2.0/.

import io
import mimetypes
import os
import sys
from concurrent import futures
from pprint import pprint

import boto3
import botocore
import requests
from mozbuild.util import memoize


@memoize
def create_aws_session():
    """
    This function creates an aws session that is
    shared between upload and delete both.
    """
    region = "us-west-2"
    level = os.environ.get("MOZ_SCM_LEVEL", "1")
    bucket = {
        "1": "gecko-docs.mozilla.org-l1",
        "2": "gecko-docs.mozilla.org-l2",
        "3": "gecko-docs.mozilla.org",
    }[level]
    secrets_url = "http://taskcluster/secrets/v1/secret/"
    secrets_url += "project/releng/gecko/build/level-{}/gecko-docs-upload".format(level)

    # Get the credentials from the TC secrets service.  Note that these
    # differ per SCM level
    if "TASK_ID" in os.environ:
        print("Using AWS credentials from the secrets service")
        session = requests.Session()
        res = session.get(secrets_url)
        res.raise_for_status()
        secret = res.json()["secret"]
        session = boto3.session.Session(
            aws_access_key_id=secret["AWS_ACCESS_KEY_ID"],
            aws_secret_access_key=secret["AWS_SECRET_ACCESS_KEY"],
            region_name=region,
        )
    else:
        print("Trying to use your AWS credentials..")
        session = boto3.session.Session(region_name=region)

    s3 = session.client("s3", config=botocore.client.Config(max_pool_connections=20))

    return s3, bucket


@memoize
def get_s3_keys(s3, bucket):
    kwargs = {"Bucket": bucket}
    all_keys = []
    while True:
        response = s3.list_objects_v2(**kwargs)
        for obj in response["Contents"]:
            all_keys.append(obj["Key"])

        try:
            kwargs["ContinuationToken"] = response["NextContinuationToken"]
        except KeyError:
            break

    return all_keys


def s3_set_redirects(redirects):

    s3, bucket = create_aws_session()

    configuration = {"IndexDocument": {"Suffix": "index.html"}, "RoutingRules": []}

    for path, redirect in redirects.items():
        rule = {
            "Condition": {"KeyPrefixEquals": path},
            "Redirect": {"ReplaceKeyPrefixWith": redirect},
        }
        if os.environ.get("MOZ_SCM_LEVEL") == "3":
            rule["Redirect"]["HostName"] = "firefox-source-docs.mozilla.org"

        configuration["RoutingRules"].append(rule)

    s3.put_bucket_website(
        Bucket=bucket,
        WebsiteConfiguration=configuration,
    )


def s3_delete_missing(files, key_prefix=None):
    """Delete files in the S3 bucket.

    Delete files on the S3 bucket that doesn't match the files
    given as the param. If the key_prefix is not specified, missing
    files that has main/ as a prefix will be removed. Otherwise, it
    will remove files with the same prefix as key_prefix.
    """
    s3, bucket = create_aws_session()
    files_on_server = get_s3_keys(s3, bucket)
    if key_prefix:
        files_on_server = [
            path for path in files_on_server if path.startswith(key_prefix)
        ]
    else:
        files_on_server = [
            path for path in files_on_server if not path.startswith("main/")
        ]
    files = [key_prefix + "/" + path if key_prefix else path for path, f in files]
    files_to_delete = [path for path in files_on_server if path not in files]

    query_size = 1000
    while files_to_delete:
        keys_to_remove = [{"Key": key} for key in files_to_delete[:query_size]]
        response = s3.delete_objects(
            Bucket=bucket,
            Delete={
                "Objects": keys_to_remove,
            },  # NOQA
        )
        pprint(response, indent=2)
        files_to_delete = files_to_delete[query_size:]


def s3_upload(files, key_prefix=None):
    """Upload files to an S3 bucket.

    ``files`` is an iterable of ``(path, BaseFile)`` (typically from a
    mozpack Finder).

    Keys in the bucket correspond to source filenames. If ``key_prefix`` is
    defined, key names will be ``<key_prefix>/<path>``.
    """
    s3, bucket = create_aws_session()

    def upload(f, path, bucket, key, extra_args):
        # Need to flush to avoid buffering/interleaving from multiple threads.
        sys.stdout.write("uploading %s to %s\n" % (path, key))
        sys.stdout.flush()
        s3.upload_fileobj(f, bucket, key, ExtraArgs=extra_args)

    fs = []
    with futures.ThreadPoolExecutor(20) as e:
        for path, f in files:
            content_type, content_encoding = mimetypes.guess_type(path)
            extra_args = {}
            if content_type:
                if content_type.startswith("text/"):
                    content_type += '; charset="utf-8"'
                extra_args["ContentType"] = content_type
            if content_encoding:
                extra_args["ContentEncoding"] = content_encoding

            if key_prefix:
                key = "%s/%s" % (key_prefix, path)
            else:
                key = path

            # The file types returned by mozpack behave like file objects. But
            # they don't accept an argument to read(). So we wrap in a BytesIO.
            fs.append(
                e.submit(upload, io.BytesIO(f.read()), path, bucket, key, extra_args)
            )

    s3_delete_missing(files, key_prefix)
    # Need to do this to catch any exceptions.
    for f in fs:
        f.result()