summaryrefslogtreecommitdiffstats
path: root/taskcluster/gecko_taskgraph/util/partners.py
blob: 2546e1ae88b27a6353bd6a47cb1407b4c98ca05b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.


import json
import logging
import os
import xml.etree.ElementTree as ET
from urllib.parse import urlencode

import requests
import yaml
from redo import retry
from taskgraph.util.schema import resolve_keyed_by

from gecko_taskgraph.util.attributes import release_level
from gecko_taskgraph.util.copy_task import copy_task

# Suppress chatty requests logging
logging.getLogger("requests").setLevel(logging.WARNING)

log = logging.getLogger(__name__)

GITHUB_API_ENDPOINT = "https://api.github.com/graphql"

"""
LOGIN_QUERY, MANIFEST_QUERY, and REPACK_CFG_QUERY are all written to the Github v4 API,
which users GraphQL. See https://developer.github.com/v4/
"""

LOGIN_QUERY = """query {
  viewer {
    login
    name
  }
}
"""

# Returns the contents of default.xml from a manifest repository
MANIFEST_QUERY = """query {
  repository(owner:"%(owner)s", name:"%(repo)s") {
    object(expression: "master:%(file)s") {
      ... on Blob {
        text
      }
    }
  }
}
"""
# Example response:
# {
#   "data": {
#     "repository": {
#       "object": {
#         "text": "<?xml version=\"1.0\" ?>\n<manifest>\n  " +
#           "<remote fetch=\"git@github.com:mozilla-partners/\" name=\"mozilla-partners\"/>\n  " +
#           "<remote fetch=\"git@github.com:mozilla/\" name=\"mozilla\"/>\n\n  " +
#           "<project name=\"repack-scripts\" path=\"scripts\" remote=\"mozilla-partners\" " +
#           "revision=\"master\"/>\n  <project name=\"build-tools\" path=\"scripts/tools\" " +
#           "remote=\"mozilla\" revision=\"master\"/>\n  <project name=\"mozilla-EME-free\" " +
#           "path=\"partners/mozilla-EME-free\" remote=\"mozilla-partners\" " +
#           "revision=\"master\"/>\n</manifest>\n"
#       }
#     }
#   }
# }

# Returns the contents of desktop/*/repack.cfg for a partner repository
REPACK_CFG_QUERY = """query{
  repository(owner:"%(owner)s", name:"%(repo)s") {
    object(expression: "%(revision)s:desktop/"){
      ... on Tree {
        entries {
          name
          object {
            ... on Tree {
              entries {
                name
                object {
                  ... on Blob {
                    text
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}
"""
# Example response:
# {
#   "data": {
#     "repository": {
#       "object": {
#         "entries": [
#           {
#             "name": "mozilla-EME-free",
#             "object": {
#               "entries": [
#                 {
#                   "name": "distribution",
#                   "object": {}
#                 },
#                 {
#                   "name": "repack.cfg",
#                   "object": {
#                     "text": "aus=\"mozilla-EMEfree\"\ndist_id=\"mozilla-EMEfree\"\n" +
#                             "dist_version=\"1.0\"\nlinux-i686=true\nlinux-x86_64=true\n" +
#                             " locales=\"ach af de en-US\"\nmac=true\nwin32=true\nwin64=true\n" +
#                             "output_dir=\"%(platform)s-EME-free/%(locale)s\"\n\n" +
#                             "# Upload params\nbucket=\"net-mozaws-prod-delivery-firefox\"\n" +
#                             "upload_to_candidates=true\n"
#                   }
#                 }
#               ]
#             }
#           }
#         ]
#       }
#     }
#   }
# }

# Map platforms in repack.cfg into their equivalents in taskcluster
TC_PLATFORM_PER_FTP = {
    "linux-i686": "linux-shippable",
    "linux-x86_64": "linux64-shippable",
    "mac": "macosx64-shippable",
    "win32": "win32-shippable",
    "win64": "win64-shippable",
    "win64-aarch64": "win64-aarch64-shippable",
}

TASKCLUSTER_PROXY_SECRET_ROOT = "http://taskcluster/secrets/v1/secret"

LOCALES_FILE = os.path.join(
    os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))),
    "browser",
    "locales",
    "l10n-changesets.json",
)

# cache data at the module level
partner_configs = {}


def get_token(params):
    """We use a Personal Access Token from Github to lookup partner config. No extra scopes are
    needed on the token to read public repositories, but need the 'repo' scope to see private
    repositories. This is not fine grained and also grants r/w access, but is revoked at the repo
    level.
    """

    # Allow for local taskgraph debugging
    if os.environ.get("GITHUB_API_TOKEN"):
        return os.environ["GITHUB_API_TOKEN"]

    # The 'usual' method - via taskClusterProxy for decision tasks
    url = "{secret_root}/project/releng/gecko/build/level-{level}/partner-github-api".format(
        secret_root=TASKCLUSTER_PROXY_SECRET_ROOT, **params
    )
    try:
        resp = retry(
            requests.get,
            attempts=2,
            sleeptime=10,
            args=(url,),
            kwargs={"timeout": 60, "headers": ""},
        )
        j = resp.json()
        return j["secret"]["key"]
    except (requests.ConnectionError, ValueError, KeyError):
        raise RuntimeError("Could not get Github API token to lookup partner data")


def query_api(query, token):
    """Make a query with a Github auth header, returning the json"""
    headers = {"Authorization": "bearer %s" % token}
    r = requests.post(GITHUB_API_ENDPOINT, json={"query": query}, headers=headers)
    r.raise_for_status()

    j = r.json()
    if "errors" in j:
        raise RuntimeError("Github query error - %s", j["errors"])
    return j


def check_login(token):
    log.debug("Checking we have a valid login")
    query_api(LOGIN_QUERY, token)


def get_repo_params(repo):
    """Parse the organisation and repo name from an https or git url for a repo"""
    if repo.startswith("https"):
        # eg https://github.com/mozilla-partners/mozilla-EME-free
        return repo.rsplit("/", 2)[-2:]
    if repo.startswith("git@"):
        # eg git@github.com:mozilla-partners/mailru.git
        repo = repo.replace(".git", "")
        return repo.split(":")[-1].split("/")


def get_partners(manifestRepo, token):
    """Given the url to a manifest repository, retrieve the default.xml and parse it into a
    list of partner repos.
    """
    log.debug("Querying for manifest default.xml in %s", manifestRepo)
    owner, repo = get_repo_params(manifestRepo)
    query = MANIFEST_QUERY % {"owner": owner, "repo": repo, "file": "default.xml"}
    raw_manifest = query_api(query, token)
    log.debug("Raw manifest: %s", raw_manifest)
    if not raw_manifest["data"]["repository"]:
        raise RuntimeError(
            "Couldn't load partner manifest at %s, insufficient permissions ?"
            % manifestRepo
        )
    e = ET.fromstring(raw_manifest["data"]["repository"]["object"]["text"])

    remotes = {}
    partners = {}
    for child in e:
        if child.tag == "remote":
            name = child.attrib["name"]
            url = child.attrib["fetch"]
            remotes[name] = url
            log.debug("Added remote %s at %s", name, url)
        elif child.tag == "project":
            # we don't need to check any code repos
            if "scripts" in child.attrib["path"]:
                continue
            owner, _ = get_repo_params(remotes[child.attrib["remote"]] + "_")
            partner_url = {
                "owner": owner,
                "repo": child.attrib["name"],
                "revision": child.attrib["revision"],
            }
            partners[child.attrib["name"]] = partner_url
            log.debug(
                "Added partner %s at revision %s"
                % (partner_url["repo"], partner_url["revision"])
            )
    return partners


def parse_config(data):
    """Parse a single repack.cfg file into a python dictionary.
    data is contents of the file, in "foo=bar\nbaz=buzz" style. We do some translation on
    locales and platforms data, otherwise passthrough
    """
    ALLOWED_KEYS = (
        "locales",
        "platforms",
        "upload_to_candidates",
        "repack_stub_installer",
        "publish_to_releases",
    )
    config = {"platforms": []}
    for l in data.splitlines():
        if "=" in l:
            l = str(l)
            key, value = l.split("=", 1)
            value = value.strip("'\"").rstrip("'\"")
            if key in TC_PLATFORM_PER_FTP.keys():
                if value.lower() == "true":
                    config["platforms"].append(TC_PLATFORM_PER_FTP[key])
                continue
            if key not in ALLOWED_KEYS:
                continue
            if key == "locales":
                # a list please
                value = value.split(" ")
            config[key] = value
    return config


def get_repack_configs(repackRepo, token):
    """For a partner repository, retrieve all the repack.cfg files and parse them into a dict"""
    log.debug("Querying for configs in %s", repackRepo)
    query = REPACK_CFG_QUERY % repackRepo
    raw_configs = query_api(query, token)
    raw_configs = raw_configs["data"]["repository"]["object"]["entries"]

    configs = {}
    for sub_config in raw_configs:
        name = sub_config["name"]
        for file in sub_config["object"].get("entries", []):
            if file["name"] != "repack.cfg":
                continue
            configs[name] = parse_config(file["object"]["text"])
    return configs


def get_attribution_config(manifestRepo, token):
    log.debug("Querying for manifest attribution_config.yml in %s", manifestRepo)
    owner, repo = get_repo_params(manifestRepo)
    query = MANIFEST_QUERY % {
        "owner": owner,
        "repo": repo,
        "file": "attribution_config.yml",
    }
    raw_manifest = query_api(query, token)
    if not raw_manifest["data"]["repository"]:
        raise RuntimeError(
            "Couldn't load partner manifest at %s, insufficient permissions ?"
            % manifestRepo
        )
    # no file has been set up, gracefully continue
    if raw_manifest["data"]["repository"]["object"] is None:
        log.debug("No attribution_config.yml file found")
        return {}

    return yaml.safe_load(raw_manifest["data"]["repository"]["object"]["text"])


def get_partner_config_by_url(manifest_url, kind, token, partner_subset=None):
    """Retrieve partner data starting from the manifest url, which points to a repository
    containing a default.xml that is intended to be drive the Google tool 'repo'. It
    descends into each partner repo to lookup and parse the repack.cfg file(s).

    If partner_subset is a list of sub_config names only return data for those.

    Supports caching data by kind to avoid repeated requests, relying on the related kinds for
    partner repacking, signing, repackage, repackage signing all having the same kind prefix.
    """
    if not manifest_url:
        raise RuntimeError(f"Manifest url for {kind} not defined")
    if kind not in partner_configs:
        log.info("Looking up data for %s from %s", kind, manifest_url)
        check_login(token)
        if kind == "release-partner-attribution":
            partner_configs[kind] = get_attribution_config(manifest_url, token)
        else:
            partners = get_partners(manifest_url, token)

            partner_configs[kind] = {}
            for partner, partner_url in partners.items():
                if partner_subset and partner not in partner_subset:
                    continue
                partner_configs[kind][partner] = get_repack_configs(partner_url, token)

    return partner_configs[kind]


def check_if_partners_enabled(config, tasks):
    if (
        (
            config.params["release_enable_partner_repack"]
            and config.kind.startswith("release-partner-repack")
        )
        or (
            config.params["release_enable_partner_attribution"]
            and config.kind.startswith("release-partner-attribution")
        )
        or (
            config.params["release_enable_emefree"]
            and config.kind.startswith("release-eme-free-")
        )
    ):
        yield from tasks


def get_partner_config_by_kind(config, kind):
    """Retrieve partner data starting from the manifest url, which points to a repository
    containing a default.xml that is intended to be drive the Google tool 'repo'. It
    descends into each partner repo to lookup and parse the repack.cfg file(s).

    Supports caching data by kind to avoid repeated requests, relying on the related kinds for
    partner repacking, signing, repackage, repackage signing all having the same kind prefix.
    """
    partner_subset = config.params["release_partners"]
    partner_configs = config.params["release_partner_config"] or {}

    # TODO eme-free should be a partner; we shouldn't care about per-kind
    for k in partner_configs:
        if kind.startswith(k):
            kind_config = partner_configs[k]
            break
    else:
        return {}
    # if we're only interested in a subset of partners we remove the rest
    if partner_subset:
        if kind.startswith("release-partner-repack"):
            # TODO - should be fatal to have an unknown partner in partner_subset
            for partner in [p for p in kind_config.keys() if p not in partner_subset]:
                del kind_config[partner]
        elif kind.startswith("release-partner-attribution") and isinstance(
            kind_config, dict
        ):
            all_configs = copy_task(kind_config.get("configs", []))
            kind_config["configs"] = []
            for this_config in all_configs:
                if this_config["campaign"] in partner_subset:
                    kind_config["configs"].append(this_config)
    return kind_config


def _fix_subpartner_locales(orig_config, all_locales):
    subpartner_config = copy_task(orig_config)
    # Get an ordered list of subpartner locales that is a subset of all_locales
    subpartner_config["locales"] = sorted(
        list(set(orig_config["locales"]) & set(all_locales))
    )
    return subpartner_config


def fix_partner_config(orig_config):
    pc = {}
    with open(LOCALES_FILE) as fh:
        all_locales = list(json.load(fh).keys())
    # l10n-changesets.json doesn't include en-US, but the repack list does
    if "en-US" not in all_locales:
        all_locales.append("en-US")
    for kind, kind_config in orig_config.items():
        if kind == "release-partner-attribution":
            pc[kind] = {}
            if kind_config:
                pc[kind] = {"defaults": kind_config["defaults"]}
                for config in kind_config["configs"]:
                    # Make sure our locale list is a subset of all_locales
                    pc[kind].setdefault("configs", []).append(
                        _fix_subpartner_locales(config, all_locales)
                    )
        else:
            for partner, partner_config in kind_config.items():
                for subpartner, subpartner_config in partner_config.items():
                    # get rid of empty subpartner configs
                    if not subpartner_config:
                        continue
                    # Make sure our locale list is a subset of all_locales
                    pc.setdefault(kind, {}).setdefault(partner, {})[
                        subpartner
                    ] = _fix_subpartner_locales(subpartner_config, all_locales)
    return pc


# seems likely this exists elsewhere already
def get_ftp_platform(platform):
    if platform.startswith("win32"):
        return "win32"
    if platform.startswith("win64-aarch64"):
        return "win64-aarch64"
    if platform.startswith("win64"):
        return "win64"
    if platform.startswith("macosx"):
        return "mac"
    if platform.startswith("linux-"):
        return "linux-i686"
    if platform.startswith("linux64"):
        return "linux-x86_64"
    raise ValueError(f"Unimplemented platform {platform}")


# Ugh
def locales_per_build_platform(build_platform, locales):
    if build_platform.startswith("mac"):
        exclude = ["ja"]
    else:
        exclude = ["ja-JP-mac"]
    return [locale for locale in locales if locale not in exclude]


def get_partner_url_config(parameters, graph_config):
    partner_url_config = copy_task(graph_config["partner-urls"])
    substitutions = {
        "release-product": parameters["release_product"],
        "release-level": release_level(parameters["project"]),
        "release-type": parameters["release_type"],
    }
    resolve_keyed_by(
        partner_url_config,
        "release-eme-free-repack",
        "eme-free manifest_url",
        **substitutions,
    )
    resolve_keyed_by(
        partner_url_config,
        "release-partner-repack",
        "partner manifest url",
        **substitutions,
    )
    resolve_keyed_by(
        partner_url_config,
        "release-partner-attribution",
        "partner attribution url",
        **substitutions,
    )
    return partner_url_config


def get_repack_ids_by_platform(config, build_platform):
    partner_config = get_partner_config_by_kind(config, config.kind)
    combinations = []
    for partner, subconfigs in partner_config.items():
        for sub_config_name, sub_config in subconfigs.items():
            if build_platform not in sub_config.get("platforms", []):
                continue
            locales = locales_per_build_platform(
                build_platform, sub_config.get("locales", [])
            )
            for locale in locales:
                combinations.append(f"{partner}/{sub_config_name}/{locale}")
    return sorted(combinations)


def get_partners_to_be_published(config):
    # hardcoded kind because release-bouncer-aliases doesn't match otherwise
    partner_config = get_partner_config_by_kind(config, "release-partner-repack")
    partners = []
    for partner, subconfigs in partner_config.items():
        for sub_config_name, sub_config in subconfigs.items():
            if sub_config.get("publish_to_releases"):
                partners.append((partner, sub_config_name, sub_config["platforms"]))
    return partners


def apply_partner_priority(config, jobs):
    priority = None
    # Reduce the priority of the partner repack jobs because they don't block QE. Meanwhile
    # leave EME-free jobs alone because they do, and they'll get the branch priority like the rest
    # of the release. Only bother with this in production, not on staging releases on try.
    # medium is the same as mozilla-central, see taskcluster/ci/config.yml. ie higher than
    # integration branches because we don't want to wait a lot for the graph to be done, but
    # for multiple releases the partner tasks always wait for non-partner.
    if (
        config.kind.startswith(
            ("release-partner-repack", "release-partner-attribution")
        )
        and release_level(config.params["project"]) == "production"
    ):
        priority = "medium"
    for job in jobs:
        if priority:
            job["priority"] = priority
        yield job


def generate_attribution_code(defaults, partner):
    params = {
        "medium": defaults["medium"],
        "source": defaults["source"],
        "campaign": partner["campaign"],
        "content": partner["content"],
    }
    if partner.get("variation"):
        params["variation"] = partner["variation"]
    if partner.get("experiment"):
        params["experiment"] = partner["experiment"]

    code = urlencode(params)
    return code