summaryrefslogtreecommitdiffstats
path: root/third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py
blob: 6be6e5eeee57facaca7c9da17787a3d88287c972 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""
Support for optimizing tasks based on the set of files that have changed.
"""


import logging
import os

import requests
from redo import retry

from .util.memoize import memoize
from .util.path import match as match_path
from .util.vcs import get_repository

logger = logging.getLogger(__name__)


@memoize
def get_changed_files(head_repository_url, head_rev, base_rev=None):
    """
    Get the set of files changed between revisions.
    Responses are cached, so multiple calls with the same arguments are OK.
    """
    repo_path = os.getcwd()
    repository = get_repository(repo_path)

    if repository.tool == "hg":
        # TODO Use VCS version once tested enough
        return _get_changed_files_json_automationrelevance(
            head_repository_url, head_rev
        )

    return repository.get_changed_files(rev=head_rev, base_rev=base_rev)


def _get_changed_files_json_automationrelevance(head_repository_url, head_rev):
    """
    Get the set of files changed in the push headed by the given revision.
    """
    url = "{}/json-automationrelevance/{}".format(
        head_repository_url.rstrip("/"), head_rev
    )
    logger.debug("Querying version control for metadata: %s", url)

    def get_automationrelevance():
        response = requests.get(url, timeout=30)
        return response.json()

    contents = retry(get_automationrelevance, attempts=10, sleeptime=10)

    logger.debug(
        "{} commits influencing task scheduling:".format(len(contents["changesets"]))
    )
    changed_files = set()
    for c in contents["changesets"]:
        desc = ""  # Support empty desc
        if c["desc"]:
            desc = c["desc"].splitlines()[0].encode("ascii", "ignore")
        logger.debug(" {cset} {desc}".format(cset=c["node"][0:12], desc=desc))
        changed_files |= set(c["files"])

    return changed_files


def check(params, file_patterns):
    """Determine whether any of the files changed between 2 revisions
    match any of the given file patterns."""

    head_repository_url = params.get("head_repository")
    head_rev = params.get("head_rev")
    if not head_repository_url or not head_rev:
        logger.warning(
            "Missing `head_repository` or `head_rev` parameters; "
            "assuming all files have changed"
        )
        return True

    base_rev = params.get("base_rev")
    changed_files = get_changed_files(head_repository_url, head_rev, base_rev)

    for pattern in file_patterns:
        for path in changed_files:
            if match_path(path, pattern):
                return True

    return False