summaryrefslogtreecommitdiffstats
path: root/tools/tryselect/selectors/coverage.py
blob: a871f35a11162de2a70144600866a06af01259d0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.


import collections
import datetime
import hashlib
import json
import os
import shutil
import sqlite3
import subprocess

import requests
import six
from mach.util import get_state_dir
from mozbuild.base import MozbuildObject
from mozpack.files import FileFinder
from moztest.resolve import TestResolver
from mozversioncontrol import get_repository_object

from ..cli import BaseTryParser
from ..push import generate_try_task_config, push_to_try
from ..tasks import filter_tasks_by_paths, generate_tasks, resolve_tests_by_suite

here = os.path.abspath(os.path.dirname(__file__))
build = None
vcs = None
CHUNK_MAPPING_FILE = None
CHUNK_MAPPING_TAG_FILE = None


def setup_globals():
    # Avoid incurring expensive computation on import.
    global build, vcs, CHUNK_MAPPING_TAG_FILE, CHUNK_MAPPING_FILE
    build = MozbuildObject.from_environment(cwd=here)
    vcs = get_repository_object(build.topsrcdir)

    root_hash = hashlib.sha256(
        six.ensure_binary(os.path.abspath(build.topsrcdir))
    ).hexdigest()
    cache_dir = os.path.join(get_state_dir(), "cache", root_hash, "chunk_mapping")
    if not os.path.isdir(cache_dir):
        os.makedirs(cache_dir)
    CHUNK_MAPPING_FILE = os.path.join(cache_dir, "chunk_mapping.sqlite")
    CHUNK_MAPPING_TAG_FILE = os.path.join(cache_dir, "chunk_mapping_tag.json")


# Maps from platform names in the chunk_mapping sqlite database to respective
# substrings in task names.
PLATFORM_MAP = {
    "linux": "test-linux64/opt",
    "windows": "test-windows10-64/opt",
}

# List of platform/build type combinations that are included in pushes by |mach try coverage|.
OPT_TASK_PATTERNS = [
    "macosx64/opt",
    "windows10-64/opt",
    "windows7-32/opt",
    "linux64/opt",
]


class CoverageParser(BaseTryParser):
    name = "coverage"
    arguments = []
    common_groups = ["push", "task"]
    task_configs = [
        "artifact",
        "env",
        "rebuild",
        "chemspill-prio",
        "disable-pgo",
        "worker-overrides",
    ]


def read_test_manifests():
    """Uses TestResolver to read all test manifests in the tree.

    Returns a (tests, support_files_map) tuple that describes the tests in the tree:
    tests - a set of test file paths
    support_files_map - a dict that maps from each support file to a list with
                        test files that require them it
    """
    test_resolver = TestResolver.from_environment(cwd=here)
    file_finder = FileFinder(build.topsrcdir)
    support_files_map = collections.defaultdict(list)
    tests = set()

    for test in test_resolver.resolve_tests(build.topsrcdir):
        tests.add(test["srcdir_relpath"])
        if "support-files" not in test:
            continue

        for support_file_pattern in test["support-files"].split():
            # Get the pattern relative to topsrcdir.
            if support_file_pattern.startswith("!/"):
                support_file_pattern = support_file_pattern[2:]
            elif support_file_pattern.startswith("/"):
                support_file_pattern = support_file_pattern[1:]
            else:
                support_file_pattern = os.path.normpath(
                    os.path.join(test["dir_relpath"], support_file_pattern)
                )

            # If it doesn't have a glob, then it's a single file.
            if "*" not in support_file_pattern:
                # Simple case: single support file, just add it here.
                support_files_map[support_file_pattern].append(test["srcdir_relpath"])
                continue

            for support_file, _ in file_finder.find(support_file_pattern):
                support_files_map[support_file].append(test["srcdir_relpath"])

    return tests, support_files_map


# TODO cache the output of this function
all_tests, all_support_files = read_test_manifests()


def download_coverage_mapping(base_revision):
    try:
        with open(CHUNK_MAPPING_TAG_FILE) as f:
            tags = json.load(f)
            if tags["target_revision"] == base_revision:
                return
            else:
                print("Base revision changed.")
    except (OSError, ValueError):
        print("Chunk mapping file not found.")

    CHUNK_MAPPING_URL_TEMPLATE = "https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/project.relman.code-coverage.production.cron.{}/artifacts/public/chunk_mapping.tar.xz"  # noqa
    JSON_PUSHES_URL_TEMPLATE = "https://hg.mozilla.org/mozilla-central/json-pushes?version=2&tipsonly=1&startdate={}"  # noqa

    # Get pushes from at most one month ago.
    PUSH_HISTORY_DAYS = 30
    delta = datetime.timedelta(days=PUSH_HISTORY_DAYS)
    start_time = (datetime.datetime.now() - delta).strftime("%Y-%m-%d")
    pushes_url = JSON_PUSHES_URL_TEMPLATE.format(start_time)
    pushes_data = requests.get(pushes_url + "&tochange={}".format(base_revision)).json()
    if "error" in pushes_data:
        if "unknown revision" in pushes_data["error"]:
            print(
                "unknown revision {}, trying with latest mozilla-central".format(
                    base_revision
                )
            )
            pushes_data = requests.get(pushes_url).json()

        if "error" in pushes_data:
            raise Exception(pushes_data["error"])

    pushes = pushes_data["pushes"]

    print("Looking for coverage data. This might take a minute or two.")
    print("Base revision:", base_revision)
    for push_id in sorted(pushes.keys())[::-1]:
        rev = pushes[push_id]["changesets"][0]
        url = CHUNK_MAPPING_URL_TEMPLATE.format(rev)
        print("push id: {},\trevision: {}".format(push_id, rev))

        r = requests.head(url)
        if not r.ok:
            continue

        print("Chunk mapping found, downloading...")
        r = requests.get(url, stream=True)

        CHUNK_MAPPING_ARCHIVE = os.path.join(build.topsrcdir, "chunk_mapping.tar.xz")
        with open(CHUNK_MAPPING_ARCHIVE, "wb") as f:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, f)

        subprocess.check_call(
            [
                "tar",
                "-xJf",
                CHUNK_MAPPING_ARCHIVE,
                "-C",
                os.path.dirname(CHUNK_MAPPING_FILE),
            ]
        )
        os.remove(CHUNK_MAPPING_ARCHIVE)
        assert os.path.isfile(CHUNK_MAPPING_FILE)
        with open(CHUNK_MAPPING_TAG_FILE, "w") as f:
            json.dump(
                {
                    "target_revision": base_revision,
                    "chunk_mapping_revision": rev,
                    "download_date": start_time,
                },
                f,
            )
        return
    raise Exception("Could not find suitable coverage data.")


def is_a_test(cursor, path):
    """Checks the all_tests global and the chunk mapping database to see if a
    given file is a test file.
    """
    if path in all_tests:
        return True

    cursor.execute("SELECT COUNT(*) from chunk_to_test WHERE path=?", (path,))
    if cursor.fetchone()[0]:
        return True

    cursor.execute("SELECT COUNT(*) from file_to_test WHERE test=?", (path,))
    if cursor.fetchone()[0]:
        return True

    return False


def tests_covering_file(cursor, path):
    """Returns a set of tests that cover a given source file."""
    cursor.execute("SELECT test FROM file_to_test WHERE source=?", (path,))
    return {e[0] for e in cursor.fetchall()}


def tests_in_chunk(cursor, platform, chunk):
    """Returns a set of tests that are contained in a given chunk."""
    cursor.execute(
        "SELECT path FROM chunk_to_test WHERE platform=? AND chunk=?", (platform, chunk)
    )
    # Because of bug 1480103, some entries in this table contain both a file name and a test name,
    # separated by a space. With the split, only the file name is kept.
    return {e[0].split(" ")[0] for e in cursor.fetchall()}


def chunks_covering_file(cursor, path):
    """Returns a set of (platform, chunk) tuples with the chunks that cover a given source file."""
    cursor.execute("SELECT platform, chunk FROM file_to_chunk WHERE path=?", (path,))
    return set(cursor.fetchall())


def tests_supported_by_file(path):
    """Returns a set of tests that are using the given file as a support-file."""
    return set(all_support_files[path])


def find_tests(changed_files):
    """Finds both individual tests and test chunks that should be run to test code changes.
    Argument: a list of file paths relative to the source checkout.

    Returns: a (test_files, test_chunks) tuple with two sets.
    test_files - contains tests that should be run to verify changes to changed_files.
    test_chunks - contains (platform, chunk) tuples with chunks that should be
                  run. These chunnks do not support running a subset of the tests (like
                  cppunit or gtest), so the whole chunk must be run.
    """
    test_files = set()
    test_chunks = set()
    files_no_coverage = set()

    with sqlite3.connect(CHUNK_MAPPING_FILE) as conn:
        c = conn.cursor()
        for path in changed_files:
            # If path is a test, add it to the list and continue.
            if is_a_test(c, path):
                test_files.add(path)
                continue

            # Look at the chunk mapping and add all tests that cover this file.
            tests = tests_covering_file(c, path)
            chunks = chunks_covering_file(c, path)
            # If we found tests covering this, then it's not a support-file, so
            # save these and continue.
            if tests or chunks:
                test_files |= tests
                test_chunks |= chunks
                continue

            # Check if the path is a support-file for any test, by querying test manifests.
            tests = tests_supported_by_file(path)
            if tests:
                test_files |= tests
                continue

            # There is no coverage information for this file.
            files_no_coverage.add(path)

        files_covered = set(changed_files) - files_no_coverage
        test_files = {s.replace("\\", "/") for s in test_files}

        _print_found_tests(files_covered, files_no_coverage, test_files, test_chunks)

        remaining_test_chunks = set()
        # For all test_chunks, try to find the tests contained by them in the
        # chunk_to_test mapping.
        for platform, chunk in test_chunks:
            tests = tests_in_chunk(c, platform, chunk)
            if tests:
                for test in tests:
                    test_files.add(test.replace("\\", "/"))
            else:
                remaining_test_chunks.add((platform, chunk))

    return test_files, remaining_test_chunks


def _print_found_tests(files_covered, files_no_coverage, test_files, test_chunks):
    """Print a summary of what will be run to the user's terminal."""
    files_covered = sorted(files_covered)
    files_no_coverage = sorted(files_no_coverage)
    test_files = sorted(test_files)
    test_chunks = sorted(test_chunks)

    if files_covered:
        print(
            "Found {} modified source files with test coverage:".format(
                len(files_covered)
            )
        )
        for covered in files_covered:
            print("\t", covered)

    if files_no_coverage:
        print(
            "Found {} modified source files with no coverage:".format(
                len(files_no_coverage)
            )
        )
        for f in files_no_coverage:
            print("\t", f)

    if not files_covered:
        print("No modified source files are covered by tests.")
    elif not files_no_coverage:
        print("All modified source files are covered by tests.")

    if test_files:
        print("Running {} individual test files.".format(len(test_files)))
    else:
        print("Could not find any individual tests to run.")

    if test_chunks:
        print("Running {} test chunks.".format(len(test_chunks)))
        for platform, chunk in test_chunks:
            print("\t", platform, chunk)
    else:
        print("Could not find any test chunks to run.")


def filter_tasks_by_chunks(tasks, chunks):
    """Find all tasks that will run the given chunks."""
    selected_tasks = set()
    for platform, chunk in chunks:
        platform = PLATFORM_MAP[platform]

        selected_task = None
        for task in tasks:
            if not task.startswith(platform):
                continue

            if not any(
                task[len(platform) + 1 :].endswith(c) for c in [chunk, chunk + "-e10s"]
            ):
                continue

            assert (
                selected_task is None
            ), "Only one task should be selected for a given platform-chunk couple ({} - {}), {} and {} were selected".format(  # noqa
                platform, chunk, selected_task, task
            )
            selected_task = task

        if selected_task is None:
            print("Warning: no task found for chunk", platform, chunk)
        else:
            selected_tasks.add(selected_task)

    return list(selected_tasks)


def is_opt_task(task):
    """True if the task runs on a supported platform and build type combination.
    This is used to remove -ccov/asan/pgo tasks, along with all /debug tasks.
    """
    return any(platform in task for platform in OPT_TASK_PATTERNS)


def run(
    try_config={},
    full=False,
    parameters=None,
    stage_changes=False,
    dry_run=False,
    message="{msg}",
    closed_tree=False,
):
    setup_globals()
    download_coverage_mapping(vcs.base_ref)

    changed_sources = vcs.get_outgoing_files()
    test_files, test_chunks = find_tests(changed_sources)
    if not test_files and not test_chunks:
        print("ERROR Could not find any tests or chunks to run.")
        return 1

    tg = generate_tasks(parameters, full)
    all_tasks = tg.tasks.keys()

    tasks_by_chunks = filter_tasks_by_chunks(all_tasks, test_chunks)
    tasks_by_path = filter_tasks_by_paths(all_tasks, test_files)
    tasks = filter(is_opt_task, set(tasks_by_path) | set(tasks_by_chunks))
    tasks = list(tasks)

    if not tasks:
        print("ERROR Did not find any matching tasks after filtering.")
        return 1
    test_count_message = (
        "{test_count} test file{test_plural} that "
        + "cover{test_singular} these changes "
        + "({task_count} task{task_plural} to be scheduled)"
    ).format(
        test_count=len(test_files),
        test_plural="" if len(test_files) == 1 else "s",
        test_singular="s" if len(test_files) == 1 else "",
        task_count=len(tasks),
        task_plural="" if len(tasks) == 1 else "s",
    )
    print("Found " + test_count_message)

    # Set the test paths to be run by setting MOZHARNESS_TEST_PATHS.
    path_env = {
        "MOZHARNESS_TEST_PATHS": six.ensure_text(
            json.dumps(resolve_tests_by_suite(test_files))
        )
    }
    try_config.setdefault("env", {}).update(path_env)

    # Build commit message.
    msg = "try coverage - " + test_count_message
    return push_to_try(
        "coverage",
        message.format(msg=msg),
        try_task_config=generate_try_task_config("coverage", tasks, try_config),
        stage_changes=stage_changes,
        dry_run=dry_run,
        closed_tree=closed_tree,
    )