testing/raptor/raptor/benchmark.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import os
import pathlib
import shutil
import socket
import subprocess
import tempfile

import mozfile
from logger.logger import RaptorLogger
from wptserve import handlers, server

LOG = RaptorLogger(component="raptor-benchmark")
here = pathlib.Path(__file__).parent.resolve()


class Benchmark(object):
    """utility class for running benchmarks in raptor"""

    def __init__(self, config, test):
        self.config = config
        self.test = test

        # Note that we can only change the repository, revision, and branch through here.
        # The path to the test should remain constant. If it needs to be changed, make a
        # patch that changes it for the new test.
        if self.config.get("benchmark_repository", None):
            self.test["repository"] = self.config["benchmark_repository"]
            self.test["repository_revision"] = self.config["benchmark_revision"]

            if self.config.get("benchmark_branch", None):
                self.test["branch"] = self.config["benchmark_branch"]

        self.setup_benchmarks(
            os.getenv("MOZ_DEVELOPER_REPO_DIR"),
            os.getenv("MOZ_MOZBUILD_DIR"),
            run_local=self.config.get("run_local", False),
        )

        LOG.info(f"bench_dir: {self.bench_dir}")
        LOG.info("bench_dir contains:")
        LOG.info(list(self.bench_dir.iterdir()))

        # now have the benchmark source ready, go ahead and serve it up!
        self.start_http_server()

    def start_http_server(self):
        self.write_server_headers()

        # pick a free port
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.bind(("", 0))
        self.host = self.config["host"]
        self.port = sock.getsockname()[1]
        sock.close()
        _webserver = "%s:%d" % (self.host, self.port)

        self.httpd = self.setup_webserver(_webserver)
        self.httpd.start()

    def write_server_headers(self):
        # to add specific headers for serving files via wptserve, write out a headers dir file
        # see http://wptserve.readthedocs.io/en/latest/handlers.html#file-handlers
        LOG.info("writing wptserve headers file")
        headers_file = pathlib.Path(self.bench_dir, "__dir__.headers")
        file = headers_file.open("w")
        file.write("Access-Control-Allow-Origin: *")
        file.close()
        LOG.info("wrote wpt headers file: %s" % headers_file)

    def setup_webserver(self, webserver):
        LOG.info("starting webserver on %r" % webserver)
        LOG.info("serving benchmarks from here: %s" % self.bench_dir)
        self.host, self.port = webserver.split(":")

        return server.WebTestHttpd(
            host=self.host,
            port=int(self.port),
            doc_root=str(self.bench_dir),
            routes=[("GET", "*", handlers.file_handler)],
        )

    def stop_serve(self):
        LOG.info("TODO: stop serving benchmark source")
        pass

    def _full_clone(self, benchmark_repository, dest):
        subprocess.check_call(
            [
                "git",
                "clone",
                "-c",
                "http.postBuffer=2147483648",
                "-c",
                "core.autocrlf=false",
                benchmark_repository,
                str(dest.resolve()),
            ]
        )

    def _get_benchmark_folder(self, benchmark_dest, run_local):
        if not run_local:
            # If the test didn't specify a repo and we're in CI
            # then we'll find them here and we don't need to do anything else
            return pathlib.Path(benchmark_dest, "tests", "webkit", "PerformanceTests")
        return pathlib.Path(benchmark_dest, "testing", "raptor", "benchmarks")

    def _sparse_clone(self, benchmark_repository, dest):
        """Get a partial clone of the repo.

        This need git version 2.30+ so it's currently unused but it works.
        See bug 1804694. This method should only be used in CI, locally we
        can simply pull the whole repo.
        """
        subprocess.check_call(
            [
                "git",
                "clone",
                "--depth",
                "1",
                "--filter",
                "blob:none",
                "--sparse",
                benchmark_repository,
                str(dest.resolve()),
            ]
        )
        subprocess.check_call(
            [
                "git",
                "sparse-checkout",
                "set",
                self.test.get("repository_path", "benchmarks"),
            ],
            cwd=dest,
        )

    def _copy_or_link_files(
        self,
        benchmark_path,
        benchmark_dest,
        skip_files_and_hidden=True,
        host_from_parent=True,
    ):
        if not benchmark_dest.exists():
            benchmark_dest.mkdir(parents=True, exist_ok=True)

        dest = pathlib.Path(benchmark_dest, benchmark_path.name)
        if hasattr(os, "symlink") and os.name != "nt":
            if not dest.exists():
                os.symlink(benchmark_path, dest)
        else:
            # Clobber the benchmark in case a recent update removed any files.
            mozfile.remove(str(dest.resolve()))
            shutil.copytree(benchmark_path, dest)

        if host_from_parent and any(
            path.is_file() for path in benchmark_path.iterdir()
        ):
            # Host the parent of this directory to prevent hosting issues
            # (e.g. linked files ending up with different routes)
            host_folder = dest.parent
            self.test["test_url"] = self.test["test_url"].replace(
                "<port>/", f"<port>/{benchmark_path.name}/"
            )
            dest = host_folder

        return dest

    def _verify_benchmark_revision(self, benchmark_revision, external_repo_path):
        try:
            # Check if the given revision is valid
            subprocess.check_call(
                ["git", "rev-parse", "--verify", f"{benchmark_revision}^{{commit}}"],
                cwd=external_repo_path,
            )
            LOG.info("Given benchmark repository revision verified")
        except Exception:
            LOG.error(
                f"Given revision doesn't exist in this repository: {benchmark_revision}"
            )
            raise

    def _update_benchmark_repo(self, external_repo_path):
        default_branch = self.test.get("repository_branch", None)
        if default_branch is None:
            try:
                # Get the default branch name, and check it if's been updated
                default_branch = (
                    subprocess.check_output(
                        ["git", "rev-parse", "--abbrev-ref", "origin/HEAD"],
                        cwd=external_repo_path,
                    )
                    .decode("utf-8")
                    .strip()
                    .split("/")[-1]
                )
                remote_default_branch = (
                    subprocess.check_output(
                        ["git", "remote", "set-head", "origin", "-a"],
                        cwd=external_repo_path,
                    )
                    .decode("utf-8")
                    .strip()
                )
                if default_branch not in remote_default_branch:
                    default_branch = remote_default_branch.split()[-1]
            except Exception:
                LOG.critical("Failed to find the default branch of the repository!")
                raise
        else:
            LOG.info(f"Using non-default branch {default_branch}")
            try:
                subprocess.check_call(["git", "pull", "--all"], cwd=external_repo_path)
            except subprocess.CalledProcessError:
                LOG.info("Failed to pull new branches from remote")

        LOG.info(external_repo_path)
        subprocess.check_call(
            ["git", "checkout", default_branch], cwd=external_repo_path
        )
        subprocess.check_call(["git", "pull"], cwd=external_repo_path)

    def _setup_git_benchmarks(self, mozbuild_path, benchmark_dest, run_local=True):
        """Setup a benchmark from a github repository."""
        benchmark_repository = self.test["repository"]
        benchmark_revision = self.test["repository_revision"]

        # Specifies where we can find the benchmark within the cloned repo, this is the
        # folder that will be hosted to run the test. If it isn't given, we'll host the
        # root of the repository.
        benchmark_repo_path = self.test.get("repository_path", "")

        # Get the performance-tests cache (if it exists), otherwise create a temp folder
        if mozbuild_path is None:
            mozbuild_path = tempfile.mkdtemp()

        external_repo_path = pathlib.Path(
            mozbuild_path, "performance-tests", benchmark_repository.split("/")[-1]
        )

        try:
            subprocess.check_output(["git", "--version"])
        except Exception as ex:
            LOG.info(
                "Git is not available! Please install git and "
                "ensure it is included in the terminal path"
            )
            raise ex

        if not external_repo_path.is_dir():
            LOG.info("Cloning the benchmarks to {}".format(external_repo_path))
            # Bug 1804694 - Use sparse checkouts instead of full clones
            # Locally, we should always do a full clone
            self._full_clone(benchmark_repository, external_repo_path)
        else:
            # Make sure that the repo origin wasn't changed
            url = (
                subprocess.check_output(
                    ["git", "config", "--get", "remote.origin.url"],
                    cwd=external_repo_path,
                )
                .decode("utf-8")
                .strip()
            )

            if url != benchmark_repository:
                LOG.info(
                    "Removing repo with a different remote origin before installing new one"
                )
                mozfile.remove(external_repo_path)
                self._full_clone(benchmark_repository, external_repo_path)
            else:
                self._update_benchmark_repo(external_repo_path)

        self._verify_benchmark_revision(benchmark_revision, external_repo_path)
        subprocess.check_call(
            ["git", "checkout", benchmark_revision], cwd=external_repo_path
        )

        benchmark_dest = pathlib.Path(
            self._get_benchmark_folder(benchmark_dest, run_local), self.test["name"]
        )
        benchmark_dest = self._copy_or_link_files(
            pathlib.Path(external_repo_path, benchmark_repo_path),
            benchmark_dest,
            skip_files_and_hidden=False,
            host_from_parent=self.test.get("host_from_parent", True),
        )

        return benchmark_dest

    def _setup_in_tree_benchmarks(self, topsrc_path, benchmark_dest, run_local=True):
        """Setup a benchmakr that is found in-tree.

        This method will be deprecated once bug 1804578 is resolved (copying our
        in-tree benchmarks into a repo) to have a standard way of running benchmarks.
        """
        benchmark_dest = self._get_benchmark_folder(benchmark_dest, run_local)
        if not run_local:
            # If the test didn't specify a repo and we're in CI
            # then we'll find them here and we don't need to do anything else
            return benchmark_dest

        benchmark_dest = self._copy_or_link_files(
            pathlib.Path(topsrc_path, "third_party", "webkit", "PerformanceTests"),
            benchmark_dest,
        )

        return benchmark_dest

    def setup_benchmarks(
        self,
        topsrc_path,
        mozbuild_path,
        run_local=True,
    ):
        """Make sure benchmarks are linked to the proper location in the objdir.

        Benchmarks can either live in-tree or in an external repository. In the latter
        case also clone/update the repository if necessary.
        """
        # bench_dir is where we will download all mitmproxy required files
        # when running locally it comes from obj_path via mozharness/mach
        if self.config.get("obj_path", None) is not None:
            bench_dir = pathlib.Path(self.config.get("obj_path"))
        else:
            # in production it is ../tasks/task_N/build/tests/raptor/raptor/...
            # 'here' is that path, we can start with that
            bench_dir = pathlib.Path(here)

        if self.test.get("repository", None) is not None:
            # Setup benchmarks that are found on Github
            bench_dir = self._setup_git_benchmarks(
                mozbuild_path, bench_dir, run_local=run_local
            )
        else:
            # Setup the benchmarks that are available in-tree
            bench_dir = self._setup_in_tree_benchmarks(
                topsrc_path, bench_dir, run_local=run_local
            )

        self.bench_dir = bench_dir