From 086c044dc34dfc0f74fbe41f4ecb402b2cd34884 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Fri, 19 Apr 2024 03:13:33 +0200
Subject: Merging upstream version 125.0.1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 testing/raptor/browsertime/motionmark-1-3.js       | 107 ++++++++++
 .../browsertime/support-scripts/motionmark-1-3.py  |  91 ++++++++
 .../support-scripts/sample_python_support.py       |   2 +-
 testing/raptor/mach_commands.py                    |  22 +-
 testing/raptor/raptor/base_python_support.py       |   6 +-
 testing/raptor/raptor/browsertime/android.py       |   4 +
 testing/raptor/raptor/browsertime/base.py          |  29 ++-
 testing/raptor/raptor/cmdline.py                   |  26 ++-
 testing/raptor/raptor/perfdocs/browsertime.rst     |  54 +++--
 testing/raptor/raptor/perfdocs/config.yml          | 230 +++++++++++----------
 testing/raptor/raptor/perftest.py                  |  36 ++--
 testing/raptor/raptor/raptor.py                    |   1 +
 testing/raptor/raptor/raptor.toml                  |   4 +
 .../tests/benchmarks/motionmark-1-3-desktop.toml   |  28 +++
 .../tests/benchmarks/motionmark-1-3-mobile.toml    |  27 +++
 .../tests/benchmarks/speedometer-desktop.toml      |   2 +-
 .../tests/benchmarks/speedometer-mobile.toml       |   2 +-
 .../raptor/tests/tp6/mobile/browsertime-tp6m.toml  |   9 +-
 .../playback/mitm6-android-fenix-sina.manifest     |   9 -
 .../playback/mitm6-android-fenix-web-de.manifest   |   9 -
 .../playback/mitm8-android-fenix-bild-de.manifest  |   9 +
 .../playback/mitm8-android-fenix-sina.manifest     |   9 +
 testing/raptor/test/test_cmdline.py                |  86 +++++++-
 testing/raptor/test/test_raptor.py                 |  29 ++-
 24 files changed, 634 insertions(+), 197 deletions(-)
 create mode 100644 testing/raptor/browsertime/motionmark-1-3.js
 create mode 100644 testing/raptor/browsertime/support-scripts/motionmark-1-3.py
 create mode 100644 testing/raptor/raptor/tests/benchmarks/motionmark-1-3-desktop.toml
 create mode 100644 testing/raptor/raptor/tests/benchmarks/motionmark-1-3-mobile.toml
 delete mode 100644 testing/raptor/raptor/tooltool-manifests/playback/mitm6-android-fenix-sina.manifest
 delete mode 100644 testing/raptor/raptor/tooltool-manifests/playback/mitm6-android-fenix-web-de.manifest
 create mode 100644 testing/raptor/raptor/tooltool-manifests/playback/mitm8-android-fenix-bild-de.manifest
 create mode 100644 testing/raptor/raptor/tooltool-manifests/playback/mitm8-android-fenix-sina.manifest

(limited to 'testing/raptor')

diff --git a/testing/raptor/browsertime/motionmark-1-3.js b/testing/raptor/browsertime/motionmark-1-3.js
new file mode 100644
index 0000000000..c240b2dddc
--- /dev/null
+++ b/testing/raptor/browsertime/motionmark-1-3.js
@@ -0,0 +1,107 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* eslint-env node */
+
+// index for the CSS selector in developer.html
+const suiteSelectorNumber = {
+  MotionMark: 1,
+  "HTML suite": 2,
+};
+
+module.exports = async function (context, commands) {
+  context.log.info("Starting MotionMark 1.3 test");
+  let url = context.options.browsertime.url;
+  let page_cycles = context.options.browsertime.page_cycles;
+  let suite_name = context.options.browsertime.suite_name;
+  let page_cycle_delay = context.options.browsertime.page_cycle_delay;
+  let post_startup_delay = context.options.browsertime.post_startup_delay;
+  let page_timeout = context.options.timeouts.pageLoad;
+  let expose_profiler = context.options.browsertime.expose_profiler;
+
+  context.log.info(
+    "Waiting for %d ms (post_startup_delay)",
+    post_startup_delay
+  );
+  await commands.wait.byTime(post_startup_delay);
+
+  for (let count = 0; count < page_cycles; count++) {
+    context.log.info("Navigating to about:blank");
+    await commands.navigate("about:blank");
+
+    context.log.info("Cycle %d, waiting for %d ms", count, page_cycle_delay);
+    await commands.wait.byTime(page_cycle_delay);
+
+    context.log.info("Cycle %d, starting the measure", count);
+    if (expose_profiler === "true") {
+      context.log.info("Custom profiler start!");
+      if (context.options.browser === "firefox") {
+        await commands.profiler.start();
+      } else if (context.options.browser === "chrome") {
+        await commands.trace.start();
+      }
+    }
+    await commands.measure.start(url);
+
+    let suite_selector = `#suites > ul > li:nth-child(${suiteSelectorNumber[suite_name]}) > label > input[type="checkbox"]`;
+
+    await commands.mouse.singleClick.bySelector(suite_selector);
+    await commands.js.runAndWait(`
+      this.benchmarkController.startBenchmark()
+    `);
+
+    let data_exists = null;
+    let starttime = await commands.js.run(`return performance.now();`);
+    while (
+      (data_exists == null || !Object.keys(data_exists).length) &&
+      (await commands.js.run(`return performance.now();`)) - starttime <
+        page_timeout
+    ) {
+      let wait_time = 3000;
+      context.log.info(
+        "Waiting %d ms for data from %s...",
+        wait_time,
+        suite_name
+      );
+      await commands.wait.byTime(wait_time);
+
+      data_exists = await commands.js.run(`
+        return window.benchmarkRunnerClient.results.data
+      `);
+    }
+
+    if (expose_profiler === "true") {
+      context.log.info("Custom profiler stop!");
+      if (context.options.browser === "firefox") {
+        await commands.profiler.stop();
+      } else if (context.options.browser === "chrome") {
+        await commands.trace.stop();
+      }
+    }
+    if (
+      !data_exists &&
+      (await commands.js.run(`return performance.now();`)) - starttime >=
+        page_timeout
+    ) {
+      context.log.error("Benchmark timed out. Aborting...");
+      return false;
+    }
+
+    let data = null;
+    data = await commands.js.run(`
+      const score = window.benchmarkRunnerClient.results.score;
+      const results = window.benchmarkRunnerClient.results.results[0].testsResults;
+      return {
+        score,
+        results,
+      };
+    `);
+    data.suite_name = suite_name;
+
+    commands.measure.addObject({ mm_res: data });
+    context.log.info("Value of summarized benchmark data: ", data);
+  }
+
+  return true;
+};
diff --git a/testing/raptor/browsertime/support-scripts/motionmark-1-3.py b/testing/raptor/browsertime/support-scripts/motionmark-1-3.py
new file mode 100644
index 0000000000..713935fd3f
--- /dev/null
+++ b/testing/raptor/browsertime/support-scripts/motionmark-1-3.py
@@ -0,0 +1,91 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import filters
+from base_python_support import BasePythonSupport
+
+
+class MotionMarkSupport(BasePythonSupport):
+    def handle_result(self, bt_result, raw_result, **kwargs):
+        """Parse a result for the required results.
+
+        See base_python_support.py for what's expected from this method.
+        """
+        suite_name = raw_result["extras"][0]["mm_res"]["suite_name"]
+        score_tracker = {
+            subtest: []
+            for subtest in raw_result["extras"][0]["mm_res"]["results"][
+                suite_name
+            ].keys()
+        }
+
+        motionmark_overall_score = []
+        for res in raw_result["extras"]:
+            motionmark_overall_score.append(round(res["mm_res"]["score"], 3))
+
+            for k, v in res["mm_res"]["results"][suite_name].items():
+                score_tracker[k].append(v["complexity"]["bootstrap"]["median"])
+
+        for k, v in score_tracker.items():
+            bt_result["measurements"][k] = v
+
+        bt_result["measurements"]["score"] = motionmark_overall_score
+
+    def _build_subtest(self, measurement_name, replicates, test):
+        unit = test.get("unit", "ms")
+        if test.get("subtest_unit"):
+            unit = test.get("subtest_unit")
+
+        lower_is_better = test.get(
+            "subtest_lower_is_better", test.get("lower_is_better", True)
+        )
+        if "score" in measurement_name:
+            lower_is_better = False
+            unit = "score"
+
+        subtest = {
+            "unit": unit,
+            "alertThreshold": float(test.get("alert_threshold", 2.0)),
+            "lowerIsBetter": lower_is_better,
+            "name": measurement_name,
+            "replicates": replicates,
+            "value": round(filters.mean(replicates), 3),
+        }
+
+        return subtest
+
+    def summarize_test(self, test, suite, **kwargs):
+        """Summarize the measurements found in the test as a suite with subtests.
+
+        See base_python_support.py for what's expected from this method.
+        """
+        suite["type"] = "benchmark"
+        if suite["subtests"] == {}:
+            suite["subtests"] = []
+        for measurement_name, replicates in test["measurements"].items():
+            if not replicates:
+                continue
+            suite["subtests"].append(
+                self._build_subtest(measurement_name, replicates, test)
+            )
+        suite["subtests"].sort(key=lambda subtest: subtest["name"])
+
+        score = 0
+        for subtest in suite["subtests"]:
+            if subtest["name"] == "score":
+                score = subtest["value"]
+                break
+        suite["value"] = score
+
+    def modify_command(self, cmd, test):
+        """Modify the browsertime command to have the appropriate suite name.
+
+        This is necessary to grab the correct CSS selector in the browsertime
+        script, and later for parsing through the final benchmark data in the
+        support python script (this file).
+
+        Current options are `MotionMark` and `HTML suite`.
+        """
+
+        cmd += ["--browsertime.suite_name", test.get("suite_name")]
diff --git a/testing/raptor/browsertime/support-scripts/sample_python_support.py b/testing/raptor/browsertime/support-scripts/sample_python_support.py
index a1ec0069a5..b31e890c0a 100644
--- a/testing/raptor/browsertime/support-scripts/sample_python_support.py
+++ b/testing/raptor/browsertime/support-scripts/sample_python_support.py
@@ -6,7 +6,7 @@ from base_python_support import BasePythonSupport
 
 
 class SamplePythonSupport(BasePythonSupport):
-    def modify_command(self, cmd):
+    def modify_command(self, cmd, test):
         for i, entry in enumerate(cmd):
             if "{replace-with-constant-value}" in entry:
                 cmd[i] = "25"
diff --git a/testing/raptor/mach_commands.py b/testing/raptor/mach_commands.py
index b1c5002efc..14ffb1caf2 100644
--- a/testing/raptor/mach_commands.py
+++ b/testing/raptor/mach_commands.py
@@ -68,6 +68,7 @@ class RaptorRunner(MozbuildObject):
         self.browsertime_visualmetrics = kwargs["browsertime_visualmetrics"]
         self.browsertime_node = kwargs["browsertime_node"]
         self.clean = kwargs["clean"]
+        self.screenshot_on_failure = kwargs["screenshot_on_failure"]
 
         if Conditions.is_android(self) or kwargs["app"] in ANDROID_BROWSERS:
             self.binary_path = None
@@ -122,6 +123,7 @@ class RaptorRunner(MozbuildObject):
             "browsertime_node": self.browsertime_node,
             "mozbuild_path": get_state_dir(),
             "clean": self.clean,
+            "screenshot_on_failure": self.screenshot_on_failure,
         }
 
         sys.path.insert(0, os.path.join(self.topsrcdir, "tools", "browsertime"))
@@ -267,7 +269,7 @@ class RaptorRunner(MozbuildObject):
 
 
 def setup_node(command_context):
-    """Fetch the latest node-16 binary and install it into the .mozbuild directory."""
+    """Fetch the latest node-18 binary and install it into the .mozbuild directory."""
     import platform
 
     from mozbuild.artifact_commands import artifact_toolchain
@@ -276,11 +278,11 @@ def setup_node(command_context):
 
     print("Setting up node for browsertime...")
     state_dir = get_state_dir()
-    cache_path = os.path.join(state_dir, "browsertime", "node-16")
+    cache_path = os.path.join(state_dir, "browsertime", "node-18")
 
     def __check_for_node():
         # Check standard locations first
-        node_exe = find_node_executable(min_version=Version("16.0.0"))
+        node_exe = find_node_executable(min_version=Version("18.0.0"))
         if node_exe and (node_exe[0] is not None):
             return node_exe[0]
         if not os.path.exists(cache_path):
@@ -293,14 +295,14 @@ def setup_node(command_context):
             node_exe_path = os.path.join(
                 state_dir,
                 "browsertime",
-                "node-16",
+                "node-18",
                 "node",
             )
         else:
             node_exe_path = os.path.join(
                 state_dir,
                 "browsertime",
-                "node-16",
+                "node-18",
                 "node",
                 "bin",
             )
@@ -313,7 +315,7 @@ def setup_node(command_context):
 
     node_exe = __check_for_node()
     if node_exe is None:
-        toolchain_job = "{}-node-16"
+        toolchain_job = "{}-node-18"
         plat = platform.system()
         if plat == "Windows":
             toolchain_job = toolchain_job.format("win64")
@@ -326,7 +328,7 @@ def setup_node(command_context):
             toolchain_job = toolchain_job.format("linux64")
 
         print(
-            "Downloading Node v16 from Taskcluster toolchain {}...".format(
+            "Downloading Node v18 from Taskcluster toolchain {}...".format(
                 toolchain_job
             )
         )
@@ -351,11 +353,11 @@ def setup_node(command_context):
 
         node_exe = __check_for_node()
         if node_exe is None:
-            raise Exception("Could not find Node v16 binary for Raptor-Browsertime")
+            raise Exception("Could not find Node v18 binary for Raptor-Browsertime")
 
-        print("Finished downloading Node v16 from Taskcluster")
+        print("Finished downloading Node v18 from Taskcluster")
 
-    print("Node v16+ found at: %s" % node_exe)
+    print("Node v18+ found at: %s" % node_exe)
     return node_exe
 
 
diff --git a/testing/raptor/raptor/base_python_support.py b/testing/raptor/raptor/base_python_support.py
index ac1281aadc..68d6651f36 100644
--- a/testing/raptor/raptor/base_python_support.py
+++ b/testing/raptor/raptor/base_python_support.py
@@ -22,11 +22,15 @@ class BasePythonSupport:
         """
         pass
 
-    def modify_command(self, cmd):
+    def modify_command(self, cmd, test):
         """Used to modify the Browsertime command before running the test.
 
         The `cmd` arg holds the current browsertime command to run. It can
         be changed directly to change how browsertime runs.
+
+        The `test` arg is the test itself with all of its current settings.
+        It can be modified as needed to add additional information to the
+        test that will run.
         """
         pass
 
diff --git a/testing/raptor/raptor/browsertime/android.py b/testing/raptor/raptor/browsertime/android.py
index 6ca6faabb3..6d83a5063a 100644
--- a/testing/raptor/raptor/browsertime/android.py
+++ b/testing/raptor/raptor/browsertime/android.py
@@ -126,6 +126,10 @@ class BrowsertimeAndroid(PerftestAndroid, Browsertime):
                 ]
             )
 
+        if self.config["app"] == "geckoview":
+            # This is needed as geckoview is crashing on shutdown and is throwing marionette errors similar to 1768889
+            args_list.extend(["--ignoreShutdownFailures", "true"])
+
         if self.config["app"] == "fenix":
             # See bug 1768889
             args_list.extend(["--ignoreShutdownFailures", "true"])
diff --git a/testing/raptor/raptor/browsertime/base.py b/testing/raptor/raptor/browsertime/base.py
index f0e9946ecf..1ddf0a91d5 100644
--- a/testing/raptor/raptor/browsertime/base.py
+++ b/testing/raptor/raptor/browsertime/base.py
@@ -16,7 +16,7 @@ from copy import deepcopy
 import mozprocess
 import six
 from benchmark import Benchmark
-from cmdline import CHROME_ANDROID_APPS
+from cmdline import CHROME_ANDROID_APPS, DESKTOP_APPS
 from logger.logger import RaptorLogger
 from manifestparser.util import evaluate_list_from_string
 from perftest import GECKO_PROFILER_APPS, TRACE_APPS, Perftest
@@ -825,6 +825,28 @@ class Browsertime(Perftest):
             os.killpg(proc.pid, signal.SIGKILL)
         proc.wait()
 
+    def get_failure_screenshot(self):
+        if not (
+            self.config.get("screenshot_on_failure")
+            and self.config["app"] in DESKTOP_APPS
+        ):
+            return
+
+        # Bug 1884178
+        # Temporarily disable on Windows + Chrom* applications.
+        if self.config["app"] in TRACE_APPS and "win" in self.config["platform"]:
+            return
+
+        from mozscreenshot import dump_screen
+
+        obj_dir = os.environ.get("MOZ_DEVELOPER_OBJ_DIR", None)
+        if obj_dir is None:
+            build_dir = pathlib.Path(os.environ.get("MOZ_UPLOAD_DIR")).parent
+            utility_path = pathlib.Path(build_dir, "tests", "bin")
+        else:
+            utility_path = os.path.join(obj_dir, "dist", "bin")
+        dump_screen(utility_path, LOG)
+
     def run_extra_profiler_run(
         self, test, timeout, proc_timeout, output_timeout, line_handler, env
     ):
@@ -899,7 +921,7 @@ class Browsertime(Perftest):
 
         if test.get("support_class", None):
             LOG.info("Test support class is modifying the command...")
-            test.get("support_class").modify_command(cmd)
+            test.get("support_class").modify_command(cmd, test)
 
         output_timeout = BROWSERTIME_PAGELOAD_OUTPUT_TIMEOUT
         if test.get("type", "") == "scenario":
@@ -1016,16 +1038,19 @@ class Browsertime(Perftest):
             )
 
             if self.output_timed_out:
+                self.get_failure_screenshot()
                 raise Exception(
                     f"Browsertime process timed out after waiting {output_timeout} seconds "
                     "for output"
                 )
             if self.timed_out:
+                self.get_failure_screenshot()
                 raise Exception(
                     f"Browsertime process timed out after {proc_timeout} seconds"
                 )
 
             if self.browsertime_failure:
+                self.get_failure_screenshot()
                 raise Exception(self.browsertime_failure)
 
             # We've run the main browsertime process, now we need to run the
diff --git a/testing/raptor/raptor/cmdline.py b/testing/raptor/raptor/cmdline.py
index 8278dafd79..3021f080ee 100644
--- a/testing/raptor/raptor/cmdline.py
+++ b/testing/raptor/raptor/cmdline.py
@@ -64,6 +64,13 @@ GECKO_PROFILER_APPS = (FIREFOX, GECKOVIEW, REFBROW, FENIX)
 
 TRACE_APPS = (CHROME, CHROMIUM, CHROMIUM_RELEASE)
 
+APP_BINARIES = {
+    "fenix": "org.mozilla.fenix",
+    "focus": "org.mozilla.focus",
+    "geckoview": "org.mozilla.geckoview_example",
+    "refbrow": "org.mozilla.reference.browser",
+}
+
 
 def print_all_activities():
     all_activities = []
@@ -244,7 +251,7 @@ def create_parser(mach_interface=False):
         "--post-startup-delay",
         dest="post_startup_delay",
         type=int,
-        default=30000,
+        default=None,
         help="How long to wait (ms) after browser start-up before starting the tests",
     )
     add_arg(
@@ -520,6 +527,13 @@ def create_parser(mach_interface=False):
         type=str,
         help="Repository branch that should be used for a particular benchmark test.",
     )
+    add_arg(
+        "--screenshot-on-failure",
+        action="store_true",
+        dest="screenshot_on_failure",
+        default=False,
+        help="Take a screenshot when the test fails.",
+    )
 
     add_logging_group(parser)
     return parser
@@ -528,7 +542,11 @@ def create_parser(mach_interface=False):
 def verify_options(parser, args):
     ctx = vars(args)
     if args.binary is None and args.app != "chrome-m":
-        parser.error("--binary is required!")
+        args.binary = APP_BINARIES.get(args.app, None)
+        if args.binary is None:
+            parser.error("--binary is required!")
+        else:
+            print(f"Using {args.binary} as default binary argument for {args.app} app")
 
     # Debug-mode is disabled in CI (check for attribute in case of mach_interface issues)
     if hasattr(args, "run_local") and (not args.run_local and args.debug_mode):
@@ -604,6 +622,10 @@ def verify_options(parser, args):
                 "When a benchmark repository is provided, a revision is also required."
             )
 
+    if args.post_startup_delay:
+        if args.post_startup_delay < 0:
+            parser.error("--post-startup-delay must be a positive integer (in ms).")
+
 
 def parse_args(argv=None):
     parser = create_parser()
diff --git a/testing/raptor/raptor/perfdocs/browsertime.rst b/testing/raptor/raptor/perfdocs/browsertime.rst
index 4d1975224f..beff3f0ad8 100644
--- a/testing/raptor/raptor/perfdocs/browsertime.rst
+++ b/testing/raptor/raptor/perfdocs/browsertime.rst
@@ -34,7 +34,7 @@ For example, here's a test on ``https://www.sitespeed.io`` using this custom tes
 
 ::
 
-  ./mach raptor --browsertime -t browsertime --browsertime-arg test_script=pageload --browsertime-arg browsertime.url=https://www.sitespeed.io --browsertime-arg iterations=3
+  ./mach raptor -t browsertime --browsertime-arg test_script=pageload --browsertime-arg browsertime.url=https://www.sitespeed.io --browsertime-arg iterations=3
 
 That test will perform 3 iterations of the given url. Note also that we can use simplified names to make use of test scripts that are built into raptor. You can use ``pageload``, ``interactive``, or provide a path to another test script.
 
@@ -50,7 +50,7 @@ There are two ways to run performance tests through browsertime listed below.
 
 ::
 
-  ./mach raptor --browsertime -t google-search
+  ./mach raptor -t google-search
 
 * Browsertime-"native":
 
@@ -64,23 +64,25 @@ Benchmark tests
 
 ::
 
-  ./mach raptor -t raptor-speedometer --browsertime
+  ./mach raptor -t speedometer
 
 Running on Android
 ------------------
+To run on android, the device needs to have Geckoview or Fenix installed on it. Our tests will only work with physical devices, and `bug 1881570 <https://bugzilla.mozilla.org/show_bug.cgi?id=1881570>`__ tracks progress for enabling virtual devices (emulators). Running either of the commands below will attempt to install locally built APKs to the device **while uninstalling/removing any existing APKs of the package on the device**, but this can be skipped by setting ``MOZ_DISABLE_ADB_INSTALL=1`` in your environment. When that environment variable exists, we expect the APK to be pre-installed on the device.
+
 Running on Raptor-Browsertime (recommended):
 
 * Running on Fenix
 
 ::
 
-  ./mach raptor --browsertime -t amazon --app fenix --binary org.mozilla.fenix
+  ./mach raptor -t amazon --app fenix --binary org.mozilla.fenix
 
 * Running on Geckoview
 
 ::
 
-  ./mach raptor --browsertime -t amazon --app geckoview --binary org.mozilla.geckoview_example
+  ./mach raptor -t amazon --app geckoview --binary org.mozilla.geckoview_example
 
 Running on vanilla Browsertime:
 
@@ -122,7 +124,7 @@ Or for Raptor-Browsertime (use ``chrome`` for desktop, and ``chrome-m`` for mobi
 
 ::
 
-  ./mach raptor --browsertime -t amazon --app chrome --browsertime-chromedriver <PATH/TO/CHROMEDRIVER>
+  ./mach raptor -t amazon --app chrome --browsertime-chromedriver <PATH/TO/CHROMEDRIVER>
 
 Running Page-load tests with third party WebExtensions
 ------------------------------------------------------
@@ -137,14 +139,14 @@ Launch amazon tp6 page-load test on Firefox Desktop:
 
 ::
 
-   ./mach raptor --browsertime -t amazon --conditioned-profile settled-webext
+   ./mach raptor -t amazon --conditioned-profile settled-webext
 
 Launch amazon tp6 mobile page-load test on Firefox for Android (the apk has to be pre-installed, mach raptor does detect if already installed but
 it does not install it):
 
 ::
 
-   ./mach raptor --browsertime -t amazon --app fenix --binary org.mozilla.fenix --conditioned-profile settled-webext
+   ./mach raptor -t amazon --app fenix --binary org.mozilla.fenix --conditioned-profile settled-webext
 
 To run these jobs on try, make sure to select the tp6 jobs that include the string `webextensions`, as an example (add ``--no-push`` to force try fuzzy to only
 list the jobs selected by the try fuzzy query) to run all tp6 page-load webextensons jobs currently defined:
@@ -153,12 +155,14 @@ list the jobs selected by the try fuzzy query) to run all tp6 page-load webexten
 
    ./mach try fuzzy -q "'tp6 'webextensions"
 
-Similarly for running tp6m on Firefox for Android builds:
+Similarly for running tp6m (equivalent to tp6 but for mobile) on Firefox for Android builds:
 
 ::
 
    ./mach try fuzzy --full -q "'tp6m 'webextensions"
 
+Note that this can also be done using ``./mach try perf --show-all -q "'tp6m 'webextensions"`` to produce a compare view link of the changes before/after the patch being tested.
+
 The set of extensions installed are the ones listed in the ``"addons"`` property of the condprof customization file
 `webext.json`_ from the ``testing/condprofile/condprof/customization/`` directory.
 
@@ -196,20 +200,27 @@ Other methods for adding additional arguments are:
 
 Running Browsertime on Try
 --------------------------
-You can run all of our browsertime pageload tests through ``./mach try fuzzy --full``. We use chimera mode in these tests which means that both cold and warm pageload variants are running at the same time.
 
-For example:
+You can run all of our browsertime pageload tests through ``./mach try perf`` by selecting the ``Pageload`` category. We use chimera mode in these tests which means that both cold and warm pageload variants are running at the same time. There are a lot of other tests/categories available as well. Documentation about this tool can be found in :ref:`Mach Try Perf`.
+
+For example, the following will select all ``Pageload`` categories to run on desktop:
+
+::
+
+  ./mach try perf -q "'Pageload"
+
+If you need to target android tasks, include the ``--android`` flag like so (remove the ``'android`` from the query string to target desktop tests at the same time):
 
 ::
 
-  ./mach try fuzzy -q "'g5 'imdb 'geckoview 'vismet '-wr 'shippable"
+  ./mach try perf --android -q "'Pageload 'android"
 
-Retriggering Browsertime Visual Metrics Tasks
----------------------------------------------
+If you exclude the ``-q "..."`` option, an interface similar to the fuzzy interface will open, and show all available categories.
 
-You can retrigger Browsertime tasks just like you retrigger any other tasks from Treeherder (using the retrigger buttons, add-new-jobs, retrigger-multiple, etc.).
+Visual Metrics
+--------------
 
-The following metrics are collected each time: ``fcp, loadtime, ContentfulSpeedIndex, PerceptualSpeedIndex, SpeedIndex, FirstVisualChange, LastVisualChange``
+The following visual metrics are collected in all page load tests: ``ContentfulSpeedIndex, PerceptualSpeedIndex, SpeedIndex, FirstVisualChange, LastVisualChange``
 
 Further information regarding these metrics can be viewed at `visual-metrics <https://www.sitespeed.io/documentation/sitespeed.io/metrics/#visual-metrics>`_
 
@@ -220,7 +231,7 @@ To run gecko profiling using Raptor-Browsertime you can add the ``--gecko-profil
 
 ::
 
-  ./mach raptor --browsertime -t amazon --gecko-profile
+  ./mach raptor -t amazon --gecko-profile
 
 Note that vanilla Browsertime does support Gecko Profiling but **it does not symbolicate the profiles** so it is **not recommended** to use for debugging performance regressions/improvements.
 
@@ -248,6 +259,7 @@ Likewise, for chrome trace you will want to be aware of `these defaults. <https:
 
 Upgrading Browsertime In-Tree
 -----------------------------
+
 To upgrade the browsertime version used in-tree you can run, then commit the changes made to ``package.json`` and ``package-lock.json``:
 
 ::
@@ -260,6 +272,7 @@ To test the upgrade, run a raptor test locally (with and without visual-metrics
 
 Updating Benchmark Tests
 ------------------------
+
 To upgrade any of our benchmark tests, you will need to change the revision used in the test manifest. There are three fields that you have available to use there: ``repository_revision`` to denote the revision, ``repository_branch`` to denote the branch name, and ``repository`` to provide the link of the Github repo that contains the benchmark.
 
 For instance, with Speedometer 3 (sp3), we can update the revision `by changing the repository_revision field found here <https://searchfox.org/mozilla-central/rev/aa3ccd258b64abfd4c5ce56c1f512bc7f65b844c/testing/raptor/raptor/tests/benchmarks/speedometer-desktop.ini#29>`_. If the change isn't found on the default branch (master/main branch), then you will need to add an entry for ``repository_branch`` to specify this.
@@ -268,6 +281,7 @@ If the path to the test file changes (the file that is invoked to run the test),
 
 Finding the Geckodriver Being Used
 ----------------------------------
+
 If you're looking for the latest geckodriver being used there are two ways:
 * Find the latest one from here: https://treeherder.mozilla.org/jobs?repo=mozilla-central&searchStr=geckodriver
 * Alternatively, if you're trying to figure out which geckodriver a given CI task is using, you can click on the browsertime task in treeherder, and then click on the ``Task`` id in the bottom left of the pop-up interface. Then in the window that opens up, click on `See more` in the task details tab on the left, this will show you the dependent tasks with the latest toolchain-geckodriver being used. There's an Artifacts drop down on the right hand side for the toolchain-geckodriver task that you can find the latest geckodriver in.
@@ -287,7 +301,7 @@ Mach Browsertime Setup
 ----------------------
 
 **WARNING**
- Raptor-Browsertime (i.e. ``./mach raptor --browsertime -t <TEST>``) is currently required to be ran first in order to acquire the Node-16 binary. In general, it is also not recommended to use ``./mach browsertime`` for testing as it will be deprecated soon.
+ Raptor-Browsertime (i.e. ``./mach raptor -t <TEST>``) is currently required to be ran first in order to acquire the Node-16 binary. In general, it is also not recommended to use ``./mach browsertime`` for testing as it will be deprecated soon.
 
 Note that if you are running Raptor-Browsertime then it will get installed automatically and also update itself. Otherwise, you can run:
 
@@ -311,8 +325,8 @@ With the replacement of ImageMagick, former cross platform installation issues h
 
 
 
-- For other issues, try deleting the ``~/.mozbuild/browsertime`` folder and re-running the browsertime setup command or a Raptor-Browsertime test
+- For other issues, try deleting the ``~/.mozbuild/browsertime`` folder and re-running the browsertime setup command or a Raptor-Browsertime test. Alternatively, you may need to delete the ``tools/browsertime/node_modules`` folder.
 
-- If you plan on running Browsertime on Android, your Android device must already be set up (see more below in the :ref: `Running on Android` section)
+- If you plan on running Browsertime on Android, your Android device must already be set up (see more above in the :ref: `Running on Android` section)
 
 - **If you encounter any issues not mentioned here, please** `file a bug <https://bugzilla.mozilla.org/enter_bug.cgi?product=Testing&component=Raptor>`_ **in the** ``Testing::Raptor`` **component.**
diff --git a/testing/raptor/raptor/perfdocs/config.yml b/testing/raptor/raptor/perfdocs/config.yml
index 12b3b48c11..a9ab330feb 100644
--- a/testing/raptor/raptor/perfdocs/config.yml
+++ b/testing/raptor/raptor/perfdocs/config.yml
@@ -28,131 +28,133 @@ suites:
     desktop:
         description: "Tests for page-load performance. The links direct to the actual websites that are being tested."
         tests:
-            amazon: "BT, FF, CH, CU"
-            bing-search: "BT, FF, CH, CU"
-            buzzfeed: "BT, FF, CH, CU"
-            cnn: "BT, FF, CH, CU"
-            ebay: "BT, FF, CH, CU"
-            espn: "BT, FF, CH, CU"
-            expedia: "BT, FF, CH, CU"
-            facebook: "BT, FF, CH, CU"
-            fandom: "BT, FF, CH, CU"
-            google-docs: "BT, FF, CH, CU"
-            google-mail: "BT, FF, CH, CU"
-            google-search: "BT, FF, CH, CU"
-            google-slides: "BT, FF, CH, CU"
-            imdb: "BT, FF, CH, CU"
-            imgur: "BT, FF, CH, CU"
-            instagram: "BT, FF, CH, CU"
-            linkedin: "BT, FF, CH, CU"
-            microsoft: "BT, FF, CH, CU"
-            netflix: "BT, FF, CH, CU"
-            nytimes: "BT, FF, CH, CU"
-            office: "BT, FF, CH, CU"
-            outlook: "BT, FF, CH, CU"
-            paypal: "BT, FF, CH, CU"
-            pinterest: "BT, FF, CH, CU"
-            reddit: "BT, FF, CH, CU"
-            tumblr: "BT, FF, CH, CU"
-            twitch: "BT, FF, CH, CU"
-            twitter: "BT, FF, CH, CU"
-            wikia: "BT, FF, CH, CU"
-            wikipedia: "BT, FF, CH, CU"
-            yahoo-mail: "BT, FF, CH, CU"
-            youtube: "BT, FF, CH, CU"
+            amazon: ""
+            bing-search: ""
+            buzzfeed: ""
+            cnn: ""
+            ebay: ""
+            espn: ""
+            expedia: ""
+            facebook: ""
+            fandom: ""
+            google-docs: ""
+            google-mail: ""
+            google-search: ""
+            google-slides: ""
+            imdb: ""
+            imgur: ""
+            instagram: ""
+            linkedin: ""
+            microsoft: ""
+            netflix: ""
+            nytimes: ""
+            office: ""
+            outlook: ""
+            paypal: ""
+            pinterest: ""
+            reddit: ""
+            tumblr: ""
+            twitch: ""
+            twitter: ""
+            wikia: ""
+            wikipedia: ""
+            yahoo-mail: ""
+            youtube: ""
     mobile:
         description: "Page-load performance test suite on Android. The links direct to the actual websites that are being tested."
         tests:
-            amazon: "BT, GV, FE, RB, CH-M"
-            youtube: "BT, GV, FE, RB, CH-M"
-            allrecipes: "BT, GV, FE, RB, CH-M"
-            amazon-search: "BT, GV, FE, RB, CH-M"
-            bing: "BT, GV, FE, RB, CH-M"
-            bing-search-restaurants: "BT, GV, FE, RB, CH-M"
-            booking: "BT, GV, FE, RB, CH-M"
-            cnn: "BT, GV, FE, RB, CH-M"
-            cnn-ampstories: "BT, GV, FE, RB, CH-M"
-            dailymail: "BT, GV, FE, RB, CH-M"
-            ebay-kleinanzeigen: "BT, GV, FE, RB, CH-M"
-            ebay-kleinanzeigen-search: "BT, GV, FE, RB, CH-M"
-            espn: "BT, GV, FE, RB, CH-M"
-            facebook: "BT, GV, FE, RB, CH-M"
-            facebook-cristiano: "BT, GV, FE, RB, CH-M"
-            google: "BT, GV, FE, RB, CH-M"
-            google-maps: "BT, GV, FE, RB, CH-M"
-            google-search-restaurants: "BT, GV, FE, RB, CH-M"
-            imdb: "BT, GV, FE, RB, CH-M"
-            instagram: "BT, GV, FE, RB, CH-M"
-            microsoft-support: "BT, GV, FE, RB, CH-M"
-            reddit: "BT, GV, FE, RB, CH-M"
-            stackoverflow: "BT, GV, FE, RB, CH-M"
-            sina: "BT, GV, FE, RB, CH-M"
-            web-de: "BT, GV, FE, RB, CH-M"
-            wikipedia: "BT, GV, FE, RB, CH-M"
-            youtube-watch: "BT, GV, FE, RB, CH-M"
+            amazon: ""
+            youtube: ""
+            allrecipes: ""
+            amazon-search: ""
+            bing: ""
+            bing-search-restaurants: ""
+            booking: ""
+            cnn: ""
+            cnn-ampstories: ""
+            dailymail: ""
+            ebay-kleinanzeigen: ""
+            ebay-kleinanzeigen-search: ""
+            espn: ""
+            facebook: ""
+            facebook-cristiano: ""
+            google: ""
+            google-maps: ""
+            google-search-restaurants: ""
+            imdb: ""
+            instagram: ""
+            microsoft-support: ""
+            reddit: ""
+            stackoverflow: ""
+            sina: ""
+            bild-de: ""
+            wikipedia: ""
+            youtube-watch: ""
     live:
         description: "A set of test pages that are run as live sites instead of recorded versions. These tests are available on all browsers, on all platforms."
         tests:
-            booking-sf: "GV, FE, RB, CH-M, FF, CH, CU"
-            discord: "GV, FE, RB, CH-M, FF, CH, CU"
-            fashionbeans: "GV, FE, RB, CH-M, FF, CH, CU"
-            google-accounts: "GV, FE, RB, CH-M, FF, CH, CU"
-            imdb-firefox: "GV, FE, RB, CH-M, FF, CH, CU"
-            medium-article: "GV, FE, RB, CH-M, FF, CH, CU"
-            people-article: "GV, FE, RB, CH-M, FF, CH, CU"
-            reddit-thread: "GV, FE, RB, CH-M, FF, CH, CU"
-            rumble-fox: "GV, FE, RB, CH-M, FF, CH, CU"
-            stackoverflow-question: "GV, FE, RB, CH-M, FF, CH, CU"
-            urbandictionary-define: "GV, FE, RB, CH-M, FF, CH, CU"
-            wikia-marvel: "GV, FE, RB, CH-M, FF, CH, CU"
+            booking-sf: ""
+            discord: ""
+            fashionbeans: ""
+            google-accounts: ""
+            imdb-firefox: ""
+            medium-article: ""
+            people-article: ""
+            reddit-thread: ""
+            rumble-fox: ""
+            stackoverflow-question: ""
+            urbandictionary-define: ""
+            wikia-marvel: ""
     benchmarks:
         description: >
             Standard benchmarks are third-party tests (i.e. Speedometer) that we have integrated
             into Raptor to run per-commit in our production CI. To update any of these benchmarks,
             see `Updating Benchmark Tests <browsertime.html#updating-benchmark-tests>`_.
         tests:
-            ares6: "FF, CH, CU"
-            assorted-dom: "FF, CH, CU"
-            jetstream2: "FF, CH, CU"
-            matrix-react-bench: "FF, CH, CU"
-            motionmark-animometer: "FF, CH, CU, FE, CH-M, GV"
-            motionmark-animometer-ramp: "FF, CH, CU, FE, CH-M, GV"
-            motionmark-htmlsuite: "FF, CH, CU, FE, CH-M, GV"
-            motionmark-htmlsuite-ramp: "FF, CH, CU, FE, CH-M, GV"
-            speedometer: "FF, CH, CU, FE, GV, RB, CH-M"
-            speedometer3: "FF, CH, CU, FE, GV, RB, CH-M"
-            stylebench: "FF, CH, CU"
-            sunspider: "FF, CH, CU"
-            twitch-animation: "FF"
-            unity-webgl: "FF, CH, CU, FE, RB, FE, CH-M"
-            wasm-godot-baseline: "FF"
-            wasm-godot-optimizing: "FF"
-            wasm-godot: "FF, CH, CU"
-            wasm-misc-baseline: "FF"
-            wasm-misc-optimizing: "FF"
-            wasm-misc: "FF, CH, CU"
-            webaudio: "FF, CH, CU"
-            youtube-playback: "FF, GV, FE, RB, CH"
-            youtube-playback-av1-sfr: "FF , GV, FE, RB, CH"
-            youtube-playback-h264-1080p30: "FF"
-            youtube-playback-h264-1080p60: "FF"
-            youtube-playback-h264-full-1080p30: "FF"
-            youtube-playback-h264-full-1080p60: "FF"
-            youtube-playback-h264-sfr: "FF , GV, FE, RB, CH"
-            youtube-playback-hfr: "FF , GV, FE, RB, CH"
-            youtube-playback-v9-1080p30: "FF"
-            youtube-playback-v9-1080p60: "FF"
-            youtube-playback-v9-full-1080p30: "FF"
-            youtube-playback-v9-full-1080p60: "FF"
-            youtube-playback-vp9-sfr: "FF , GV, FE, RB, CH"
-            youtube-playback-widevine-h264-sfr: "FF , GV, FE, RB, CH"
-            youtube-playback-widevine-hfr: "FF , GV, FE, RB, CH"
-            youtube-playback-widevine-vp9-sfr: "FF , GV, FE, RB, CH"
+            ares6: ""
+            assorted-dom: ""
+            jetstream2: ""
+            matrix-react-bench: ""
+            motionmark-1-3: ""
+            motionmark-htmlsuite-1-3: ""
+            motionmark-animometer: ""
+            motionmark-animometer-ramp: ""
+            motionmark-htmlsuite: ""
+            motionmark-htmlsuite-ramp: ""
+            speedometer: ""
+            speedometer3: ""
+            stylebench: ""
+            sunspider: ""
+            twitch-animation: ""
+            unity-webgl: ""
+            wasm-godot-baseline: ""
+            wasm-godot-optimizing: ""
+            wasm-godot: ""
+            wasm-misc-baseline: ""
+            wasm-misc-optimizing: ""
+            wasm-misc: ""
+            webaudio: ""
+            youtube-playback: ""
+            youtube-playback-av1-sfr: ""
+            youtube-playback-h264-1080p30: ""
+            youtube-playback-h264-1080p60: ""
+            youtube-playback-h264-full-1080p30: ""
+            youtube-playback-h264-full-1080p60: ""
+            youtube-playback-h264-sfr: ""
+            youtube-playback-hfr: ""
+            youtube-playback-v9-1080p30: ""
+            youtube-playback-v9-1080p60: ""
+            youtube-playback-v9-full-1080p30: ""
+            youtube-playback-v9-full-1080p60: ""
+            youtube-playback-vp9-sfr: ""
+            youtube-playback-widevine-h264-sfr: ""
+            youtube-playback-widevine-hfr: ""
+            youtube-playback-widevine-vp9-sfr: ""
     scenario:
         description: "Tests that perform a specific action (a scenario), i.e. idle application, idle application in background, etc."
         tests:
-            idle: "FE, GV, RB"
-            idle-bg: "FE, GV, RB"
+            idle: ""
+            idle-bg: ""
     custom:
         description: "Browsertime tests that use a custom pageload test script. These use the pageload type, but may have other intentions."
         tests:
@@ -191,7 +193,7 @@ suites:
     unittests:
         description: "These tests aren't used in standard testing, they are only used in the Raptor unit tests (they are similar to raptor-tp6 tests though)."
         tests:
-            test-page-1: "FF"
-            test-page-2: "FF"
-            test-page-3: "FF"
-            test-page-4: "FF"
+            test-page-1: ""
+            test-page-2: ""
+            test-page-3: ""
+            test-page-4: ""
diff --git a/testing/raptor/raptor/perftest.py b/testing/raptor/raptor/perftest.py
index eb46e351af..6e21b6d114 100644
--- a/testing/raptor/raptor/perftest.py
+++ b/testing/raptor/raptor/perftest.py
@@ -90,7 +90,7 @@ class Perftest(object):
         live_sites=False,
         is_release_build=False,
         debug_mode=False,
-        post_startup_delay=POST_DELAY_DEFAULT,
+        post_startup_delay=None,
         interrupt_handler=None,
         e10s=True,
         results_handler_class=RaptorResultsHandler,
@@ -110,6 +110,7 @@ class Perftest(object):
         benchmark_revision=None,
         benchmark_branch=None,
         clean=False,
+        screenshot_on_failure=False,
         **kwargs
     ):
         self._remote_test_root = None
@@ -156,6 +157,7 @@ class Perftest(object):
             "benchmark_revision": benchmark_revision,
             "benchmark_branch": benchmark_branch,
             "clean": clean,
+            "screenshot_on_failure": screenshot_on_failure,
         }
 
         self.firefox_android_apps = FIREFOX_ANDROID_APPS
@@ -213,22 +215,28 @@ class Perftest(object):
         self.run_local = self.config["run_local"]
         self.debug_mode = debug_mode if self.run_local else False
 
-        # For the post startup delay, we want to max it to 1s when using the
-        # conditioned profiles.
-        if self.config.get("conditioned_profile"):
-            self.post_startup_delay = min(post_startup_delay, POST_DELAY_CONDPROF)
-        elif (
-            self.debug_mode
-        ):  # if running debug-mode reduce the pause after browser startup
-            self.post_startup_delay = min(post_startup_delay, POST_DELAY_DEBUG)
+        if post_startup_delay is None:
+            # For the post startup delay, we want to max it to 1s when using the
+            # conditioned profiles.
+            if self.config.get("conditioned_profile"):
+                self.post_startup_delay = POST_DELAY_CONDPROF
+            elif (
+                self.debug_mode
+            ):  # if running debug-mode reduce the pause after browser startup
+                self.post_startup_delay = POST_DELAY_DEBUG
+            else:
+                self.post_startup_delay = POST_DELAY_DEFAULT
+
+            if (
+                app in CHROME_ANDROID_APPS + FIREFOX_ANDROID_APPS
+                and not self.config.get("conditioned_profile")
+            ):
+                LOG.info("Mobile non-conditioned profile")
+                self.post_startup_delay = POST_DELAY_MOBILE
         else:
+            # User supplied a custom post_startup_delay value
             self.post_startup_delay = post_startup_delay
 
-        if app in CHROME_ANDROID_APPS + FIREFOX_ANDROID_APPS and not self.config.get(
-            "conditioned_profile"
-        ):
-            LOG.info("Mobile non-conditioned profile")
-            self.post_startup_delay = POST_DELAY_MOBILE
         LOG.info("Post startup delay set to %d ms" % self.post_startup_delay)
         LOG.info("main raptor init, config is: %s" % str(self.config))
 
diff --git a/testing/raptor/raptor/raptor.py b/testing/raptor/raptor/raptor.py
index 9390b530b8..8b9cacda25 100644
--- a/testing/raptor/raptor/raptor.py
+++ b/testing/raptor/raptor/raptor.py
@@ -124,6 +124,7 @@ def main(args=sys.argv[1:]):
             benchmark_branch=args.benchmark_branch,
             page_timeout=args.page_timeout,
             clean=args.clean,
+            screenshot_on_failure=args.screenshot_on_failure,
         )
     except Exception:
         traceback.print_exc()
diff --git a/testing/raptor/raptor/raptor.toml b/testing/raptor/raptor/raptor.toml
index fa6d0913e8..19277e8cff 100644
--- a/testing/raptor/raptor/raptor.toml
+++ b/testing/raptor/raptor/raptor.toml
@@ -9,6 +9,10 @@
 
 ["include:tests/benchmarks/matrix-react-bench.toml"]
 
+["include:tests/benchmarks/motionmark-1-3-desktop.toml"]
+
+["include:tests/benchmarks/motionmark-1-3-mobile.toml"]
+
 ["include:tests/benchmarks/motionmark-animometer-desktop.toml"]
 
 ["include:tests/benchmarks/motionmark-animometer-mobile.toml"]
diff --git a/testing/raptor/raptor/tests/benchmarks/motionmark-1-3-desktop.toml b/testing/raptor/raptor/tests/benchmarks/motionmark-1-3-desktop.toml
new file mode 100644
index 0000000000..3c471d3a17
--- /dev/null
+++ b/testing/raptor/raptor/tests/benchmarks/motionmark-1-3-desktop.toml
@@ -0,0 +1,28 @@
+[DEFAULT]
+alert_threshold = 2.0
+apps = "firefox, chrome, chromium, safari, custom-car"
+gecko_profile_interval = 1
+gecko_profile_features = "stackwalk,js,cpu,java,processcpu"
+gecko_profile_threads = "GeckoMain,Compositor,Renderer,SwComposite,RenderBackend,SceneBuilder,WrWorker,CanvasWorkers,TextureUpdate"
+expose_browser_profiler = true
+expose_chrome_trace = true
+lower_is_better = false
+page_cycles = 1
+page_timeout = 600000
+support_class = "motionmark-1-3.py"
+test_script = "motionmark-1-3.js"
+owner = "Graphics Team"
+type = "benchmark"
+unit = "score"
+repository = "https://github.com/webkit/motionmark"
+repository_revision = "5d9c88136d59c11daf78d539c73e4e3e88c091ab"
+test_url="http://<host>:<port>/MotionMark/developer.html?warmup-length=2000&warmup-frame-count=30&first-frame-minimum-length=0&test-interval=30&display=minimal&tiles=big&controller=ramp&system-frame-rate=60&frame-rate=60&time-measurement=performance"
+cold = true
+browser_cycles = 1
+host_from_parent = false
+
+[motionmark-1-3]
+suite_name = "MotionMark"
+
+[motionmark-htmlsuite-1-3]
+suite_name = "HTML suite"
diff --git a/testing/raptor/raptor/tests/benchmarks/motionmark-1-3-mobile.toml b/testing/raptor/raptor/tests/benchmarks/motionmark-1-3-mobile.toml
new file mode 100644
index 0000000000..f68f418608
--- /dev/null
+++ b/testing/raptor/raptor/tests/benchmarks/motionmark-1-3-mobile.toml
@@ -0,0 +1,27 @@
+[DEFAULT]
+alert_threshold = 2.0
+apps = "fenix, geckoview, chrome-m, cstm-car-m"
+gecko_profile_interval = 1
+gecko_profile_features = "stackwalk,js,cpu,java,processcpu"
+gecko_profile_threads = "GeckoMain,Compositor,Renderer,SwComposite,RenderBackend,SceneBuilder,WrWorker,CanvasWorkers,TextureUpdate"
+expose_browser_profiler = true
+lower_is_better = false
+page_cycles = 1
+page_timeout = 600000
+support_class = "motionmark-1-3.py"
+test_script = "motionmark-1-3.js"
+owner = "Graphics Team"
+type = "benchmark"
+unit = "score"
+repository = "https://github.com/webkit/motionmark"
+repository_revision = "5d9c88136d59c11daf78d539c73e4e3e88c091ab"
+test_url="http://<host>:<port>/MotionMark/developer.html?warmup-length=2000&warmup-frame-count=30&first-frame-minimum-length=0&test-interval=30&display=minimal&tiles=big&controller=ramp&system-frame-rate=60&frame-rate=60&time-measurement=performance"
+cold = true
+browser_cycles = 1
+host_from_parent = false
+
+[motionmark-1-3]
+suite_name = "MotionMark"
+
+[motionmark-htmlsuite-1-3]
+suite_name = "HTML suite"
diff --git a/testing/raptor/raptor/tests/benchmarks/speedometer-desktop.toml b/testing/raptor/raptor/tests/benchmarks/speedometer-desktop.toml
index 6a378b999a..d3bc83e420 100644
--- a/testing/raptor/raptor/tests/benchmarks/speedometer-desktop.toml
+++ b/testing/raptor/raptor/tests/benchmarks/speedometer-desktop.toml
@@ -20,7 +20,7 @@ browser_cycles = 5
 owner = "Performance Team"
 cold = true
 repository = "https://github.com/WebKit/Speedometer"
-repository_revision = "64a5e7c0ba0f668834c0adf01b933e54d2b0d1f0"
+repository_revision = "8d67f28d0281ac4330f283495b7f48286654ad7d"
 host_from_parent = false
 support_class = "speedometer3.py"
 test_script = "speedometer3.js"
diff --git a/testing/raptor/raptor/tests/benchmarks/speedometer-mobile.toml b/testing/raptor/raptor/tests/benchmarks/speedometer-mobile.toml
index e2e4381303..12bb9b16c4 100644
--- a/testing/raptor/raptor/tests/benchmarks/speedometer-mobile.toml
+++ b/testing/raptor/raptor/tests/benchmarks/speedometer-mobile.toml
@@ -20,7 +20,7 @@ browser_cycles = 5
 owner = "Performance Team"
 cold = true
 repository = "https://github.com/WebKit/Speedometer"
-repository_revision = "64a5e7c0ba0f668834c0adf01b933e54d2b0d1f0"
+repository_revision = "8d67f28d0281ac4330f283495b7f48286654ad7d"
 host_from_parent = false
 support_class = "speedometer3.py"
 test_script = "speedometer3.js"
diff --git a/testing/raptor/raptor/tests/tp6/mobile/browsertime-tp6m.toml b/testing/raptor/raptor/tests/tp6/mobile/browsertime-tp6m.toml
index b20a13f605..a16e0916be 100644
--- a/testing/raptor/raptor/tests/tp6/mobile/browsertime-tp6m.toml
+++ b/testing/raptor/raptor/tests/tp6/mobile/browsertime-tp6m.toml
@@ -27,6 +27,10 @@ test_url = "https://www.amazon.com"
 ["amazon-search"]
 test_url = "https://www.amazon.com/s/ref=nb_sb_noss_2/139-6317191-5622045?url=search-alias%3Daps&field-keywords=mobile+phone"
 
+["bild-de"]
+playback_pageset_manifest = "mitm8-android-fenix-{subtest}.manifest"
+test_url = "https://www.bild.de/sport/american-football/nfl/super-bowl-rekordquote-fuer-die-nfl-87139668.bild.html"
+
 ["bing"]
 test_url = "https://www.bing.com/"
 
@@ -90,15 +94,12 @@ test_url = "https://support.microsoft.com/en-us"
 test_url = "https://www.reddit.com"
 
 ["sina"]
+playback_pageset_manifest = "mitm8-android-fenix-{subtest}.manifest"
 test_url = "https://www.sina.com.cn/"
 
 ["stackoverflow"]
 test_url = "https://stackoverflow.com/"
 
-["web-de"]
-playback_pageset_manifest = "mitm7-android-gve-p2-web-de.manifest"
-test_url = "https://web.de/magazine/politik/politologe-glaubt-grossen-koalition-herbst-knallen-33563566"
-
 ["wikipedia"]
 test_url = "https://en.m.wikipedia.org/wiki/Main_Page"
 
diff --git a/testing/raptor/raptor/tooltool-manifests/playback/mitm6-android-fenix-sina.manifest b/testing/raptor/raptor/tooltool-manifests/playback/mitm6-android-fenix-sina.manifest
deleted file mode 100644
index b52d47df45..0000000000
--- a/testing/raptor/raptor/tooltool-manifests/playback/mitm6-android-fenix-sina.manifest
+++ /dev/null
@@ -1,9 +0,0 @@
-[
-  {
-    "algorithm": "sha512",
-    "digest": "f3b55b61e6c38a342fee68fef04196de39b209053348cf58f5d9cc289100d82f271008f065adfd4ec02df564f6de12726f198e151af24aace9f654bf008a7b86",
-    "filename": "mitm6-android-fenix-sina.zip",
-    "size": 14757830,
-    "visibility": "public"
-  }
-]
diff --git a/testing/raptor/raptor/tooltool-manifests/playback/mitm6-android-fenix-web-de.manifest b/testing/raptor/raptor/tooltool-manifests/playback/mitm6-android-fenix-web-de.manifest
deleted file mode 100644
index 9bde5db272..0000000000
--- a/testing/raptor/raptor/tooltool-manifests/playback/mitm6-android-fenix-web-de.manifest
+++ /dev/null
@@ -1,9 +0,0 @@
-[
-  {
-    "algorithm": "sha512",
-    "digest": "9f10db4616f7415dba946f056658126297928d1229e5d2e525f9f1dd539c954b9a90b7d81fa574002fd1127321d8f918a3102cae12054b218118b059702cde7f",
-    "filename": "mitm6-android-fenix-web-de.zip",
-    "size": 1891502,
-    "visibility": "public"
-  }
-]
diff --git a/testing/raptor/raptor/tooltool-manifests/playback/mitm8-android-fenix-bild-de.manifest b/testing/raptor/raptor/tooltool-manifests/playback/mitm8-android-fenix-bild-de.manifest
new file mode 100644
index 0000000000..e53c42c38f
--- /dev/null
+++ b/testing/raptor/raptor/tooltool-manifests/playback/mitm8-android-fenix-bild-de.manifest
@@ -0,0 +1,9 @@
+[
+  {
+    "filename": "mitm8-android-fenix-bild-de.zip",
+    "size": 22367904,
+    "algorithm": "sha512",
+    "digest": "982b91675bd21aa89d27168f3964bcfe81e3002b6256da80b67398c711f063d2aca7cd023ef3540891f0a5b1f1762af49c22869e69adbc1515caacd7e8282e7b",
+    "visibility": "public"
+  }
+]
diff --git a/testing/raptor/raptor/tooltool-manifests/playback/mitm8-android-fenix-sina.manifest b/testing/raptor/raptor/tooltool-manifests/playback/mitm8-android-fenix-sina.manifest
new file mode 100644
index 0000000000..1985bd7247
--- /dev/null
+++ b/testing/raptor/raptor/tooltool-manifests/playback/mitm8-android-fenix-sina.manifest
@@ -0,0 +1,9 @@
+[
+  {
+    "filename": "mitm8-android-fenix-sina.zip",
+    "size": 15119356,
+    "algorithm": "sha512",
+    "digest": "56f38396be740dcf7ba0476125fa0f66fa68f5cbc049652a598afcf9492deee4e974d73193ddc4b125f3b136b5179931bb6c8a19bfa82e874a168393ea05a7eb",
+    "visibility": "public"
+  }
+]
diff --git a/testing/raptor/test/test_cmdline.py b/testing/raptor/test/test_cmdline.py
index 80ce6cf88a..de88e60c77 100644
--- a/testing/raptor/test/test_cmdline.py
+++ b/testing/raptor/test/test_cmdline.py
@@ -1,5 +1,6 @@
 import os
 import sys
+from unittest import mock
 
 import mozunit
 import pytest
@@ -11,7 +12,7 @@ sys.path.insert(0, raptor_dir)
 
 from argparse import ArgumentParser, Namespace
 
-from cmdline import verify_options
+from cmdline import create_parser, verify_options
 
 
 def test_verify_options(filedir):
@@ -33,6 +34,7 @@ def test_verify_options(filedir):
         benchmark_repository=None,
         benchmark_revision=None,
         benchmark_branch=None,
+        post_startup_delay=None,
     )
     parser = ArgumentParser()
 
@@ -61,6 +63,7 @@ def test_verify_options(filedir):
         benchmark_repository=None,
         benchmark_revision=None,
         benchmark_branch=None,
+        post_startup_delay=None,
     )
     verify_options(parser, args)  # assert no exception
 
@@ -83,6 +86,7 @@ def test_verify_options(filedir):
         benchmark_repository=None,
         benchmark_revision=None,
         benchmark_branch=None,
+        post_startup_delay=None,
     )
     verify_options(parser, args)  # assert no exception
 
@@ -105,6 +109,7 @@ def test_verify_options(filedir):
         benchmark_repository=None,
         benchmark_revision=None,
         benchmark_branch=None,
+        post_startup_delay=None,
     )
     verify_options(parser, args)  # assert no exception
 
@@ -127,6 +132,7 @@ def test_verify_options(filedir):
         benchmark_repository=None,
         benchmark_revision=None,
         benchmark_branch=None,
+        post_startup_delay=None,
     )
     verify_options(parser, args)  # assert no exception
 
@@ -149,11 +155,89 @@ def test_verify_options(filedir):
         benchmark_repository=None,
         benchmark_revision=None,
         benchmark_branch=None,
+        post_startup_delay=None,
     )
     parser = ArgumentParser()
 
     verify_options(parser, args)  # also will work as uses default activity
 
 
+@mock.patch("perftest.Perftest.build_browser_profile", new=mock.MagicMock())
+@pytest.mark.parametrize(
+    "args,settings_to_check",
+    [
+        # Test that post_startup_delay is 30s as expected
+        [
+            [
+                "--test",
+                "test-page-1",
+                "--binary",
+                "invalid/path",
+                # This gets set automatically from mach_commands, but is set
+                # to False by default in the Perftest class
+                "--run-local",
+            ],
+            [
+                ("post_startup_delay", 30000),
+                ("run_local", True),
+                ("debug_mode", False),
+            ],
+        ],
+        # Test that run_local is false by default
+        [
+            [
+                "--test",
+                "test-page-1",
+                "--binary",
+                "invalid/path",
+            ],
+            [
+                ("post_startup_delay", 30000),
+                ("run_local", False),
+                ("debug_mode", False),
+            ],
+        ],
+        # Test that debug mode gets set when running locally
+        [
+            [
+                "--test",
+                "test-page-1",
+                "--binary",
+                "invalid/path",
+                "--debug-mode",
+                "--run-local",
+            ],
+            [
+                ("post_startup_delay", 3000),
+                ("run_local", True),
+                ("debug_mode", True),
+            ],
+        ],
+        # Test that debug mode doesn't get set when we're not running locally
+        [
+            [
+                "--test",
+                "test-page-1",
+                "--binary",
+                "invalid/path",
+                "--debug-mode",
+            ],
+            [
+                ("post_startup_delay", 30000),
+                ("run_local", False),
+                ("debug_mode", False),
+            ],
+        ],
+    ],
+)
+def test_perftest_setup_with_args(ConcretePerftest, args, settings_to_check):
+    parser = create_parser()
+    args = parser.parse_args(args)
+
+    perftest = ConcretePerftest(**vars(args))
+    for setting, expected in settings_to_check:
+        assert getattr(perftest, setting) == expected
+
+
 if __name__ == "__main__":
     mozunit.main()
diff --git a/testing/raptor/test/test_raptor.py b/testing/raptor/test/test_raptor.py
index 9184fa5e18..3c90b611a5 100644
--- a/testing/raptor/test/test_raptor.py
+++ b/testing/raptor/test/test_raptor.py
@@ -127,38 +127,51 @@ def test_raptor_venv(ConcretePerftest, options):
     assert perftest.raptor_venv.endswith("raptor-venv")
 
 
+@mock.patch("perftest.Perftest.build_browser_profile", new=mock.MagicMock())
 @pytest.mark.parametrize(
+    "app,"
     "run_local,"
     "debug_mode,"
+    "conditioned_profile,"
     "post_startup_delay,"
     "expected_post_startup_delay,"
     "expected_debug_mode",
     [
-        [True, True, 1234, 1234, True],
-        [True, True, 12345, 3000, True],
-        [False, False, 1234, 1234, False],
-        [False, False, 12345, 12345, False],
-        [True, False, 1234, 1234, False],
-        [True, False, 12345, 12345, False],
-        [False, True, 1234, 1234, False],
-        [False, True, 12345, 12345, False],
+        ["firefox", True, True, None, 1234, 1234, True],
+        ["firefox", True, True, None, None, 3000, True],
+        ["firefox", True, False, None, None, 30000, False],
+        ["firefox", True, False, "settled", None, 1000, False],
+        ["fenix", True, False, None, None, 20000, False],
+        ["fenix", True, False, "settled", None, 1000, False],
+        ["firefox", False, False, None, 1234, 1234, False],
+        ["firefox", False, False, None, 12345, 12345, False],
+        ["firefox", True, False, None, 1234, 1234, False],
+        ["firefox", True, False, None, 12345, 12345, False],
+        ["firefox", False, True, None, 1234, 1234, False],
+        ["firefox", False, True, None, 12345, 12345, False],
     ],
 )
 def test_post_startup_delay(
     ConcretePerftest,
     options,
+    app,
     run_local,
     debug_mode,
+    conditioned_profile,
     post_startup_delay,
     expected_post_startup_delay,
     expected_debug_mode,
 ):
+    options["app"] = app
+
     perftest = ConcretePerftest(
         run_local=run_local,
         debug_mode=debug_mode,
         post_startup_delay=post_startup_delay,
+        conditioned_profile=conditioned_profile,
         **options
     )
+
     assert perftest.post_startup_delay == expected_post_startup_delay
     assert perftest.debug_mode == expected_debug_mode
 
-- 
cgit v1.2.3