summaryrefslogtreecommitdiffstats
path: root/src/ci/stage-build.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/ci/stage-build.py')
-rw-r--r--src/ci/stage-build.py842
1 files changed, 842 insertions, 0 deletions
diff --git a/src/ci/stage-build.py b/src/ci/stage-build.py
new file mode 100644
index 000000000..bd8fd524a
--- /dev/null
+++ b/src/ci/stage-build.py
@@ -0,0 +1,842 @@
+#!/usr/bin/env python3
+# ignore-tidy-linelength
+
+# Compatible with Python 3.6+
+
+import contextlib
+import getpass
+import glob
+import json
+import logging
+import os
+import pprint
+import shutil
+import subprocess
+import sys
+import time
+import traceback
+import urllib.request
+from io import StringIO
+from pathlib import Path
+from typing import Callable, ContextManager, Dict, Iterable, Iterator, List, Optional, \
+ Tuple, Union
+
+PGO_HOST = os.environ["PGO_HOST"]
+
+LOGGER = logging.getLogger("stage-build")
+
+LLVM_PGO_CRATES = [
+ "syn-1.0.89",
+ "cargo-0.60.0",
+ "serde-1.0.136",
+ "ripgrep-13.0.0",
+ "regex-1.5.5",
+ "clap-3.1.6",
+ "hyper-0.14.18"
+]
+
+RUSTC_PGO_CRATES = [
+ "externs",
+ "ctfe-stress-5",
+ "cargo-0.60.0",
+ "token-stream-stress",
+ "match-stress",
+ "tuple-stress",
+ "diesel-1.4.8",
+ "bitmaps-3.1.0"
+]
+
+LLVM_BOLT_CRATES = LLVM_PGO_CRATES
+
+
+class Pipeline:
+ # Paths
+ def checkout_path(self) -> Path:
+ """
+ The root checkout, where the source is located.
+ """
+ raise NotImplementedError
+
+ def downloaded_llvm_dir(self) -> Path:
+ """
+ Directory where the host LLVM is located.
+ """
+ raise NotImplementedError
+
+ def build_root(self) -> Path:
+ """
+ The main directory where the build occurs.
+ """
+ raise NotImplementedError
+
+ def build_artifacts(self) -> Path:
+ return self.build_root() / "build" / PGO_HOST
+
+ def rustc_stage_0(self) -> Path:
+ return self.build_artifacts() / "stage0" / "bin" / "rustc"
+
+ def cargo_stage_0(self) -> Path:
+ return self.build_artifacts() / "stage0" / "bin" / "cargo"
+
+ def rustc_stage_2(self) -> Path:
+ return self.build_artifacts() / "stage2" / "bin" / "rustc"
+
+ def opt_artifacts(self) -> Path:
+ raise NotImplementedError
+
+ def llvm_profile_dir_root(self) -> Path:
+ return self.opt_artifacts() / "llvm-pgo"
+
+ def llvm_profile_merged_file(self) -> Path:
+ return self.opt_artifacts() / "llvm-pgo.profdata"
+
+ def rustc_perf_dir(self) -> Path:
+ return self.opt_artifacts() / "rustc-perf"
+
+ def build_rustc_perf(self):
+ raise NotImplementedError()
+
+ def rustc_profile_dir_root(self) -> Path:
+ return self.opt_artifacts() / "rustc-pgo"
+
+ def rustc_profile_merged_file(self) -> Path:
+ return self.opt_artifacts() / "rustc-pgo.profdata"
+
+ def rustc_profile_template_path(self) -> Path:
+ """
+ The profile data is written into a single filepath that is being repeatedly merged when each
+ rustc invocation ends. Empirically, this can result in some profiling data being lost. That's
+ why we override the profile path to include the PID. This will produce many more profiling
+ files, but the resulting profile will produce a slightly faster rustc binary.
+ """
+ return self.rustc_profile_dir_root() / "default_%m_%p.profraw"
+
+ def supports_bolt(self) -> bool:
+ raise NotImplementedError
+
+ def llvm_bolt_profile_merged_file(self) -> Path:
+ return self.opt_artifacts() / "bolt.profdata"
+
+ def metrics_path(self) -> Path:
+ return self.build_root() / "build" / "metrics.json"
+
+
+class LinuxPipeline(Pipeline):
+ def checkout_path(self) -> Path:
+ return Path("/checkout")
+
+ def downloaded_llvm_dir(self) -> Path:
+ return Path("/rustroot")
+
+ def build_root(self) -> Path:
+ return self.checkout_path() / "obj"
+
+ def opt_artifacts(self) -> Path:
+ return Path("/tmp/tmp-multistage/opt-artifacts")
+
+ def build_rustc_perf(self):
+ # /tmp/rustc-perf comes from the Dockerfile
+ shutil.copytree("/tmp/rustc-perf", self.rustc_perf_dir())
+ cmd(["chown", "-R", f"{getpass.getuser()}:", self.rustc_perf_dir()])
+
+ with change_cwd(self.rustc_perf_dir()):
+ cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict(
+ RUSTC=str(self.rustc_stage_0()),
+ RUSTC_BOOTSTRAP="1"
+ ))
+
+ def supports_bolt(self) -> bool:
+ return True
+
+
+class WindowsPipeline(Pipeline):
+ def __init__(self):
+ self.checkout_dir = Path(os.getcwd())
+
+ def checkout_path(self) -> Path:
+ return self.checkout_dir
+
+ def downloaded_llvm_dir(self) -> Path:
+ return self.checkout_path() / "citools" / "clang-rust"
+
+ def build_root(self) -> Path:
+ return self.checkout_path()
+
+ def opt_artifacts(self) -> Path:
+ return self.checkout_path() / "opt-artifacts"
+
+ def rustc_stage_0(self) -> Path:
+ return super().rustc_stage_0().with_suffix(".exe")
+
+ def cargo_stage_0(self) -> Path:
+ return super().cargo_stage_0().with_suffix(".exe")
+
+ def rustc_stage_2(self) -> Path:
+ return super().rustc_stage_2().with_suffix(".exe")
+
+ def build_rustc_perf(self):
+ # rustc-perf version from 2022-07-22
+ perf_commit = "3c253134664fdcba862c539d37f0de18557a9a4c"
+ rustc_perf_zip_path = self.opt_artifacts() / "perf.zip"
+
+ def download_rustc_perf():
+ download_file(
+ f"https://github.com/rust-lang/rustc-perf/archive/{perf_commit}.zip",
+ rustc_perf_zip_path
+ )
+ with change_cwd(self.opt_artifacts()):
+ unpack_archive(rustc_perf_zip_path)
+ move_path(Path(f"rustc-perf-{perf_commit}"), self.rustc_perf_dir())
+ delete_file(rustc_perf_zip_path)
+
+ retry_action(download_rustc_perf, "Download rustc-perf")
+
+ with change_cwd(self.rustc_perf_dir()):
+ cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict(
+ RUSTC=str(self.rustc_stage_0()),
+ RUSTC_BOOTSTRAP="1"
+ ))
+
+ def rustc_profile_template_path(self) -> Path:
+ """
+ On Windows, we don't have enough space to use separate files for each rustc invocation.
+ Therefore, we use a single file for the generated profiles.
+ """
+ return self.rustc_profile_dir_root() / "default_%m.profraw"
+
+ def supports_bolt(self) -> bool:
+ return False
+
+
+def get_timestamp() -> float:
+ return time.time()
+
+
+Duration = float
+
+
+def iterate_timers(timer: "Timer", name: str, level: int = 0) -> Iterator[
+ Tuple[int, str, Duration]]:
+ """
+ Hierarchically iterate the children of a timer, in a depth-first order.
+ """
+ yield (level, name, timer.total_duration())
+ for (child_name, child_timer) in timer.children:
+ yield from iterate_timers(child_timer, child_name, level=level + 1)
+
+
+class Timer:
+ def __init__(self, parent_names: Tuple[str, ...] = ()):
+ self.children: List[Tuple[str, Timer]] = []
+ self.section_active = False
+ self.parent_names = parent_names
+ self.duration_excluding_children: Duration = 0
+
+ @contextlib.contextmanager
+ def section(self, name: str) -> ContextManager["Timer"]:
+ assert not self.section_active
+ self.section_active = True
+
+ start = get_timestamp()
+ exc = None
+
+ child_timer = Timer(parent_names=self.parent_names + (name,))
+ full_name = " > ".join(child_timer.parent_names)
+ try:
+ LOGGER.info(f"Section `{full_name}` starts")
+ yield child_timer
+ except BaseException as exception:
+ exc = exception
+ raise
+ finally:
+ end = get_timestamp()
+ duration = end - start
+
+ child_timer.duration_excluding_children = duration - child_timer.total_duration()
+ self.add_child(name, child_timer)
+ if exc is None:
+ LOGGER.info(f"Section `{full_name}` ended: OK ({duration:.2f}s)")
+ else:
+ LOGGER.info(f"Section `{full_name}` ended: FAIL ({duration:.2f}s)")
+ self.section_active = False
+
+ def total_duration(self) -> Duration:
+ return self.duration_excluding_children + sum(
+ c.total_duration() for (_, c) in self.children)
+
+ def has_children(self) -> bool:
+ return len(self.children) > 0
+
+ def print_stats(self):
+ rows = []
+ for (child_name, child_timer) in self.children:
+ for (level, name, duration) in iterate_timers(child_timer, child_name, level=0):
+ label = f"{' ' * level}{name}:"
+ rows.append((label, duration))
+
+ # Empty row
+ rows.append(("", ""))
+
+ total_duration_label = "Total duration:"
+ total_duration = self.total_duration()
+ rows.append((total_duration_label, humantime(total_duration)))
+
+ space_after_label = 2
+ max_label_length = max(16, max(len(label) for (label, _) in rows)) + space_after_label
+
+ table_width = max_label_length + 23
+ divider = "-" * table_width
+
+ with StringIO() as output:
+ print(divider, file=output)
+ for (label, duration) in rows:
+ if isinstance(duration, Duration):
+ pct = (duration / total_duration) * 100
+ value = f"{duration:>12.2f}s ({pct:>5.2f}%)"
+ else:
+ value = f"{duration:>{len(total_duration_label) + 7}}"
+ print(f"{label:<{max_label_length}} {value}", file=output)
+ print(divider, file=output, end="")
+ LOGGER.info(f"Timer results\n{output.getvalue()}")
+
+ def add_child(self, name: str, timer: "Timer"):
+ self.children.append((name, timer))
+
+ def add_duration(self, name: str, duration: Duration):
+ timer = Timer(parent_names=self.parent_names + (name,))
+ timer.duration_excluding_children = duration
+ self.add_child(name, timer)
+
+
+class BuildStep:
+ def __init__(self, type: str, children: List["BuildStep"], duration: float):
+ self.type = type
+ self.children = children
+ self.duration = duration
+
+ def find_all_by_type(self, type: str) -> Iterator["BuildStep"]:
+ if type == self.type:
+ yield self
+ for child in self.children:
+ yield from child.find_all_by_type(type)
+
+ def __repr__(self):
+ return f"BuildStep(type={self.type}, duration={self.duration}, children={len(self.children)})"
+
+
+def load_last_metrics(path: Path) -> BuildStep:
+ """
+ Loads the metrics of the most recent bootstrap execution from a metrics.json file.
+ """
+ with open(path, "r") as f:
+ metrics = json.load(f)
+ invocation = metrics["invocations"][-1]
+
+ def parse(entry) -> Optional[BuildStep]:
+ if "kind" not in entry or entry["kind"] != "rustbuild_step":
+ return None
+ type = entry.get("type", "")
+ duration = entry.get("duration_excluding_children_sec", 0)
+ children = []
+
+ for child in entry.get("children", ()):
+ step = parse(child)
+ if step is not None:
+ children.append(step)
+ duration += step.duration
+ return BuildStep(type=type, children=children, duration=duration)
+
+ children = [parse(child) for child in invocation.get("children", ())]
+ return BuildStep(
+ type="root",
+ children=children,
+ duration=invocation.get("duration_including_children_sec", 0)
+ )
+
+
+@contextlib.contextmanager
+def change_cwd(dir: Path):
+ """
+ Temporarily change working directory to `dir`.
+ """
+ cwd = os.getcwd()
+ LOGGER.debug(f"Changing working dir from `{cwd}` to `{dir}`")
+ os.chdir(dir)
+ try:
+ yield
+ finally:
+ LOGGER.debug(f"Reverting working dir to `{cwd}`")
+ os.chdir(cwd)
+
+
+def humantime(time_s: float) -> str:
+ hours = time_s // 3600
+ time_s = time_s % 3600
+ minutes = time_s // 60
+ seconds = time_s % 60
+
+ result = ""
+ if hours > 0:
+ result += f"{int(hours)}h "
+ if minutes > 0:
+ result += f"{int(minutes)}m "
+ result += f"{round(seconds)}s"
+ return result
+
+
+def move_path(src: Path, dst: Path):
+ LOGGER.info(f"Moving `{src}` to `{dst}`")
+ shutil.move(src, dst)
+
+
+def delete_file(path: Path):
+ LOGGER.info(f"Deleting file `{path}`")
+ os.unlink(path)
+
+
+def delete_directory(path: Path):
+ LOGGER.info(f"Deleting directory `{path}`")
+ shutil.rmtree(path)
+
+
+def unpack_archive(archive: Path):
+ LOGGER.info(f"Unpacking archive `{archive}`")
+ shutil.unpack_archive(archive)
+
+
+def download_file(src: str, target: Path):
+ LOGGER.info(f"Downloading `{src}` into `{target}`")
+ urllib.request.urlretrieve(src, str(target))
+
+
+def retry_action(action, name: str, max_fails: int = 5):
+ LOGGER.info(f"Attempting to perform action `{name}` with retry")
+ for iteration in range(max_fails):
+ LOGGER.info(f"Attempt {iteration + 1}/{max_fails}")
+ try:
+ action()
+ return
+ except:
+ LOGGER.error(f"Action `{name}` has failed\n{traceback.format_exc()}")
+
+ raise Exception(f"Action `{name}` has failed after {max_fails} attempts")
+
+
+def cmd(
+ args: List[Union[str, Path]],
+ env: Optional[Dict[str, str]] = None,
+ output_path: Optional[Path] = None
+):
+ args = [str(arg) for arg in args]
+
+ environment = os.environ.copy()
+
+ cmd_str = ""
+ if env is not None:
+ environment.update(env)
+ cmd_str += " ".join(f"{k}={v}" for (k, v) in (env or {}).items())
+ cmd_str += " "
+ cmd_str += " ".join(args)
+ if output_path is not None:
+ cmd_str += f" > {output_path}"
+ LOGGER.info(f"Executing `{cmd_str}`")
+
+ if output_path is not None:
+ with open(output_path, "w") as f:
+ return subprocess.run(
+ args,
+ env=environment,
+ check=True,
+ stdout=f
+ )
+ return subprocess.run(args, env=environment, check=True)
+
+
+def run_compiler_benchmarks(
+ pipeline: Pipeline,
+ profiles: List[str],
+ scenarios: List[str],
+ crates: List[str],
+ env: Optional[Dict[str, str]] = None
+):
+ env = env if env is not None else {}
+
+ # Compile libcore, both in opt-level=0 and opt-level=3
+ with change_cwd(pipeline.build_root()):
+ cmd([
+ pipeline.rustc_stage_2(),
+ "--edition", "2021",
+ "--crate-type", "lib",
+ str(pipeline.checkout_path() / "library/core/src/lib.rs"),
+ "--out-dir", pipeline.opt_artifacts()
+ ], env=dict(RUSTC_BOOTSTRAP="1", **env))
+
+ cmd([
+ pipeline.rustc_stage_2(),
+ "--edition", "2021",
+ "--crate-type", "lib",
+ "-Copt-level=3",
+ str(pipeline.checkout_path() / "library/core/src/lib.rs"),
+ "--out-dir", pipeline.opt_artifacts()
+ ], env=dict(RUSTC_BOOTSTRAP="1", **env))
+
+ # Run rustc-perf benchmarks
+ # Benchmark using profile_local with eprintln, which essentially just means
+ # don't actually benchmark -- just make sure we run rustc a bunch of times.
+ with change_cwd(pipeline.rustc_perf_dir()):
+ cmd([
+ pipeline.cargo_stage_0(),
+ "run",
+ "-p", "collector", "--bin", "collector", "--",
+ "profile_local", "eprintln",
+ pipeline.rustc_stage_2(),
+ "--id", "Test",
+ "--cargo", pipeline.cargo_stage_0(),
+ "--profiles", ",".join(profiles),
+ "--scenarios", ",".join(scenarios),
+ "--include", ",".join(crates)
+ ], env=dict(
+ RUST_LOG="collector=debug",
+ RUSTC=str(pipeline.rustc_stage_0()),
+ RUSTC_BOOTSTRAP="1",
+ **env
+ ))
+
+
+# https://stackoverflow.com/a/31631711/1107768
+def format_bytes(size: int) -> str:
+ """Return the given bytes as a human friendly KiB, MiB or GiB string."""
+ KB = 1024
+ MB = KB ** 2 # 1,048,576
+ GB = KB ** 3 # 1,073,741,824
+ TB = KB ** 4 # 1,099,511,627,776
+
+ if size < KB:
+ return f"{size} B"
+ elif KB <= size < MB:
+ return f"{size / KB:.2f} KiB"
+ elif MB <= size < GB:
+ return f"{size / MB:.2f} MiB"
+ elif GB <= size < TB:
+ return f"{size / GB:.2f} GiB"
+ else:
+ return str(size)
+
+
+# https://stackoverflow.com/a/63307131/1107768
+def count_files(path: Path) -> int:
+ return sum(1 for p in path.rglob("*") if p.is_file())
+
+
+def count_files_with_prefix(path: Path) -> int:
+ return sum(1 for p in glob.glob(f"{path}*") if Path(p).is_file())
+
+
+# https://stackoverflow.com/a/55659577/1107768
+def get_path_size(path: Path) -> int:
+ if path.is_dir():
+ return sum(p.stat().st_size for p in path.rglob("*"))
+ return path.stat().st_size
+
+
+def get_path_prefix_size(path: Path) -> int:
+ """
+ Get size of all files beginning with the prefix `path`.
+ Alternative to shell `du -sh <path>*`.
+ """
+ return sum(Path(p).stat().st_size for p in glob.glob(f"{path}*"))
+
+
+def get_files(directory: Path, filter: Optional[Callable[[Path], bool]] = None) -> Iterable[Path]:
+ for file in os.listdir(directory):
+ path = directory / file
+ if filter is None or filter(path):
+ yield path
+
+
+def build_rustc(
+ pipeline: Pipeline,
+ args: List[str],
+ env: Optional[Dict[str, str]] = None
+):
+ arguments = [
+ sys.executable,
+ pipeline.checkout_path() / "x.py",
+ "build",
+ "--target", PGO_HOST,
+ "--host", PGO_HOST,
+ "--stage", "2",
+ "library/std"
+ ] + args
+ cmd(arguments, env=env)
+
+
+def create_pipeline() -> Pipeline:
+ if sys.platform == "linux":
+ return LinuxPipeline()
+ elif sys.platform in ("cygwin", "win32"):
+ return WindowsPipeline()
+ else:
+ raise Exception(f"Optimized build is not supported for platform {sys.platform}")
+
+
+def gather_llvm_profiles(pipeline: Pipeline):
+ LOGGER.info("Running benchmarks with PGO instrumented LLVM")
+ run_compiler_benchmarks(
+ pipeline,
+ profiles=["Debug", "Opt"],
+ scenarios=["Full"],
+ crates=LLVM_PGO_CRATES
+ )
+
+ profile_path = pipeline.llvm_profile_merged_file()
+ LOGGER.info(f"Merging LLVM PGO profiles to {profile_path}")
+ cmd([
+ pipeline.downloaded_llvm_dir() / "bin" / "llvm-profdata",
+ "merge",
+ "-o", profile_path,
+ pipeline.llvm_profile_dir_root()
+ ])
+
+ LOGGER.info("LLVM PGO statistics")
+ LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}")
+ LOGGER.info(
+ f"{pipeline.llvm_profile_dir_root()}: {format_bytes(get_path_size(pipeline.llvm_profile_dir_root()))}")
+ LOGGER.info(f"Profile file count: {count_files(pipeline.llvm_profile_dir_root())}")
+
+ # We don't need the individual .profraw files now that they have been merged
+ # into a final .profdata
+ delete_directory(pipeline.llvm_profile_dir_root())
+
+
+def gather_rustc_profiles(pipeline: Pipeline):
+ LOGGER.info("Running benchmarks with PGO instrumented rustc")
+
+ # Here we're profiling the `rustc` frontend, so we also include `Check`.
+ # The benchmark set includes various stress tests that put the frontend under pressure.
+ run_compiler_benchmarks(
+ pipeline,
+ profiles=["Check", "Debug", "Opt"],
+ scenarios=["All"],
+ crates=RUSTC_PGO_CRATES,
+ env=dict(
+ LLVM_PROFILE_FILE=str(pipeline.rustc_profile_template_path())
+ )
+ )
+
+ profile_path = pipeline.rustc_profile_merged_file()
+ LOGGER.info(f"Merging Rustc PGO profiles to {profile_path}")
+ cmd([
+ pipeline.build_artifacts() / "llvm" / "bin" / "llvm-profdata",
+ "merge",
+ "-o", profile_path,
+ pipeline.rustc_profile_dir_root()
+ ])
+
+ LOGGER.info("Rustc PGO statistics")
+ LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}")
+ LOGGER.info(
+ f"{pipeline.rustc_profile_dir_root()}: {format_bytes(get_path_size(pipeline.rustc_profile_dir_root()))}")
+ LOGGER.info(f"Profile file count: {count_files(pipeline.rustc_profile_dir_root())}")
+
+ # We don't need the individual .profraw files now that they have been merged
+ # into a final .profdata
+ delete_directory(pipeline.rustc_profile_dir_root())
+
+
+def gather_llvm_bolt_profiles(pipeline: Pipeline):
+ LOGGER.info("Running benchmarks with BOLT instrumented LLVM")
+ run_compiler_benchmarks(
+ pipeline,
+ profiles=["Check", "Debug", "Opt"],
+ scenarios=["Full"],
+ crates=LLVM_BOLT_CRATES
+ )
+
+ merged_profile_path = pipeline.llvm_bolt_profile_merged_file()
+ profile_files_path = Path("/tmp/prof.fdata")
+ LOGGER.info(f"Merging LLVM BOLT profiles to {merged_profile_path}")
+
+ profile_files = sorted(glob.glob(f"{profile_files_path}*"))
+ cmd([
+ "merge-fdata",
+ *profile_files,
+ ], output_path=merged_profile_path)
+
+ LOGGER.info("LLVM BOLT statistics")
+ LOGGER.info(f"{merged_profile_path}: {format_bytes(get_path_size(merged_profile_path))}")
+ LOGGER.info(
+ f"{profile_files_path}: {format_bytes(get_path_prefix_size(profile_files_path))}")
+ LOGGER.info(f"Profile file count: {count_files_with_prefix(profile_files_path)}")
+
+
+def clear_llvm_files(pipeline: Pipeline):
+ """
+ Rustbuild currently doesn't support rebuilding LLVM when PGO options
+ change (or any other llvm-related options); so just clear out the relevant
+ directories ourselves.
+ """
+ LOGGER.info("Clearing LLVM build files")
+ delete_directory(pipeline.build_artifacts() / "llvm")
+ delete_directory(pipeline.build_artifacts() / "lld")
+
+
+def print_binary_sizes(pipeline: Pipeline):
+ bin_dir = pipeline.build_artifacts() / "stage2" / "bin"
+ binaries = get_files(bin_dir)
+
+ lib_dir = pipeline.build_artifacts() / "stage2" / "lib"
+ libraries = get_files(lib_dir, lambda p: p.suffix == ".so")
+
+ paths = sorted(binaries) + sorted(libraries)
+ with StringIO() as output:
+ for path in paths:
+ path_str = f"{path.name}:"
+ print(f"{path_str:<50}{format_bytes(path.stat().st_size):>14}", file=output)
+ LOGGER.info(f"Rustc binary size\n{output.getvalue()}")
+
+
+def print_free_disk_space(pipeline: Pipeline):
+ usage = shutil.disk_usage(pipeline.opt_artifacts())
+ total = usage.total
+ used = usage.used
+ free = usage.free
+
+ logging.info(
+ f"Free disk space: {format_bytes(free)} out of total {format_bytes(total)} ({(used / total) * 100:.2f}% used)")
+
+
+def log_metrics(step: BuildStep):
+ substeps: List[Tuple[int, BuildStep]] = []
+
+ def visit(step: BuildStep, level: int):
+ substeps.append((level, step))
+ for child in step.children:
+ visit(child, level=level + 1)
+
+ visit(step, 0)
+
+ output = StringIO()
+ for (level, step) in substeps:
+ label = f"{'.' * level}{step.type}"
+ print(f"{label:<65}{step.duration:>8.2f}s", file=output)
+ logging.info(f"Build step durations\n{output.getvalue()}")
+
+
+def record_metrics(pipeline: Pipeline, timer: Timer):
+ metrics = load_last_metrics(pipeline.metrics_path())
+ if metrics is None:
+ return
+ llvm_steps = tuple(metrics.find_all_by_type("bootstrap::native::Llvm"))
+ assert len(llvm_steps) > 0
+ llvm_duration = sum(step.duration for step in llvm_steps)
+
+ rustc_steps = tuple(metrics.find_all_by_type("bootstrap::compile::Rustc"))
+ assert len(rustc_steps) > 0
+ rustc_duration = sum(step.duration for step in rustc_steps)
+
+ # The LLVM step is part of the Rustc step
+ rustc_duration -= llvm_duration
+
+ timer.add_duration("LLVM", llvm_duration)
+ timer.add_duration("Rustc", rustc_duration)
+
+ log_metrics(metrics)
+
+
+def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: List[str]):
+ # Clear and prepare tmp directory
+ shutil.rmtree(pipeline.opt_artifacts(), ignore_errors=True)
+ os.makedirs(pipeline.opt_artifacts(), exist_ok=True)
+
+ pipeline.build_rustc_perf()
+
+ # Stage 1: Build rustc + PGO instrumented LLVM
+ with timer.section("Stage 1 (LLVM PGO)") as stage1:
+ with stage1.section("Build rustc and LLVM") as rustc_build:
+ build_rustc(pipeline, args=[
+ "--llvm-profile-generate"
+ ], env=dict(
+ LLVM_PROFILE_DIR=str(pipeline.llvm_profile_dir_root() / "prof-%p")
+ ))
+ record_metrics(pipeline, rustc_build)
+
+ with stage1.section("Gather profiles"):
+ gather_llvm_profiles(pipeline)
+ print_free_disk_space(pipeline)
+
+ clear_llvm_files(pipeline)
+ final_build_args += [
+ "--llvm-profile-use",
+ pipeline.llvm_profile_merged_file()
+ ]
+
+ # Stage 2: Build PGO instrumented rustc + LLVM
+ with timer.section("Stage 2 (rustc PGO)") as stage2:
+ with stage2.section("Build rustc and LLVM") as rustc_build:
+ build_rustc(pipeline, args=[
+ "--rust-profile-generate",
+ pipeline.rustc_profile_dir_root()
+ ])
+ record_metrics(pipeline, rustc_build)
+
+ with stage2.section("Gather profiles"):
+ gather_rustc_profiles(pipeline)
+ print_free_disk_space(pipeline)
+
+ clear_llvm_files(pipeline)
+ final_build_args += [
+ "--rust-profile-use",
+ pipeline.rustc_profile_merged_file()
+ ]
+
+ # Stage 3: Build rustc + BOLT instrumented LLVM
+ if pipeline.supports_bolt():
+ with timer.section("Stage 3 (LLVM BOLT)") as stage3:
+ with stage3.section("Build rustc and LLVM") as rustc_build:
+ build_rustc(pipeline, args=[
+ "--llvm-profile-use",
+ pipeline.llvm_profile_merged_file(),
+ "--llvm-bolt-profile-generate",
+ ])
+ record_metrics(pipeline, rustc_build)
+
+ with stage3.section("Gather profiles"):
+ gather_llvm_bolt_profiles(pipeline)
+
+ print_free_disk_space(pipeline)
+ clear_llvm_files(pipeline)
+ final_build_args += [
+ "--llvm-bolt-profile-use",
+ pipeline.llvm_bolt_profile_merged_file()
+ ]
+
+ # Stage 4: Build PGO optimized rustc + PGO/BOLT optimized LLVM
+ with timer.section("Stage 4 (final build)") as stage4:
+ cmd(final_build_args)
+ record_metrics(pipeline, stage4)
+
+
+if __name__ == "__main__":
+ logging.basicConfig(
+ level=logging.DEBUG,
+ format="%(name)s %(levelname)-4s: %(message)s",
+ )
+
+ LOGGER.info(f"Running multi-stage build using Python {sys.version}")
+ LOGGER.info(f"Environment values\n{pprint.pformat(dict(os.environ), indent=2)}")
+
+ build_args = sys.argv[1:]
+
+ timer = Timer()
+ pipeline = create_pipeline()
+ try:
+ execute_build_pipeline(timer, pipeline, build_args)
+ except BaseException as e:
+ LOGGER.error("The multi-stage build has failed")
+ raise e
+ finally:
+ timer.print_stats()
+ print_free_disk_space(pipeline)
+
+ print_binary_sizes(pipeline)