summaryrefslogtreecommitdiffstats
path: root/src/ci/stage-build.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/ci/stage-build.py')
-rwxr-xr-xsrc/ci/stage-build.py1037
1 files changed, 0 insertions, 1037 deletions
diff --git a/src/ci/stage-build.py b/src/ci/stage-build.py
deleted file mode 100755
index 3bb3b1418..000000000
--- a/src/ci/stage-build.py
+++ /dev/null
@@ -1,1037 +0,0 @@
-#!/usr/bin/env python3
-# ignore-tidy-linelength
-
-# Compatible with Python 3.6+
-
-import contextlib
-import getpass
-import glob
-import json
-import logging
-import os
-import pprint
-import shutil
-import subprocess
-import sys
-import time
-import traceback
-import urllib.request
-from io import StringIO
-from pathlib import Path
-from typing import Callable, ContextManager, Dict, Iterable, Iterator, List, Optional, \
- Tuple, Union
-
-PGO_HOST = os.environ["PGO_HOST"]
-CHANNEL = os.environ.get("RUST_RELEASE_CHANNEL", "")
-
-LOGGER = logging.getLogger("stage-build")
-
-LLVM_PGO_CRATES = [
- "syn-1.0.89",
- "cargo-0.60.0",
- "serde-1.0.136",
- "ripgrep-13.0.0",
- "regex-1.5.5",
- "clap-3.1.6",
- "hyper-0.14.18"
-]
-
-RUSTC_PGO_CRATES = [
- "externs",
- "ctfe-stress-5",
- "cargo-0.60.0",
- "token-stream-stress",
- "match-stress",
- "tuple-stress",
- "diesel-1.4.8",
- "bitmaps-3.1.0"
-]
-
-LLVM_BOLT_CRATES = LLVM_PGO_CRATES
-
-
-def is_try_build() -> bool:
- return os.environ.get("DIST_TRY_BUILD", "0") != "0"
-
-
-class Pipeline:
- # Paths
- def checkout_path(self) -> Path:
- """
- The root checkout, where the source is located.
- """
- raise NotImplementedError
-
- def downloaded_llvm_dir(self) -> Path:
- """
- Directory where the host LLVM is located.
- """
- raise NotImplementedError
-
- def build_root(self) -> Path:
- """
- The main directory where the build occurs.
- """
- raise NotImplementedError
-
- def build_artifacts(self) -> Path:
- return self.build_root() / "build" / PGO_HOST
-
- def rustc_stage_0(self) -> Path:
- return self.build_artifacts() / "stage0" / "bin" / "rustc"
-
- def cargo_stage_0(self) -> Path:
- return self.build_artifacts() / "stage0" / "bin" / "cargo"
-
- def rustc_stage_2(self) -> Path:
- return self.build_artifacts() / "stage2" / "bin" / "rustc"
-
- def opt_artifacts(self) -> Path:
- raise NotImplementedError
-
- def llvm_profile_dir_root(self) -> Path:
- return self.opt_artifacts() / "llvm-pgo"
-
- def llvm_profile_merged_file(self) -> Path:
- return self.opt_artifacts() / "llvm-pgo.profdata"
-
- def rustc_perf_dir(self) -> Path:
- return self.opt_artifacts() / "rustc-perf"
-
- def build_rustc_perf(self):
- raise NotImplementedError()
-
- def rustc_profile_dir_root(self) -> Path:
- return self.opt_artifacts() / "rustc-pgo"
-
- def rustc_profile_merged_file(self) -> Path:
- return self.opt_artifacts() / "rustc-pgo.profdata"
-
- def rustc_profile_template_path(self) -> Path:
- """
- The profile data is written into a single filepath that is being repeatedly merged when each
- rustc invocation ends. Empirically, this can result in some profiling data being lost. That's
- why we override the profile path to include the PID. This will produce many more profiling
- files, but the resulting profile will produce a slightly faster rustc binary.
- """
- return self.rustc_profile_dir_root() / "default_%m_%p.profraw"
-
- def supports_bolt(self) -> bool:
- raise NotImplementedError
-
- def llvm_bolt_profile_merged_file(self) -> Path:
- return self.opt_artifacts() / "bolt.profdata"
-
- def metrics_path(self) -> Path:
- return self.build_root() / "build" / "metrics.json"
-
- def executable_extension(self) -> str:
- raise NotImplementedError
-
- def skipped_tests(self) -> Iterable[str]:
- return ()
-
-
-class LinuxPipeline(Pipeline):
- def checkout_path(self) -> Path:
- return Path("/checkout")
-
- def downloaded_llvm_dir(self) -> Path:
- return Path("/rustroot")
-
- def build_root(self) -> Path:
- return self.checkout_path() / "obj"
-
- def opt_artifacts(self) -> Path:
- return Path("/tmp/tmp-multistage/opt-artifacts")
-
- def build_rustc_perf(self):
- # /tmp/rustc-perf comes from the Dockerfile
- shutil.copytree("/tmp/rustc-perf", self.rustc_perf_dir())
- cmd(["chown", "-R", f"{getpass.getuser()}:", self.rustc_perf_dir()])
-
- with change_cwd(self.rustc_perf_dir()):
- cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict(
- RUSTC=str(self.rustc_stage_0()),
- RUSTC_BOOTSTRAP="1"
- ))
-
- def supports_bolt(self) -> bool:
- return True
-
- def executable_extension(self) -> str:
- return ""
-
- def skipped_tests(self) -> Iterable[str]:
- # This test fails because of linker errors, as of June 2023.
- yield "tests/ui/process/nofile-limit.rs"
-
-
-class WindowsPipeline(Pipeline):
- def __init__(self):
- self.checkout_dir = Path(os.getcwd())
-
- def checkout_path(self) -> Path:
- return self.checkout_dir
-
- def downloaded_llvm_dir(self) -> Path:
- return self.checkout_path() / "citools" / "clang-rust"
-
- def build_root(self) -> Path:
- return self.checkout_path()
-
- def opt_artifacts(self) -> Path:
- return self.checkout_path() / "opt-artifacts"
-
- def rustc_stage_0(self) -> Path:
- return super().rustc_stage_0().with_suffix(".exe")
-
- def cargo_stage_0(self) -> Path:
- return super().cargo_stage_0().with_suffix(".exe")
-
- def rustc_stage_2(self) -> Path:
- return super().rustc_stage_2().with_suffix(".exe")
-
- def build_rustc_perf(self):
- # rustc-perf version from 2023-03-15
- perf_commit = "8b2ac3042e1ff2c0074455a0a3618adef97156b1"
- rustc_perf_zip_path = self.opt_artifacts() / "perf.zip"
-
- def download_rustc_perf():
- download_file(
- f"https://github.com/rust-lang/rustc-perf/archive/{perf_commit}.zip",
- rustc_perf_zip_path
- )
- with change_cwd(self.opt_artifacts()):
- unpack_archive(rustc_perf_zip_path)
- move_path(Path(f"rustc-perf-{perf_commit}"), self.rustc_perf_dir())
- delete_file(rustc_perf_zip_path)
-
- retry_action(download_rustc_perf, "Download rustc-perf")
-
- with change_cwd(self.rustc_perf_dir()):
- cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict(
- RUSTC=str(self.rustc_stage_0()),
- RUSTC_BOOTSTRAP="1"
- ))
-
- def rustc_profile_template_path(self) -> Path:
- """
- On Windows, we don't have enough space to use separate files for each rustc invocation.
- Therefore, we use a single file for the generated profiles.
- """
- return self.rustc_profile_dir_root() / "default_%m.profraw"
-
- def supports_bolt(self) -> bool:
- return False
-
- def executable_extension(self) -> str:
- return ".exe"
-
- def skipped_tests(self) -> Iterable[str]:
- # This test fails as of June 2023
- yield "tests\\codegen\\vec-shrink-panik.rs"
-
-
-def get_timestamp() -> float:
- return time.time()
-
-
-Duration = float
-
-
-def iterate_timers(timer: "Timer", name: str, level: int = 0) -> Iterator[
- Tuple[int, str, Duration]]:
- """
- Hierarchically iterate the children of a timer, in a depth-first order.
- """
- yield (level, name, timer.total_duration())
- for (child_name, child_timer) in timer.children:
- yield from iterate_timers(child_timer, child_name, level=level + 1)
-
-
-class Timer:
- def __init__(self, parent_names: Tuple[str, ...] = ()):
- self.children: List[Tuple[str, Timer]] = []
- self.section_active = False
- self.parent_names = parent_names
- self.duration_excluding_children: Duration = 0
-
- @contextlib.contextmanager
- def section(self, name: str) -> ContextManager["Timer"]:
- assert not self.section_active
- self.section_active = True
-
- start = get_timestamp()
- exc = None
-
- child_timer = Timer(parent_names=self.parent_names + (name,))
- full_name = " > ".join(child_timer.parent_names)
- try:
- LOGGER.info(f"Section `{full_name}` starts")
- yield child_timer
- except BaseException as exception:
- exc = exception
- raise
- finally:
- end = get_timestamp()
- duration = end - start
-
- child_timer.duration_excluding_children = duration - child_timer.total_duration()
- self.add_child(name, child_timer)
- if exc is None:
- LOGGER.info(f"Section `{full_name}` ended: OK ({duration:.2f}s)")
- else:
- LOGGER.info(f"Section `{full_name}` ended: FAIL ({duration:.2f}s)")
- self.section_active = False
-
- def total_duration(self) -> Duration:
- return self.duration_excluding_children + sum(
- c.total_duration() for (_, c) in self.children)
-
- def has_children(self) -> bool:
- return len(self.children) > 0
-
- def print_stats(self):
- rows = []
- for (child_name, child_timer) in self.children:
- for (level, name, duration) in iterate_timers(child_timer, child_name, level=0):
- label = f"{' ' * level}{name}:"
- rows.append((label, duration))
-
- # Empty row
- rows.append(("", ""))
-
- total_duration_label = "Total duration:"
- total_duration = self.total_duration()
- rows.append((total_duration_label, humantime(total_duration)))
-
- space_after_label = 2
- max_label_length = max(16, max(len(label) for (label, _) in rows)) + space_after_label
-
- table_width = max_label_length + 23
- divider = "-" * table_width
-
- with StringIO() as output:
- print(divider, file=output)
- for (label, duration) in rows:
- if isinstance(duration, Duration):
- pct = (duration / total_duration) * 100
- value = f"{duration:>12.2f}s ({pct:>5.2f}%)"
- else:
- value = f"{duration:>{len(total_duration_label) + 7}}"
- print(f"{label:<{max_label_length}} {value}", file=output)
- print(divider, file=output, end="")
- LOGGER.info(f"Timer results\n{output.getvalue()}")
-
- def add_child(self, name: str, timer: "Timer"):
- self.children.append((name, timer))
-
- def add_duration(self, name: str, duration: Duration):
- timer = Timer(parent_names=self.parent_names + (name,))
- timer.duration_excluding_children = duration
- self.add_child(name, timer)
-
-
-class BuildStep:
- def __init__(self, type: str, children: List["BuildStep"], duration: float):
- self.type = type
- self.children = children
- self.duration = duration
-
- def find_all_by_type(self, type: str) -> Iterator["BuildStep"]:
- if type == self.type:
- yield self
- for child in self.children:
- yield from child.find_all_by_type(type)
-
- def __repr__(self):
- return f"BuildStep(type={self.type}, duration={self.duration}, children={len(self.children)})"
-
-
-def load_last_metrics(path: Path) -> BuildStep:
- """
- Loads the metrics of the most recent bootstrap execution from a metrics.json file.
- """
- with open(path, "r") as f:
- metrics = json.load(f)
- invocation = metrics["invocations"][-1]
-
- def parse(entry) -> Optional[BuildStep]:
- if "kind" not in entry or entry["kind"] != "rustbuild_step":
- return None
- type = entry.get("type", "")
- duration = entry.get("duration_excluding_children_sec", 0)
- children = []
-
- for child in entry.get("children", ()):
- step = parse(child)
- if step is not None:
- children.append(step)
- duration += step.duration
- return BuildStep(type=type, children=children, duration=duration)
-
- children = [parse(child) for child in invocation.get("children", ())]
- return BuildStep(
- type="root",
- children=children,
- duration=invocation.get("duration_including_children_sec", 0)
- )
-
-
-@contextlib.contextmanager
-def change_cwd(dir: Path):
- """
- Temporarily change working directory to `dir`.
- """
- cwd = os.getcwd()
- LOGGER.debug(f"Changing working dir from `{cwd}` to `{dir}`")
- os.chdir(dir)
- try:
- yield
- finally:
- LOGGER.debug(f"Reverting working dir to `{cwd}`")
- os.chdir(cwd)
-
-
-def humantime(time_s: float) -> str:
- hours = time_s // 3600
- time_s = time_s % 3600
- minutes = time_s // 60
- seconds = time_s % 60
-
- result = ""
- if hours > 0:
- result += f"{int(hours)}h "
- if minutes > 0:
- result += f"{int(minutes)}m "
- result += f"{round(seconds)}s"
- return result
-
-
-def move_path(src: Path, dst: Path):
- LOGGER.info(f"Moving `{src}` to `{dst}`")
- shutil.move(src, dst)
-
-
-def delete_file(path: Path):
- LOGGER.info(f"Deleting file `{path}`")
- os.unlink(path)
-
-
-def delete_directory(path: Path):
- LOGGER.info(f"Deleting directory `{path}`")
- shutil.rmtree(path)
-
-
-def unpack_archive(archive: Path, target_dir: Optional[Path] = None):
- LOGGER.info(f"Unpacking archive `{archive}`")
- shutil.unpack_archive(str(archive), extract_dir=str(target_dir) if target_dir is not None else None)
-
-
-def download_file(src: str, target: Path):
- LOGGER.info(f"Downloading `{src}` into `{target}`")
- urllib.request.urlretrieve(src, str(target))
-
-
-def retry_action(action, name: str, max_fails: int = 5):
- LOGGER.info(f"Attempting to perform action `{name}` with retry")
- for iteration in range(max_fails):
- LOGGER.info(f"Attempt {iteration + 1}/{max_fails}")
- try:
- action()
- return
- except BaseException: # also catch ctrl+c/sysexit
- LOGGER.error(f"Action `{name}` has failed\n{traceback.format_exc()}")
-
- raise Exception(f"Action `{name}` has failed after {max_fails} attempts")
-
-
-def cmd(
- args: List[Union[str, Path]],
- env: Optional[Dict[str, str]] = None,
- output_path: Optional[Path] = None
-):
- args = [str(arg) for arg in args]
-
- environment = os.environ.copy()
-
- cmd_str = ""
- if env is not None:
- environment.update(env)
- cmd_str += " ".join(f"{k}={v}" for (k, v) in (env or {}).items())
- cmd_str += " "
- cmd_str += " ".join(args)
- if output_path is not None:
- cmd_str += f" > {output_path}"
- LOGGER.info(f"Executing `{cmd_str}`")
-
- if output_path is not None:
- with open(output_path, "w") as f:
- return subprocess.run(
- args,
- env=environment,
- check=True,
- stdout=f
- )
- return subprocess.run(args, env=environment, check=True)
-
-
-class BenchmarkRunner:
- def run_rustc(self, pipeline: Pipeline):
- raise NotImplementedError
-
- def run_llvm(self, pipeline: Pipeline):
- raise NotImplementedError
-
- def run_bolt(self, pipeline: Pipeline):
- raise NotImplementedError
-
-
-class DefaultBenchmarkRunner(BenchmarkRunner):
- def run_rustc(self, pipeline: Pipeline):
- # Here we're profiling the `rustc` frontend, so we also include `Check`.
- # The benchmark set includes various stress tests that put the frontend under pressure.
- run_compiler_benchmarks(
- pipeline,
- profiles=["Check", "Debug", "Opt"],
- scenarios=["All"],
- crates=RUSTC_PGO_CRATES,
- env=dict(
- LLVM_PROFILE_FILE=str(pipeline.rustc_profile_template_path())
- )
- )
-
- def run_llvm(self, pipeline: Pipeline):
- run_compiler_benchmarks(
- pipeline,
- profiles=["Debug", "Opt"],
- scenarios=["Full"],
- crates=LLVM_PGO_CRATES
- )
-
- def run_bolt(self, pipeline: Pipeline):
- run_compiler_benchmarks(
- pipeline,
- profiles=["Check", "Debug", "Opt"],
- scenarios=["Full"],
- crates=LLVM_BOLT_CRATES
- )
-
-
-def run_compiler_benchmarks(
- pipeline: Pipeline,
- profiles: List[str],
- scenarios: List[str],
- crates: List[str],
- env: Optional[Dict[str, str]] = None
-):
- env = env if env is not None else {}
-
- # Compile libcore, both in opt-level=0 and opt-level=3
- with change_cwd(pipeline.build_root()):
- cmd([
- pipeline.rustc_stage_2(),
- "--edition", "2021",
- "--crate-type", "lib",
- str(pipeline.checkout_path() / "library/core/src/lib.rs"),
- "--out-dir", pipeline.opt_artifacts()
- ], env=dict(RUSTC_BOOTSTRAP="1", **env))
-
- cmd([
- pipeline.rustc_stage_2(),
- "--edition", "2021",
- "--crate-type", "lib",
- "-Copt-level=3",
- str(pipeline.checkout_path() / "library/core/src/lib.rs"),
- "--out-dir", pipeline.opt_artifacts()
- ], env=dict(RUSTC_BOOTSTRAP="1", **env))
-
- # Run rustc-perf benchmarks
- # Benchmark using profile_local with eprintln, which essentially just means
- # don't actually benchmark -- just make sure we run rustc a bunch of times.
- with change_cwd(pipeline.rustc_perf_dir()):
- cmd([
- pipeline.cargo_stage_0(),
- "run",
- "-p", "collector", "--bin", "collector", "--",
- "profile_local", "eprintln",
- pipeline.rustc_stage_2(),
- "--id", "Test",
- "--cargo", pipeline.cargo_stage_0(),
- "--profiles", ",".join(profiles),
- "--scenarios", ",".join(scenarios),
- "--include", ",".join(crates)
- ], env=dict(
- RUST_LOG="collector=debug",
- RUSTC=str(pipeline.rustc_stage_0()),
- RUSTC_BOOTSTRAP="1",
- **env
- ))
-
-
-# https://stackoverflow.com/a/31631711/1107768
-def format_bytes(size: int) -> str:
- """Return the given bytes as a human friendly KiB, MiB or GiB string."""
- KB = 1024
- MB = KB ** 2 # 1,048,576
- GB = KB ** 3 # 1,073,741,824
- TB = KB ** 4 # 1,099,511,627,776
-
- if size < KB:
- return f"{size} B"
- elif KB <= size < MB:
- return f"{size / KB:.2f} KiB"
- elif MB <= size < GB:
- return f"{size / MB:.2f} MiB"
- elif GB <= size < TB:
- return f"{size / GB:.2f} GiB"
- else:
- return str(size)
-
-
-# https://stackoverflow.com/a/63307131/1107768
-def count_files(path: Path) -> int:
- return sum(1 for p in path.rglob("*") if p.is_file())
-
-
-def count_files_with_prefix(path: Path) -> int:
- return sum(1 for p in glob.glob(f"{path}*") if Path(p).is_file())
-
-
-# https://stackoverflow.com/a/55659577/1107768
-def get_path_size(path: Path) -> int:
- if path.is_dir():
- return sum(p.stat().st_size for p in path.rglob("*"))
- return path.stat().st_size
-
-
-def get_path_prefix_size(path: Path) -> int:
- """
- Get size of all files beginning with the prefix `path`.
- Alternative to shell `du -sh <path>*`.
- """
- return sum(Path(p).stat().st_size for p in glob.glob(f"{path}*"))
-
-
-def get_files(directory: Path, filter: Optional[Callable[[Path], bool]] = None) -> Iterable[Path]:
- for file in os.listdir(directory):
- path = directory / file
- if filter is None or filter(path):
- yield path
-
-
-def bootstrap_build(
- pipeline: Pipeline,
- args: List[str],
- env: Optional[Dict[str, str]] = None,
- targets: Iterable[str] = ("library/std", )
-):
- if env is None:
- env = {}
- else:
- env = dict(env)
- env["RUST_BACKTRACE"] = "1"
- arguments = [
- sys.executable,
- pipeline.checkout_path() / "x.py",
- "build",
- "--target", PGO_HOST,
- "--host", PGO_HOST,
- "--stage", "2",
- ] + list(targets) + args
- cmd(arguments, env=env)
-
-
-def create_pipeline() -> Pipeline:
- if sys.platform == "linux":
- return LinuxPipeline()
- elif sys.platform in ("cygwin", "win32"):
- return WindowsPipeline()
- else:
- raise Exception(f"Optimized build is not supported for platform {sys.platform}")
-
-
-def gather_llvm_profiles(pipeline: Pipeline, runner: BenchmarkRunner):
- LOGGER.info("Running benchmarks with PGO instrumented LLVM")
-
- runner.run_llvm(pipeline)
-
- profile_path = pipeline.llvm_profile_merged_file()
- LOGGER.info(f"Merging LLVM PGO profiles to {profile_path}")
- cmd([
- pipeline.downloaded_llvm_dir() / "bin" / "llvm-profdata",
- "merge",
- "-o", profile_path,
- pipeline.llvm_profile_dir_root()
- ])
-
- LOGGER.info("LLVM PGO statistics")
- LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}")
- LOGGER.info(
- f"{pipeline.llvm_profile_dir_root()}: {format_bytes(get_path_size(pipeline.llvm_profile_dir_root()))}")
- LOGGER.info(f"Profile file count: {count_files(pipeline.llvm_profile_dir_root())}")
-
- # We don't need the individual .profraw files now that they have been merged
- # into a final .profdata
- delete_directory(pipeline.llvm_profile_dir_root())
-
-
-def gather_rustc_profiles(pipeline: Pipeline, runner: BenchmarkRunner):
- LOGGER.info("Running benchmarks with PGO instrumented rustc")
-
- runner.run_rustc(pipeline)
-
- profile_path = pipeline.rustc_profile_merged_file()
- LOGGER.info(f"Merging Rustc PGO profiles to {profile_path}")
- cmd([
- pipeline.build_artifacts() / "llvm" / "bin" / "llvm-profdata",
- "merge",
- "-o", profile_path,
- pipeline.rustc_profile_dir_root()
- ])
-
- LOGGER.info("Rustc PGO statistics")
- LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}")
- LOGGER.info(
- f"{pipeline.rustc_profile_dir_root()}: {format_bytes(get_path_size(pipeline.rustc_profile_dir_root()))}")
- LOGGER.info(f"Profile file count: {count_files(pipeline.rustc_profile_dir_root())}")
-
- # We don't need the individual .profraw files now that they have been merged
- # into a final .profdata
- delete_directory(pipeline.rustc_profile_dir_root())
-
-
-def gather_llvm_bolt_profiles(pipeline: Pipeline, runner: BenchmarkRunner):
- LOGGER.info("Running benchmarks with BOLT instrumented LLVM")
-
- runner.run_bolt(pipeline)
-
- merged_profile_path = pipeline.llvm_bolt_profile_merged_file()
- profile_files_path = Path("/tmp/prof.fdata")
- LOGGER.info(f"Merging LLVM BOLT profiles to {merged_profile_path}")
-
- profile_files = sorted(glob.glob(f"{profile_files_path}*"))
- cmd([
- "merge-fdata",
- *profile_files,
- ], output_path=merged_profile_path)
-
- LOGGER.info("LLVM BOLT statistics")
- LOGGER.info(f"{merged_profile_path}: {format_bytes(get_path_size(merged_profile_path))}")
- LOGGER.info(
- f"{profile_files_path}: {format_bytes(get_path_prefix_size(profile_files_path))}")
- LOGGER.info(f"Profile file count: {count_files_with_prefix(profile_files_path)}")
-
-
-def clear_llvm_files(pipeline: Pipeline):
- """
- Rustbuild currently doesn't support rebuilding LLVM when PGO options
- change (or any other llvm-related options); so just clear out the relevant
- directories ourselves.
- """
- LOGGER.info("Clearing LLVM build files")
- delete_directory(pipeline.build_artifacts() / "llvm")
- delete_directory(pipeline.build_artifacts() / "lld")
-
-
-def print_binary_sizes(pipeline: Pipeline):
- bin_dir = pipeline.build_artifacts() / "stage2" / "bin"
- binaries = get_files(bin_dir)
-
- lib_dir = pipeline.build_artifacts() / "stage2" / "lib"
- libraries = get_files(lib_dir, lambda p: p.suffix == ".so")
-
- paths = sorted(binaries) + sorted(libraries)
- with StringIO() as output:
- for path in paths:
- path_str = f"{path.name}:"
- print(f"{path_str:<50}{format_bytes(path.stat().st_size):>14}", file=output)
- LOGGER.info(f"Rustc binary size\n{output.getvalue()}")
-
-
-def print_free_disk_space(pipeline: Pipeline):
- usage = shutil.disk_usage(pipeline.opt_artifacts())
- total = usage.total
- used = usage.used
- free = usage.free
-
- logging.info(
- f"Free disk space: {format_bytes(free)} out of total {format_bytes(total)} ({(used / total) * 100:.2f}% used)")
-
-
-def log_metrics(step: BuildStep):
- substeps: List[Tuple[int, BuildStep]] = []
-
- def visit(step: BuildStep, level: int):
- substeps.append((level, step))
- for child in step.children:
- visit(child, level=level + 1)
-
- visit(step, 0)
-
- output = StringIO()
- for (level, step) in substeps:
- label = f"{'.' * level}{step.type}"
- print(f"{label:<65}{step.duration:>8.2f}s", file=output)
- logging.info(f"Build step durations\n{output.getvalue()}")
-
-
-def record_metrics(pipeline: Pipeline, timer: Timer):
- metrics = load_last_metrics(pipeline.metrics_path())
- if metrics is None:
- return
- llvm_steps = tuple(metrics.find_all_by_type("bootstrap::llvm::Llvm"))
- llvm_duration = sum(step.duration for step in llvm_steps)
-
- rustc_steps = tuple(metrics.find_all_by_type("bootstrap::compile::Rustc"))
- rustc_duration = sum(step.duration for step in rustc_steps)
-
- # The LLVM step is part of the Rustc step
- rustc_duration = max(0, rustc_duration - llvm_duration)
-
- if llvm_duration > 0:
- timer.add_duration("LLVM", llvm_duration)
- if rustc_duration > 0:
- timer.add_duration("Rustc", rustc_duration)
-
- log_metrics(metrics)
-
-
-def run_tests(pipeline: Pipeline):
- """
- After `dist` is executed, we extract its archived components into a sysroot directory,
- and then use that extracted rustc as a stage0 compiler.
- Then we run a subset of tests using that compiler, to have a basic smoke test which checks
- whether the optimization pipeline hasn't broken something.
- """
- build_dir = pipeline.build_root() / "build"
- dist_dir = build_dir / "dist"
-
- def extract_dist_dir(name: str) -> Path:
- target_dir = build_dir / "optimized-dist"
- target_dir.mkdir(parents=True, exist_ok=True)
- unpack_archive(dist_dir / f"{name}.tar.xz", target_dir=target_dir)
- extracted_path = target_dir / name
- assert extracted_path.is_dir()
- return extracted_path
-
- # Extract rustc, libstd, cargo and src archives to create the optimized sysroot
- rustc_dir = extract_dist_dir(f"rustc-{CHANNEL}-{PGO_HOST}") / "rustc"
- libstd_dir = extract_dist_dir(f"rust-std-{CHANNEL}-{PGO_HOST}") / f"rust-std-{PGO_HOST}"
- cargo_dir = extract_dist_dir(f"cargo-{CHANNEL}-{PGO_HOST}") / "cargo"
- extracted_src_dir = extract_dist_dir(f"rust-src-{CHANNEL}") / "rust-src"
-
- # We need to manually copy libstd to the extracted rustc sysroot
- shutil.copytree(
- libstd_dir / "lib" / "rustlib" / PGO_HOST / "lib",
- rustc_dir / "lib" / "rustlib" / PGO_HOST / "lib"
- )
-
- # Extract sources - they aren't in the `rustc-{CHANNEL}-{host}` tarball, so we need to manually copy libstd
- # sources to the extracted sysroot. We need sources available so that `-Zsimulate-remapped-rust-src-base`
- # works correctly.
- shutil.copytree(
- extracted_src_dir / "lib" / "rustlib" / "src",
- rustc_dir / "lib" / "rustlib" / "src"
- )
-
- rustc_path = rustc_dir / "bin" / f"rustc{pipeline.executable_extension()}"
- assert rustc_path.is_file()
- cargo_path = cargo_dir / "bin" / f"cargo{pipeline.executable_extension()}"
- assert cargo_path.is_file()
-
- # Specify path to a LLVM config so that LLVM is not rebuilt.
- # It doesn't really matter which LLVM config we choose, because no sysroot will be compiled.
- llvm_config = pipeline.build_artifacts() / "llvm" / "bin" / f"llvm-config{pipeline.executable_extension()}"
- assert llvm_config.is_file()
-
- config_content = f"""profile = "user"
-changelog-seen = 2
-
-[build]
-rustc = "{rustc_path.as_posix()}"
-cargo = "{cargo_path.as_posix()}"
-
-[target.{PGO_HOST}]
-llvm-config = "{llvm_config.as_posix()}"
-"""
- logging.info(f"Using following `config.toml` for running tests:\n{config_content}")
-
- # Simulate a stage 0 compiler with the extracted optimized dist artifacts.
- with open("config.toml", "w") as f:
- f.write(config_content)
-
- args = [
- sys.executable,
- pipeline.checkout_path() / "x.py",
- "test",
- "--stage", "0",
- "tests/assembly",
- "tests/codegen",
- "tests/codegen-units",
- "tests/incremental",
- "tests/mir-opt",
- "tests/pretty",
- "tests/run-pass-valgrind",
- "tests/ui",
- ]
- for test_path in pipeline.skipped_tests():
- args.extend(["--exclude", test_path])
- cmd(args=args, env=dict(
- COMPILETEST_FORCE_STAGE0="1"
- ))
-
-
-def execute_build_pipeline(timer: Timer, pipeline: Pipeline, runner: BenchmarkRunner, dist_build_args: List[str]):
- # Clear and prepare tmp directory
- shutil.rmtree(pipeline.opt_artifacts(), ignore_errors=True)
- os.makedirs(pipeline.opt_artifacts(), exist_ok=True)
-
- pipeline.build_rustc_perf()
-
- """
- Stage 1: Build PGO instrumented rustc
-
- We use a normal build of LLVM, because gathering PGO profiles for LLVM and `rustc` at the same time
- can cause issues.
- """
- with timer.section("Stage 1 (rustc PGO)") as stage1:
- with stage1.section("Build PGO instrumented rustc and LLVM") as rustc_pgo_instrument:
- bootstrap_build(pipeline, args=[
- "--rust-profile-generate",
- pipeline.rustc_profile_dir_root()
- ])
- record_metrics(pipeline, rustc_pgo_instrument)
-
- with stage1.section("Gather profiles"):
- gather_rustc_profiles(pipeline, runner)
- print_free_disk_space(pipeline)
-
- with stage1.section("Build PGO optimized rustc") as rustc_pgo_use:
- bootstrap_build(pipeline, args=[
- "--rust-profile-use",
- pipeline.rustc_profile_merged_file()
- ])
- record_metrics(pipeline, rustc_pgo_use)
- dist_build_args += [
- "--rust-profile-use",
- pipeline.rustc_profile_merged_file()
- ]
-
- """
- Stage 2: Gather LLVM PGO profiles
- """
- with timer.section("Stage 2 (LLVM PGO)") as stage2:
- # Clear normal LLVM artifacts
- clear_llvm_files(pipeline)
-
- with stage2.section("Build PGO instrumented LLVM") as llvm_pgo_instrument:
- bootstrap_build(pipeline, args=[
- "--llvm-profile-generate",
- # We want to keep the already built PGO-optimized `rustc`.
- "--keep-stage", "0",
- "--keep-stage", "1"
- ], env=dict(
- LLVM_PROFILE_DIR=str(pipeline.llvm_profile_dir_root() / "prof-%p")
- ))
- record_metrics(pipeline, llvm_pgo_instrument)
-
- with stage2.section("Gather profiles"):
- gather_llvm_profiles(pipeline, runner)
-
- dist_build_args += [
- "--llvm-profile-use",
- pipeline.llvm_profile_merged_file(),
- ]
- print_free_disk_space(pipeline)
-
- # Clear PGO-instrumented LLVM artifacts
- clear_llvm_files(pipeline)
-
- """
- Stage 3: Build BOLT instrumented LLVM
-
- We build a PGO optimized LLVM in this step, then instrument it with BOLT and gather BOLT profiles.
- Note that we don't remove LLVM artifacts after this step, so that they are reused in the final dist build.
- BOLT instrumentation is performed "on-the-fly" when the LLVM library is copied to the sysroot of rustc,
- therefore the LLVM artifacts on disk are not "tainted" with BOLT instrumentation and they can be reused.
- """
- if pipeline.supports_bolt():
- with timer.section("Stage 3 (LLVM BOLT)") as stage3:
- with stage3.section("Build BOLT instrumented LLVM") as llvm_bolt_instrument:
- bootstrap_build(pipeline, args=[
- "--llvm-profile-use",
- pipeline.llvm_profile_merged_file(),
- "--llvm-bolt-profile-generate",
- # We want to keep the already built PGO-optimized `rustc`.
- "--keep-stage", "0",
- "--keep-stage", "1"
- ])
- record_metrics(pipeline, llvm_bolt_instrument)
-
- with stage3.section("Gather profiles"):
- gather_llvm_bolt_profiles(pipeline, runner)
-
- dist_build_args += [
- "--llvm-bolt-profile-use",
- pipeline.llvm_bolt_profile_merged_file()
- ]
- print_free_disk_space(pipeline)
-
- # We want to keep the already built PGO-optimized `rustc`.
- dist_build_args += [
- "--keep-stage", "0",
- "--keep-stage", "1"
- ]
-
- """
- Final stage: Build PGO optimized rustc + PGO/BOLT optimized LLVM
- """
- with timer.section("Final stage (dist build)") as final_stage:
- cmd(dist_build_args)
- record_metrics(pipeline, final_stage)
-
- # Try builds can be in various broken states, so we don't want to gatekeep them with tests
- # Do not run tests, as they are broken for beta/stable versions in this script
- # if not is_try_build():
- # with timer.section("Run tests"):
- # run_tests(pipeline)
-
-
-def run(runner: BenchmarkRunner):
- logging.basicConfig(
- level=logging.DEBUG,
- format="%(name)s %(levelname)-4s: %(message)s",
- )
-
- LOGGER.info(f"Running multi-stage build using Python {sys.version}")
- LOGGER.info(f"Environment values\n{pprint.pformat(dict(os.environ), indent=2)}")
-
- build_args = sys.argv[1:]
-
- # Skip components that are not needed for try builds to speed them up
- if is_try_build():
- LOGGER.info("Skipping building of unimportant components for a try build")
- for target in ("rust-docs", "rustc-docs", "rust-docs-json", "rust-analyzer",
- "rustc-src", "clippy", "miri", "rustfmt"):
- build_args.extend(["--exclude", target])
-
- timer = Timer()
- pipeline = create_pipeline()
-
- try:
- execute_build_pipeline(timer, pipeline, runner, build_args)
- except BaseException as e:
- LOGGER.error("The multi-stage build has failed")
- raise e
- finally:
- timer.print_stats()
- print_free_disk_space(pipeline)
-
- print_binary_sizes(pipeline)
-
-
-if __name__ == "__main__":
- runner = DefaultBenchmarkRunner()
- run(runner)