diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:59:35 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:59:35 +0000 |
commit | d1b2d29528b7794b41e66fc2136e395a02f8529b (patch) | |
tree | a4a17504b260206dec3cf55b2dca82929a348ac2 /src/tools/opt-dist | |
parent | Releasing progress-linux version 1.72.1+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.tar.xz rustc-d1b2d29528b7794b41e66fc2136e395a02f8529b.zip |
Merging upstream version 1.73.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/tools/opt-dist')
-rw-r--r-- | src/tools/opt-dist/Cargo.toml | 23 | ||||
-rw-r--r-- | src/tools/opt-dist/README.md | 7 | ||||
-rw-r--r-- | src/tools/opt-dist/src/bolt.rs | 103 | ||||
-rw-r--r-- | src/tools/opt-dist/src/environment/linux.rs | 58 | ||||
-rw-r--r-- | src/tools/opt-dist/src/environment/mod.rs | 77 | ||||
-rw-r--r-- | src/tools/opt-dist/src/environment/windows.rs | 82 | ||||
-rw-r--r-- | src/tools/opt-dist/src/exec.rs | 179 | ||||
-rw-r--r-- | src/tools/opt-dist/src/main.rs | 215 | ||||
-rw-r--r-- | src/tools/opt-dist/src/metrics.rs | 106 | ||||
-rw-r--r-- | src/tools/opt-dist/src/tests.rs | 114 | ||||
-rw-r--r-- | src/tools/opt-dist/src/timer.rs | 167 | ||||
-rw-r--r-- | src/tools/opt-dist/src/training.rs | 223 | ||||
-rw-r--r-- | src/tools/opt-dist/src/utils/io.rs | 88 | ||||
-rw-r--r-- | src/tools/opt-dist/src/utils/mod.rs | 75 |
14 files changed, 1517 insertions, 0 deletions
diff --git a/src/tools/opt-dist/Cargo.toml b/src/tools/opt-dist/Cargo.toml new file mode 100644 index 000000000..3f7dba81c --- /dev/null +++ b/src/tools/opt-dist/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "opt-dist" +version = "0.1.0" +edition = "2021" + +[dependencies] +build_helper = { path = "../build_helper" } +env_logger = "0.10" +log = "0.4" +anyhow = { version = "1", features = ["backtrace"] } +humantime = "2" +humansize = "2" +sysinfo = { version = "0.29", default-features = false } +fs_extra = "1" +camino = "1" +reqwest = { version = "0.11", features = ["blocking"] } +zip = { version = "0.6", default-features = false, features = ["deflate"] } +tar = "0.4" +xz = "0.1" +serde = { version = "1", features = ["derive"] } +serde_json = "1" +glob = "0.3" +tempfile = "3.5" diff --git a/src/tools/opt-dist/README.md b/src/tools/opt-dist/README.md new file mode 100644 index 000000000..05a75870d --- /dev/null +++ b/src/tools/opt-dist/README.md @@ -0,0 +1,7 @@ +# Optimized build pipeline +This binary implements a heavily optimized build pipeline for `rustc` and `LLVM` artifacts that are used for both for +benchmarking using the perf. bot and for final distribution to users. + +It uses LTO, PGO and BOLT to optimize the compiler and LLVM as much as possible. +This logic is not part of bootstrap, because it needs to invoke bootstrap multiple times, force-rebuild various +artifacts repeatedly and sometimes go around bootstrap's cache mechanism. diff --git a/src/tools/opt-dist/src/bolt.rs b/src/tools/opt-dist/src/bolt.rs new file mode 100644 index 000000000..cf9f4fabc --- /dev/null +++ b/src/tools/opt-dist/src/bolt.rs @@ -0,0 +1,103 @@ +use anyhow::Context; + +use crate::exec::cmd; +use crate::training::LlvmBoltProfile; +use camino::{Utf8Path, Utf8PathBuf}; + +use crate::utils::io::copy_file; + +/// Instruments an artifact at the given `path` (in-place) with BOLT and then calls `func`. +/// After this function finishes, the original file will be restored. +pub fn with_bolt_instrumented<F: FnOnce() -> anyhow::Result<R>, R>( + path: &Utf8Path, + func: F, +) -> anyhow::Result<R> { + // Back up the original file. + // It will be restored to its original state when this function exits. + // By copying it, we break any existing hard links, so that they are not affected by the + // instrumentation. + let _backup_file = BackedUpFile::new(path)?; + + let instrumented_path = tempfile::NamedTempFile::new()?.into_temp_path(); + + // Instrument the original file with BOLT, saving the result into `instrumented_path` + cmd(&["llvm-bolt"]) + .arg("-instrument") + .arg(path) + // Make sure that each process will write its profiles into a separate file + .arg("--instrumentation-file-append-pid") + .arg("-o") + .arg(instrumented_path.display()) + .run() + .with_context(|| anyhow::anyhow!("Could not instrument {path} using BOLT"))?; + + // Copy the instrumented artifact over the original one + copy_file(&instrumented_path, path)?; + + // Run the function that will make use of the instrumented artifact. + // The original file will be restored when `_backup_file` is dropped. + func() +} + +/// Optimizes the file at `path` with BOLT in-place using the given `profile`. +pub fn bolt_optimize(path: &Utf8Path, profile: &LlvmBoltProfile) -> anyhow::Result<()> { + // Copy the artifact to a new location, so that we do not use the same input and output file. + // BOLT cannot handle optimizing when the input and output is the same file, because it performs + // in-place patching. + let temp_path = tempfile::NamedTempFile::new()?.into_temp_path(); + copy_file(path, &temp_path)?; + + cmd(&["llvm-bolt"]) + .arg(temp_path.display()) + .arg("-data") + .arg(&profile.0) + .arg("-o") + .arg(path) + // Reorder basic blocks within functions + .arg("-reorder-blocks=ext-tsp") + // Reorder functions within the binary + .arg("-reorder-functions=hfsort+") + // Split function code into hot and code regions + .arg("-split-functions") + // Split as many basic blocks as possible + .arg("-split-all-cold") + // Move jump tables to a separate section + .arg("-jump-tables=move") + // Fold functions with identical code + .arg("-icf=1") + // The following flag saves about 50 MiB of libLLVM.so size. + // However, it succeeds very non-deterministically. To avoid frequent artifact size swings, + // it is kept disabled for now. + // FIXME(kobzol): try to re-enable this once BOLT in-place rewriting is merged or after + // we bump LLVM. + // Try to reuse old text segments to reduce binary size + // .arg("--use-old-text") + // Update DWARF debug info in the final binary + .arg("-update-debug-sections") + // Print optimization statistics + .arg("-dyno-stats") + .run() + .with_context(|| anyhow::anyhow!("Could not optimize {path} with BOLT"))?; + + Ok(()) +} + +/// Copies a file to a temporary location and restores it (copies it back) when it is dropped. +pub struct BackedUpFile { + original: Utf8PathBuf, + backup: tempfile::TempPath, +} + +impl BackedUpFile { + pub fn new(file: &Utf8Path) -> anyhow::Result<Self> { + let temp_path = tempfile::NamedTempFile::new()?.into_temp_path(); + copy_file(file, &temp_path)?; + Ok(Self { backup: temp_path, original: file.to_path_buf() }) + } +} + +impl Drop for BackedUpFile { + fn drop(&mut self) { + copy_file(&self.backup, &self.original).expect("Cannot restore backed up file"); + } +} diff --git a/src/tools/opt-dist/src/environment/linux.rs b/src/tools/opt-dist/src/environment/linux.rs new file mode 100644 index 000000000..58b7e6d23 --- /dev/null +++ b/src/tools/opt-dist/src/environment/linux.rs @@ -0,0 +1,58 @@ +use crate::environment::Environment; +use crate::exec::cmd; +use crate::utils::io::copy_directory; +use camino::{Utf8Path, Utf8PathBuf}; + +pub(super) struct LinuxEnvironment; + +impl Environment for LinuxEnvironment { + fn python_binary(&self) -> &'static str { + "python3" + } + + fn checkout_path(&self) -> Utf8PathBuf { + Utf8PathBuf::from("/checkout") + } + + fn host_llvm_dir(&self) -> Utf8PathBuf { + Utf8PathBuf::from("/rustroot") + } + + fn opt_artifacts(&self) -> Utf8PathBuf { + Utf8PathBuf::from("/tmp/tmp-multistage/opt-artifacts") + } + + fn build_root(&self) -> Utf8PathBuf { + self.checkout_path().join("obj") + } + + fn prepare_rustc_perf(&self) -> anyhow::Result<()> { + // /tmp/rustc-perf comes from the x64 dist Dockerfile + copy_directory(Utf8Path::new("/tmp/rustc-perf"), &self.rustc_perf_dir())?; + cmd(&[self.cargo_stage_0().as_str(), "build", "-p", "collector"]) + .workdir(&self.rustc_perf_dir()) + .env("RUSTC", &self.rustc_stage_0().into_string()) + .env("RUSTC_BOOTSTRAP", "1") + .run()?; + Ok(()) + } + + fn supports_bolt(&self) -> bool { + true + } + + fn supports_shared_llvm(&self) -> bool { + true + } + + fn executable_extension(&self) -> &'static str { + "" + } + + fn skipped_tests(&self) -> &'static [&'static str] { + &[ + // Fails because of linker errors, as of June 2023. + "tests/ui/process/nofile-limit.rs", + ] + } +} diff --git a/src/tools/opt-dist/src/environment/mod.rs b/src/tools/opt-dist/src/environment/mod.rs new file mode 100644 index 000000000..a8650fad0 --- /dev/null +++ b/src/tools/opt-dist/src/environment/mod.rs @@ -0,0 +1,77 @@ +use camino::Utf8PathBuf; + +#[cfg(target_family = "unix")] +mod linux; +#[cfg(target_family = "windows")] +mod windows; + +pub trait Environment { + fn host_triple(&self) -> String { + std::env::var("PGO_HOST").expect("PGO_HOST environment variable missing") + } + + fn python_binary(&self) -> &'static str; + + /// The rustc checkout, where the compiler source is located. + fn checkout_path(&self) -> Utf8PathBuf; + + /// Path to the host LLVM used to compile LLVM in `src/llvm-project`. + fn host_llvm_dir(&self) -> Utf8PathBuf; + + /// Directory where the optimization artifacts (PGO/BOLT profiles, etc.) + /// will be stored. + fn opt_artifacts(&self) -> Utf8PathBuf; + + /// The main directory where the build occurs. + fn build_root(&self) -> Utf8PathBuf; + + fn build_artifacts(&self) -> Utf8PathBuf { + self.build_root().join("build").join(self.host_triple()) + } + + fn cargo_stage_0(&self) -> Utf8PathBuf { + self.build_artifacts() + .join("stage0") + .join("bin") + .join(format!("cargo{}", self.executable_extension())) + } + + fn rustc_stage_0(&self) -> Utf8PathBuf { + self.build_artifacts() + .join("stage0") + .join("bin") + .join(format!("rustc{}", self.executable_extension())) + } + + fn rustc_stage_2(&self) -> Utf8PathBuf { + self.build_artifacts() + .join("stage2") + .join("bin") + .join(format!("rustc{}", self.executable_extension())) + } + + /// Path to the built rustc-perf benchmark suite. + fn rustc_perf_dir(&self) -> Utf8PathBuf { + self.opt_artifacts().join("rustc-perf") + } + + /// Download and/or compile rustc-perf. + fn prepare_rustc_perf(&self) -> anyhow::Result<()>; + + fn supports_bolt(&self) -> bool; + + fn supports_shared_llvm(&self) -> bool; + + /// What is the extension of binary executables in this environment? + fn executable_extension(&self) -> &'static str; + + /// List of test paths that should be skipped when testing the optimized artifacts. + fn skipped_tests(&self) -> &'static [&'static str]; +} + +pub fn create_environment() -> Box<dyn Environment> { + #[cfg(target_family = "unix")] + return Box::new(linux::LinuxEnvironment); + #[cfg(target_family = "windows")] + return Box::new(windows::WindowsEnvironment::new()); +} diff --git a/src/tools/opt-dist/src/environment/windows.rs b/src/tools/opt-dist/src/environment/windows.rs new file mode 100644 index 000000000..8a9733d64 --- /dev/null +++ b/src/tools/opt-dist/src/environment/windows.rs @@ -0,0 +1,82 @@ +use crate::environment::Environment; +use crate::exec::cmd; +use crate::utils::io::move_directory; +use camino::Utf8PathBuf; +use std::io::Cursor; +use zip::ZipArchive; + +pub(super) struct WindowsEnvironment { + checkout_dir: Utf8PathBuf, +} + +impl WindowsEnvironment { + pub fn new() -> Self { + Self { checkout_dir: std::env::current_dir().unwrap().try_into().unwrap() } + } +} + +impl Environment for WindowsEnvironment { + fn python_binary(&self) -> &'static str { + "python" + } + + fn checkout_path(&self) -> Utf8PathBuf { + self.checkout_dir.clone() + } + + fn host_llvm_dir(&self) -> Utf8PathBuf { + self.checkout_path().join("citools").join("clang-rust") + } + + fn opt_artifacts(&self) -> Utf8PathBuf { + self.checkout_path().join("opt-artifacts") + } + + fn build_root(&self) -> Utf8PathBuf { + self.checkout_path() + } + + fn prepare_rustc_perf(&self) -> anyhow::Result<()> { + // FIXME: add some mechanism for synchronization of this commit SHA with + // Linux (which builds rustc-perf in a Dockerfile) + // rustc-perf version from 2023-05-30 + const PERF_COMMIT: &str = "8b2ac3042e1ff2c0074455a0a3618adef97156b1"; + + let url = format!("https://github.com/rust-lang/rustc-perf/archive/{PERF_COMMIT}.zip"); + let response = reqwest::blocking::get(url)?.error_for_status()?.bytes()?.to_vec(); + + let mut archive = ZipArchive::new(Cursor::new(response))?; + archive.extract(self.rustc_perf_dir())?; + move_directory( + &self.rustc_perf_dir().join(format!("rustc-perf-{PERF_COMMIT}")), + &self.rustc_perf_dir(), + )?; + + cmd(&[self.cargo_stage_0().as_str(), "build", "-p", "collector"]) + .workdir(&self.rustc_perf_dir()) + .env("RUSTC", &self.rustc_stage_0().into_string()) + .env("RUSTC_BOOTSTRAP", "1") + .run()?; + + Ok(()) + } + + fn supports_bolt(&self) -> bool { + false + } + + fn supports_shared_llvm(&self) -> bool { + false + } + + fn executable_extension(&self) -> &'static str { + ".exe" + } + + fn skipped_tests(&self) -> &'static [&'static str] { + &[ + // Fails as of June 2023. + "tests\\codegen\\vec-shrink-panik.rs", + ] + } +} diff --git a/src/tools/opt-dist/src/exec.rs b/src/tools/opt-dist/src/exec.rs new file mode 100644 index 000000000..4765dceb5 --- /dev/null +++ b/src/tools/opt-dist/src/exec.rs @@ -0,0 +1,179 @@ +use crate::environment::Environment; +use crate::metrics::{load_metrics, record_metrics}; +use crate::timer::TimerSection; +use crate::training::{LlvmBoltProfile, LlvmPGOProfile, RustcPGOProfile}; +use camino::{Utf8Path, Utf8PathBuf}; +use std::collections::BTreeMap; +use std::fs::File; +use std::process::{Command, Stdio}; + +#[derive(Default)] +pub struct CmdBuilder { + args: Vec<String>, + env: BTreeMap<String, String>, + workdir: Option<Utf8PathBuf>, + output: Option<Utf8PathBuf>, +} + +impl CmdBuilder { + pub fn arg<S: ToString>(mut self, arg: S) -> Self { + self.args.push(arg.to_string()); + self + } + + pub fn env(mut self, name: &str, value: &str) -> Self { + self.env.insert(name.to_string(), value.to_string()); + self + } + + pub fn workdir(mut self, path: &Utf8Path) -> Self { + self.workdir = Some(path.to_path_buf()); + self + } + + pub fn redirect_output(mut self, path: Utf8PathBuf) -> Self { + self.output = Some(path); + self + } + + pub fn run(self) -> anyhow::Result<()> { + let mut cmd_str = String::new(); + cmd_str.push_str( + &self + .env + .iter() + .map(|(key, value)| format!("{key}={value}")) + .collect::<Vec<_>>() + .join(" "), + ); + if !self.env.is_empty() { + cmd_str.push(' '); + } + cmd_str.push_str(&self.args.join(" ")); + if let Some(ref path) = self.output { + cmd_str.push_str(&format!(" > {path:?}")); + } + cmd_str.push_str(&format!( + " [at {}]", + self.workdir + .clone() + .unwrap_or_else(|| std::env::current_dir().unwrap().try_into().unwrap()) + )); + log::info!("Executing `{cmd_str}`"); + + let mut cmd = Command::new(&self.args[0]); + cmd.stdin(Stdio::null()); + cmd.args(self.args.iter().skip(1)); + for (key, value) in &self.env { + cmd.env(key, value); + } + if let Some(ref output) = self.output { + cmd.stdout(File::create(output.clone().into_std_path_buf())?); + } + if let Some(ref workdir) = self.workdir { + cmd.current_dir(workdir.clone().into_std_path_buf()); + } + let exit_status = cmd.spawn()?.wait()?; + if !exit_status.success() { + Err(anyhow::anyhow!( + "Command {cmd_str} has failed with exit code {:?}", + exit_status.code(), + )) + } else { + Ok(()) + } + } +} + +pub fn cmd(args: &[&str]) -> CmdBuilder { + assert!(!args.is_empty()); + CmdBuilder { args: args.iter().map(|s| s.to_string()).collect(), ..Default::default() } +} + +pub struct Bootstrap { + cmd: CmdBuilder, + metrics_path: Utf8PathBuf, +} + +impl Bootstrap { + pub fn build(env: &dyn Environment) -> Self { + let metrics_path = env.build_root().join("build").join("metrics.json"); + let cmd = cmd(&[ + env.python_binary(), + env.checkout_path().join("x.py").as_str(), + "build", + "--target", + &env.host_triple(), + "--host", + &env.host_triple(), + "--stage", + "2", + "library/std", + ]) + .env("RUST_BACKTRACE", "full"); + Self { cmd, metrics_path } + } + + pub fn dist(env: &dyn Environment, dist_args: &[String]) -> Self { + let metrics_path = env.build_root().join("build").join("metrics.json"); + let cmd = cmd(&dist_args.iter().map(|arg| arg.as_str()).collect::<Vec<_>>()) + .env("RUST_BACKTRACE", "full"); + Self { cmd, metrics_path } + } + + pub fn llvm_pgo_instrument(mut self, profile_dir: &Utf8Path) -> Self { + self.cmd = self + .cmd + .arg("--llvm-profile-generate") + .env("LLVM_PROFILE_DIR", profile_dir.join("prof-%p").as_str()); + self + } + + pub fn llvm_pgo_optimize(mut self, profile: &LlvmPGOProfile) -> Self { + self.cmd = self.cmd.arg("--llvm-profile-use").arg(profile.0.as_str()); + self + } + + pub fn rustc_pgo_instrument(mut self, profile_dir: &Utf8Path) -> Self { + self.cmd = self.cmd.arg("--rust-profile-generate").arg(profile_dir.as_str()); + self + } + + pub fn without_llvm_lto(mut self) -> Self { + self.cmd = self + .cmd + .arg("--set") + .arg("llvm.thin-lto=false") + .arg("--set") + .arg("llvm.link-shared=true"); + self + } + + pub fn rustc_pgo_optimize(mut self, profile: &RustcPGOProfile) -> Self { + self.cmd = self.cmd.arg("--rust-profile-use").arg(profile.0.as_str()); + self + } + + pub fn with_llvm_bolt_ldflags(mut self) -> Self { + self.cmd = self.cmd.arg("--set").arg("llvm.ldflags=-Wl,-q"); + self + } + + pub fn with_bolt_profile(mut self, profile: LlvmBoltProfile) -> Self { + self.cmd = self.cmd.arg("--reproducible-artifact").arg(profile.0.as_str()); + self + } + + /// Do not rebuild rustc, and use a previously built rustc sysroot instead. + pub fn avoid_rustc_rebuild(mut self) -> Self { + self.cmd = self.cmd.arg("--keep-stage").arg("0").arg("--keep-stage").arg("1"); + self + } + + pub fn run(self, timer: &mut TimerSection) -> anyhow::Result<()> { + self.cmd.run()?; + let metrics = load_metrics(&self.metrics_path)?; + record_metrics(&metrics, timer); + Ok(()) + } +} diff --git a/src/tools/opt-dist/src/main.rs b/src/tools/opt-dist/src/main.rs new file mode 100644 index 000000000..8ab19674d --- /dev/null +++ b/src/tools/opt-dist/src/main.rs @@ -0,0 +1,215 @@ +use crate::bolt::{bolt_optimize, with_bolt_instrumented}; +use anyhow::Context; +use log::LevelFilter; +use utils::io; + +use crate::environment::{create_environment, Environment}; +use crate::exec::Bootstrap; +use crate::tests::run_tests; +use crate::timer::Timer; +use crate::training::{gather_llvm_bolt_profiles, gather_llvm_profiles, gather_rustc_profiles}; +use crate::utils::io::reset_directory; +use crate::utils::{ + clear_llvm_files, format_env_variables, print_binary_sizes, print_free_disk_space, + with_log_group, +}; + +mod bolt; +mod environment; +mod exec; +mod metrics; +mod tests; +mod timer; +mod training; +mod utils; + +fn is_try_build() -> bool { + std::env::var("DIST_TRY_BUILD").unwrap_or_else(|_| "0".to_string()) != "0" +} + +fn execute_pipeline( + env: &dyn Environment, + timer: &mut Timer, + dist_args: Vec<String>, +) -> anyhow::Result<()> { + reset_directory(&env.opt_artifacts())?; + + with_log_group("Building rustc-perf", || env.prepare_rustc_perf())?; + + // Stage 1: Build PGO instrumented rustc + // We use a normal build of LLVM, because gathering PGO profiles for LLVM and `rustc` at the + // same time can cause issues, because the host and in-tree LLVM versions can diverge. + let rustc_pgo_profile = timer.section("Stage 1 (Rustc PGO)", |stage| { + let rustc_profile_dir_root = env.opt_artifacts().join("rustc-pgo"); + + stage.section("Build PGO instrumented rustc and LLVM", |section| { + let mut builder = Bootstrap::build(env).rustc_pgo_instrument(&rustc_profile_dir_root); + + if env.supports_shared_llvm() { + // This first LLVM that we build will be thrown away after this stage, and it + // doesn't really need LTO. Without LTO, it builds in ~1 minute thanks to sccache, + // with LTO it takes almost 10 minutes. It makes the followup Rustc PGO + // instrumented/optimized build a bit slower, but it seems to be worth it. + builder = builder.without_llvm_lto(); + } + + builder.run(section) + })?; + + let profile = stage + .section("Gather profiles", |_| gather_rustc_profiles(env, &rustc_profile_dir_root))?; + print_free_disk_space()?; + + stage.section("Build PGO optimized rustc", |section| { + Bootstrap::build(env).rustc_pgo_optimize(&profile).run(section) + })?; + + Ok(profile) + })?; + + // Stage 2: Gather LLVM PGO profiles + // Here we build a PGO instrumented LLVM, reusing the previously PGO optimized rustc. + // Then we use the instrumented LLVM to gather LLVM PGO profiles. + let llvm_pgo_profile = timer.section("Stage 2 (LLVM PGO)", |stage| { + // Remove the previous, uninstrumented build of LLVM. + clear_llvm_files(env)?; + + let llvm_profile_dir_root = env.opt_artifacts().join("llvm-pgo"); + + stage.section("Build PGO instrumented LLVM", |section| { + Bootstrap::build(env) + .llvm_pgo_instrument(&llvm_profile_dir_root) + .avoid_rustc_rebuild() + .run(section) + })?; + + let profile = stage + .section("Gather profiles", |_| gather_llvm_profiles(env, &llvm_profile_dir_root))?; + + print_free_disk_space()?; + + // Proactively delete the instrumented artifacts, to avoid using them by accident in + // follow-up stages. + clear_llvm_files(env)?; + + Ok(profile) + })?; + + let llvm_bolt_profile = if env.supports_bolt() { + // Stage 3: Build BOLT instrumented LLVM + // We build a PGO optimized LLVM in this step, then instrument it with BOLT and gather BOLT profiles. + // Note that we don't remove LLVM artifacts after this step, so that they are reused in the final dist build. + // BOLT instrumentation is performed "on-the-fly" when the LLVM library is copied to the sysroot of rustc, + // therefore the LLVM artifacts on disk are not "tainted" with BOLT instrumentation and they can be reused. + timer.section("Stage 3 (LLVM BOLT)", |stage| { + stage.section("Build PGO optimized LLVM", |stage| { + Bootstrap::build(env) + .with_llvm_bolt_ldflags() + .llvm_pgo_optimize(&llvm_pgo_profile) + .avoid_rustc_rebuild() + .run(stage) + })?; + + // Find the path to the `libLLVM.so` file + let llvm_lib = io::find_file_in_dir( + &env.build_artifacts().join("stage2").join("lib"), + "libLLVM", + ".so", + )?; + + // Instrument it and gather profiles + let profile = with_bolt_instrumented(&llvm_lib, || { + stage.section("Gather profiles", |_| gather_llvm_bolt_profiles(env)) + })?; + print_free_disk_space()?; + + // Now optimize the library with BOLT. The `libLLVM-XXX.so` library is actually hard-linked + // from several places, and this specific path (`llvm_lib`) will *not* be packaged into + // the final dist build. However, when BOLT optimizes an artifact, it does so *in-place*, + // therefore it will actually optimize all the hard links, which means that the final + // packaged `libLLVM.so` file *will* be BOLT optimized. + bolt_optimize(&llvm_lib, &profile).context("Could not optimize LLVM with BOLT")?; + + // LLVM is not being cleared here, we want to use the BOLT-optimized LLVM + Ok(Some(profile)) + })? + } else { + None + }; + + let mut dist = Bootstrap::dist(env, &dist_args) + .llvm_pgo_optimize(&llvm_pgo_profile) + .rustc_pgo_optimize(&rustc_pgo_profile) + .avoid_rustc_rebuild(); + + if let Some(llvm_bolt_profile) = llvm_bolt_profile { + dist = dist.with_bolt_profile(llvm_bolt_profile); + } + + // Final stage: Assemble the dist artifacts + // The previous PGO optimized rustc build and PGO optimized LLVM builds should be reused. + timer.section("Stage 4 (final build)", |stage| dist.run(stage))?; + + // After dist has finished, run a subset of the test suite on the optimized artifacts to discover + // possible regressions. + // The tests are not executed for try builds, which can be in various broken states, so we don't + // want to gatekeep them with tests. + if !is_try_build() { + timer.section("Run tests", |_| run_tests(env))?; + } + + Ok(()) +} + +fn main() -> anyhow::Result<()> { + // Make sure that we get backtraces for easier debugging in CI + std::env::set_var("RUST_BACKTRACE", "1"); + + env_logger::builder() + .filter_level(LevelFilter::Info) + .format_timestamp_millis() + .parse_default_env() + .init(); + + let mut build_args: Vec<String> = std::env::args().skip(1).collect(); + println!("Running optimized build pipeline with args `{}`", build_args.join(" ")); + + with_log_group("Environment values", || { + println!("Environment values\n{}", format_env_variables()); + }); + + with_log_group("Printing config.toml", || { + if let Ok(config) = std::fs::read_to_string("config.toml") { + println!("Contents of `config.toml`:\n{config}"); + } + }); + + // Skip components that are not needed for try builds to speed them up + if is_try_build() { + log::info!("Skipping building of unimportant components for a try build"); + for target in [ + "rust-docs", + "rustc-docs", + "rust-docs-json", + "rust-analyzer", + "rustc-src", + "clippy", + "miri", + "rustfmt", + ] { + build_args.extend(["--skip".to_string(), target.to_string()]); + } + } + + let mut timer = Timer::new(); + let env = create_environment(); + + let result = execute_pipeline(env.as_ref(), &mut timer, build_args); + log::info!("Timer results\n{}", timer.format_stats()); + + print_free_disk_space()?; + result.context("Optimized build pipeline has failed")?; + print_binary_sizes(env.as_ref())?; + + Ok(()) +} diff --git a/src/tools/opt-dist/src/metrics.rs b/src/tools/opt-dist/src/metrics.rs new file mode 100644 index 000000000..cabe07eda --- /dev/null +++ b/src/tools/opt-dist/src/metrics.rs @@ -0,0 +1,106 @@ +use crate::timer::TimerSection; +use build_helper::metrics::{JsonNode, JsonRoot}; +use camino::Utf8Path; +use std::time::Duration; + +#[derive(Clone, Debug)] +pub struct BuildStep { + r#type: String, + children: Vec<BuildStep>, + duration: Duration, +} + +impl BuildStep { + pub fn find_all_by_type(&self, r#type: &str) -> Vec<&BuildStep> { + let mut result = Vec::new(); + self.find_by_type(r#type, &mut result); + result + } + fn find_by_type<'a>(&'a self, r#type: &str, result: &mut Vec<&'a BuildStep>) { + if self.r#type == r#type { + result.push(self); + } + for child in &self.children { + child.find_by_type(r#type, result); + } + } +} + +/// Loads the metrics of the most recent bootstrap execution from a metrics.json file. +pub fn load_metrics(path: &Utf8Path) -> anyhow::Result<BuildStep> { + let content = std::fs::read(path.as_std_path())?; + let mut metrics = serde_json::from_slice::<JsonRoot>(&content)?; + let invocation = metrics + .invocations + .pop() + .ok_or_else(|| anyhow::anyhow!("No bootstrap invocation found in metrics file"))?; + + fn parse(node: JsonNode) -> Option<BuildStep> { + match node { + JsonNode::RustbuildStep { + type_: kind, + children, + duration_excluding_children_sec, + .. + } => { + let children: Vec<_> = children.into_iter().filter_map(parse).collect(); + let children_duration = children.iter().map(|c| c.duration).sum::<Duration>(); + Some(BuildStep { + r#type: kind.to_string(), + children, + duration: children_duration + + Duration::from_secs_f64(duration_excluding_children_sec), + }) + } + JsonNode::TestSuite(_) => None, + } + } + + let duration = Duration::from_secs_f64(invocation.duration_including_children_sec); + let children: Vec<_> = invocation.children.into_iter().filter_map(parse).collect(); + Ok(BuildStep { r#type: "root".to_string(), children, duration }) +} + +/// Logs the individual metrics in a table and add Rustc and LLVM durations to the passed +/// timer. +pub fn record_metrics(metrics: &BuildStep, timer: &mut TimerSection) { + let llvm_steps = metrics.find_all_by_type("bootstrap::llvm::Llvm"); + let llvm_duration: Duration = llvm_steps.into_iter().map(|s| s.duration).sum(); + + let rustc_steps = metrics.find_all_by_type("bootstrap::compile::Rustc"); + let rustc_duration: Duration = rustc_steps.into_iter().map(|s| s.duration).sum(); + + // The LLVM step is part of the Rustc step + let rustc_duration = rustc_duration.saturating_sub(llvm_duration); + + if !llvm_duration.is_zero() { + timer.add_duration("LLVM", llvm_duration); + } + if !rustc_duration.is_zero() { + timer.add_duration("Rustc", rustc_duration); + } + + log_metrics(metrics); +} + +fn log_metrics(metrics: &BuildStep) { + use std::fmt::Write; + + let mut substeps: Vec<(u32, &BuildStep)> = Vec::new(); + + fn visit<'a>(step: &'a BuildStep, level: u32, substeps: &mut Vec<(u32, &'a BuildStep)>) { + substeps.push((level, step)); + for child in &step.children { + visit(child, level + 1, substeps); + } + } + + visit(metrics, 0, &mut substeps); + + let mut output = String::new(); + for (level, step) in substeps { + let label = format!("{}{}", ".".repeat(level as usize), step.r#type); + writeln!(output, "{label:<65}{:>8.2}s", step.duration.as_secs_f64()).unwrap(); + } + log::info!("Build step durations\n{output}"); +} diff --git a/src/tools/opt-dist/src/tests.rs b/src/tools/opt-dist/src/tests.rs new file mode 100644 index 000000000..3dd1a3223 --- /dev/null +++ b/src/tools/opt-dist/src/tests.rs @@ -0,0 +1,114 @@ +use crate::environment::Environment; +use crate::exec::cmd; +use crate::utils::io::{copy_directory, find_file_in_dir, unpack_archive}; +use anyhow::Context; +use camino::{Utf8Path, Utf8PathBuf}; + +/// Run tests on optimized dist artifacts. +pub fn run_tests(env: &dyn Environment) -> anyhow::Result<()> { + // After `dist` is executed, we extract its archived components into a sysroot directory, + // and then use that extracted rustc as a stage0 compiler. + // Then we run a subset of tests using that compiler, to have a basic smoke test which checks + // whether the optimization pipeline hasn't broken something. + let build_dir = env.build_root().join("build"); + let dist_dir = build_dir.join("dist"); + let unpacked_dist_dir = build_dir.join("unpacked-dist"); + std::fs::create_dir_all(&unpacked_dist_dir)?; + + let extract_dist_dir = |name: &str| -> anyhow::Result<Utf8PathBuf> { + unpack_archive(&dist_dir.join(format!("{name}.tar.xz")), &unpacked_dist_dir)?; + let extracted_path = unpacked_dist_dir.join(name); + assert!(extracted_path.is_dir()); + Ok(extracted_path) + }; + let host_triple = env.host_triple(); + let version = find_dist_version(&dist_dir)?; + + // Extract rustc, libstd, cargo and src archives to create the optimized sysroot + let rustc_dir = extract_dist_dir(&format!("rustc-{version}-{host_triple}"))?.join("rustc"); + let libstd_dir = extract_dist_dir(&format!("rust-std-{version}-{host_triple}"))? + .join(format!("rust-std-{host_triple}")); + let cargo_dir = extract_dist_dir(&format!("cargo-{version}-{host_triple}"))?.join("cargo"); + let extracted_src_dir = extract_dist_dir(&format!("rust-src-{version}"))?.join("rust-src"); + + // We need to manually copy libstd to the extracted rustc sysroot + copy_directory( + &libstd_dir.join("lib").join("rustlib").join(&host_triple).join("lib"), + &rustc_dir.join("lib").join("rustlib").join(&host_triple).join("lib"), + )?; + + // Extract sources - they aren't in the `rustc-nightly-{host}` tarball, so we need to manually copy libstd + // sources to the extracted sysroot. We need sources available so that `-Zsimulate-remapped-rust-src-base` + // works correctly. + copy_directory( + &extracted_src_dir.join("lib").join("rustlib").join("src"), + &rustc_dir.join("lib").join("rustlib").join("src"), + )?; + + let rustc_path = rustc_dir.join("bin").join(format!("rustc{}", env.executable_extension())); + assert!(rustc_path.is_file()); + let cargo_path = cargo_dir.join("bin").join(format!("cargo{}", env.executable_extension())); + assert!(cargo_path.is_file()); + + // Specify path to a LLVM config so that LLVM is not rebuilt. + // It doesn't really matter which LLVM config we choose, because no sysroot will be compiled. + let llvm_config = env + .build_artifacts() + .join("llvm") + .join("bin") + .join(format!("llvm-config{}", env.executable_extension())); + assert!(llvm_config.is_file()); + + let config_content = format!( + r#"profile = "user" +changelog-seen = 2 + +[build] +rustc = "{rustc}" +cargo = "{cargo}" + +[target.{host_triple}] +llvm-config = "{llvm_config}" +"#, + rustc = rustc_path.to_string().replace('\\', "/"), + cargo = cargo_path.to_string().replace('\\', "/"), + llvm_config = llvm_config.to_string().replace('\\', "/") + ); + log::info!("Using following `config.toml` for running tests:\n{config_content}"); + + // Simulate a stage 0 compiler with the extracted optimized dist artifacts. + std::fs::write("config.toml", config_content)?; + + let x_py = env.checkout_path().join("x.py"); + let mut args = vec![ + env.python_binary(), + x_py.as_str(), + "test", + "--stage", + "0", + "tests/assembly", + "tests/codegen", + "tests/codegen-units", + "tests/incremental", + "tests/mir-opt", + "tests/pretty", + "tests/run-pass-valgrind", + "tests/ui", + ]; + for test_path in env.skipped_tests() { + args.extend(["--skip", test_path]); + } + cmd(&args).env("COMPILETEST_FORCE_STAGE0", "1").run().context("Cannot execute tests") +} + +/// Tries to find the version of the dist artifacts (either nightly, beta, or 1.XY.Z). +fn find_dist_version(directory: &Utf8Path) -> anyhow::Result<String> { + // Lookup a known file with a unique prefix and extract the version from its filename + let archive = find_file_in_dir(directory, "reproducible-artifacts-", ".tar.xz")? + .file_name() + .unwrap() + .to_string(); + let (version, _) = + archive.strip_prefix("reproducible-artifacts-").unwrap().split_once("-").unwrap(); + Ok(version.to_string()) +} diff --git a/src/tools/opt-dist/src/timer.rs b/src/tools/opt-dist/src/timer.rs new file mode 100644 index 000000000..2b29ba8d5 --- /dev/null +++ b/src/tools/opt-dist/src/timer.rs @@ -0,0 +1,167 @@ +use std::ops::{Deref, DerefMut}; +use std::time::{Duration, SystemTime}; + +pub struct Timer { + root: TimerSection, +} + +impl Timer { + pub fn new() -> Self { + Timer { root: TimerSection::new(None) } + } + + pub fn format_stats(&self) -> String { + use std::fmt::Write; + + let mut items = Vec::new(); + for (name, child) in &self.root.children { + match child { + SectionEntry::SubSection(section) => { + section.collect_levels(0, name, &mut items); + } + SectionEntry::Duration(duration) => items.push((0, name, *duration)), + } + } + + let rows: Vec<(String, Duration)> = items + .into_iter() + .map(|(level, name, duration)| (format!("{}{name}:", " ".repeat(level)), duration)) + .collect(); + + let total_duration = self.total_duration(); + let total_duration_label = "Total duration:".to_string(); + + const SPACE_AFTER_LABEL: usize = 2; + let max_label_length = 16.max(rows.iter().map(|(label, _)| label.len()).max().unwrap_or(0)) + + SPACE_AFTER_LABEL; + + let table_width = max_label_length + 23; + let divider = "-".repeat(table_width); + + let mut output = String::new(); + writeln!(output, "{divider}").unwrap(); + for (label, duration) in rows { + let pct = (duration.as_millis() as f64 / total_duration.as_millis() as f64) * 100.0; + let duration_fmt = format!("{:>12.2}s ({pct:>5.2}%)", duration.as_secs_f64()); + writeln!(output, "{label:<0$} {duration_fmt}", max_label_length).unwrap(); + } + output.push('\n'); + + let total_duration = Duration::new(total_duration.as_secs(), 0); + let total_duration = format!( + "{:>1$}", + humantime::format_duration(total_duration).to_string(), + table_width - total_duration_label.len() + ); + writeln!(output, "{total_duration_label}{total_duration}").unwrap(); + + writeln!(output, "{divider}").unwrap(); + output + } +} + +impl Deref for Timer { + type Target = TimerSection; + + fn deref(&self) -> &Self::Target { + &self.root + } +} + +impl DerefMut for Timer { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.root + } +} + +pub struct TimerSection { + name: Option<String>, + children: Vec<(String, SectionEntry)>, + duration_excluding_children: Duration, +} + +impl TimerSection { + pub fn new(name: Option<String>) -> Self { + TimerSection { + name, + children: Default::default(), + duration_excluding_children: Duration::ZERO, + } + } + + pub fn section<F: FnOnce(&mut TimerSection) -> anyhow::Result<R>, R>( + &mut self, + name: &str, + func: F, + ) -> anyhow::Result<R> { + let full_name = match &self.name { + Some(current_name) => { + format!("{current_name} > {name}") + } + None => name.to_string(), + }; + log::info!("Section `{full_name}` starts"); + let mut child = TimerSection { + name: Some(full_name.clone()), + children: Default::default(), + duration_excluding_children: Duration::ZERO, + }; + + let start = SystemTime::now(); + let result = func(&mut child); + let duration = start.elapsed().unwrap(); + + let msg = match result { + Ok(_) => "OK", + Err(_) => "FAIL", + }; + + child.duration_excluding_children = duration.saturating_sub(child.total_duration()); + + log::info!("Section `{full_name}` ended: {msg} ({:.2}s)`", duration.as_secs_f64()); + self.children.push((name.to_string(), SectionEntry::SubSection(child))); + result + } + + pub fn add_duration(&mut self, name: &str, duration: Duration) { + self.children.push((name.to_string(), SectionEntry::Duration(duration))); + } + + fn total_duration(&self) -> Duration { + self.duration_excluding_children + + self.children.iter().map(|(_, child)| child.total_duration()).sum::<Duration>() + } + + fn collect_levels<'a>( + &'a self, + level: usize, + name: &'a str, + items: &mut Vec<(usize, &'a str, Duration)>, + ) { + items.push((level, name, self.total_duration())); + for (name, child) in &self.children { + match &child { + SectionEntry::Duration(duration) => { + items.push((level + 1, name, *duration)); + } + SectionEntry::SubSection(section) => { + section.collect_levels(level + 1, name, items); + } + } + } + } +} + +enum SectionEntry { + Duration(Duration), + SubSection(TimerSection), +} + +impl SectionEntry { + fn total_duration(&self) -> Duration { + match self { + SectionEntry::Duration(duration) => *duration, + SectionEntry::SubSection(timer) => timer.total_duration(), + } + } +} diff --git a/src/tools/opt-dist/src/training.rs b/src/tools/opt-dist/src/training.rs new file mode 100644 index 000000000..59c73fbd6 --- /dev/null +++ b/src/tools/opt-dist/src/training.rs @@ -0,0 +1,223 @@ +use crate::environment::Environment; +use crate::exec::{cmd, CmdBuilder}; +use crate::utils::io::{count_files, delete_directory}; +use crate::utils::with_log_group; +use anyhow::Context; +use camino::{Utf8Path, Utf8PathBuf}; +use humansize::BINARY; + +const LLVM_PGO_CRATES: &[&str] = &[ + "syn-1.0.89", + "cargo-0.60.0", + "serde-1.0.136", + "ripgrep-13.0.0", + "regex-1.5.5", + "clap-3.1.6", + "hyper-0.14.18", +]; + +const RUSTC_PGO_CRATES: &[&str] = &[ + "externs", + "ctfe-stress-5", + "cargo-0.60.0", + "token-stream-stress", + "match-stress", + "tuple-stress", + "diesel-1.4.8", + "bitmaps-3.1.0", +]; + +const LLVM_BOLT_CRATES: &[&str] = LLVM_PGO_CRATES; + +fn init_compiler_benchmarks( + env: &dyn Environment, + profiles: &[&str], + scenarios: &[&str], + crates: &[&str], +) -> CmdBuilder { + // Run rustc-perf benchmarks + // Benchmark using profile_local with eprintln, which essentially just means + // don't actually benchmark -- just make sure we run rustc a bunch of times. + cmd(&[ + env.cargo_stage_0().as_str(), + "run", + "-p", + "collector", + "--bin", + "collector", + "--", + "profile_local", + "eprintln", + env.rustc_stage_2().as_str(), + "--id", + "Test", + "--cargo", + env.cargo_stage_0().as_str(), + "--profiles", + profiles.join(",").as_str(), + "--scenarios", + scenarios.join(",").as_str(), + "--include", + crates.join(",").as_str(), + ]) + .env("RUST_LOG", "collector=debug") + .env("RUSTC", env.rustc_stage_0().as_str()) + .env("RUSTC_BOOTSTRAP", "1") + .workdir(&env.rustc_perf_dir()) +} + +/// Describes which `llvm-profdata` binary should be used for merging PGO profiles. +enum LlvmProfdata { + /// Use llvm-profdata from the host toolchain (i.e. from LLVM provided externally). + Host, + /// Use llvm-profdata from the target toolchain (i.e. from LLVM built from `src/llvm-project`). + Target, +} + +fn merge_llvm_profiles( + env: &dyn Environment, + merged_path: &Utf8Path, + profile_dir: &Utf8Path, + profdata: LlvmProfdata, +) -> anyhow::Result<()> { + let llvm_profdata = match profdata { + LlvmProfdata::Host => env.host_llvm_dir().join("bin/llvm-profdata"), + LlvmProfdata::Target => env + .build_artifacts() + .join("llvm") + .join("build") + .join(format!("bin/llvm-profdata{}", env.executable_extension())), + }; + + cmd(&[llvm_profdata.as_str(), "merge", "-o", merged_path.as_str(), profile_dir.as_str()]) + .run() + .context("Cannot merge LLVM profiles")?; + Ok(()) +} + +fn log_profile_stats( + name: &str, + merged_profile: &Utf8Path, + profile_root: &Utf8Path, +) -> anyhow::Result<()> { + log::info!("{name} PGO statistics"); + log::info!( + "{merged_profile}: {}", + humansize::format_size(std::fs::metadata(merged_profile.as_std_path())?.len(), BINARY) + ); + log::info!( + "{profile_root}: {}", + humansize::format_size(fs_extra::dir::get_size(profile_root.as_std_path())?, BINARY) + ); + log::info!("Profile file count: {}", count_files(profile_root)?); + Ok(()) +} + +pub struct LlvmPGOProfile(pub Utf8PathBuf); + +pub fn gather_llvm_profiles( + env: &dyn Environment, + profile_root: &Utf8Path, +) -> anyhow::Result<LlvmPGOProfile> { + log::info!("Running benchmarks with PGO instrumented LLVM"); + + with_log_group("Running benchmarks", || { + init_compiler_benchmarks(env, &["Debug", "Opt"], &["Full"], LLVM_PGO_CRATES) + .run() + .context("Cannot gather LLVM PGO profiles") + })?; + + let merged_profile = env.opt_artifacts().join("llvm-pgo.profdata"); + log::info!("Merging LLVM PGO profiles to {merged_profile}"); + + merge_llvm_profiles(env, &merged_profile, profile_root, LlvmProfdata::Host)?; + log_profile_stats("LLVM", &merged_profile, profile_root)?; + + // We don't need the individual .profraw files now that they have been merged + // into a final .profdata + delete_directory(profile_root)?; + + Ok(LlvmPGOProfile(merged_profile)) +} + +pub struct RustcPGOProfile(pub Utf8PathBuf); + +pub fn gather_rustc_profiles( + env: &dyn Environment, + profile_root: &Utf8Path, +) -> anyhow::Result<RustcPGOProfile> { + log::info!("Running benchmarks with PGO instrumented rustc"); + + // The profile data is written into a single filepath that is being repeatedly merged when each + // rustc invocation ends. Empirically, this can result in some profiling data being lost. That's + // why we override the profile path to include the PID. This will produce many more profiling + // files, but the resulting profile will produce a slightly faster rustc binary. + let profile_template = profile_root.join("default_%m_%p.profraw"); + + // Here we're profiling the `rustc` frontend, so we also include `Check`. + // The benchmark set includes various stress tests that put the frontend under pressure. + with_log_group("Running benchmarks", || { + init_compiler_benchmarks(env, &["Check", "Debug", "Opt"], &["All"], RUSTC_PGO_CRATES) + .env("LLVM_PROFILE_FILE", profile_template.as_str()) + .run() + .context("Cannot gather rustc PGO profiles") + })?; + + let merged_profile = env.opt_artifacts().join("rustc-pgo.profdata"); + log::info!("Merging Rustc PGO profiles to {merged_profile}"); + + merge_llvm_profiles(env, &merged_profile, profile_root, LlvmProfdata::Target)?; + log_profile_stats("Rustc", &merged_profile, profile_root)?; + + // We don't need the individual .profraw files now that they have been merged + // into a final .profdata + delete_directory(profile_root)?; + + Ok(RustcPGOProfile(merged_profile)) +} + +pub struct LlvmBoltProfile(pub Utf8PathBuf); + +pub fn gather_llvm_bolt_profiles(env: &dyn Environment) -> anyhow::Result<LlvmBoltProfile> { + log::info!("Running benchmarks with BOLT instrumented LLVM"); + + with_log_group("Running benchmarks", || { + init_compiler_benchmarks(env, &["Check", "Debug", "Opt"], &["Full"], LLVM_BOLT_CRATES) + .run() + .context("Cannot gather LLVM BOLT profiles") + })?; + + let merged_profile = env.opt_artifacts().join("llvm-bolt.profdata"); + let profile_root = Utf8PathBuf::from("/tmp/prof.fdata"); + log::info!("Merging LLVM BOLT profiles to {merged_profile}"); + + let profiles: Vec<_> = + glob::glob(&format!("{profile_root}*"))?.into_iter().collect::<Result<Vec<_>, _>>()?; + + let mut merge_args = vec!["merge-fdata"]; + merge_args.extend(profiles.iter().map(|p| p.to_str().unwrap())); + + with_log_group("Merging BOLT profiles", || { + cmd(&merge_args) + .redirect_output(merged_profile.clone()) + .run() + .context("Cannot merge BOLT profiles") + })?; + + log::info!("LLVM BOLT statistics"); + log::info!( + "{merged_profile}: {}", + humansize::format_size(std::fs::metadata(merged_profile.as_std_path())?.len(), BINARY) + ); + + let size = profiles + .iter() + .map(|p| std::fs::metadata(p).map(|metadata| metadata.len())) + .collect::<Result<Vec<_>, _>>()? + .into_iter() + .sum::<u64>(); + log::info!("{profile_root}: {}", humansize::format_size(size, BINARY)); + log::info!("Profile file count: {}", profiles.len()); + + Ok(LlvmBoltProfile(merged_profile)) +} diff --git a/src/tools/opt-dist/src/utils/io.rs b/src/tools/opt-dist/src/utils/io.rs new file mode 100644 index 000000000..8bd516fa3 --- /dev/null +++ b/src/tools/opt-dist/src/utils/io.rs @@ -0,0 +1,88 @@ +use anyhow::Context; +use camino::{Utf8Path, Utf8PathBuf}; +use fs_extra::dir::CopyOptions; +use std::fs::File; +use std::path::Path; + +/// Delete and re-create the directory. +pub fn reset_directory(path: &Utf8Path) -> anyhow::Result<()> { + log::info!("Resetting directory {path}"); + let _ = std::fs::remove_dir(path); + std::fs::create_dir_all(path)?; + Ok(()) +} + +pub fn copy_directory(src: &Utf8Path, dst: &Utf8Path) -> anyhow::Result<()> { + log::info!("Copying directory {src} to {dst}"); + fs_extra::dir::copy(src, dst, &CopyOptions::default().copy_inside(true))?; + Ok(()) +} + +pub fn copy_file<S: AsRef<Path>, D: AsRef<Path>>(src: S, dst: D) -> anyhow::Result<()> { + log::info!("Copying file {} to {}", src.as_ref().display(), dst.as_ref().display()); + std::fs::copy(src.as_ref(), dst.as_ref())?; + Ok(()) +} + +#[allow(unused)] +pub fn move_directory(src: &Utf8Path, dst: &Utf8Path) -> anyhow::Result<()> { + log::info!("Moving directory {src} to {dst}"); + fs_extra::dir::move_dir(src, dst, &CopyOptions::default().content_only(true))?; + Ok(()) +} + +/// Counts all children of a directory (non-recursively). +pub fn count_files(dir: &Utf8Path) -> anyhow::Result<u64> { + Ok(std::fs::read_dir(dir)?.count() as u64) +} + +pub fn delete_directory(path: &Utf8Path) -> anyhow::Result<()> { + log::info!("Deleting directory `{path}`"); + std::fs::remove_dir_all(path.as_std_path()) + .context(format!("Cannot remove directory {path}"))?; + Ok(()) +} + +pub fn unpack_archive(path: &Utf8Path, dest_dir: &Utf8Path) -> anyhow::Result<()> { + log::info!("Unpacking directory `{path}` into `{dest_dir}`"); + + assert!(path.as_str().ends_with(".tar.xz")); + let file = File::open(path.as_std_path())?; + let file = xz::read::XzDecoder::new(file); + let mut archive = tar::Archive::new(file); + archive.unpack(dest_dir.as_std_path())?; + Ok(()) +} + +/// Returns paths in the given `dir` (non-recursively), optionally with the given `suffix`. +/// The `suffix` should contain the leading dot. +pub fn get_files_from_dir( + dir: &Utf8Path, + suffix: Option<&str>, +) -> anyhow::Result<Vec<Utf8PathBuf>> { + let path = format!("{dir}/*{}", suffix.unwrap_or("")); + + Ok(glob::glob(&path)? + .into_iter() + .map(|p| p.map(|p| Utf8PathBuf::from_path_buf(p).unwrap())) + .collect::<Result<Vec<_>, _>>()?) +} + +/// Finds a single file in the specified `directory` with the given `prefix` and `suffix`. +pub fn find_file_in_dir( + directory: &Utf8Path, + prefix: &str, + suffix: &str, +) -> anyhow::Result<Utf8PathBuf> { + let files = glob::glob(&format!("{directory}/{prefix}*{suffix}"))? + .into_iter() + .collect::<Result<Vec<_>, _>>()?; + match files.len() { + 0 => Err(anyhow::anyhow!("No file with prefix {prefix} found in {directory}")), + 1 => Ok(Utf8PathBuf::from_path_buf(files[0].clone()).unwrap()), + _ => Err(anyhow::anyhow!( + "More than one file with prefix {prefix} found in {directory}: {:?}", + files + )), + } +} diff --git a/src/tools/opt-dist/src/utils/mod.rs b/src/tools/opt-dist/src/utils/mod.rs new file mode 100644 index 000000000..9a3df15e3 --- /dev/null +++ b/src/tools/opt-dist/src/utils/mod.rs @@ -0,0 +1,75 @@ +pub mod io; + +use crate::environment::Environment; +use crate::utils::io::{delete_directory, get_files_from_dir}; +use humansize::{format_size, BINARY}; +use sysinfo::{DiskExt, RefreshKind, System, SystemExt}; + +pub fn format_env_variables() -> String { + let vars = std::env::vars().map(|(key, value)| format!("{key}={value}")).collect::<Vec<_>>(); + vars.join("\n") +} + +pub fn print_free_disk_space() -> anyhow::Result<()> { + let sys = System::new_with_specifics(RefreshKind::default().with_disks_list().with_disks()); + let available_space: u64 = sys.disks().iter().map(|d| d.available_space()).sum(); + let total_space: u64 = sys.disks().iter().map(|d| d.total_space()).sum(); + let used_space = total_space - available_space; + + log::info!( + "Free disk space: {} out of total {} ({:.2}% used)", + humansize::format_size(available_space, BINARY), + humansize::format_size(total_space, BINARY), + (used_space as f64 / total_space as f64) * 100.0 + ); + Ok(()) +} + +pub fn print_binary_sizes(env: &dyn Environment) -> anyhow::Result<()> { + use std::fmt::Write; + + let root = env.build_artifacts().join("stage2"); + + let mut files = get_files_from_dir(&root.join("bin"), None)?; + files.extend(get_files_from_dir(&root.join("lib"), Some(".so"))?); + files.sort_unstable(); + + let mut output = String::new(); + for file in files { + let size = std::fs::metadata(file.as_std_path())?.len(); + let size_formatted = format_size(size, BINARY); + let name = format!("{}:", file.file_name().unwrap()); + writeln!(output, "{name:<50}{size_formatted:>10}")?; + } + + log::info!("Rustc artifact size\n{output}"); + + Ok(()) +} + +pub fn clear_llvm_files(env: &dyn Environment) -> anyhow::Result<()> { + // Bootstrap currently doesn't support rebuilding LLVM when PGO options + // change (or any other llvm-related options); so just clear out the relevant + // directories ourselves. + log::info!("Clearing LLVM build files"); + delete_directory(&env.build_artifacts().join("llvm"))?; + delete_directory(&env.build_artifacts().join("lld"))?; + Ok(()) +} + +/// Wraps all output produced within the `func` closure in a CI output group, if we're running in +/// CI. +pub fn with_log_group<F: FnOnce() -> R, R>(group: &str, func: F) -> R { + if is_in_ci() { + println!("::group::{group}"); + let result = func(); + println!("::endgroup::"); + result + } else { + func() + } +} + +fn is_in_ci() -> bool { + std::env::var("GITHUB_ACTIONS").is_ok() +} |