From ef24de24a82fe681581cc130f342363c47c0969a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 7 Jun 2024 07:48:48 +0200 Subject: Merging upstream version 1.75.0+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_codegen_gcc/src/back/lto.rs | 341 +++++++++++++++++++++++++++ compiler/rustc_codegen_gcc/src/back/mod.rs | 1 + compiler/rustc_codegen_gcc/src/back/write.rs | 102 +++++++- 3 files changed, 435 insertions(+), 9 deletions(-) create mode 100644 compiler/rustc_codegen_gcc/src/back/lto.rs (limited to 'compiler/rustc_codegen_gcc/src/back') diff --git a/compiler/rustc_codegen_gcc/src/back/lto.rs b/compiler/rustc_codegen_gcc/src/back/lto.rs new file mode 100644 index 000000000..529454b11 --- /dev/null +++ b/compiler/rustc_codegen_gcc/src/back/lto.rs @@ -0,0 +1,341 @@ +/// GCC requires to use the same toolchain for the whole compilation when doing LTO. +/// So, we need the same version/commit of the linker (gcc) and lto front-end binaries (lto1, +/// lto-wrapper, liblto_plugin.so). + +// FIXME(antoyo): the executables compiled with LTO are bigger than those compiled without LTO. +// Since it is the opposite for cg_llvm, check if this is normal. +// +// Maybe we embed the bitcode in the final binary? +// It doesn't look like we try to generate fat objects for the final binary. +// Check if the way we combine the object files make it keep the LTO sections on the final link. +// Maybe that's because the combined object files contain the IR (true) and the final link +// does not remove it? +// +// TODO(antoyo): for performance, check which optimizations the C++ frontend enables. +// +// Fix these warnings: +// /usr/bin/ld: warning: type of symbol `_RNvNvNvNtCs5JWOrf9uCus_5rayon11thread_pool19WORKER_THREAD_STATE7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o +// /usr/bin/ld: warning: type of symbol `_RNvNvNvNvNtNtNtCsAj5i4SGTR7_3std4sync4mpmc5waker17current_thread_id5DUMMY7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o +// /usr/bin/ld: warning: incremental linking of LTO and non-LTO objects; using -flinker-output=nolto-rel which will bypass whole program optimization + +use std::ffi::CString; +use std::fs::{self, File}; +use std::path::{Path, PathBuf}; + +use gccjit::OutputKind; +use object::read::archive::ArchiveFile; +use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule}; +use rustc_codegen_ssa::back::symbol_export; +use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput}; +use rustc_codegen_ssa::traits::*; +use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind}; +use rustc_data_structures::memmap::Mmap; +use rustc_errors::{FatalError, Handler}; +use rustc_hir::def_id::LOCAL_CRATE; +use rustc_middle::dep_graph::WorkProduct; +use rustc_middle::middle::exported_symbols::{SymbolExportInfo, SymbolExportLevel}; +use rustc_session::config::{CrateType, Lto}; +use tempfile::{TempDir, tempdir}; + +use crate::back::write::save_temp_bitcode; +use crate::errors::{ + DynamicLinkingWithLTO, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib, +}; +use crate::{GccCodegenBackend, GccContext, to_gcc_opt_level}; + +/// We keep track of the computed LTO cache keys from the previous +/// session to determine which CGUs we can reuse. +//pub const THIN_LTO_KEYS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-keys.bin"; + +pub fn crate_type_allows_lto(crate_type: CrateType) -> bool { + match crate_type { + CrateType::Executable | CrateType::Dylib | CrateType::Staticlib | CrateType::Cdylib => true, + CrateType::Rlib | CrateType::ProcMacro => false, + } +} + +struct LtoData { + // TODO(antoyo): use symbols_below_threshold. + //symbols_below_threshold: Vec, + upstream_modules: Vec<(SerializedModule, CString)>, + tmp_path: TempDir, +} + +fn prepare_lto(cgcx: &CodegenContext, diag_handler: &Handler) -> Result { + let export_threshold = match cgcx.lto { + // We're just doing LTO for our one crate + Lto::ThinLocal => SymbolExportLevel::Rust, + + // We're doing LTO for the entire crate graph + Lto::Fat | Lto::Thin => symbol_export::crates_export_threshold(&cgcx.crate_types), + + Lto::No => panic!("didn't request LTO but we're doing LTO"), + }; + + let tmp_path = + match tempdir() { + Ok(tmp_path) => tmp_path, + Err(error) => { + eprintln!("Cannot create temporary directory: {}", error); + return Err(FatalError); + }, + }; + + let symbol_filter = &|&(ref name, info): &(String, SymbolExportInfo)| { + if info.level.is_below_threshold(export_threshold) || info.used { + Some(CString::new(name.as_str()).unwrap()) + } else { + None + } + }; + let exported_symbols = cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO"); + let mut symbols_below_threshold = { + let _timer = cgcx.prof.generic_activity("GCC_lto_generate_symbols_below_threshold"); + exported_symbols[&LOCAL_CRATE].iter().filter_map(symbol_filter).collect::>() + }; + info!("{} symbols to preserve in this crate", symbols_below_threshold.len()); + + // If we're performing LTO for the entire crate graph, then for each of our + // upstream dependencies, find the corresponding rlib and load the bitcode + // from the archive. + // + // We save off all the bytecode and GCC module file path for later processing + // with either fat or thin LTO + let mut upstream_modules = Vec::new(); + if cgcx.lto != Lto::ThinLocal { + // Make sure we actually can run LTO + for crate_type in cgcx.crate_types.iter() { + if !crate_type_allows_lto(*crate_type) { + diag_handler.emit_err(LtoDisallowed); + return Err(FatalError); + } else if *crate_type == CrateType::Dylib { + if !cgcx.opts.unstable_opts.dylib_lto { + diag_handler.emit_err(LtoDylib); + return Err(FatalError); + } + } + } + + if cgcx.opts.cg.prefer_dynamic && !cgcx.opts.unstable_opts.dylib_lto { + diag_handler.emit_err(DynamicLinkingWithLTO); + return Err(FatalError); + } + + for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() { + let exported_symbols = + cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO"); + { + let _timer = + cgcx.prof.generic_activity("GCC_lto_generate_symbols_below_threshold"); + symbols_below_threshold + .extend(exported_symbols[&cnum].iter().filter_map(symbol_filter)); + } + + let archive_data = unsafe { + Mmap::map(File::open(&path).expect("couldn't open rlib")) + .expect("couldn't map rlib") + }; + let archive = ArchiveFile::parse(&*archive_data).expect("wanted an rlib"); + let obj_files = archive + .members() + .filter_map(|child| { + child.ok().and_then(|c| { + std::str::from_utf8(c.name()).ok().map(|name| (name.trim(), c)) + }) + }) + .filter(|&(name, _)| looks_like_rust_object_file(name)); + for (name, child) in obj_files { + info!("adding bitcode from {}", name); + let path = tmp_path.path().join(name); + match save_as_file(child.data(&*archive_data).expect("corrupt rlib"), &path) { + Ok(()) => { + let buffer = ModuleBuffer::new(path); + let module = SerializedModule::Local(buffer); + upstream_modules.push((module, CString::new(name).unwrap())); + } + Err(e) => { + diag_handler.emit_err(e); + return Err(FatalError); + } + } + } + } + } + + Ok(LtoData { + //symbols_below_threshold, + upstream_modules, + tmp_path, + }) +} + +fn save_as_file(obj: &[u8], path: &Path) -> Result<(), LtoBitcodeFromRlib> { + fs::write(path, obj) + .map_err(|error| LtoBitcodeFromRlib { + gcc_err: format!("write object file to temp dir: {}", error) + }) +} + +/// Performs fat LTO by merging all modules into a single one and returning it +/// for further optimization. +pub(crate) fn run_fat( + cgcx: &CodegenContext, + modules: Vec>, + cached_modules: Vec<(SerializedModule, WorkProduct)>, +) -> Result, FatalError> { + let diag_handler = cgcx.create_diag_handler(); + let lto_data = prepare_lto(cgcx, &diag_handler)?; + /*let symbols_below_threshold = + lto_data.symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::>();*/ + fat_lto(cgcx, &diag_handler, modules, cached_modules, lto_data.upstream_modules, lto_data.tmp_path, + //&symbols_below_threshold, + ) +} + +fn fat_lto(cgcx: &CodegenContext, _diag_handler: &Handler, modules: Vec>, cached_modules: Vec<(SerializedModule, WorkProduct)>, mut serialized_modules: Vec<(SerializedModule, CString)>, tmp_path: TempDir, + //symbols_below_threshold: &[*const libc::c_char], +) -> Result, FatalError> { + let _timer = cgcx.prof.generic_activity("GCC_fat_lto_build_monolithic_module"); + info!("going for a fat lto"); + + // Sort out all our lists of incoming modules into two lists. + // + // * `serialized_modules` (also and argument to this function) contains all + // modules that are serialized in-memory. + // * `in_memory` contains modules which are already parsed and in-memory, + // such as from multi-CGU builds. + // + // All of `cached_modules` (cached from previous incremental builds) can + // immediately go onto the `serialized_modules` modules list and then we can + // split the `modules` array into these two lists. + let mut in_memory = Vec::new(); + serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| { + info!("pushing cached module {:?}", wp.cgu_name); + (buffer, CString::new(wp.cgu_name).unwrap()) + })); + for module in modules { + match module { + FatLtoInput::InMemory(m) => in_memory.push(m), + FatLtoInput::Serialized { name, buffer } => { + info!("pushing serialized module {:?}", name); + let buffer = SerializedModule::Local(buffer); + serialized_modules.push((buffer, CString::new(name).unwrap())); + } + } + } + + // Find the "costliest" module and merge everything into that codegen unit. + // All the other modules will be serialized and reparsed into the new + // context, so this hopefully avoids serializing and parsing the largest + // codegen unit. + // + // Additionally use a regular module as the base here to ensure that various + // file copy operations in the backend work correctly. The only other kind + // of module here should be an allocator one, and if your crate is smaller + // than the allocator module then the size doesn't really matter anyway. + let costliest_module = in_memory + .iter() + .enumerate() + .filter(|&(_, module)| module.kind == ModuleKind::Regular) + .map(|(i, _module)| { + //let cost = unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) }; + // TODO(antoyo): compute the cost of a module if GCC allows this. + (0, i) + }) + .max(); + + // If we found a costliest module, we're good to go. Otherwise all our + // inputs were serialized which could happen in the case, for example, that + // all our inputs were incrementally reread from the cache and we're just + // re-executing the LTO passes. If that's the case deserialize the first + // module and create a linker with it. + let mut module: ModuleCodegen = match costliest_module { + Some((_cost, i)) => in_memory.remove(i), + None => { + unimplemented!("Incremental"); + /*assert!(!serialized_modules.is_empty(), "must have at least one serialized module"); + let (buffer, name) = serialized_modules.remove(0); + info!("no in-memory regular modules to choose from, parsing {:?}", name); + ModuleCodegen { + module_llvm: GccContext::parse(cgcx, &name, buffer.data(), diag_handler)?, + name: name.into_string().unwrap(), + kind: ModuleKind::Regular, + }*/ + } + }; + let mut serialized_bitcode = Vec::new(); + { + info!("using {:?} as a base module", module.name); + + // We cannot load and merge GCC contexts in memory like cg_llvm is doing. + // Instead, we combine the object files into a single object file. + for module in in_memory { + let path = tmp_path.path().to_path_buf().join(&module.name); + let path = path.to_str().expect("path"); + let context = &module.module_llvm.context; + let config = cgcx.config(module.kind); + // NOTE: we need to set the optimization level here in order for LTO to do its job. + context.set_optimization_level(to_gcc_opt_level(config.opt_level)); + context.add_command_line_option("-flto=auto"); + context.add_command_line_option("-flto-partition=one"); + context.compile_to_file(OutputKind::ObjectFile, path); + let buffer = ModuleBuffer::new(PathBuf::from(path)); + let llmod_id = CString::new(&module.name[..]).unwrap(); + serialized_modules.push((SerializedModule::Local(buffer), llmod_id)); + } + // Sort the modules to ensure we produce deterministic results. + serialized_modules.sort_by(|module1, module2| module1.1.cmp(&module2.1)); + + // We add the object files and save in should_combine_object_files that we should combine + // them into a single object file when compiling later. + for (bc_decoded, name) in serialized_modules { + let _timer = cgcx + .prof + .generic_activity_with_arg_recorder("GCC_fat_lto_link_module", |recorder| { + recorder.record_arg(format!("{:?}", name)) + }); + info!("linking {:?}", name); + match bc_decoded { + SerializedModule::Local(ref module_buffer) => { + module.module_llvm.should_combine_object_files = true; + module.module_llvm.context.add_driver_option(module_buffer.0.to_str().expect("path")); + }, + SerializedModule::FromRlib(_) => unimplemented!("from rlib"), + SerializedModule::FromUncompressedFile(_) => unimplemented!("from uncompressed file"), + } + serialized_bitcode.push(bc_decoded); + } + save_temp_bitcode(cgcx, &module, "lto.input"); + + // Internalize everything below threshold to help strip out more modules and such. + /*unsafe { + let ptr = symbols_below_threshold.as_ptr(); + llvm::LLVMRustRunRestrictionPass( + llmod, + ptr as *const *const libc::c_char, + symbols_below_threshold.len() as libc::size_t, + );*/ + save_temp_bitcode(cgcx, &module, "lto.after-restriction"); + //} + } + + // NOTE: save the temporary directory used by LTO so that it gets deleted after linking instead + // of now. + module.module_llvm.temp_dir = Some(tmp_path); + + Ok(LtoModuleCodegen::Fat { module, _serialized_bitcode: serialized_bitcode }) +} + +pub struct ModuleBuffer(PathBuf); + +impl ModuleBuffer { + pub fn new(path: PathBuf) -> ModuleBuffer { + ModuleBuffer(path) + } +} + +impl ModuleBufferMethods for ModuleBuffer { + fn data(&self) -> &[u8] { + unimplemented!("data not needed for GCC codegen"); + } +} diff --git a/compiler/rustc_codegen_gcc/src/back/mod.rs b/compiler/rustc_codegen_gcc/src/back/mod.rs index d692799d7..10187eab0 100644 --- a/compiler/rustc_codegen_gcc/src/back/mod.rs +++ b/compiler/rustc_codegen_gcc/src/back/mod.rs @@ -1 +1,2 @@ +pub mod lto; pub mod write; diff --git a/compiler/rustc_codegen_gcc/src/back/write.rs b/compiler/rustc_codegen_gcc/src/back/write.rs index 5f54ac4eb..04772d770 100644 --- a/compiler/rustc_codegen_gcc/src/back/write.rs +++ b/compiler/rustc_codegen_gcc/src/back/write.rs @@ -2,27 +2,71 @@ use std::{env, fs}; use gccjit::OutputKind; use rustc_codegen_ssa::{CompiledModule, ModuleCodegen}; -use rustc_codegen_ssa::back::write::{CodegenContext, EmitObj, ModuleConfig}; +use rustc_codegen_ssa::back::link::ensure_removed; +use rustc_codegen_ssa::back::write::{BitcodeSection, CodegenContext, EmitObj, ModuleConfig}; use rustc_errors::Handler; +use rustc_fs_util::link_or_copy; use rustc_session::config::OutputType; use rustc_span::fatal_error::FatalError; use rustc_target::spec::SplitDebuginfo; use crate::{GccCodegenBackend, GccContext}; +use crate::errors::CopyBitcode; -pub(crate) unsafe fn codegen(cgcx: &CodegenContext, _diag_handler: &Handler, module: ModuleCodegen, config: &ModuleConfig) -> Result { - let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_codegen", &*module.name); +pub(crate) unsafe fn codegen(cgcx: &CodegenContext, diag_handler: &Handler, module: ModuleCodegen, config: &ModuleConfig) -> Result { + let _timer = cgcx.prof.generic_activity_with_arg("GCC_module_codegen", &*module.name); { let context = &module.module_llvm.context; let module_name = module.name.clone(); + + let should_combine_object_files = module.module_llvm.should_combine_object_files; + let module_name = Some(&module_name[..]); - let _bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name); + // NOTE: Only generate object files with GIMPLE when this environment variable is set for + // now because this requires a particular setup (same gcc/lto1/lto-wrapper commit as libgccjit). + let fat_lto = env::var("EMBED_LTO_BITCODE").as_deref() == Ok("1"); + + let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name); let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name); - if config.bitcode_needed() { + if config.bitcode_needed() && fat_lto { + let _timer = cgcx + .prof + .generic_activity_with_arg("GCC_module_codegen_make_bitcode", &*module.name); + // TODO(antoyo) + /*if let Some(bitcode_filename) = bc_out.file_name() { + cgcx.prof.artifact_size( + "llvm_bitcode", + bitcode_filename.to_string_lossy(), + data.len() as u64, + ); + }*/ + + if config.emit_bc || config.emit_obj == EmitObj::Bitcode { + let _timer = cgcx + .prof + .generic_activity_with_arg("GCC_module_codegen_emit_bitcode", &*module.name); + context.add_command_line_option("-flto=auto"); + context.add_command_line_option("-flto-partition=one"); + context.compile_to_file(OutputKind::ObjectFile, bc_out.to_str().expect("path to str")); + } + + if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) { + let _timer = cgcx + .prof + .generic_activity_with_arg("GCC_module_codegen_embed_bitcode", &*module.name); + // TODO(antoyo): maybe we should call embed_bitcode to have the proper iOS fixes? + //embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data); + + context.add_command_line_option("-flto=auto"); + context.add_command_line_option("-flto-partition=one"); + context.add_command_line_option("-ffat-lto-objects"); + // TODO(antoyo): Send -plugin/usr/lib/gcc/x86_64-pc-linux-gnu/11.1.0/liblto_plugin.so to linker (this should be done when specifying the appropriate rustc cli argument). + context.compile_to_file(OutputKind::ObjectFile, bc_out.to_str().expect("path to str")); + } } if config.emit_ir { @@ -32,7 +76,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext, _diag_han if config.emit_asm { let _timer = cgcx .prof - .generic_activity_with_arg("LLVM_module_codegen_emit_asm", &*module.name); + .generic_activity_with_arg("GCC_module_codegen_emit_asm", &*module.name); let path = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name); context.compile_to_file(OutputKind::Assembler, path.to_str().expect("path to str")); } @@ -41,7 +85,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext, _diag_han EmitObj::ObjectCode(_) => { let _timer = cgcx .prof - .generic_activity_with_arg("LLVM_module_codegen_emit_obj", &*module.name); + .generic_activity_with_arg("GCC_module_codegen_emit_obj", &*module.name); if env::var("CG_GCCJIT_DUMP_MODULE_NAMES").as_deref() == Ok("1") { println!("Module {}", module.name); } @@ -60,11 +104,36 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext, _diag_han context.set_debug_info(true); context.dump_to_file(path, true); } - context.compile_to_file(OutputKind::ObjectFile, obj_out.to_str().expect("path to str")); + if should_combine_object_files && fat_lto { + context.add_command_line_option("-flto=auto"); + context.add_command_line_option("-flto-partition=one"); + + context.add_driver_option("-Wl,-r"); + // NOTE: we need -nostdlib, otherwise, we get the following error: + // /usr/bin/ld: cannot find -lgcc_s: No such file or directory + context.add_driver_option("-nostdlib"); + // NOTE: without -fuse-linker-plugin, we get the following error: + // lto1: internal compiler error: decompressed stream: Destination buffer is too small + context.add_driver_option("-fuse-linker-plugin"); + + // NOTE: this doesn't actually generate an executable. With the above flags, it combines the .o files together in another .o. + context.compile_to_file(OutputKind::Executable, obj_out.to_str().expect("path to str")); + } + else { + context.compile_to_file(OutputKind::ObjectFile, obj_out.to_str().expect("path to str")); + } } EmitObj::Bitcode => { - // TODO(antoyo) + debug!("copying bitcode {:?} to obj {:?}", bc_out, obj_out); + if let Err(err) = link_or_copy(&bc_out, &obj_out) { + diag_handler.emit_err(CopyBitcode { err }); + } + + if !config.emit_bc { + debug!("removing_bitcode {:?}", bc_out); + ensure_removed(diag_handler, &bc_out); + } } EmitObj::None => {} @@ -82,3 +151,18 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext, _diag_han pub(crate) fn link(_cgcx: &CodegenContext, _diag_handler: &Handler, mut _modules: Vec>) -> Result, FatalError> { unimplemented!(); } + +pub(crate) fn save_temp_bitcode(cgcx: &CodegenContext, _module: &ModuleCodegen, _name: &str) { + if !cgcx.save_temps { + return; + } + unimplemented!(); + /*unsafe { + let ext = format!("{}.bc", name); + let cgu = Some(&module.name[..]); + let path = cgcx.output_filenames.temp_path_ext(&ext, cgu); + let cstr = path_to_c_string(&path); + let llmod = module.module_llvm.llmod(); + llvm::LLVMWriteBitcodeToFile(llmod, cstr.as_ptr()); + }*/ +} -- cgit v1.2.3