diff options
Diffstat (limited to 'build/moz.configure/lto-pgo.configure')
-rw-r--r-- | build/moz.configure/lto-pgo.configure | 437 |
1 files changed, 437 insertions, 0 deletions
diff --git a/build/moz.configure/lto-pgo.configure b/build/moz.configure/lto-pgo.configure new file mode 100644 index 0000000000..ab6a527fe4 --- /dev/null +++ b/build/moz.configure/lto-pgo.configure @@ -0,0 +1,437 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# PGO +# ============================================================== +llvm_profdata = check_prog( + "LLVM_PROFDATA", ["llvm-profdata"], allow_missing=True, paths=clang_search_path +) + + +@depends_if(llvm_profdata) +@checking("whether llvm-profdata supports 'order' subcommand") +def llvm_profdata_order(profdata): + retcode, _, _ = get_cmd_output(profdata, "order", "--help") + return retcode == 0 + + +option( + "--enable-profile-generate", + env="MOZ_PROFILE_GENERATE", + nargs="?", + choices=("cross",), + help="Build a PGO instrumented binary", +) + +imply_option("MOZ_PGO", depends_if("--enable-profile-generate")(lambda _: True)) + +set_config( + "MOZ_PROFILE_GENERATE", depends_if("--enable-profile-generate")(lambda _: True) +) + +set_define( + "MOZ_PROFILE_GENERATE", depends_if("--enable-profile-generate")(lambda _: True) +) + +add_old_configure_assignment( + "MOZ_PROFILE_GENERATE", 1, when="--enable-profile-generate" +) + +option( + "--enable-profile-use", + env="MOZ_PROFILE_USE", + nargs="?", + choices=("cross",), + help="Use a generated profile during the build", +) + +option( + "--with-pgo-profile-path", + help="Path to the directory with unmerged profile data to use during the build" + ", or to a merged profdata file", + nargs=1, +) + +imply_option("MOZ_PGO", depends_if("--enable-profile-use")(lambda _: True)) + +set_config("MOZ_PROFILE_USE", depends_if("--enable-profile-use")(lambda _: True)) + + +@depends( + "--with-pgo-profile-path", + "--enable-profile-use", + llvm_profdata, + build_environment, +) +@imports("os") +def pgo_profile_path(path, pgo_use, profdata, build_env): + topobjdir = build_env.topobjdir + if topobjdir.endswith("/js/src"): + topobjdir = topobjdir[:-7] + + if not path: + return os.path.join(topobjdir, "instrumented", "merged.profdata") + if path and not pgo_use: + die("Pass --enable-profile-use to use --with-pgo-profile-path.") + if path and not profdata: + die("LLVM_PROFDATA must be set to process the pgo profile.") + if not os.path.isfile(path[0]): + die("Argument to --with-pgo-profile-path must be a file.") + if not os.path.isabs(path[0]): + die("Argument to --with-pgo-profile-path must be an absolute path.") + return path[0] + + +set_config("PGO_PROFILE_PATH", pgo_profile_path) + + +@depends( + "--enable-profile-use", + pgo_profile_path, + llvm_profdata, + llvm_profdata_order, + build_environment, +) +def orderfile_path(profile_use, path, profdata, profdata_order, build_env): + if not profile_use: + return None + + if not profdata_order: + return None + + topobjdir = build_env.topobjdir + + orderfile = os.path.join(topobjdir, "orderfile.txt") + check_cmd_output(profdata, "order", path, "-o", orderfile) + return orderfile + + +pgo_temporal = c_compiler.try_compile( + flags=["-fprofile-generate", "-mllvm", "-pgo-temporal-instrumentation"], + check_msg="whether the C compiler supports temporal instrumentation", + when="--enable-profile-generate", +) + + +@depends( + c_compiler, + select_linker, + target, + pgo_profile_path, + orderfile_path, + target_is_windows, + pgo_temporal, +) +@imports("multiprocessing") +def pgo_flags( + compiler, linker, target, profdata, orderfile, target_is_windows, pgo_temporal +): + if compiler.type == "gcc": + return namespace( + gen_cflags=["-fprofile-generate"], + gen_ldflags=["-fprofile-generate"], + use_cflags=["-fprofile-use", "-fprofile-correction", "-Wcoverage-mismatch"], + use_ldflags=["-fprofile-use"], + ) + + if compiler.type in ("clang-cl", "clang"): + prefix = "" + if compiler.type == "clang-cl": + prefix = "/clang:" + gen_ldflags = None + else: + gen_ldflags = ["-fprofile-generate"] + + use_ldflags = [] + if orderfile: + if compiler.type == "clang-cl": + use_ldflags += ["-ORDER:@" + orderfile] + elif linker.KIND == "ld64" or (linker.KIND == "lld" and target.os == "OSX"): + use_ldflags += ["-Wl,-order_file", orderfile] + elif linker.KIND == "lld": + use_ldflags += [ + "-Wl,--symbol-ordering-file", + orderfile, + "-Wl,--no-warn-symbol-ordering", + ] + + if use_ldflags: + log.info("Activating PGO-based orderfile") + + gen_cflags = [prefix + "-fprofile-generate"] + + if pgo_temporal: + gen_cflags += ["-mllvm", "-pgo-temporal-instrumentation"] + + if target_is_windows: + # native llvm-profdata.exe on Windows can't read profile data + # if name compression is enabled (which cross-compiling enables + # by default) + gen_cflags += ["-mllvm", "-enable-name-compression=false"] + + return namespace( + gen_cflags=gen_cflags, + gen_ldflags=gen_ldflags, + use_cflags=[ + prefix + "-fprofile-use=%s" % profdata, + # Some error messages about mismatched profile data + # come in via -Wbackend-plugin, so disable those too. + "-Wno-error=backend-plugin", + ], + use_ldflags=use_ldflags, + ) + + +set_config("PROFILE_GEN_CFLAGS", pgo_flags.gen_cflags) +set_config("PROFILE_GEN_LDFLAGS", pgo_flags.gen_ldflags) +set_config("PROFILE_USE_CFLAGS", pgo_flags.use_cflags) +set_config("PROFILE_USE_LDFLAGS", pgo_flags.use_ldflags) + +option( + "--with-pgo-jarlog", + help="Use the provided jarlog file when packaging during a profile-use " "build", + nargs=1, +) + +set_config("PGO_JARLOG_PATH", depends_if("--with-pgo-jarlog")(lambda p: p)) + + +@depends("MOZ_PGO", "--enable-profile-use", "--enable-profile-generate", c_compiler) +def moz_pgo_rust(pgo, profile_use, profile_generate, c_compiler): + if not pgo: + return + + # Enabling PGO through MOZ_PGO only and not --enable* flags. + if not profile_use and not profile_generate: + return + + if profile_use and profile_generate: + die("Cannot build with --enable-profile-use and --enable-profile-generate.") + + want_cross = (len(profile_use) and profile_use[0] == "cross") or ( + len(profile_generate) and profile_generate[0] == "cross" + ) + + if not want_cross: + return + + if c_compiler.type == "gcc": + die("Cannot use cross-language PGO with GCC.") + + return True + + +set_config("MOZ_PGO_RUST", moz_pgo_rust) + +# LTO +# ============================================================== + +option( + "--enable-lto", + env="MOZ_LTO", + nargs="*", + choices=("full", "thin", "cross"), + help="Enable LTO", +) + +option( + env="MOZ_LD64_KNOWN_GOOD", + nargs=1, + help="Indicate that ld64 is free of symbol aliasing bugs.", +) + +imply_option("MOZ_LD64_KNOWN_GOOD", depends_if("MOZ_AUTOMATION")(lambda _: True)) + + +@depends( + "--enable-lto", + c_compiler, + select_linker, + "MOZ_LD64_KNOWN_GOOD", + target, + "--enable-profile-generate", + pass_manager.enabled, + "--enable-profile-use", + "MOZ_AUTOMATION", +) +@imports("multiprocessing") +def lto( + values, + c_compiler, + select_linker, + ld64_known_good, + target, + instrumented_build, + pass_manager, + pgo_build, + moz_automation, +): + cflags = [] + ldflags = [] + enabled = None + rust_lto = False + + if not values: + return + + # Sanitize LTO modes. + if "full" in values and "thin" in values: + die("incompatible --enable-lto choices 'full' and 'thin'") + + # If a value was given to --enable-lto, use that. Otherwise, make the lto + # mode explicit, using full with gcc, and full or thin with clang depending + # on the performance benefit. + # Defaulting to full LTO is costly in terms of compilation time, so we only + # default to it if MOZ_AUTOMATION and PGO are on, and for some platforms. + # Based on speedometer3 scores, full lto + pgo is beneficial for Linux and + # Windows for x86_64 targets. + if values == () or values == ("cross",): + if c_compiler.type == "gcc": + values += ("full",) + elif ( + pgo_build + and moz_automation + and target.os in ("WINNT", "GNU") + and target.cpu == "x86_64" + ): + values += ("full",) + else: + values += ("thin",) + + if instrumented_build: + log.warning("Disabling LTO because --enable-profile-generate is specified") + return + + if c_compiler.type == "gcc": + if "cross" in values: + die("Cross-language LTO is not supported with GCC.") + if "thin" in values: + die( + "gcc does not support thin LTO. Use `--enable-lto` " + "to enable full LTO for gcc." + ) + + if ( + target.kernel == "Darwin" + and target.os == "OSX" + and "cross" in values + and select_linker.KIND == "ld64" + and not ld64_known_good + ): + die( + "The Mac linker is known to have a bug that affects cross-language " + "LTO. If you know that your linker is free from this bug, please " + "set the environment variable `MOZ_LD64_KNOWN_GOOD=1` and re-run " + "configure." + ) + + if c_compiler.type == "clang": + if "full" in values: + cflags.append("-flto") + ldflags.append("-flto") + else: + cflags.append("-flto=thin") + ldflags.append("-flto=thin") + + if target.os == "Android" and "cross" in values: + # Work around https://github.com/rust-lang/rust/issues/90088 + # by enabling the highest level of SSE the rust targets default + # to. + # https://github.com/rust-lang/rust/blob/bdfcb88e8b6203ccb46a2fb6649979b773efc8ac/compiler/rustc_target/src/spec/i686_linux_android.rs#L13 + # https://github.com/rust-lang/rust/blob/8d1083e319841624f64400e1524805a40d725439/compiler/rustc_target/src/spec/x86_64_linux_android.rs#L7 + if target.cpu == "x86": + ldflags.append("-Wl,-plugin-opt=-mattr=+ssse3") + elif target.cpu == "x86_64": + ldflags.append("-Wl,-plugin-opt=-mattr=+sse4.2") + elif c_compiler.type == "clang-cl": + if "full" in values: + cflags.append("-flto") + else: + cflags.append("-flto=thin") + # With clang-cl, -flto can only be used with -c or -fuse-ld=lld. + # AC_TRY_LINKs during configure don't have -c, so pass -fuse-ld=lld. + cflags.append("-fuse-ld=lld") + + # Explicitly set the CPU to optimize for so the linker doesn't + # choose a poor default. Rust compilation by default uses the + # pentium4 CPU on x86: + # + # https://github.com/rust-lang/rust/blob/049a49b91151a88c95fa0d62a53fd0a0ac2c3af9/compiler/rustc_target/src/spec/i686_pc_windows_msvc.rs#L5 + # + # which specifically supports "long" (multi-byte) nops. See + # https://bugzilla.mozilla.org/show_bug.cgi?id=1568450#c8 for details. + # + # The pentium4 seems like kind of a weird CPU to optimize for, but + # it seems to have worked out OK thus far. LLVM does not seem to + # specifically schedule code for the pentium4's deep pipeline, so + # that probably contributes to it being an OK default for our + # purposes. + if target.cpu == "x86": + ldflags.append("-mllvm:-mcpu=pentium4") + # This is also the CPU that Rust uses. The LLVM source code + # recommends this as the "generic 64-bit specific x86 processor model": + # + # https://github.com/llvm/llvm-project/blob/e7694f34ab6a12b8bb480cbfcb396d0a64fe965f/llvm/lib/Target/X86/X86.td#L1165-L1187 + if target.cpu == "x86_64": + ldflags.append("-mllvm:-mcpu=x86-64") + # We do not need special flags for arm64. Hooray for fixed-length + # instruction sets. + else: + num_cores = multiprocessing.cpu_count() + cflags.append("-flto") + cflags.append("-flifetime-dse=1") + + ldflags.append("-flto=%s" % num_cores) + ldflags.append("-flifetime-dse=1") + + # Tell LTO not to inline functions above a certain size, to mitigate + # binary size growth while still getting good performance. + # (For hot functions, PGO will put a multiplier on this limit.) + if target.os == "WINNT": + ldflags.append("-mllvm:-import-instr-limit=10") + elif target.os == "OSX": + ldflags.append("-Wl,-mllvm,-import-instr-limit=10") + elif c_compiler.type == "clang": + ldflags.append("-Wl,-plugin-opt=-import-instr-limit=10") + + # If we're using the new pass manager, we can also enable the new PM + # during LTO. Further we can use the resulting size savings to increase + # the import limit in hot functions. + if pass_manager: + if target.os == "WINNT": + if c_compiler.version >= "12.0.0" and c_compiler.version < "13.0.0": + ldflags.append("-opt:ltonewpassmanager") + if c_compiler.version >= "12.0.0": + ldflags.append("-mllvm:-import-hot-multiplier=30") + elif target.os == "OSX": + ldflags.append("-Wl,-mllvm,-import-hot-multiplier=30") + else: + if c_compiler.version < "13.0.0": + ldflags.append("-Wl,-plugin-opt=new-pass-manager") + ldflags.append("-Wl,-plugin-opt=-import-hot-multiplier=30") + + # Pick Rust LTO mode in case of cross lTO. Thin is the default. + if "cross" in values: + rust_lto = "full" if "full" in values else "thin" + else: + rust_lto = "" + + return namespace( + enabled=True, + cflags=cflags, + ldflags=ldflags, + rust_lto=rust_lto, + ) + + +add_old_configure_assignment("MOZ_LTO", lto.enabled) +set_config("MOZ_LTO", lto.enabled) +set_define("MOZ_LTO", lto.enabled) +set_config("MOZ_LTO_CFLAGS", lto.cflags) +set_config("MOZ_LTO_LDFLAGS", lto.ldflags) +set_config("MOZ_LTO_RUST_CROSS", lto.rust_lto) +add_old_configure_assignment("MOZ_LTO_CFLAGS", lto.cflags) +add_old_configure_assignment("MOZ_LTO_LDFLAGS", lto.ldflags) |