diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /build/build-clang | |
parent | Initial commit. (diff) | |
download | firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
54 files changed, 10765 insertions, 0 deletions
diff --git a/build/build-clang/1stage.json b/build/build-clang/1stage.json new file mode 100644 index 0000000000..4633a3e93e --- /dev/null +++ b/build/build-clang/1stage.json @@ -0,0 +1,4 @@ +{ + "stages": "1", + "targets": "X86;ARM;AArch64;WebAssembly" +} diff --git a/build/build-clang/2stages.json b/build/build-clang/2stages.json new file mode 100644 index 0000000000..e34226758d --- /dev/null +++ b/build/build-clang/2stages.json @@ -0,0 +1,3 @@ +{ + "stages": "2" +} diff --git a/build/build-clang/4stages-pgo.json b/build/build-clang/4stages-pgo.json new file mode 100644 index 0000000000..7f9e17c140 --- /dev/null +++ b/build/build-clang/4stages-pgo.json @@ -0,0 +1,5 @@ +{ + "stages": "4", + "targets": "X86;ARM;AArch64;WebAssembly", + "pgo": true +} diff --git a/build/build-clang/README b/build/build-clang/README new file mode 100644 index 0000000000..56adfd827d --- /dev/null +++ b/build/build-clang/README @@ -0,0 +1,55 @@ +build-clang.py +============== + +A script to build clang from source. + +``` +usage: build-clang.py [-h] -c CONFIG [--clean] + +optional arguments: + -h, --help show this help message and exit + -c CONFIG, --config CONFIG + Clang configuration file + --clean Clean the build directory +``` + +Pre-requisites +-------------- +* Working build toolchain. +* git +* CMake +* Ninja +* Python 2.7 and 3 + +Please use the latest available CMake for your platform to avoid surprises. + +Config file format +------------------ + +build-clang.py accepts a JSON config format with the following fields: + +* stages: Use 1, 2, 3 or 4 to select different compiler stages. The default is 2. +* cc: Path to the bootsraping C Compiler. +* cxx: Path to the bootsraping C++ Compiler. +* as: Path to the assembler tool. +* ar: Path to the library archiver tool. +* ranlib: Path to the ranlib tool (optional). +* libtool: Path to the libtool tool (optional). +* ld: Path to the linker. +* patches: Optional list of patches to apply. +* build_type: The type of build to make. Supported types: Release, Debug, RelWithDebInfo or MinSizeRel. +* targets: The targets supported by the final stage LLVM/clang. +* build_clang_tidy: Whether to build clang-tidy with the Mozilla checks imported. The default is false. +* osx_cross_compile: Whether to invoke CMake for OS X cross compile builds. +* assertions: Whether to enable LLVM assertions. The default is false. +* pgo: Whether to build with PGO (requires stages == 4). The default is false. + +The revisions are defined in taskcluster/ci/fetch/toolchains.yml. They are usually commit sha1s corresponding to upstream tags. + +Environment Variables +--------------------- + +The following environment variables are used for cross-compile builds targeting OS X on Linux. + +* CROSS_CCTOOLS_PATH: Path to the cctools directory where the cross compiler toolchain is located. +* CROSS_SYSROOT: Path to the OS X SDK directory for cross compile builds. diff --git a/build/build-clang/Remove-FlushViewOfFile-when-unmaping-gcda-files.patch b/build/build-clang/Remove-FlushViewOfFile-when-unmaping-gcda-files.patch new file mode 100644 index 0000000000..a3ea2d75f9 --- /dev/null +++ b/build/build-clang/Remove-FlushViewOfFile-when-unmaping-gcda-files.patch @@ -0,0 +1,31 @@ +From 78a6bcfed4b73f13b9973afd69b76067dd4a5dde Mon Sep 17 00:00:00 2001 +From: Calixte Denizet <calixte.denizet@gmail.com> +Date: Mon, 4 Oct 2021 11:07:56 +0200 +Subject: [PATCH] Remove FlushViewOfFile when unmaping gcda files - it can + causes bad performances with slow disks; - MS docs say that it's mainly + useful in case of hard failures (OS crash, electrical failure, ...): so it's + useless to call this function when ccov builds run on CI. + +--- + compiler-rt/lib/profile/GCDAProfiling.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c +index 4293e8f7b5bf..83650d33c95d 100644 +--- a/compiler-rt/lib/profile/GCDAProfiling.c ++++ b/compiler-rt/lib/profile/GCDAProfiling.c +@@ -286,11 +286,6 @@ static int map_file() { + + static void unmap_file() { + #if defined(_WIN32) +- if (!FlushViewOfFile(write_buffer, file_size)) { +- fprintf(stderr, "profiling: %s: cannot flush mapped view: %lu\n", filename, +- GetLastError()); +- } +- + if (!UnmapViewOfFile(write_buffer)) { + fprintf(stderr, "profiling: %s: cannot unmap mapped view: %lu\n", filename, + GetLastError()); +-- +2.33.0 + diff --git a/build/build-clang/android-mangling-error_clang_12.patch b/build/build-clang/android-mangling-error_clang_12.patch new file mode 100644 index 0000000000..315756d30b --- /dev/null +++ b/build/build-clang/android-mangling-error_clang_12.patch @@ -0,0 +1,24 @@ +Workaround segfault in clang's mangling code that is tickled when +attempting to mangle the declaration: + std:__ndk1::__find_detail::__find_exactly_one_checked::__matches +in the <tuple> header in the Android NDK. +This codepath is exercised by MozsearchIndexer.cpp (the searchfox +indexer) when indexing on Android. See also +https://bugs.llvm.org/show_bug.cgi?id=40747 + +diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp +index 4420f6a2c1c3..39792e6b7350 100644 +--- a/clang/lib/AST/ItaniumMangle.cpp ++++ b/clang/lib/AST/ItaniumMangle.cpp +@@ -3954,6 +3954,11 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity, + // produces no output, where ImplicitlyConvertedToType and AsTemplateArg need + // to be preserved. + recurse: ++ if (!E) { ++ Out << "MOZ_WE_HACKED_AROUND_BUG_1500941"; ++ return; ++ } ++ + switch (E->getStmtClass()) { + case Expr::NoStmtClass: + #define ABSTRACT_STMT(Type) diff --git a/build/build-clang/bug47258-extract-symbols-mbcs.patch b/build/build-clang/bug47258-extract-symbols-mbcs.patch new file mode 100644 index 0000000000..69a95df072 --- /dev/null +++ b/build/build-clang/bug47258-extract-symbols-mbcs.patch @@ -0,0 +1,13 @@ +diff --git a/llvm/utils/extract_symbols.py b/llvm/utils/extract_symbols.py +index 43f603963a2..01fe10d36f0 100755 +--- a/llvm/utils/extract_symbols.py ++++ b/llvm/utils/extract_symbols.py +@@ -32,7 +32,7 @@ import argparse + def dumpbin_get_symbols(lib): + process = subprocess.Popen(['dumpbin','/symbols',lib], bufsize=1, + stdout=subprocess.PIPE, stdin=subprocess.PIPE, +- universal_newlines=True) ++ universal_newlines=True, encoding='mbcs') + process.stdin.close() + for line in process.stdout: + # Look for external symbols that are defined in some section diff --git a/build/build-clang/build-clang.py b/build/build-clang/build-clang.py new file mode 100755 index 0000000000..5345b0a3cb --- /dev/null +++ b/build/build-clang/build-clang.py @@ -0,0 +1,841 @@ +#!/usr/bin/python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Only necessary for flake8 to be happy... +import argparse +import errno +import fnmatch +import glob +import json +import os +import os.path +import platform +import re +import shutil +import subprocess +import sys +import tarfile +from contextlib import contextmanager +from shutil import which + +import zstandard + + +def is_llvm_toolchain(cc, cxx): + return "clang" in cc and "clang" in cxx + + +def check_run(args): + print(" ".join(args), file=sys.stderr, flush=True) + if args[0] == "cmake": + # CMake `message(STATUS)` messages, as appearing in failed source code + # compiles, appear on stdout, so we only capture that. + p = subprocess.Popen(args, stdout=subprocess.PIPE) + lines = [] + for line in p.stdout: + lines.append(line) + sys.stdout.write(line.decode()) + sys.stdout.flush() + r = p.wait() + if r != 0 and os.environ.get("UPLOAD_DIR"): + cmake_output_re = re.compile(b'See also "(.*/CMakeOutput.log)"') + cmake_error_re = re.compile(b'See also "(.*/CMakeError.log)"') + + def find_first_match(re): + for l in lines: + match = re.search(l) + if match: + return match + + output_match = find_first_match(cmake_output_re) + error_match = find_first_match(cmake_error_re) + + upload_dir = os.environ["UPLOAD_DIR"].encode("utf-8") + if output_match or error_match: + mkdir_p(upload_dir) + if output_match: + shutil.copy2(output_match.group(1), upload_dir) + if error_match: + shutil.copy2(error_match.group(1), upload_dir) + else: + r = subprocess.call(args) + assert r == 0 + + +def run_in(path, args): + with chdir(path): + check_run(args) + + +@contextmanager +def chdir(path): + d = os.getcwd() + print('cd "%s"' % path, file=sys.stderr) + os.chdir(path) + try: + yield + finally: + print('cd "%s"' % d, file=sys.stderr) + os.chdir(d) + + +def patch(patch, srcdir): + patch = os.path.realpath(patch) + check_run(["patch", "-d", srcdir, "-p1", "-i", patch, "--fuzz=0", "-s"]) + + +def import_clang_tidy(source_dir, build_clang_tidy_alpha, build_clang_tidy_external): + clang_plugin_path = os.path.join(os.path.dirname(sys.argv[0]), "..", "clang-plugin") + clang_tidy_path = os.path.join(source_dir, "clang-tools-extra/clang-tidy") + sys.path.append(clang_plugin_path) + from import_mozilla_checks import do_import + + import_options = { + "alpha": build_clang_tidy_alpha, + "external": build_clang_tidy_external, + } + do_import(clang_plugin_path, clang_tidy_path, import_options) + + +def build_package(package_build_dir, cmake_args): + if not os.path.exists(package_build_dir): + os.mkdir(package_build_dir) + # If CMake has already been run, it may have been run with different + # arguments, so we need to re-run it. Make sure the cached copy of the + # previous CMake run is cleared before running it again. + if os.path.exists(package_build_dir + "/CMakeCache.txt"): + os.remove(package_build_dir + "/CMakeCache.txt") + if os.path.exists(package_build_dir + "/CMakeFiles"): + shutil.rmtree(package_build_dir + "/CMakeFiles") + + run_in(package_build_dir, ["cmake"] + cmake_args) + run_in(package_build_dir, ["ninja", "install", "-v"]) + + +@contextmanager +def updated_env(env): + old_env = os.environ.copy() + os.environ.update(env) + yield + os.environ.clear() + os.environ.update(old_env) + + +def build_tar_package(name, base, directory): + name = os.path.realpath(name) + print("tarring {} from {}/{}".format(name, base, directory), file=sys.stderr) + assert name.endswith(".tar.zst") + + cctx = zstandard.ZstdCompressor() + with open(name, "wb") as f, cctx.stream_writer(f) as z: + with tarfile.open(mode="w|", fileobj=z) as tf: + with chdir(base): + tf.add(directory) + + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST or not os.path.isdir(path): + raise + + +def delete(path): + if os.path.isdir(path): + shutil.rmtree(path) + else: + try: + os.unlink(path) + except Exception: + pass + + +def install_import_library(build_dir, clang_dir): + shutil.copy2( + os.path.join(build_dir, "lib", "clang.lib"), os.path.join(clang_dir, "lib") + ) + + +def is_darwin(): + return platform.system() == "Darwin" + + +def is_linux(): + return platform.system() == "Linux" + + +def is_windows(): + return platform.system() == "Windows" + + +def build_one_stage( + cc, + cxx, + asm, + ar, + ranlib, + libtool, + ldflags, + src_dir, + stage_dir, + package_name, + osx_cross_compile, + build_type, + assertions, + targets, + is_final_stage=False, + profile=None, +): + if not os.path.exists(stage_dir): + os.mkdir(stage_dir) + + build_dir = stage_dir + "/build" + inst_dir = stage_dir + "/" + package_name + + # cmake doesn't deal well with backslashes in paths. + def slashify_path(path): + return path.replace("\\", "/") + + def cmake_base_args(cc, cxx, asm, ar, ranlib, libtool, ldflags, inst_dir): + machine_targets = targets if is_final_stage and targets else "X86" + + cmake_args = [ + "-GNinja", + "-DCMAKE_C_COMPILER=%s" % slashify_path(cc[0]), + "-DCMAKE_CXX_COMPILER=%s" % slashify_path(cxx[0]), + "-DCMAKE_ASM_COMPILER=%s" % slashify_path(asm[0]), + "-DCMAKE_AR=%s" % slashify_path(ar), + "-DCMAKE_C_FLAGS=%s" % " ".join(cc[1:]), + "-DCMAKE_CXX_FLAGS=%s" % " ".join(cxx[1:]), + "-DCMAKE_ASM_FLAGS=%s" % " ".join(asm[1:]), + "-DCMAKE_EXE_LINKER_FLAGS=%s" % " ".join(ldflags), + "-DCMAKE_SHARED_LINKER_FLAGS=%s" % " ".join(ldflags), + "-DCMAKE_BUILD_TYPE=%s" % build_type, + "-DCMAKE_INSTALL_PREFIX=%s" % inst_dir, + "-DLLVM_TARGETS_TO_BUILD=%s" % machine_targets, + "-DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=OFF", + "-DLLVM_ENABLE_ASSERTIONS=%s" % ("ON" if assertions else "OFF"), + "-DLLVM_ENABLE_BINDINGS=OFF", + "-DLLVM_ENABLE_CURL=OFF", + "-DLLVM_INCLUDE_TESTS=OFF", + ] + if is_llvm_toolchain(cc[0], cxx[0]): + cmake_args += ["-DLLVM_ENABLE_LLD=ON"] + + if "TASK_ID" in os.environ: + cmake_args += [ + "-DCLANG_REPOSITORY_STRING=taskcluster-%s" % os.environ["TASK_ID"], + ] + projects = ["clang", "lld"] + if is_final_stage: + projects.append("clang-tools-extra") + else: + cmake_args.append("-DLLVM_TOOL_LLI_BUILD=OFF") + + cmake_args.append("-DLLVM_ENABLE_PROJECTS=%s" % ";".join(projects)) + + # There is no libxml2 on Windows except if we build one ourselves. + # libxml2 is only necessary for llvm-mt, but Windows can just use the + # native MT tool. + if not is_windows() and is_final_stage: + cmake_args += ["-DLLVM_ENABLE_LIBXML2=FORCE_ON"] + if is_linux() and not osx_cross_compile and is_final_stage: + sysroot = os.path.join(os.environ.get("MOZ_FETCHES_DIR", ""), "sysroot") + if os.path.exists(sysroot): + cmake_args += ["-DLLVM_BINUTILS_INCDIR=/usr/include"] + cmake_args += ["-DCMAKE_SYSROOT=%s" % sysroot] + # Work around the LLVM build system not building the i386 compiler-rt + # because it doesn't allow to use a sysroot for that during the cmake + # checks. + cmake_args += ["-DCAN_TARGET_i386=1"] + if is_windows(): + cmake_args.insert(-1, "-DLLVM_EXPORT_SYMBOLS_FOR_PLUGINS=ON") + cmake_args.insert(-1, "-DLLVM_USE_CRT_RELEASE=MT") + else: + # libllvm as a shared library is not supported on Windows + cmake_args += ["-DLLVM_LINK_LLVM_DYLIB=ON"] + if ranlib is not None: + cmake_args += ["-DCMAKE_RANLIB=%s" % slashify_path(ranlib)] + if libtool is not None: + cmake_args += ["-DCMAKE_LIBTOOL=%s" % slashify_path(libtool)] + if osx_cross_compile: + arch = "arm64" if os.environ.get("OSX_ARCH") == "arm64" else "x86_64" + target_cpu = ( + "aarch64" if os.environ.get("OSX_ARCH") == "arm64" else "x86_64" + ) + cmake_args += [ + "-DCMAKE_SYSTEM_NAME=Darwin", + "-DCMAKE_SYSTEM_VERSION=%s" % os.environ["MACOSX_DEPLOYMENT_TARGET"], + "-DCMAKE_OSX_SYSROOT=%s" % slashify_path(os.getenv("CROSS_SYSROOT")), + "-DCMAKE_FIND_ROOT_PATH=%s" % slashify_path(os.getenv("CROSS_SYSROOT")), + "-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER", + "-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY", + "-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY", + "-DCMAKE_MACOSX_RPATH=ON", + "-DCMAKE_OSX_ARCHITECTURES=%s" % arch, + "-DDARWIN_osx_ARCHS=%s" % arch, + "-DDARWIN_osx_SYSROOT=%s" % slashify_path(os.getenv("CROSS_SYSROOT")), + "-DLLVM_DEFAULT_TARGET_TRIPLE=%s-apple-darwin" % target_cpu, + "-DCMAKE_C_COMPILER_TARGET=%s-apple-darwin" % target_cpu, + "-DCMAKE_CXX_COMPILER_TARGET=%s-apple-darwin" % target_cpu, + "-DCMAKE_ASM_COMPILER_TARGET=%s-apple-darwin" % target_cpu, + ] + if os.environ.get("OSX_ARCH") == "arm64": + cmake_args += [ + "-DDARWIN_osx_BUILTIN_ARCHS=arm64", + ] + # Starting in LLVM 11 (which requires SDK 10.12) the build tries to + # detect the SDK version by calling xcrun. Cross-compiles don't have + # an xcrun, so we have to set the version explicitly. + cmake_args += [ + "-DDARWIN_macosx_OVERRIDE_SDK_VERSION=%s" + % os.environ["MACOSX_DEPLOYMENT_TARGET"], + ] + if profile == "gen": + # Per https://releases.llvm.org/10.0.0/docs/HowToBuildWithPGO.html + cmake_args += [ + "-DLLVM_BUILD_INSTRUMENTED=IR", + "-DLLVM_BUILD_RUNTIME=No", + ] + elif profile: + cmake_args += [ + "-DLLVM_PROFDATA_FILE=%s" % profile, + ] + return cmake_args + + cmake_args = [] + cmake_args += cmake_base_args(cc, cxx, asm, ar, ranlib, libtool, ldflags, inst_dir) + cmake_args += [src_dir] + build_package(build_dir, cmake_args) + + # For some reasons the import library clang.lib of clang.exe is not + # installed, so we copy it by ourselves. + if is_windows() and is_final_stage: + install_import_library(build_dir, inst_dir) + + +# Return the absolute path of a build tool. We first look to see if the +# variable is defined in the config file, and if so we make sure it's an +# absolute path to an existing tool, otherwise we look for a program in +# $PATH named "key". +# +# This expects the name of the key in the config file to match the name of +# the tool in the default toolchain on the system (for example, "ld" on Unix +# and "link" on Windows). +def get_tool(config, key): + f = None + if key in config: + f = config[key].format(**os.environ) + if os.path.isabs(f): + if not os.path.exists(f): + raise ValueError("%s must point to an existing path" % key) + return f + + # Assume that we have the name of some program that should be on PATH. + tool = which(f) if f else which(key) + if not tool: + raise ValueError("%s not found on PATH" % (f or key)) + return tool + + +# This function is intended to be called on the final build directory when +# building clang-tidy. Also clang-format binaries are included that can be used +# in conjunction with clang-tidy. +# As a separate binary we also ship clangd for the language server protocol that +# can be used as a plugin in `vscode`. +# Its job is to remove all of the files which won't be used for clang-tidy or +# clang-format to reduce the download size. Currently when this function +# finishes its job, it will leave final_dir with a layout like this: +# +# clang/ +# bin/ +# clang-apply-replacements +# clang-format +# clang-tidy +# clangd +# run-clang-tidy +# include/ +# * (nothing will be deleted here) +# lib/ +# clang/ +# 4.0.0/ +# include/ +# * (nothing will be deleted here) +# share/ +# clang/ +# clang-format-diff.py +# clang-tidy-diff.py +# run-clang-tidy.py +def prune_final_dir_for_clang_tidy(final_dir, osx_cross_compile): + # Make sure we only have what we expect. + dirs = [ + "bin", + "include", + "lib", + "lib32", + "libexec", + "msbuild-bin", + "share", + "tools", + ] + if is_linux(): + dirs.append("x86_64-unknown-linux-gnu") + for f in glob.glob("%s/*" % final_dir): + if os.path.basename(f) not in dirs: + raise Exception("Found unknown file %s in the final directory" % f) + if not os.path.isdir(f): + raise Exception("Expected %s to be a directory" % f) + + kept_binaries = [ + "clang-apply-replacements", + "clang-format", + "clang-tidy", + "clangd", + "clang-query", + "run-clang-tidy", + ] + re_clang_tidy = re.compile(r"^(" + "|".join(kept_binaries) + r")(\.exe)?$", re.I) + for f in glob.glob("%s/bin/*" % final_dir): + if re_clang_tidy.search(os.path.basename(f)) is None: + delete(f) + + # Keep include/ intact. + + # Remove the target-specific files. + if is_linux(): + if os.path.exists(os.path.join(final_dir, "x86_64-unknown-linux-gnu")): + shutil.rmtree(os.path.join(final_dir, "x86_64-unknown-linux-gnu")) + + # In lib/, only keep lib/clang/N.M.O/include and the LLVM shared library. + re_ver_num = re.compile(r"^\d+\.\d+\.\d+$", re.I) + for f in glob.glob("%s/lib/*" % final_dir): + name = os.path.basename(f) + if name == "clang": + continue + if osx_cross_compile and name in ["libLLVM.dylib", "libclang-cpp.dylib"]: + continue + if is_linux() and ( + fnmatch.fnmatch(name, "libLLVM*.so") + or fnmatch.fnmatch(name, "libclang-cpp.so*") + ): + continue + delete(f) + for f in glob.glob("%s/lib/clang/*" % final_dir): + if re_ver_num.search(os.path.basename(f)) is None: + delete(f) + for f in glob.glob("%s/lib/clang/*/*" % final_dir): + if os.path.basename(f) != "include": + delete(f) + + # Completely remove libexec/, msbuild-bin and tools, if it exists. + shutil.rmtree(os.path.join(final_dir, "libexec")) + for d in ("msbuild-bin", "tools"): + d = os.path.join(final_dir, d) + if os.path.exists(d): + shutil.rmtree(d) + + # In share/, only keep share/clang/*tidy* + re_clang_tidy = re.compile(r"format|tidy", re.I) + for f in glob.glob("%s/share/*" % final_dir): + if os.path.basename(f) != "clang": + delete(f) + for f in glob.glob("%s/share/clang/*" % final_dir): + if re_clang_tidy.search(os.path.basename(f)) is None: + delete(f) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-c", + "--config", + action="append", + required=True, + type=argparse.FileType("r"), + help="Clang configuration file", + ) + parser.add_argument( + "--clean", required=False, action="store_true", help="Clean the build directory" + ) + parser.add_argument( + "--skip-tar", + required=False, + action="store_true", + help="Skip tar packaging stage", + ) + parser.add_argument( + "--skip-patch", + required=False, + action="store_true", + help="Do not patch source", + ) + + args = parser.parse_args() + + if not os.path.exists("llvm/README.txt"): + raise Exception( + "The script must be run from the root directory of the llvm-project tree" + ) + source_dir = os.getcwd() + build_dir = source_dir + "/build" + + if args.clean: + shutil.rmtree(build_dir) + os.sys.exit(0) + + llvm_source_dir = source_dir + "/llvm" + + exe_ext = "" + if is_windows(): + exe_ext = ".exe" + + cc_name = "clang" + cxx_name = "clang++" + if is_windows(): + cc_name = "clang-cl" + cxx_name = "clang-cl" + + config = {} + # Merge all the configs we got from the command line. + for c in args.config: + this_config_dir = os.path.dirname(c.name) + this_config = json.load(c) + patches = this_config.get("patches") + if patches: + this_config["patches"] = [os.path.join(this_config_dir, p) for p in patches] + for key, value in this_config.items(): + old_value = config.get(key) + if old_value is None: + config[key] = value + elif value is None: + if key in config: + del config[key] + elif type(old_value) != type(value): + raise Exception( + "{} is overriding `{}` with a value of the wrong type".format( + c.name, key + ) + ) + elif isinstance(old_value, list): + for v in value: + if v not in old_value: + old_value.append(v) + elif isinstance(old_value, dict): + raise Exception("{} is setting `{}` to a dict?".format(c.name, key)) + else: + config[key] = value + + stages = 2 + if "stages" in config: + stages = int(config["stages"]) + if stages not in (1, 2, 3, 4): + raise ValueError("We only know how to build 1, 2, 3, or 4 stages.") + skip_stages = 0 + if "skip_stages" in config: + # The assumption here is that the compiler given in `cc` and other configs + # is the result of the last skip stage, built somewhere else. + skip_stages = int(config["skip_stages"]) + if skip_stages >= stages: + raise ValueError("Cannot skip more stages than are built.") + pgo = False + if "pgo" in config: + pgo = config["pgo"] + if pgo not in (True, False): + raise ValueError("Only boolean values are accepted for pgo.") + build_type = "Release" + if "build_type" in config: + build_type = config["build_type"] + if build_type not in ("Release", "Debug", "RelWithDebInfo", "MinSizeRel"): + raise ValueError( + "We only know how to do Release, Debug, RelWithDebInfo or " + "MinSizeRel builds" + ) + targets = config.get("targets") + build_clang_tidy = False + if "build_clang_tidy" in config: + build_clang_tidy = config["build_clang_tidy"] + if build_clang_tidy not in (True, False): + raise ValueError("Only boolean values are accepted for build_clang_tidy.") + build_clang_tidy_alpha = False + # check for build_clang_tidy_alpha only if build_clang_tidy is true + if build_clang_tidy and "build_clang_tidy_alpha" in config: + build_clang_tidy_alpha = config["build_clang_tidy_alpha"] + if build_clang_tidy_alpha not in (True, False): + raise ValueError( + "Only boolean values are accepted for build_clang_tidy_alpha." + ) + build_clang_tidy_external = False + # check for build_clang_tidy_external only if build_clang_tidy is true + if build_clang_tidy and "build_clang_tidy_external" in config: + build_clang_tidy_external = config["build_clang_tidy_external"] + if build_clang_tidy_external not in (True, False): + raise ValueError( + "Only boolean values are accepted for build_clang_tidy_external." + ) + osx_cross_compile = False + if "osx_cross_compile" in config: + osx_cross_compile = config["osx_cross_compile"] + if osx_cross_compile not in (True, False): + raise ValueError("Only boolean values are accepted for osx_cross_compile.") + if osx_cross_compile and not is_linux(): + raise ValueError("osx_cross_compile can only be used on Linux.") + assertions = False + if "assertions" in config: + assertions = config["assertions"] + if assertions not in (True, False): + raise ValueError("Only boolean values are accepted for assertions.") + + if is_darwin() or osx_cross_compile: + os.environ["MACOSX_DEPLOYMENT_TARGET"] = ( + "11.0" if os.environ.get("OSX_ARCH") == "arm64" else "10.12" + ) + + cc = get_tool(config, "cc") + cxx = get_tool(config, "cxx") + asm = get_tool(config, "ml" if is_windows() else "as") + # Not using lld here as default here because it's not in PATH. But clang + # knows how to find it when they are installed alongside each others. + ar = get_tool(config, "lib" if is_windows() else "ar") + ranlib = None if is_windows() else get_tool(config, "ranlib") + libtool = None + if "libtool" in config: + libtool = get_tool(config, "libtool") + + if not os.path.exists(source_dir): + os.makedirs(source_dir) + + if not args.skip_patch: + for p in config.get("patches", []): + patch(p, source_dir) + + package_name = "clang" + if build_clang_tidy: + package_name = "clang-tidy" + if not args.skip_patch: + import_clang_tidy( + source_dir, build_clang_tidy_alpha, build_clang_tidy_external + ) + + if not os.path.exists(build_dir): + os.makedirs(build_dir) + + stage1_dir = build_dir + "/stage1" + stage1_inst_dir = stage1_dir + "/" + package_name + + final_stage_dir = stage1_dir + + if is_darwin(): + extra_cflags = [] + extra_cxxflags = [] + extra_cflags2 = [] + extra_cxxflags2 = [] + extra_asmflags = [] + extra_ldflags = [] + elif is_linux(): + extra_cflags = [] + extra_cxxflags = [] + extra_cflags2 = ["-fPIC"] + # Silence clang's warnings about arguments not being used in compilation. + extra_cxxflags2 = [ + "-fPIC", + "-Qunused-arguments", + ] + extra_asmflags = [] + # Avoid libLLVM internal function calls going through the PLT. + extra_ldflags = ["-Wl,-Bsymbolic-functions"] + # For whatever reason, LLVM's build system will set things up to turn + # on -ffunction-sections and -fdata-sections, but won't turn on the + # corresponding option to strip unused sections. We do it explicitly + # here. LLVM's build system is also picky about turning on ICF, so + # we do that explicitly here, too. + + # It's unfortunately required to specify the linker used here because + # the linker flags are used in LLVM's configure step before + # -DLLVM_ENABLE_LLD is actually processed. + if is_llvm_toolchain(cc, cxx): + extra_ldflags += ["-fuse-ld=lld", "-Wl,--icf=safe"] + extra_ldflags += ["-Wl,--gc-sections"] + elif is_windows(): + extra_cflags = [] + extra_cxxflags = [] + # clang-cl would like to figure out what it's supposed to be emulating + # by looking at an MSVC install, but we don't really have that here. + # Force things on based on WinMsvc.cmake. + # Ideally, we'd just use WinMsvc.cmake as a toolchain file, but it only + # really works for cross-compiles, which this is not. + with open(os.path.join(llvm_source_dir, "cmake/platforms/WinMsvc.cmake")) as f: + compat = [ + item + for line in f + for item in line.split() + if "-fms-compatibility-version=" in item + ][0] + extra_cflags2 = [compat] + extra_cxxflags2 = [compat] + extra_asmflags = [] + extra_ldflags = [] + + if osx_cross_compile: + # undo the damage done in the is_linux() block above, and also simulate + # the is_darwin() block above. + extra_cflags = [] + extra_cxxflags = [] + extra_cxxflags2 = [] + + extra_flags = [ + "-mlinker-version=137", + "-B", + "%s/bin" % os.getenv("CROSS_CCTOOLS_PATH"), + "-isysroot", + os.getenv("CROSS_SYSROOT"), + # technically the sysroot flag there should be enough to deduce this, + # but clang needs some help to figure this out. + "-I%s/usr/include" % os.getenv("CROSS_SYSROOT"), + "-iframework", + "%s/System/Library/Frameworks" % os.getenv("CROSS_SYSROOT"), + ] + extra_cflags += extra_flags + extra_cxxflags += extra_flags + extra_cflags2 += extra_flags + extra_cxxflags2 += extra_flags + extra_asmflags += extra_flags + extra_ldflags = [ + "-Wl,-syslibroot,%s" % os.getenv("CROSS_SYSROOT"), + "-Wl,-dead_strip", + ] + + upload_dir = os.getenv("UPLOAD_DIR") + if assertions and upload_dir: + extra_cflags2 += ["-fcrash-diagnostics-dir=%s" % upload_dir] + extra_cxxflags2 += ["-fcrash-diagnostics-dir=%s" % upload_dir] + + if skip_stages < 1: + build_one_stage( + [cc] + extra_cflags, + [cxx] + extra_cxxflags, + [asm] + extra_asmflags, + ar, + ranlib, + libtool, + extra_ldflags, + llvm_source_dir, + stage1_dir, + package_name, + osx_cross_compile, + build_type, + assertions, + targets, + is_final_stage=(stages == 1), + ) + + if stages >= 2 and skip_stages < 2: + stage2_dir = build_dir + "/stage2" + stage2_inst_dir = stage2_dir + "/" + package_name + final_stage_dir = stage2_dir + if skip_stages < 1: + cc = stage1_inst_dir + "/bin/%s%s" % (cc_name, exe_ext) + cxx = stage1_inst_dir + "/bin/%s%s" % (cxx_name, exe_ext) + asm = stage1_inst_dir + "/bin/%s%s" % (cc_name, exe_ext) + build_one_stage( + [cc] + extra_cflags2, + [cxx] + extra_cxxflags2, + [asm] + extra_asmflags, + ar, + ranlib, + libtool, + extra_ldflags, + llvm_source_dir, + stage2_dir, + package_name, + osx_cross_compile, + build_type, + assertions, + targets, + is_final_stage=(stages == 2), + profile="gen" if pgo else None, + ) + + if stages >= 3 and skip_stages < 3: + stage3_dir = build_dir + "/stage3" + stage3_inst_dir = stage3_dir + "/" + package_name + final_stage_dir = stage3_dir + if skip_stages < 2: + cc = stage2_inst_dir + "/bin/%s%s" % (cc_name, exe_ext) + cxx = stage2_inst_dir + "/bin/%s%s" % (cxx_name, exe_ext) + asm = stage2_inst_dir + "/bin/%s%s" % (cc_name, exe_ext) + build_one_stage( + [cc] + extra_cflags2, + [cxx] + extra_cxxflags2, + [asm] + extra_asmflags, + ar, + ranlib, + libtool, + extra_ldflags, + llvm_source_dir, + stage3_dir, + package_name, + osx_cross_compile, + build_type, + assertions, + targets, + (stages == 3), + ) + if pgo: + llvm_profdata = stage2_inst_dir + "/bin/llvm-profdata%s" % exe_ext + merge_cmd = [llvm_profdata, "merge", "-o", "merged.profdata"] + profraw_files = glob.glob( + os.path.join(stage2_dir, "build", "profiles", "*.profraw") + ) + run_in(stage3_dir, merge_cmd + profraw_files) + if stages == 3: + mkdir_p(upload_dir) + shutil.copy2(os.path.join(stage3_dir, "merged.profdata"), upload_dir) + return + + if stages >= 4 and skip_stages < 4: + stage4_dir = build_dir + "/stage4" + final_stage_dir = stage4_dir + profile = None + if pgo: + if skip_stages == 3: + profile_dir = os.environ.get("MOZ_FETCHES_DIR", "") + else: + profile_dir = stage3_dir + profile = os.path.join(profile_dir, "merged.profdata") + if skip_stages < 3: + cc = stage3_inst_dir + "/bin/%s%s" % (cc_name, exe_ext) + cxx = stage3_inst_dir + "/bin/%s%s" % (cxx_name, exe_ext) + asm = stage3_inst_dir + "/bin/%s%s" % (cc_name, exe_ext) + build_one_stage( + [cc] + extra_cflags2, + [cxx] + extra_cxxflags2, + [asm] + extra_asmflags, + ar, + ranlib, + libtool, + extra_ldflags, + llvm_source_dir, + stage4_dir, + package_name, + osx_cross_compile, + build_type, + assertions, + targets, + (stages == 4), + profile=profile, + ) + + if build_clang_tidy: + prune_final_dir_for_clang_tidy( + os.path.join(final_stage_dir, package_name), osx_cross_compile + ) + + if not args.skip_tar: + build_tar_package("%s.tar.zst" % package_name, final_stage_dir, package_name) + + +if __name__ == "__main__": + main() diff --git a/build/build-clang/clang-14.json b/build/build-clang/clang-14.json new file mode 100644 index 0000000000..6fbe063392 --- /dev/null +++ b/build/build-clang/clang-14.json @@ -0,0 +1,17 @@ +{ + "patches": [ + "find_symbolizer_linux_clang_10.patch", + "android-mangling-error_clang_12.patch", + "unpoison-thread-stacks_clang_10.patch", + "downgrade-mangling-error_clang_12.patch", + "bug47258-extract-symbols-mbcs.patch", + "Remove-FlushViewOfFile-when-unmaping-gcda-files.patch", + "fuzzing_ccov_build_clang_12.patch", + "win64-no-symlink.patch", + "llvmorg-15-init-16512-g4b1e3d193706.patch", + "revert-llvmorg-14-init-14141-gd6d3000a2f6d.patch", + "revert-llvmorg-14-init-11890-gf86deb18cab6.patch", + "win64-ret-null-on-commitment-limit_clang_14.patch", + "compiler-rt-rss-limit-heap-profile.patch" + ] +} diff --git a/build/build-clang/clang-15.json b/build/build-clang/clang-15.json new file mode 100644 index 0000000000..853ef3d32a --- /dev/null +++ b/build/build-clang/clang-15.json @@ -0,0 +1,23 @@ +{ + "patches": [ + "find_symbolizer_linux_clang_15.patch", + "android-mangling-error_clang_12.patch", + "unpoison-thread-stacks_clang_10.patch", + "downgrade-mangling-error_clang_12.patch", + "bug47258-extract-symbols-mbcs.patch", + "fuzzing_ccov_build_clang_12.patch", + "win64-no-symlink_clang_16.patch", + "revert-llvmorg-15-init-17171-g8bb4451a651a.patch", + "revert-llvmorg-15-init-13446-g7524fe962e47.patch", + "revert-llvmorg-15-init-11205-gcead4eceb01b.patch", + "revert-llvmorg-14-init-14141-gd6d3000a2f6d.patch", + "revert-llvmorg-14-init-11890-gf86deb18cab6.patch", + "win64-ret-null-on-commitment-limit_clang_14.patch", + "llvmorg-16-init-7778-g232e0a011e8c.patch", + "llvmorg-16-init-10850-gff111a997f1b.patch", + "llvmorg-16-init-14003-g78c033b541f0.patch", + "llvmorg-16-init-14750-ga903ecb4a26d.patch", + "llvmorg-16-init-15447-g8d3ab9dfc4d4.patch", + "compiler-rt-rss-limit-heap-profile.patch" + ] +} diff --git a/build/build-clang/clang-5.0.json b/build/build-clang/clang-5.0.json new file mode 100644 index 0000000000..ab4b180317 --- /dev/null +++ b/build/build-clang/clang-5.0.json @@ -0,0 +1,8 @@ +{ + "cc": "/usr/bin/gcc", + "cxx": "/usr/bin/g++", + "as": "/usr/bin/gcc", + "patches": [ + "profile-g4a10504e1f70c.patch" + ] +} diff --git a/build/build-clang/clang-tidy-ci.patch b/build/build-clang/clang-tidy-ci.patch new file mode 100644 index 0000000000..6c31752136 --- /dev/null +++ b/build/build-clang/clang-tidy-ci.patch @@ -0,0 +1,34 @@ +diff --git a/clang-tools-extra/clang-tidy/ClangTidy.cpp b/clang-tools-extra/clang-tidy/ClangTidy.cpp +index 7de313ad4da6..697f98c362d1 100644 +--- a/clang-tools-extra/clang-tidy/ClangTidy.cpp ++++ b/clang-tools-extra/clang-tidy/ClangTidy.cpp +@@ -432,6 +432,7 @@ ClangTidyASTConsumerFactory::createASTConsumer( + + for (auto &Check : Checks) { + Check->registerMatchers(&*Finder); ++ Check->registerPPCallbacks(Compiler); + Check->registerPPCallbacks(*SM, PP, ModuleExpanderPP); + } + +diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.h b/clang-tools-extra/clang-tidy/ClangTidyCheck.h +index 9b41e5836de7..d8938b8fe05e 100644 +--- a/clang-tools-extra/clang-tidy/ClangTidyCheck.h ++++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.h +@@ -20,6 +20,7 @@ + + namespace clang { + ++class CompilerInstance; + class SourceManager; + + namespace tidy { +@@ -69,6 +70,9 @@ public: + return true; + } + ++ /// This has been deprecated in clang 9 - needed by mozilla-must-override ++ virtual void registerPPCallbacks(CompilerInstance &Compiler) {} ++ + /// Override this to register ``PPCallbacks`` in the preprocessor. + /// + /// This should be used for clang-tidy checks that analyze preprocessor- diff --git a/build/build-clang/clang-tidy-external-linux64.json b/build/build-clang/clang-tidy-external-linux64.json new file mode 100644 index 0000000000..897911d3e3 --- /dev/null +++ b/build/build-clang/clang-tidy-external-linux64.json @@ -0,0 +1,11 @@ +{ + "stages": "1", + "build_clang_tidy": true, + "cc": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "cxx": "{MOZ_FETCHES_DIR}/clang/bin/clang++", + "as": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "patches": [ + "clang-tidy-ci.patch" + ], + "build_clang_tidy_external": true +} diff --git a/build/build-clang/clang-tidy-linux64.json b/build/build-clang/clang-tidy-linux64.json new file mode 100644 index 0000000000..53dee2120d --- /dev/null +++ b/build/build-clang/clang-tidy-linux64.json @@ -0,0 +1,11 @@ +{ + "stages": "1", + "build_clang_tidy": true, + "cc": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "cxx": "{MOZ_FETCHES_DIR}/clang/bin/clang++", + "as": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "patches": [ + "clang_include_cleaner.patch", + "clang-tidy-ci.patch" + ] +} diff --git a/build/build-clang/clang-tidy-macosx64.json b/build/build-clang/clang-tidy-macosx64.json new file mode 100644 index 0000000000..ac21a121fc --- /dev/null +++ b/build/build-clang/clang-tidy-macosx64.json @@ -0,0 +1,16 @@ +{ + "stages": "1", + "build_clang_tidy": true, + "osx_cross_compile": true, + "cc": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "cxx": "{MOZ_FETCHES_DIR}/clang/bin/clang++", + "as": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "ar": "{MOZ_FETCHES_DIR}/cctools/bin/x86_64-apple-darwin-ar", + "ranlib": "{MOZ_FETCHES_DIR}/cctools/bin/x86_64-apple-darwin-ranlib", + "libtool": "{MOZ_FETCHES_DIR}/cctools/bin/x86_64-apple-darwin-libtool", + "ld": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "patches": [ + "clang_include_cleaner.patch", + "clang-tidy-ci.patch" + ] +} diff --git a/build/build-clang/clang-tidy-win64.json b/build/build-clang/clang-tidy-win64.json new file mode 100644 index 0000000000..38a017bc24 --- /dev/null +++ b/build/build-clang/clang-tidy-win64.json @@ -0,0 +1,11 @@ +{ + "stages": "1", + "build_clang_tidy": true, + "cc": "cl.exe", + "cxx": "cl.exe", + "ml": "ml64.exe", + "patches": [ + "clang_include_cleaner.patch", + "clang-tidy-ci.patch" + ] +} diff --git a/build/build-clang/clang-trunk.json b/build/build-clang/clang-trunk.json new file mode 100644 index 0000000000..4e85b43216 --- /dev/null +++ b/build/build-clang/clang-trunk.json @@ -0,0 +1,19 @@ +{ + "patches": [ + "find_symbolizer_linux_clang_15.patch", + "android-mangling-error_clang_12.patch", + "unpoison-thread-stacks_clang_10.patch", + "downgrade-mangling-error_clang_12.patch", + "bug47258-extract-symbols-mbcs.patch", + "fuzzing_ccov_build_clang_12.patch", + "win64-no-symlink_clang_16.patch", + "revert-llvmorg-16-init-9324-g01859da84bad.patch", + "revert-llvmorg-16-init-7598-g54bfd0484615.patch", + "revert-llvmorg-15-init-13446-g7524fe962e47.patch", + "revert-llvmorg-15-init-11205-gcead4eceb01b_clang_16.patch", + "revert-llvmorg-14-init-14141-gd6d3000a2f6d.patch", + "revert-llvmorg-14-init-11890-gf86deb18cab6_clang_16.patch", + "win64-ret-null-on-commitment-limit_clang_14.patch", + "compiler-rt-rss-limit-heap-profile.patch" + ] +} diff --git a/build/build-clang/clang_include_cleaner.patch b/build/build-clang/clang_include_cleaner.patch new file mode 100644 index 0000000000..0bc3b5b019 --- /dev/null +++ b/build/build-clang/clang_include_cleaner.patch @@ -0,0 +1,2235 @@ +Ported from clangd, this still can be improved over time, but it can be landed. +This was based on the work from https://bit.ly/3TkV2N1 + + The utility makes the assumption that all header are self contained! + It only checkes Decls from the main translation file, where SourceLocarion is the passed cpp file. + It builds a list with all of the includes from the translation unit. + It matches all of the Decls from the main translation units with definitions from the included header files and builds a list with used header files. + All of the includes that are not part of the matched used header files are considered to be unused. Of course this is correct if the first assumption if followed by the coding guide, where all of the header are self contained. Since the mozilla code base doesn't follow this approach false positives might appear where the is the following situation: + +FOO.cpp + +#include <A> +#Include <B> + +If header A defines a symbol that is used by header B and B doesn't include A nor +it has symbols defined that are used by FOO.cpp then B it will be marked as potentially to be removed +by the tool. +This is the limitation determined by header that are not self contained. + +The limitation presented above can be fixed in the future with extra work, but it's very time expensive +during the runtime of the checker. + +diff --git a/clang-tools-extra/CMakeLists.txt b/clang-tools-extra/CMakeLists.txt +index 6a3f741721ee..ff17c8e8472a 100644 +--- a/clang-tools-extra/CMakeLists.txt ++++ b/clang-tools-extra/CMakeLists.txt +@@ -16,6 +16,7 @@ endif() + add_subdirectory(clang-apply-replacements) + add_subdirectory(clang-reorder-fields) + add_subdirectory(modularize) ++add_subdirectory(include-cleaner) + add_subdirectory(clang-tidy) + + add_subdirectory(clang-change-namespace) +@@ -23,7 +24,6 @@ add_subdirectory(clang-doc) + add_subdirectory(clang-include-fixer) + add_subdirectory(clang-move) + add_subdirectory(clang-query) +-add_subdirectory(include-cleaner) + add_subdirectory(pp-trace) + add_subdirectory(pseudo) + add_subdirectory(tool-template) +diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt +index 8a953eeea275..f2edc509acaf 100644 +--- a/clang-tools-extra/clang-tidy/CMakeLists.txt ++++ b/clang-tools-extra/clang-tidy/CMakeLists.txt +@@ -50,6 +50,7 @@ endif() + + # Checks. + # If you add a check, also add it to ClangTidyForceLinker.h in this directory. ++add_subdirectory(alpha) + add_subdirectory(android) + add_subdirectory(abseil) + add_subdirectory(altera) +@@ -77,6 +78,7 @@ add_subdirectory(portability) + add_subdirectory(readability) + add_subdirectory(zircon) + set(ALL_CLANG_TIDY_CHECKS ++ clangTidyAlphaModule + clangTidyAndroidModule + clangTidyAbseilModule + clangTidyAlteraModule +diff --git a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h +index 2691d90fa521..2fa064cff22a 100644 +--- a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h ++++ b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h +@@ -20,6 +20,11 @@ extern volatile int AbseilModuleAnchorSource; + static int LLVM_ATTRIBUTE_UNUSED AbseilModuleAnchorDestination = + AbseilModuleAnchorSource; + ++// This anchor is used to force the linker to link the AlphaModule. ++extern volatile int AlphaModuleAnchorSource; ++static int LLVM_ATTRIBUTE_UNUSED AlphaModuleAnchorDestination = ++ AlphaModuleAnchorSource; ++ + // This anchor is used to force the linker to link the AlteraModule. + extern volatile int AlteraModuleAnchorSource; + static int LLVM_ATTRIBUTE_UNUSED AlteraModuleAnchorDestination = +diff --git a/clang-tools-extra/clang-tidy/alpha/AlphaTidyModule.cpp b/clang-tools-extra/clang-tidy/alpha/AlphaTidyModule.cpp +new file mode 100644 +index 000000000000..b598a36cebf7 +--- /dev/null ++++ b/clang-tools-extra/clang-tidy/alpha/AlphaTidyModule.cpp +@@ -0,0 +1,38 @@ ++//===--- AlphaTidyModule.cpp - clang-tidy ----------------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "../ClangTidy.h" ++#include "../ClangTidyModule.h" ++#include "../ClangTidyModuleRegistry.h" ++#include "UnusedIncludesCheck.h" ++ ++ ++namespace clang { ++namespace tidy { ++namespace alpha { ++ ++class AlphaModule : public ClangTidyModule { ++public: ++ void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { ++ ++ CheckFactories.registerCheck<UnusedIncludesCheck>("alpha-unused-includes"); ++ } ++}; ++ ++} // namespace alpha ++ ++// Register the AlphaTidyModule using this statically initialized variable. ++static ClangTidyModuleRegistry::Add<alpha::AlphaModule> ++ X("alpha-module", "Adds alpha lint checks."); ++ ++// This anchor is used to force the linker to link in the generated object file ++// and thus register the AlphaModule. ++volatile int AlphaModuleAnchorSource = 0; ++ ++} // namespace tidy ++} // namespace clang +diff --git a/clang-tools-extra/clang-tidy/alpha/CMakeLists.txt b/clang-tools-extra/clang-tidy/alpha/CMakeLists.txt +new file mode 100644 +index 000000000000..b50576868645 +--- /dev/null ++++ b/clang-tools-extra/clang-tidy/alpha/CMakeLists.txt +@@ -0,0 +1,32 @@ ++include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../include-cleaner/include) ++ ++set(LLVM_LINK_COMPONENTS ++ Support ++ ) ++ ++add_clang_library(clangTidyAlphaModule ++ ++ AlphaTidyModule.cpp ++ UnusedIncludesCheck.cpp ++ ++ LINK_LIBS ++ clangAnalysis ++ clangIncludeCleaner ++ clangTidy ++ clangTidyUtils ++ ++ DEPENDS ++ omp_gen ++ ) ++ ++clang_target_link_libraries(clangTidyAlphaModule ++ PRIVATE ++ clangAnalysis ++ clangAST ++ clangASTMatchers ++ clangBasic ++ clangIncludeCleaner ++ clangLex ++ clangSerialization ++ clangTooling ++ ) +diff --git a/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.cpp b/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.cpp +new file mode 100644 +index 000000000000..0d6a6bf7a367 +--- /dev/null ++++ b/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.cpp +@@ -0,0 +1,76 @@ ++//===--- UnusedIncludesCheck.cpp - clang-tidy------------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "UnusedIncludesCheck.h" ++#include "clang-include-cleaner/Analysis.h" ++#include "clang-include-cleaner/Hooks.h" ++#include "clang/Basic/Diagnostic.h" ++#include "clang/Basic/LLVM.h" ++#include "clang/Basic/SourceLocation.h" ++#include "clang/Lex/Preprocessor.h" ++ ++using namespace clang::ast_matchers; ++ ++namespace clang { ++namespace tidy { ++namespace alpha { ++ ++UnusedIncludesCheck::UnusedIncludesCheck(StringRef Name, ++ ClangTidyContext *Context) ++ : ClangTidyCheck(Name, Context) {} ++ ++void UnusedIncludesCheck::registerPPCallbacks(const SourceManager &SM, ++ Preprocessor *PP, ++ Preprocessor *) { ++ Ctx = std::make_unique<include_cleaner::AnalysisContext>( ++ include_cleaner::Policy{}, *PP); ++ RecordedPP = std::make_unique<include_cleaner::RecordedPP>(); ++ PP->addPPCallbacks(RecordedPP->record(*Ctx)); ++} ++ ++void UnusedIncludesCheck::registerMatchers(MatchFinder *Finder) { ++ Finder->addMatcher( ++ translationUnitDecl(forEach(decl(isExpansionInMainFile()).bind("top"))), ++ this); ++} ++ ++void UnusedIncludesCheck::check(const MatchFinder::MatchResult &Result) { ++ Top.push_back(const_cast<Decl *>(Result.Nodes.getNodeAs<Decl>("top"))); ++} ++ ++void UnusedIncludesCheck::onEndOfTranslationUnit() { ++ llvm::DenseSet<const include_cleaner::RecordedPP::Include *> Used; ++ llvm::DenseSet<include_cleaner::Header> Seen; ++ include_cleaner::walkUsed( ++ *Ctx, Top, RecordedPP->MacroReferences, ++ [&](SourceLocation Loc, include_cleaner::Symbol Sym, ++ llvm::ArrayRef<include_cleaner::Header> Headers) { ++ for (const auto &Header : Headers) { ++ if (!Seen.insert(Header).second) ++ continue; ++ const auto& HeadersToInsert = RecordedPP->Includes.match(Header); ++ Used.insert(HeadersToInsert.begin(), HeadersToInsert.end()); ++ } ++ }); ++ for (const auto &I : RecordedPP->Includes.all()) { ++ if (!Used.contains(&I)) { ++ const auto &SM = Ctx->sourceManager(); ++ FileID FID = SM.getFileID(I.Location); ++ diag(I.Location, "there is a high probability that include is unused") ++ << FixItHint::CreateRemoval(CharSourceRange::getCharRange( ++ SM.translateLineCol(FID, I.Line, 1), ++ SM.translateLineCol(FID, I.Line + 1, 1))); ++ } ++ } ++} ++ ++UnusedIncludesCheck::~UnusedIncludesCheck() = default; ++ ++} // namespace alpha ++} // namespace tidy ++} // namespace clang +diff --git a/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.h b/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.h +new file mode 100644 +index 000000000000..f67c46e6cc3e +--- /dev/null ++++ b/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.h +@@ -0,0 +1,42 @@ ++//===--- UnusedIncludesCheck.h - clang-tidy----------------------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_INCLUDES_H ++#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_INCLUDES_H ++ ++#include "../ClangTidyCheck.h" ++ ++namespace clang { ++namespace include_cleaner { ++class AnalysisContext; ++struct RecordedPP; ++} // namespace include_cleaner ++namespace tidy { ++namespace alpha { ++ ++class UnusedIncludesCheck : public ClangTidyCheck { ++public: ++ UnusedIncludesCheck(StringRef Name, ClangTidyContext *Context); ++ ~UnusedIncludesCheck(); ++ void registerPPCallbacks(const SourceManager &SM, Preprocessor *, ++ Preprocessor *) override; ++ void registerMatchers(ast_matchers::MatchFinder *Finder) override; ++ void check(const ast_matchers::MatchFinder::MatchResult &Result) override; ++ void onEndOfTranslationUnit() override; ++ ++private: ++ std::unique_ptr<include_cleaner::AnalysisContext> Ctx; ++ std::unique_ptr<include_cleaner::RecordedPP> RecordedPP; ++ std::vector<Decl *> Top; ++}; ++ ++} // namespace misc ++} // namespace tidy ++} // namespace clang ++ ++#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_INCLUDES_H +diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt +index de8f087a52a5..14f605b1efaf 100644 +--- a/clang-tools-extra/clangd/CMakeLists.txt ++++ b/clang-tools-extra/clangd/CMakeLists.txt +@@ -2,6 +2,8 @@ + include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + include_directories(${CMAKE_CURRENT_BINARY_DIR}) + ++include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include-cleaner/include) ++ + add_subdirectory(support) + + # Configure the Features.inc file. +@@ -153,6 +155,7 @@ clang_target_link_libraries(clangDaemon + clangDriver + clangFormat + clangFrontend ++ clangIncludeCleaner + clangIndex + clangLex + clangSema +diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp +index 26eb2574195d..a3cbc8894f6d 100644 +--- a/clang-tools-extra/clangd/Hover.cpp ++++ b/clang-tools-extra/clangd/Hover.cpp +@@ -12,9 +12,11 @@ + #include "CodeCompletionStrings.h" + #include "Config.h" + #include "FindTarget.h" ++#include "IncludeCleaner.h" + #include "ParsedAST.h" + #include "Selection.h" + #include "SourceCode.h" ++#include "clang-include-cleaner/Analysis.h" + #include "index/SymbolCollector.h" + #include "support/Markup.h" + #include "clang/AST/ASTContext.h" +@@ -985,6 +987,23 @@ llvm::Optional<HoverInfo> getHover(ParsedAST &AST, Position Pos, + // FIXME: We don't have a fitting value for Kind. + HI.Definition = + URIForFile::canonicalize(Inc.Resolved, *MainFilePath).file().str(); ++ ++ // FIXME: share code, macros too... ++ include_cleaner::AnalysisContext Ctx(include_cleaner::Policy{}, ++ AST.getPreprocessor()); ++ std::vector<std::string> Provides; ++ include_cleaner::walkUsed( ++ Ctx, AST.getLocalTopLevelDecls(), /*Macros=*/{}, ++ [&](SourceLocation Loc, include_cleaner::Symbol S, ++ llvm::ArrayRef<include_cleaner::Header> Headers) { ++ for (const auto &H : Headers) ++ if (match(H, Inc, AST.getIncludeStructure())) ++ Provides.push_back(S.name()); ++ }); ++ llvm::sort(Provides); ++ Provides.erase(std::unique(Provides.begin(), Provides.end()), ++ Provides.end()); ++ HI.Documentation = "provides " + llvm::join(Provides, ", "); + HI.DefinitionLanguage = ""; + return HI; + } +diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp +index e5b5187e030c..3c0ba06316ac 100644 +--- a/clang-tools-extra/clangd/IncludeCleaner.cpp ++++ b/clang-tools-extra/clangd/IncludeCleaner.cpp +@@ -12,6 +12,8 @@ + #include "ParsedAST.h" + #include "Protocol.h" + #include "SourceCode.h" ++#include "clang-include-cleaner/Analysis.h" ++#include "clang-include-cleaner/Types.h" + #include "index/CanonicalIncludes.h" + #include "support/Logger.h" + #include "support/Trace.h" +@@ -40,181 +42,6 @@ void setIncludeCleanerAnalyzesStdlib(bool B) { AnalyzeStdlib = B; } + + namespace { + +-/// Crawler traverses the AST and feeds in the locations of (sometimes +-/// implicitly) used symbols into \p Result. +-class ReferencedLocationCrawler +- : public RecursiveASTVisitor<ReferencedLocationCrawler> { +-public: +- ReferencedLocationCrawler(ReferencedLocations &Result, +- const SourceManager &SM) +- : Result(Result), SM(SM) {} +- +- bool VisitDeclRefExpr(DeclRefExpr *DRE) { +- add(DRE->getDecl()); +- add(DRE->getFoundDecl()); +- return true; +- } +- +- bool VisitMemberExpr(MemberExpr *ME) { +- add(ME->getMemberDecl()); +- add(ME->getFoundDecl().getDecl()); +- return true; +- } +- +- bool VisitTagType(TagType *TT) { +- add(TT->getDecl()); +- return true; +- } +- +- bool VisitFunctionDecl(FunctionDecl *FD) { +- // Function definition will require redeclarations to be included. +- if (FD->isThisDeclarationADefinition()) +- add(FD); +- return true; +- } +- +- bool VisitCXXConstructExpr(CXXConstructExpr *CCE) { +- add(CCE->getConstructor()); +- return true; +- } +- +- bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) { +- // Using templateName case is handled by the override TraverseTemplateName. +- if (TST->getTemplateName().getKind() == TemplateName::UsingTemplate) +- return true; +- add(TST->getAsCXXRecordDecl()); // Specialization +- return true; +- } +- +- // There is no VisitTemplateName in RAV, thus we override the Traverse version +- // to handle the Using TemplateName case. +- bool TraverseTemplateName(TemplateName TN) { +- VisitTemplateName(TN); +- return Base::TraverseTemplateName(TN); +- } +- // A pseudo VisitTemplateName, dispatched by the above TraverseTemplateName! +- bool VisitTemplateName(TemplateName TN) { +- if (const auto *USD = TN.getAsUsingShadowDecl()) { +- add(USD); +- return true; +- } +- add(TN.getAsTemplateDecl()); // Primary template. +- return true; +- } +- +- bool VisitUsingType(UsingType *UT) { +- add(UT->getFoundDecl()); +- return true; +- } +- +- bool VisitTypedefType(TypedefType *TT) { +- add(TT->getDecl()); +- return true; +- } +- +- // Consider types of any subexpression used, even if the type is not named. +- // This is helpful in getFoo().bar(), where Foo must be complete. +- // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to +- // consider types "used" when they are not directly spelled in code. +- bool VisitExpr(Expr *E) { +- TraverseType(E->getType()); +- return true; +- } +- +- bool TraverseType(QualType T) { +- if (isNew(T.getTypePtrOrNull())) // don't care about quals +- Base::TraverseType(T); +- return true; +- } +- +- bool VisitUsingDecl(UsingDecl *D) { +- for (const auto *Shadow : D->shadows()) +- add(Shadow->getTargetDecl()); +- return true; +- } +- +- // Enums may be usefully forward-declared as *complete* types by specifying +- // an underlying type. In this case, the definition should see the declaration +- // so they can be checked for compatibility. +- bool VisitEnumDecl(EnumDecl *D) { +- if (D->isThisDeclarationADefinition() && D->getIntegerTypeSourceInfo()) +- add(D); +- return true; +- } +- +- // When the overload is not resolved yet, mark all candidates as used. +- bool VisitOverloadExpr(OverloadExpr *E) { +- for (const auto *ResolutionDecl : E->decls()) +- add(ResolutionDecl); +- return true; +- } +- +-private: +- using Base = RecursiveASTVisitor<ReferencedLocationCrawler>; +- +- void add(const Decl *D) { +- if (!D || !isNew(D->getCanonicalDecl())) +- return; +- if (auto SS = StdRecognizer(D)) { +- Result.Stdlib.insert(*SS); +- return; +- } +- // Special case RecordDecls, as it is common for them to be forward +- // declared multiple times. The most common cases are: +- // - Definition available in TU, only mark that one as usage. The rest is +- // likely to be unnecessary. This might result in false positives when an +- // internal definition is visible. +- // - There's a forward declaration in the main file, no need for other +- // redecls. +- if (const auto *RD = llvm::dyn_cast<RecordDecl>(D)) { +- if (const auto *Definition = RD->getDefinition()) { +- Result.User.insert(Definition->getLocation()); +- return; +- } +- if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation())) +- return; +- } +- for (const Decl *Redecl : D->redecls()) +- Result.User.insert(Redecl->getLocation()); +- } +- +- bool isNew(const void *P) { return P && Visited.insert(P).second; } +- +- ReferencedLocations &Result; +- llvm::DenseSet<const void *> Visited; +- const SourceManager &SM; +- tooling::stdlib::Recognizer StdRecognizer; +-}; +- +-// Given a set of referenced FileIDs, determines all the potentially-referenced +-// files and macros by traversing expansion/spelling locations of macro IDs. +-// This is used to map the referenced SourceLocations onto real files. +-struct ReferencedFilesBuilder { +- ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {} +- llvm::DenseSet<FileID> Files; +- llvm::DenseSet<FileID> Macros; +- const SourceManager &SM; +- +- void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); } +- +- void add(FileID FID, SourceLocation Loc) { +- if (FID.isInvalid()) +- return; +- assert(SM.isInFileID(Loc, FID)); +- if (Loc.isFileID()) { +- Files.insert(FID); +- return; +- } +- // Don't process the same macro FID twice. +- if (!Macros.insert(FID).second) +- return; +- const auto &Exp = SM.getSLocEntry(FID).getExpansion(); +- add(Exp.getSpellingLoc()); +- add(Exp.getExpansionLocStart()); +- add(Exp.getExpansionLocEnd()); +- } +-}; +- + // Returns the range starting at '#' and ending at EOL. Escaped newlines are not + // handled. + clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) { +@@ -231,10 +58,10 @@ clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) { + + // Finds locations of macros referenced from within the main file. That includes + // references that were not yet expanded, e.g `BAR` in `#define FOO BAR`. +-void findReferencedMacros(const SourceManager &SM, Preprocessor &PP, +- const syntax::TokenBuffer *Tokens, +- ReferencedLocations &Result) { ++std::vector<include_cleaner::SymbolReference> ++findReferencedMacros(ParsedAST &AST, include_cleaner::AnalysisContext &Ctx) { + trace::Span Tracer("IncludeCleaner::findReferencedMacros"); ++ std::vector<include_cleaner::SymbolReference> Result; + // FIXME(kirillbobyrev): The macros from the main file are collected in + // ParsedAST's MainFileMacros. However, we can't use it here because it + // doesn't handle macro references that were not expanded, e.g. in macro +@@ -244,15 +71,19 @@ void findReferencedMacros(const SourceManager &SM, Preprocessor &PP, + // this mechanism (as opposed to iterating through all tokens) will improve + // the performance of findReferencedMacros and also improve other features + // relying on MainFileMacros. +- for (const syntax::Token &Tok : Tokens->spelledTokens(SM.getMainFileID())) { +- auto Macro = locateMacroAt(Tok, PP); ++ for (const syntax::Token &Tok : ++ AST.getTokens().spelledTokens(AST.getSourceManager().getMainFileID())) { ++ auto Macro = locateMacroAt(Tok, AST.getPreprocessor()); + if (!Macro) + continue; + auto Loc = Macro->Info->getDefinitionLoc(); + if (Loc.isValid()) +- Result.User.insert(Loc); +- // FIXME: support stdlib macros ++ Result.push_back(include_cleaner::SymbolReference{ ++ Tok.location(), ++ Ctx.macro(AST.getPreprocessor().getIdentifierInfo(Macro->Name), ++ Loc)}); + } ++ return Result; + } + + static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST, +@@ -296,110 +127,8 @@ static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST, + } + return true; + } +- +-// In case symbols are coming from non self-contained header, we need to find +-// its first includer that is self-contained. This is the header users can +-// include, so it will be responsible for bringing the symbols from given +-// header into the scope. +-FileID headerResponsible(FileID ID, const SourceManager &SM, +- const IncludeStructure &Includes) { +- // Unroll the chain of non self-contained headers until we find the one that +- // can be included. +- for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID(); +- FE = SM.getFileEntryForID(ID)) { +- // If FE is nullptr, we consider it to be the responsible header. +- if (!FE) +- break; +- auto HID = Includes.getID(FE); +- assert(HID && "We're iterating over headers already existing in " +- "IncludeStructure"); +- if (Includes.isSelfContained(*HID)) +- break; +- // The header is not self-contained: put the responsibility for its symbols +- // on its includer. +- ID = SM.getFileID(SM.getIncludeLoc(ID)); +- } +- return ID; +-} +- + } // namespace + +-ReferencedLocations findReferencedLocations(ASTContext &Ctx, Preprocessor &PP, +- const syntax::TokenBuffer *Tokens) { +- trace::Span Tracer("IncludeCleaner::findReferencedLocations"); +- ReferencedLocations Result; +- const auto &SM = Ctx.getSourceManager(); +- ReferencedLocationCrawler Crawler(Result, SM); +- Crawler.TraverseAST(Ctx); +- if (Tokens) +- findReferencedMacros(SM, PP, Tokens, Result); +- return Result; +-} +- +-ReferencedLocations findReferencedLocations(ParsedAST &AST) { +- return findReferencedLocations(AST.getASTContext(), AST.getPreprocessor(), +- &AST.getTokens()); +-} +- +-ReferencedFiles findReferencedFiles( +- const ReferencedLocations &Locs, const SourceManager &SM, +- llvm::function_ref<FileID(FileID)> HeaderResponsible, +- llvm::function_ref<Optional<StringRef>(FileID)> UmbrellaHeader) { +- std::vector<SourceLocation> Sorted{Locs.User.begin(), Locs.User.end()}; +- llvm::sort(Sorted); // Group by FileID. +- ReferencedFilesBuilder Builder(SM); +- for (auto It = Sorted.begin(); It < Sorted.end();) { +- FileID FID = SM.getFileID(*It); +- Builder.add(FID, *It); +- // Cheaply skip over all the other locations from the same FileID. +- // This avoids lots of redundant Loc->File lookups for the same file. +- do +- ++It; +- while (It != Sorted.end() && SM.isInFileID(*It, FID)); +- } +- +- // If a header is not self-contained, we consider its symbols a logical part +- // of the including file. Therefore, mark the parents of all used +- // non-self-contained FileIDs as used. Perform this on FileIDs rather than +- // HeaderIDs, as each inclusion of a non-self-contained file is distinct. +- llvm::DenseSet<FileID> UserFiles; +- llvm::StringSet<> PublicHeaders; +- for (FileID ID : Builder.Files) { +- UserFiles.insert(HeaderResponsible(ID)); +- if (auto PublicHeader = UmbrellaHeader(ID)) { +- PublicHeaders.insert(*PublicHeader); +- } +- } +- +- llvm::DenseSet<tooling::stdlib::Header> StdlibFiles; +- for (const auto &Symbol : Locs.Stdlib) +- for (const auto &Header : Symbol.headers()) +- StdlibFiles.insert(Header); +- +- return {std::move(UserFiles), std::move(StdlibFiles), +- std::move(PublicHeaders)}; +-} +- +-ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs, +- const IncludeStructure &Includes, +- const CanonicalIncludes &CanonIncludes, +- const SourceManager &SM) { +- return findReferencedFiles( +- Locs, SM, +- [&SM, &Includes](FileID ID) { +- return headerResponsible(ID, SM, Includes); +- }, +- [&SM, &CanonIncludes](FileID ID) -> Optional<StringRef> { +- auto Entry = SM.getFileEntryRefForID(ID); +- if (!Entry) +- return llvm::None; +- auto PublicHeader = CanonIncludes.mapHeader(*Entry); +- if (PublicHeader.empty()) +- return llvm::None; +- return PublicHeader; +- }); +-} +- + std::vector<const Inclusion *> + getUnused(ParsedAST &AST, + const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles, +@@ -426,51 +155,50 @@ getUnused(ParsedAST &AST, + return Unused; + } + +-#ifndef NDEBUG +-// Is FID a <built-in>, <scratch space> etc? +-static bool isSpecialBuffer(FileID FID, const SourceManager &SM) { +- const SrcMgr::FileInfo &FI = SM.getSLocEntry(FID).getFile(); +- return FI.getName().startswith("<"); +-} +-#endif +- +-llvm::DenseSet<IncludeStructure::HeaderID> +-translateToHeaderIDs(const ReferencedFiles &Files, +- const IncludeStructure &Includes, +- const SourceManager &SM) { +- trace::Span Tracer("IncludeCleaner::translateToHeaderIDs"); +- llvm::DenseSet<IncludeStructure::HeaderID> TranslatedHeaderIDs; +- TranslatedHeaderIDs.reserve(Files.User.size()); +- for (FileID FID : Files.User) { +- const FileEntry *FE = SM.getFileEntryForID(FID); +- if (!FE) { +- assert(isSpecialBuffer(FID, SM)); +- continue; +- } +- const auto File = Includes.getID(FE); +- assert(File); +- TranslatedHeaderIDs.insert(*File); +- } +- for (tooling::stdlib::Header StdlibUsed : Files.Stdlib) +- for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed)) +- TranslatedHeaderIDs.insert(HID); +- return TranslatedHeaderIDs; ++bool match(const include_cleaner::Header &H, const Inclusion &I, ++ const IncludeStructure &S) { ++ switch (H.kind()) { ++ case include_cleaner::Header::Physical: ++ if (auto HID = S.getID(H.getPhysical())) ++ if (static_cast<unsigned>(*HID) == I.HeaderID) ++ return true; ++ break; ++ case include_cleaner::Header::StandardLibrary: ++ return I.Written == H.getStandardLibrary().name(); ++ case include_cleaner::Header::Verbatim: ++ return llvm::StringRef(I.Written).trim("\"<>") == H.getVerbatimSpelling(); ++ case include_cleaner::Header::Builtin: ++ case include_cleaner::Header::MainFile: ++ break; ++ } ++ return false; + } + + std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST) { +- const auto &SM = AST.getSourceManager(); +- +- auto Refs = findReferencedLocations(AST); +- auto ReferencedFiles = +- findReferencedFiles(Refs, AST.getIncludeStructure(), +- AST.getCanonicalIncludes(), AST.getSourceManager()); +- auto ReferencedHeaders = +- translateToHeaderIDs(ReferencedFiles, AST.getIncludeStructure(), SM); +- return getUnused(AST, ReferencedHeaders, ReferencedFiles.SpelledUmbrellas); ++ include_cleaner::AnalysisContext Ctx(include_cleaner::Policy{}, ++ AST.getPreprocessor()); ++ llvm::DenseSet<const Inclusion *> Used; ++ include_cleaner::walkUsed( ++ Ctx, AST.getLocalTopLevelDecls(), ++ /*MacroRefs=*/findReferencedMacros(AST, Ctx), ++ [&](SourceLocation Loc, include_cleaner::Symbol Sym, ++ llvm::ArrayRef<include_cleaner::Header> Headers) { ++ for (const auto &I : AST.getIncludeStructure().MainFileIncludes) ++ for (const auto &H : Headers) ++ if (match(H, I, AST.getIncludeStructure())) ++ Used.insert(&I); ++ }); ++ std::vector<const Inclusion *> Unused; ++ const Config &Cfg = Config::current(); ++ for (const auto &I : AST.getIncludeStructure().MainFileIncludes) { ++ if (!Used.contains(&I) && mayConsiderUnused(I, AST, Cfg)) ++ Unused.push_back(&I); ++ } ++ return Unused; + } + +-std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST, +- llvm::StringRef Code) { ++auto issueUnusedIncludesDiagnostics(ParsedAST &AST, ++ llvm::StringRef Code) -> std::vector<Diag> { + const Config &Cfg = Config::current(); + if (Cfg.Diagnostics.UnusedIncludes != Config::UnusedIncludesPolicy::Strict || + Cfg.Diagnostics.SuppressAll || +diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h +index 4ce31baaa067..c858a60c5db7 100644 +--- a/clang-tools-extra/clangd/IncludeCleaner.h ++++ b/clang-tools-extra/clangd/IncludeCleaner.h +@@ -23,6 +23,7 @@ + #include "index/CanonicalIncludes.h" + #include "clang/Basic/SourceLocation.h" + #include "clang/Tooling/Inclusions/StandardLibrary.h" ++#include "clang-include-cleaner/Types.h" + #include "llvm/ADT/DenseSet.h" + #include "llvm/ADT/STLFunctionalExtras.h" + #include "llvm/ADT/StringSet.h" +@@ -100,6 +101,10 @@ std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST); + std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST, + llvm::StringRef Code); + ++// Does an include-cleaner header spec match a clangd recorded inclusion? ++bool match(const include_cleaner::Header &H, const Inclusion &I, ++ const IncludeStructure &S); ++ + /// Affects whether standard library includes should be considered for + /// removal. This is off by default for now due to implementation limitations: + /// - macros are not tracked +diff --git a/clang-tools-extra/include-cleaner/CMakeLists.txt b/clang-tools-extra/include-cleaner/CMakeLists.txt +index 0550b02f603b..325186879a47 100644 +--- a/clang-tools-extra/include-cleaner/CMakeLists.txt ++++ b/clang-tools-extra/include-cleaner/CMakeLists.txt +@@ -1,4 +1,8 @@ ++include_directories(include) ++include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) + add_subdirectory(lib) ++add_subdirectory(tool) ++ + if(CLANG_INCLUDE_TESTS) + add_subdirectory(test) + add_subdirectory(unittests) +diff --git a/clang-tools-extra/include-cleaner/README.md b/clang-tools-extra/include-cleaner/README.md +deleted file mode 100644 +index e69de29bb2d1..000000000000 +diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h +new file mode 100644 +index 000000000000..4e5cc8d03814 +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h +@@ -0,0 +1,77 @@ ++//===--- Analysis.h - Analyze used files --------------------------- C++-*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef CLANG_INCLUDE_CLEANER_ANALYSIS_H ++#define CLANG_INCLUDE_CLEANER_ANALYSIS_H ++ ++#include "clang-include-cleaner/Policy.h" ++#include "clang-include-cleaner/Types.h" ++ ++namespace clang { ++namespace include_cleaner { ++class Cache; ++ ++// Bundles the policy, compiler state, and caches for one include-cleaner run. ++// (This is needed everywhere, but shouldn't be used to propagate state around!) ++class AnalysisContext { ++public: ++ AnalysisContext(const Policy &, const Preprocessor &); ++ AnalysisContext(AnalysisContext &&) = delete; ++ AnalysisContext &operator=(AnalysisContext &&) = delete; ++ ~AnalysisContext(); ++ ++ const Policy &policy() const { return P; } ++ ++ const SourceManager &sourceManager() const { return *SM; } ++ const Preprocessor &preprocessor() const { return *PP; } ++ ++ // Only for internal use (the Cache class definition is not exposed). ++ // This allows us to reuse e.g. mappings from symbols to their locations. ++ Cache &cache() { return *C; } ++ // FIXME: does this need to be public? ++ Symbol macro(const IdentifierInfo *, SourceLocation); ++ ++private: ++ Policy P; ++ const SourceManager *SM; ++ const Preprocessor *PP; ++ std::unique_ptr<Cache> C; ++}; ++ ++// A UsedSymbolVisitor is a callback invoked for each symbol reference seen. ++// ++// References occur at a particular location, refer to a single symbol, and ++// that symbol may be provided by any of several headers. ++// ++// The first element of ProvidedBy is the *preferred* header, e.g. to insert. ++using UsedSymbolVisitor = ++ llvm::function_ref<void(SourceLocation UsedAt, Symbol UsedSymbol, ++ llvm::ArrayRef<Header> ProvidedBy)>; ++ ++// Find and report all references to symbols in a region of code. ++// ++// The AST traversal is rooted at ASTRoots - typically top-level declarations ++// of a single source file. MacroRefs are additional recorded references to ++// macros, which do not appear in the AST. ++// ++// This is the main entrypoint of the include-cleaner library, and can be used: ++// - to diagnose missing includes: a referenced symbol is provided by ++// headers which don't match any #include in the main file ++// - to diagnose unused includes: an #include in the main file does not match ++// the headers for any referenced symbol ++// ++// Mapping between Header and #include directives is not provided here, but see ++// RecordedPP::Includes::match() in Hooks.h. ++void walkUsed(AnalysisContext &, llvm::ArrayRef<Decl *> ASTRoots, ++ llvm::ArrayRef<SymbolReference> MacroRefs, ++ UsedSymbolVisitor Callback); ++ ++} // namespace include_cleaner ++} // namespace clang ++ ++#endif +diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Hooks.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Hooks.h +new file mode 100644 +index 000000000000..39e11653b210 +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Hooks.h +@@ -0,0 +1,87 @@ ++//===--- Hooks.h - Record compiler events -------------------------- C++-*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// Where Analysis.h analyzes AST nodes and recorded preprocessor events, this ++// file defines ways to capture AST and preprocessor information from a parse. ++// ++// These are the simplest way to connect include-cleaner logic to the parser, ++// but other ways are possible (for example clangd records includes separately). ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef CLANG_INCLUDE_CLEANER_HOOKS_H ++#define CLANG_INCLUDE_CLEANER_HOOKS_H ++ ++#include "Analysis.h" ++#include "Types.h" ++#include "clang/Basic/FileEntry.h" ++#include "clang/Basic/SourceLocation.h" ++#include "llvm/ADT/DenseMap.h" ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/ADT/StringMap.h" ++#include "llvm/ADT/StringRef.h" ++#include <vector> ++ ++namespace clang { ++class FileEntry; ++class PPCallbacks; ++namespace include_cleaner { ++class PPRecorder; ++ ++// Contains recorded preprocessor events relevant to include-cleaner. ++struct RecordedPP { ++ // The callback (when installed into clang) tracks macros/includes in this. ++ std::unique_ptr<PPCallbacks> record(AnalysisContext &Ctx); ++ // FIXME: probably also want a comment handler to capture IWYU pragmas. ++ ++ // Describes where macros were used from the main file. ++ std::vector<SymbolReference> MacroReferences; ++ ++ // A single #include directive from the main file. ++ struct Include { ++ llvm::StringRef Spelled; // e.g. vector ++ const FileEntry *Resolved; // e.g. /path/to/c++/v1/vector ++ SourceLocation Location; // of hash in #include <vector> ++ unsigned Line; // 1-based line number for #include ++ }; ++ // The set of includes recorded from the main file. ++ class RecordedIncludes { ++ public: ++ // All #includes seen, in the order they appear. ++ llvm::ArrayRef<Include> all() const { return All; } ++ // Determine #includes that match a header (that provides a used symbol). ++ // ++ // Matching is based on the type of Header specified: ++ // - for a physical file like /path/to/foo.h, we check Resolved ++ // - for a logical file like <vector>, we check Spelled ++ llvm::SmallVector<const Include *> match(Header H) const; ++ ++ private: ++ std::vector<Include> All; ++ llvm::StringMap<llvm::SmallVector<unsigned>> BySpelling; ++ llvm::DenseMap<const FileEntry *, llvm::SmallVector<unsigned>> ByFile; ++ friend PPRecorder; ++ } Includes; ++}; ++ ++// Contains recorded parser events relevant to include-cleaner. ++struct RecordedAST { ++ // The consumer (when installed into clang) tracks declarations in this. ++ std::unique_ptr<ASTConsumer> record(AnalysisContext &Ctx); ++ ++ // The set of declarations written at file scope inside the main file. ++ // ++ // These are the roots of the subtrees that should be traversed to find uses. ++ // (Traversing the TranslationUnitDecl would find uses inside headers!) ++ std::vector<Decl *> TopLevelDecls; ++}; ++ ++} // namespace include_cleaner ++} // namespace clang ++ ++#endif +diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Policy.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Policy.h +new file mode 100644 +index 000000000000..142887b85529 +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Policy.h +@@ -0,0 +1,35 @@ ++//===--- Policy.h - Tuning what is considered used ----------------- C++-*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef CLANG_INCLUDE_CLEANER_POLICY_H ++#define CLANG_INCLUDE_CLEANER_POLICY_H ++ ++namespace clang { ++namespace include_cleaner { ++ ++// Provides some fine-tuning of include-cleaner's choices about what is used. ++// ++// Changing the policy serves two purposes: ++// - marking more things used reduces the false-positives for "unused include", ++// while marking fewer things improves "missing include" in the same way. ++// - different coding styles may make different decisions about which includes ++// are required. ++struct Policy { ++ // Does construction count as use of the type, when the type is not named? ++ // e.g. printVector({x, y, z}); - is std::vector used? ++ bool Construction = false; ++ // Is member access tracked as a reference? ++ bool Members = false; ++ // Are operator calls tracked as references? ++ bool Operators = false; ++}; ++ ++} // namespace include_cleaner ++} // namespace clang ++ ++#endif +diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h +new file mode 100644 +index 000000000000..2a91473b926e +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h +@@ -0,0 +1,219 @@ ++//===--- Types.h - Data structures for used-symbol analysis -------- C++-*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// Find referenced files is mostly a matter of translating: ++// AST Node => declaration => source location => file ++// ++// clang has types for these (DynTypedNode, Decl, SourceLocation, FileID), but ++// there are special cases: macros are not declarations, the concrete file where ++// a standard library symbol was defined doesn't matter, etc. ++// ++// We define some slightly more abstract sum types to handle these cases while ++// keeping the API clean. For example, Symbol is Decl+DefinedMacro. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef CLANG_INCLUDE_CLEANER_TYPES_H ++#define CLANG_INCLUDE_CLEANER_TYPES_H ++ ++#include "clang/AST/DeclBase.h" ++#include "clang/Tooling/Inclusions/StandardLibrary.h" ++#include "llvm/ADT/BitmaskEnum.h" ++#include "llvm/ADT/PointerSumType.h" ++ ++namespace clang { ++class IdentifierInfo; ++class MacroDirective; ++namespace include_cleaner { ++ ++// Identifies a macro, along with a particular definition of it. ++// We generally consider redefined macros to be different symbols. ++struct DefinedMacro { ++ const IdentifierInfo *Name; ++ const SourceLocation Definition; ++}; ++ ++// A Symbol is an entity that can be referenced. ++// It is either a declaration (NamedDecl) or a macro (DefinedMacro). ++class Symbol { ++public: ++ enum Kind { ++ Macro, ++ Declaration, ++ }; ++ Symbol(NamedDecl *ND) : Target(ND) {} ++ Symbol(const DefinedMacro *M) : Target(M) {} ++ ++ std::string name() const; ++ std::string nodeName() const; ++ Kind kind() const { return Target.is<NamedDecl *>() ? Declaration : Macro; } ++ ++ NamedDecl *getDeclaration() const { return Target.get<NamedDecl *>(); } ++ const DefinedMacro *getMacro() const { ++ return Target.get<const DefinedMacro *>(); ++ } ++ ++private: ++ llvm::PointerUnion<const DefinedMacro *, NamedDecl *> Target; ++}; ++ ++// A usage of a Symbol seen in our source code. ++struct SymbolReference { ++ // The point in the code where the reference occurred. ++ // We could track the DynTypedNode we found it in if it's important. ++ SourceLocation Location; ++ Symbol Target; ++}; ++ ++// A Location is a place where a symbol can be provided. ++// It is either a physical part of the TU (SourceLocation) or a logical location ++// in the standard library (stdlib::Symbol). ++class Location { ++public: ++ enum Kind : uint8_t { ++ Physical, ++ StandardLibrary, ++ }; ++ ++ Location(SourceLocation S) : K(Physical), SrcLoc(S) {} ++ Location(tooling::stdlib::Symbol S) : K(StandardLibrary), StdlibSym(S) {} ++ ++ std::string name(const SourceManager &SM) const; ++ Kind kind() const { return K; } ++ ++ SourceLocation getPhysical() const { ++ assert(kind() == Physical); ++ return SrcLoc; ++ }; ++ tooling::stdlib::Symbol getStandardLibrary() const { ++ assert(kind() == StandardLibrary); ++ return StdlibSym; ++ }; ++ ++private: ++ Kind K; ++ union { ++ SourceLocation SrcLoc; ++ tooling::stdlib::Symbol StdlibSym; ++ }; ++}; ++ ++// A Header is an includable file that can provide access to Locations. ++// It is either a physical file (FileEntry), a logical location in the standard ++// library (stdlib::Header), or a verbatim header spelling (StringRef). ++class Header { ++public: ++ enum Kind : uint8_t { ++ Physical, ++ StandardLibrary, ++ Verbatim, ++ Builtin, ++ MainFile, ++ }; ++ ++ Header(const FileEntry *FE) : K(Physical), PhysicalFile(FE) {} ++ Header(tooling::stdlib::Header H) : K(StandardLibrary), StdlibHeader(H) {} ++ Header(const char *V) : K(Verbatim), VerbatimSpelling(V) {} ++ static Header builtin() { return Header{Builtin}; }; ++ static Header mainFile() { return Header{MainFile}; }; ++ ++ std::string name() const; ++ Kind kind() const { return K; } ++ ++ const FileEntry *getPhysical() const { ++ assert(kind() == Physical); ++ return PhysicalFile; ++ }; ++ tooling::stdlib::Header getStandardLibrary() const { ++ assert(kind() == StandardLibrary); ++ return StdlibHeader; ++ }; ++ llvm::StringRef getVerbatimSpelling() const { ++ assert(kind() == Verbatim); ++ return VerbatimSpelling; ++ }; ++ ++private: ++ Header(Kind K) : K(K) {} ++ ++ Kind K; ++ union { ++ const FileEntry *PhysicalFile; ++ tooling::stdlib::Header StdlibHeader; ++ const char *VerbatimSpelling; ++ }; ++ ++ friend bool operator==(const Header &L, const Header &R) { ++ if (L.kind() != R.kind()) ++ return false; ++ switch (L.kind()) { ++ case Physical: ++ return L.getPhysical() == R.getPhysical(); ++ case StandardLibrary: ++ return L.getStandardLibrary() == R.getStandardLibrary(); ++ case Verbatim: ++ return L.getVerbatimSpelling() == R.getVerbatimSpelling(); ++ case Builtin: ++ case MainFile: ++ return true; // no payload ++ } ++ llvm_unreachable("unhandled Header kind"); ++ } ++ ++ friend bool operator<(const Header &L, const Header &R) { ++ if (L.kind() != R.kind()) ++ return L.kind() < R.kind(); ++ switch (L.kind()) { ++ case Physical: ++ return L.getPhysical() == R.getPhysical(); ++ case StandardLibrary: ++ return L.getStandardLibrary() < R.getStandardLibrary(); ++ case Verbatim: ++ return L.getVerbatimSpelling() < R.getVerbatimSpelling(); ++ case Builtin: ++ case MainFile: ++ return false; // no payload ++ } ++ llvm_unreachable("unhandled Header kind"); ++ } ++ ++ friend llvm::hash_code hash_value(const Header &H) { ++ switch (H.K) { ++ case Header::Physical: ++ return llvm::hash_combine(H.K, H.getPhysical()); ++ case Header::StandardLibrary: ++ // FIXME: make StdlibHeader hashable instead. ++ return llvm::hash_combine(H.K, H.getStandardLibrary().name()); ++ case Header::Verbatim: ++ return llvm::hash_combine(H.K, llvm::StringRef(H.VerbatimSpelling)); ++ case Header::Builtin: ++ case Header::MainFile: ++ return llvm::hash_value(H.K); ++ } ++ } ++}; ++ ++template <typename T> struct DefaultDenseMapInfo { ++ static T isEqual(const T &L, const T &R) { return L == R; } ++ static unsigned getHashValue(const T &V) { return hash_value(V); } ++}; ++ ++} // namespace include_cleaner ++} // namespace clang ++ ++namespace llvm { ++template <> struct DenseMapInfo<clang::include_cleaner::Header> { ++ using Header = clang::include_cleaner::Header; ++ static Header getTombstoneKey() { return Header("__tombstone__"); } ++ static Header getEmptyKey() { return Header("__empty__"); } ++ static bool isEqual(const Header &L, const Header &R) { return L == R; } ++ static unsigned getHashValue(const Header &V) { return hash_value(V); } ++}; ++} // namespace llvm ++ ++#endif +diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp +new file mode 100644 +index 000000000000..5ac0008b07e8 +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp +@@ -0,0 +1,101 @@ ++//===--- Analysis.cpp - Analyze used files --------------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "clang-include-cleaner/Analysis.h" ++#include "AnalysisInternal.h" ++#include "clang/Lex/Preprocessor.h" ++ ++namespace clang { ++namespace include_cleaner { ++ ++AnalysisContext::AnalysisContext(const Policy &P, const Preprocessor &PP) ++ : P(P), SM(&PP.getSourceManager()), PP(&PP), C(std::make_unique<Cache>()) {} ++AnalysisContext::~AnalysisContext() = default; ++ ++static bool prefer(AnalysisContext &Ctx, Hint L, Hint R) { ++ return std::make_tuple(bool(L & Hint::NameMatch), bool(L & Hint::Complete)) > ++ std::make_tuple(bool(R & Hint::NameMatch), bool(R & Hint::Complete)); ++} ++ ++// Is this hint actually useful? ++static void addNameMatchHint(const IdentifierInfo *II, ++ llvm::SmallVector<Hinted<Header>> &H) { ++ if (!II) ++ return; ++ for (auto &HH : H) ++ if (HH->kind() == Header::Physical && ++ II->getName().equals_insensitive(HH->getPhysical()->getName())) ++ HH.Hint |= Hint::NameMatch; ++} ++ ++static llvm::SmallVector<Header> ++rank(AnalysisContext &Ctx, llvm::SmallVector<Hinted<Header>> &Candidates) { ++ // Sort by Header, so we can deduplicate (and combine flags). ++ llvm::stable_sort(Candidates, ++ [&](const Hinted<Header> &L, const Hinted<Header> &R) { ++ return *L < *R; ++ }); ++ // Like unique(), but merge hints. ++ auto *Write = Candidates.begin(); ++ for (auto *Read = Candidates.begin(); Read != Candidates.end(); ++Write) { ++ *Write = *Read; ++ for (++Read; Read != Candidates.end() && Read->Value == Write->Value; ++ ++Read) ++ Write->Hint |= Read->Hint; ++ } ++ Candidates.erase(Write, Candidates.end()); ++ // Now sort by hints. ++ llvm::stable_sort(Candidates, ++ [&](const Hinted<Header> &L, const Hinted<Header> &R) { ++ return prefer(Ctx, L.Hint, R.Hint); ++ }); ++ // Drop hints to return clean result list. ++ llvm::SmallVector<Header> Result; ++ for (const auto &H : Candidates) ++ Result.push_back(*H); ++ return Result; ++} ++ ++template <typename T> void addHint(Hint H, T &Items) { ++ for (auto &Item : Items) ++ Item.Hint |= H; ++} ++ ++void walkUsed(AnalysisContext &Ctx, llvm::ArrayRef<Decl *> ASTRoots, ++ llvm::ArrayRef<SymbolReference> MacroRefs, ++ UsedSymbolVisitor Callback) { ++ for (Decl *Root : ASTRoots) { ++ walkAST(Ctx, *Root, [&](SourceLocation RefLoc, Hinted<NamedDecl &> ND) { ++ auto Locations = locateDecl(Ctx, *ND); ++ llvm::SmallVector<Hinted<Header>> Headers; ++ for (const auto &Loc : Locations) { ++ auto LocHeaders = includableHeader(Ctx, *Loc); ++ addHint(Loc.Hint, LocHeaders); ++ Headers.append(std::move(LocHeaders)); ++ } ++ addHint(ND.Hint, Headers); ++ addNameMatchHint(ND.Value.getDeclName().getAsIdentifierInfo(), Headers); ++ Callback(RefLoc, &ND.Value, rank(Ctx, Headers)); ++ }); ++ } ++ for (const SymbolReference &MacroRef : MacroRefs) { ++ assert(MacroRef.Target.kind() == Symbol::Macro); ++ auto Loc = locateMacro(Ctx, *MacroRef.Target.getMacro()); ++ auto Headers = includableHeader(Ctx, *Loc); ++ addHint(Loc.Hint, Headers); ++ addNameMatchHint(MacroRef.Target.getMacro()->Name, Headers); ++ Callback(MacroRef.Location, MacroRef.Target, rank(Ctx, Headers)); ++ } ++} ++ ++Symbol AnalysisContext::macro(const IdentifierInfo *II, SourceLocation Loc) { ++ return cache().macro(II, Loc); ++} ++ ++} // namespace include_cleaner ++} // namespace clang +diff --git a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h +index 8b0c73fe7997..31b1ad8039d8 100644 +--- a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h ++++ b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h +@@ -21,6 +21,95 @@ + #ifndef CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H + #define CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H + ++#include "clang-include-cleaner/Analysis.h" ++#include "clang-include-cleaner/Types.h" ++#include "clang/Tooling/Inclusions/StandardLibrary.h" ++ ++namespace clang { ++namespace include_cleaner { ++ ++// FIXME: Right now we cache nothing, this is just used as an arena for macros. ++// Verify we're burning time in repeated analysis and cache partial operations. ++class Cache { ++public: ++ Symbol macro(const IdentifierInfo *Name, const SourceLocation Def) { ++ auto &DMS = DefinedMacros[Name->getName()]; ++ // Linear search. We probably only saw ~1 definition of each macro name. ++ for (const DefinedMacro &DM : DMS) ++ if (DM.Definition == Def) ++ return &DM; ++ DMS.push_back(DefinedMacro{Name, Def}); ++ return &DMS.back(); ++ } ++ ++ tooling::stdlib::Recognizer StdlibRecognizer; ++ ++private: ++ llvm::StringMap<llvm::SmallVector<DefinedMacro>> DefinedMacros; ++}; ++ ++enum class Hint : uint16_t { ++ None = 0, ++ Complete = 1, // Provides a complete definition that is often needed. ++ // e.g. classes, templates. ++ NameMatch = 1, // Header name matches the symbol name. ++ LLVM_MARK_AS_BITMASK_ENUM(Hint::Complete) ++}; ++LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); ++ ++template <typename T> struct Hinted { ++ Hinted(T Value, Hint H = Hint::None) : Value(Value), Hint(H) {} ++ T Value; ++ include_cleaner::Hint Hint; ++ ++ T &operator*() { return Value; } ++ const T &operator*() const { return Value; } ++ std::remove_reference_t<T> *operator->() { return &Value; } ++ const std::remove_reference_t<T> *operator->() const { return &Value; } ++}; ++ ++// Traverses a subtree of the AST, reporting declarations referenced. ++void walkAST(AnalysisContext &, Decl &Root, ++ llvm::function_ref<void(SourceLocation, Hinted<NamedDecl &>)>); ++ ++// Finds the locations where a declaration is provided. ++llvm::SmallVector<Hinted<Location>> locateDecl(AnalysisContext &, ++ const NamedDecl &); ++ ++// Finds the locations where a macro is provided. ++Hinted<Location> locateMacro(AnalysisContext &, const DefinedMacro &); ++ ++// Finds the headers that provide a location. ++llvm::SmallVector<Hinted<Header>> includableHeader(AnalysisContext &, ++ const Location &); ++ ++} // namespace include_cleaner ++} // namespace clang ++ ++#endif ++//===--- AnalysisInternal.h - Analysis building blocks ------------- C++-*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file provides smaller, testable pieces of the used-header analysis. ++// We find the headers by chaining together several mappings. ++// ++// AST => AST node => Symbol => Location => Header ++// / ++// Macro expansion => ++// ++// The individual steps are declared here. ++// (AST => AST Node => Symbol is one API to avoid materializing DynTypedNodes). ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H ++#define CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H ++ + #include "clang/Basic/SourceLocation.h" + #include "llvm/ADT/STLFunctionalExtras.h" + +diff --git a/clang-tools-extra/include-cleaner/lib/CMakeLists.txt b/clang-tools-extra/include-cleaner/lib/CMakeLists.txt +index 5e2807332f94..25d66b4f30df 100644 +--- a/clang-tools-extra/include-cleaner/lib/CMakeLists.txt ++++ b/clang-tools-extra/include-cleaner/lib/CMakeLists.txt +@@ -1,10 +1,15 @@ + set(LLVM_LINK_COMPONENTS Support) + + add_clang_library(clangIncludeCleaner ++ Analysis.cpp ++ Headers.cpp ++ Hooks.cpp ++ Locations.cpp ++ Types.cpp + WalkAST.cpp + + LINK_LIBS + clangBasic ++ clangLex + clangAST + ) +- +diff --git a/clang-tools-extra/include-cleaner/lib/Headers.cpp b/clang-tools-extra/include-cleaner/lib/Headers.cpp +new file mode 100644 +index 000000000000..f41bbe4c59c8 +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/lib/Headers.cpp +@@ -0,0 +1,46 @@ ++//===--- Headers.cpp - Find headers that provide locations ----------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "AnalysisInternal.h" ++#include "clang/Basic/SourceManager.h" ++#include "clang/Lex/Preprocessor.h" ++ ++namespace clang { ++namespace include_cleaner { ++ ++llvm::SmallVector<Hinted<Header>> includableHeader(AnalysisContext &Ctx, ++ const Location &Loc) { ++ switch (Loc.kind()) { ++ case Location::Physical: { ++ FileID FID = Ctx.sourceManager().getFileID( ++ Ctx.sourceManager().getExpansionLoc(Loc.getPhysical())); ++ if (FID == Ctx.sourceManager().getMainFileID()) ++ return {Header::mainFile()}; ++ if (FID == Ctx.preprocessor().getPredefinesFileID()) ++ return {Header::builtin()}; ++ // FIXME: if the file is not self-contained, find its umbrella header: ++ // - files that lack header guards (e.g. *.def) ++ // - IWYU private pragmas (and maybe export?) ++ // - #pragma clang include_instead ++ // - headers containing "#error ... include" clangd isDontIncludeMeHeader ++ // - apple framework header layout ++ if (auto *FE = Ctx.sourceManager().getFileEntryForID(FID)) ++ return {{FE}}; ++ return {}; ++ } ++ case Location::StandardLibrary: ++ // FIXME: some symbols are provided by multiple stdlib headers: ++ // - for historical reasons, like size_t ++ // - some headers are guaranteed to include others (<initializer_list>) ++ // - ::printf is de facto provided by cstdio and stdio.h, etc ++ return {{Loc.getStandardLibrary().header()}}; ++ } ++} ++ ++} // namespace include_cleaner ++} // namespace clang +diff --git a/clang-tools-extra/include-cleaner/lib/Hooks.cpp b/clang-tools-extra/include-cleaner/lib/Hooks.cpp +new file mode 100644 +index 000000000000..decb83110c65 +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/lib/Hooks.cpp +@@ -0,0 +1,166 @@ ++//===--- Hooks.cpp - Record events from the compiler --------------- C++-*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "clang-include-cleaner/Hooks.h" ++#include "AnalysisInternal.h" ++#include "clang-include-cleaner/Analysis.h" ++#include "clang/AST/ASTConsumer.h" ++#include "clang/AST/DeclCXX.h" ++#include "clang/AST/DeclGroup.h" ++#include "clang/AST/DeclObjC.h" ++#include "clang/Lex/MacroInfo.h" ++#include "clang/Lex/PPCallbacks.h" ++#include "clang/Lex/Preprocessor.h" ++#include "clang/Lex/Token.h" ++ ++namespace clang { ++namespace include_cleaner { ++ ++class PPRecorder : public PPCallbacks { ++public: ++ PPRecorder(AnalysisContext &Ctx, RecordedPP &Recorded) ++ : Ctx(Ctx), Recorded(Recorded) {} ++ ++ virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, ++ SrcMgr::CharacteristicKind FileType, ++ FileID PrevFID) override { ++ Active = Ctx.sourceManager().isWrittenInMainFile(Loc); ++ } ++ ++ void InclusionDirective(SourceLocation Hash, const Token &IncludeTok, ++ StringRef SpelledFilename, bool IsAngled, ++ CharSourceRange FilenameRange, Optional<FileEntryRef> File, ++ StringRef SearchPath, StringRef RelativePath, ++ const Module *, SrcMgr::CharacteristicKind) override { ++ if (!Active) ++ return; ++ ++ unsigned Index = Recorded.Includes.All.size(); ++ Recorded.Includes.All.emplace_back(); ++ RecordedPP::Include &I = Recorded.Includes.All.back(); ++ const auto *const RawFile = &(*File).getFileEntry(); ++ I.Location = Hash; ++ I.Resolved = RawFile; ++ I.Line = Ctx.sourceManager().getSpellingLineNumber(Hash); ++ auto BySpellingIt = ++ Recorded.Includes.BySpelling.try_emplace(SpelledFilename).first; ++ I.Spelled = BySpellingIt->first(); ++ ++ BySpellingIt->second.push_back(Index); ++ Recorded.Includes.ByFile[RawFile].push_back(Index); ++ } ++ ++ void MacroExpands(const Token &MacroName, const MacroDefinition &MD, ++ SourceRange Range, const MacroArgs *Args) override { ++ if (!Active) ++ return; ++ recordMacroRef(MacroName, *MD.getMacroInfo()); ++ } ++ ++ void MacroDefined(const Token &MacroName, const MacroDirective *MD) override { ++ if (!Active) ++ return; ++ ++ const auto *MI = MD->getMacroInfo(); ++ // The tokens of a macro definition could refer to a macro. ++ // Formally this reference isn't resolved until this macro is expanded, ++ // but we want to treat it as a reference anyway. ++ for (const auto &Tok : MI->tokens()) { ++ auto *II = Tok.getIdentifierInfo(); ++ // Could this token be a reference to a macro? (Not param to this macro). ++ if (!II || !II->hadMacroDefinition() || ++ llvm::is_contained(MI->params(), II)) ++ continue; ++ if (const MacroInfo *MI = Ctx.preprocessor().getMacroInfo(II)) ++ recordMacroRef(Tok, *MI); ++ } ++ } ++ ++private: ++ void recordMacroRef(const Token &Tok, const MacroInfo &MI) { ++ if (MI.isBuiltinMacro()) ++ return; // __FILE__ is not a reference. ++ Recorded.MacroReferences.push_back(SymbolReference{ ++ Tok.getLocation(), ++ Ctx.cache().macro(Tok.getIdentifierInfo(), MI.getDefinitionLoc())}); ++ } ++ ++ bool Active = false; ++ AnalysisContext &Ctx; ++ RecordedPP &Recorded; ++}; ++ ++llvm::SmallVector<const RecordedPP::Include *> ++RecordedPP::RecordedIncludes::match(Header H) const { ++ llvm::SmallVector<const Include *> Result; ++ switch (H.kind()) { ++ case Header::Physical: ++ for (unsigned I : ByFile.lookup(H.getPhysical())) ++ Result.push_back(&All[I]); ++ break; ++ case Header::StandardLibrary: ++ for (unsigned I : ++ BySpelling.lookup(H.getStandardLibrary().name().trim("<>"))) ++ Result.push_back(&All[I]); ++ break; ++ case Header::Verbatim: ++ for (unsigned I : BySpelling.lookup(H.getVerbatimSpelling())) ++ Result.push_back(&All[I]); ++ break; ++ case Header::Builtin: ++ case Header::MainFile: ++ break; ++ } ++ llvm::sort(Result); ++ Result.erase(std::unique(Result.begin(), Result.end()), Result.end()); ++ return Result; ++} ++ ++class ASTRecorder : public ASTConsumer { ++public: ++ ASTRecorder(AnalysisContext &Ctx, RecordedAST &Recorded) ++ : Ctx(Ctx), Recorded(Recorded) {} ++ ++ bool HandleTopLevelDecl(DeclGroupRef DG) override { ++ for (Decl *D : DG) { ++ if (!Ctx.sourceManager().isWrittenInMainFile( ++ Ctx.sourceManager().getExpansionLoc(D->getLocation()))) ++ continue; ++ if (const auto *T = llvm::dyn_cast<FunctionDecl>(D)) ++ if (T->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) ++ continue; ++ if (const auto *T = llvm::dyn_cast<CXXRecordDecl>(D)) ++ if (T->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) ++ continue; ++ if (const auto *T = llvm::dyn_cast<VarDecl>(D)) ++ if (T->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) ++ continue; ++ // ObjCMethodDecl are not actually top-level! ++ if (isa<ObjCMethodDecl>(D)) ++ continue; ++ ++ Recorded.TopLevelDecls.push_back(D); ++ } ++ return true; ++ } ++ ++private: ++ AnalysisContext &Ctx; ++ RecordedAST &Recorded; ++}; ++ ++std::unique_ptr<PPCallbacks> RecordedPP::record(AnalysisContext &Ctx) { ++ return std::make_unique<PPRecorder>(Ctx, *this); ++} ++ ++std::unique_ptr<ASTConsumer> RecordedAST::record(AnalysisContext &Ctx) { ++ return std::make_unique<ASTRecorder>(Ctx, *this); ++} ++ ++} // namespace include_cleaner ++} // namespace clang +\ No newline at end of file +diff --git a/clang-tools-extra/include-cleaner/lib/Locations.cpp b/clang-tools-extra/include-cleaner/lib/Locations.cpp +new file mode 100644 +index 000000000000..7e23c56c1dfc +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/lib/Locations.cpp +@@ -0,0 +1,60 @@ ++//===--- Locations.cpp - Find the locations that provide symbols ----------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "AnalysisInternal.h" ++#include "clang-include-cleaner/Analysis.h" ++#include "clang-include-cleaner/Types.h" ++#include "clang/AST/Decl.h" ++#include "clang/AST/DeclBase.h" ++#include "clang/AST/DeclTemplate.h" ++#include "clang/Basic/SourceLocation.h" ++#include "llvm/ADT/SmallVector.h" ++ ++namespace clang { ++namespace include_cleaner { ++ ++Hint declHint(const NamedDecl &D) { ++ Hint H = Hint::None; ++ if (auto *TD = llvm::dyn_cast<TagDecl>(&D)) ++ if (TD->isThisDeclarationADefinition()) ++ H |= Hint::Complete; ++ if (auto *CTD = llvm::dyn_cast<ClassTemplateDecl>(&D)) ++ if (CTD->isThisDeclarationADefinition()) ++ H |= Hint::Complete; ++ // A function template being defined is similar to a class being defined. ++ if (auto *FTD = llvm::dyn_cast<FunctionTemplateDecl>(&D)) ++ if (FTD->isThisDeclarationADefinition()) ++ H |= Hint::Complete; ++ return H; ++} ++ ++llvm::SmallVector<Hinted<Location>> locateDecl(AnalysisContext &Ctx, ++ const NamedDecl &ND) { ++ if (auto StdlibSym = Ctx.cache().StdlibRecognizer(&ND)) ++ return {{*StdlibSym}}; ++ ++ llvm::SmallVector<Hinted<Location>> Result; ++ // Is accepting all the redecls too naive? ++ for (const Decl *RD : ND.redecls()) { ++ // `friend X` is not an interesting location for X unless it's acting as a ++ // forward-declaration. ++ if (RD->getFriendObjectKind() == Decl::FOK_Declared) ++ continue; ++ SourceLocation Loc = RD->getLocation(); ++ if (Loc.isValid()) ++ Result.push_back({Loc, declHint(*cast<NamedDecl>(RD))}); ++ } ++ return Result; ++} ++ ++Hinted<Location> locateMacro(AnalysisContext &Ctx, const DefinedMacro &M) { ++ return {M.Definition}; ++} ++ ++} // namespace include_cleaner ++} // namespace clang +diff --git a/clang-tools-extra/include-cleaner/lib/Types.cpp b/clang-tools-extra/include-cleaner/lib/Types.cpp +new file mode 100644 +index 000000000000..6b79c603a70d +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/lib/Types.cpp +@@ -0,0 +1,61 @@ ++//===--- Types.cpp - Data structures for used-symbol analysis -------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "clang-include-cleaner/Types.h" ++#include "clang/AST/Decl.h" ++#include "clang/Basic/FileEntry.h" ++#include "clang/Basic/IdentifierTable.h" ++#include "clang/Tooling/Inclusions/StandardLibrary.h" ++ ++namespace clang { ++namespace include_cleaner { ++ ++std::string Symbol::name() const { ++ switch (kind()) { ++ case Macro: ++ return getMacro()->Name->getName().str(); ++ case Declaration: ++ return getDeclaration()->getNameAsString(); ++ } ++ llvm_unreachable("Unhandled Symbol kind"); ++} ++ ++std::string Symbol::nodeName() const { ++ if (kind() == Macro) ++ return "macro"; ++ return getDeclaration()->getDeclKindName(); ++} ++ ++std::string Location::name(const SourceManager &SM) const { ++ switch (K) { ++ case Physical: ++ return SrcLoc.printToString(SM); ++ case StandardLibrary: ++ return StdlibSym.name().str(); ++ } ++ llvm_unreachable("Unhandled Location kind"); ++} ++ ++std::string Header::name() const { ++ switch (K) { ++ case Physical: ++ return PhysicalFile->getName().str(); ++ case StandardLibrary: ++ return StdlibHeader.name().str(); ++ case Verbatim: ++ return VerbatimSpelling; ++ case Builtin: ++ return "<built-in>"; ++ case MainFile: ++ return "<main-file>"; ++ } ++ llvm_unreachable("Unhandled Header kind"); ++} ++ ++} // namespace include_cleaner ++} // namespace clang +diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp +index b7354fe300e0..02a27977005f 100644 +--- a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp ++++ b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp +@@ -7,40 +7,132 @@ + //===----------------------------------------------------------------------===// + + #include "AnalysisInternal.h" ++#include "clang-include-cleaner/Analysis.h" + #include "clang/AST/RecursiveASTVisitor.h" ++#include "clang/Basic/SourceManager.h" ++#include "llvm/Support/SaveAndRestore.h" + + namespace clang { + namespace include_cleaner { + namespace { +-using DeclCallback = llvm::function_ref<void(SourceLocation, NamedDecl &)>; + ++using DeclCallback = ++ llvm::function_ref<void(SourceLocation, Hinted<NamedDecl &>)>; ++ ++// Traverses part of the AST, looking for references and reporting them. + class ASTWalker : public RecursiveASTVisitor<ASTWalker> { +- DeclCallback Callback; ++public: ++ ASTWalker(AnalysisContext &Ctx, DeclCallback Callback) ++ : Ctx(Ctx), Callback(Callback) {} + +- void report(SourceLocation Loc, NamedDecl *ND) { +- if (!ND || Loc.isInvalid()) +- return; +- Callback(Loc, *cast<NamedDecl>(ND->getCanonicalDecl())); ++ bool VisitDeclRefExpr(DeclRefExpr *E) { ++ if (!Ctx.policy().Operators) ++ if (auto *FD = E->getDecl()->getAsFunction()) ++ if (FD->isOverloadedOperator()) ++ return true; ++ report(E->getLocation(), E->getFoundDecl()); ++ return true; + } + +-public: +- ASTWalker(DeclCallback Callback) : Callback(Callback) {} ++ bool VisitMemberExpr(MemberExpr *ME) { ++ if (Ctx.policy().Members) ++ report(ME->getMemberLoc(), ME->getFoundDecl().getDecl()); ++ return true; ++ } ++ ++ bool VisitTagType(TagType *TT) { ++ report(LocationOfType, TT->getDecl()); ++ return true; ++ } ++ ++ bool VisitFunctionDecl(FunctionDecl *FD) { ++ // Count function definitions as a reference to their declarations. ++ if (FD->isThisDeclarationADefinition() && FD->getCanonicalDecl() != FD) ++ report(FD->getLocation(), FD->getCanonicalDecl()); ++ return true; ++ } ++ ++ bool VisitCXXConstructExpr(CXXConstructExpr *E) { ++ if (!Ctx.policy().Construction) ++ return true; ++ SaveAndRestore<SourceLocation> Loc(LocationOfType, E->getLocation()); ++ LocationOfType = E->getLocation(); ++ return TraverseType(E->getType()); ++ } ++ ++ // We handle TypeLocs by saving their loc and consuming it in Visit*Type(). ++ // ++ // Handling Visit*TypeLoc() directly would be simpler, but sometimes unwritten ++ // types count as references (e.g. implicit conversions, with no TypeLoc). ++ // Stashing the location and visiting the contained type lets us handle both ++ // cases in VisitTagType() etc. ++ bool TraverseTypeLoc(TypeLoc TL) { ++ SaveAndRestore<SourceLocation> Loc(LocationOfType, TL.getBeginLoc()); ++ // The base implementation calls: ++ // - Visit*TypeLoc() - does nothing ++ // - Visit*Type() - where we handle type references ++ // - TraverseTypeLoc for each lexically nested type. ++ return Base::TraverseTypeLoc(TL); ++ } + +- bool VisitTagTypeLoc(TagTypeLoc TTL) { +- report(TTL.getNameLoc(), TTL.getDecl()); ++ bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) { ++ report(LocationOfType, ++ TST->getTemplateName().getAsTemplateDecl()); // Primary template. ++ report(LocationOfType, TST->getAsCXXRecordDecl()); // Specialization + return true; + } + +- bool VisitDeclRefExpr(DeclRefExpr *DRE) { +- report(DRE->getLocation(), DRE->getFoundDecl()); ++ bool VisitUsingType(UsingType *UT) { ++ report(LocationOfType, UT->getFoundDecl()); + return true; + } ++ ++ bool VisitTypedefType(TypedefType *TT) { ++ report(LocationOfType, TT->getDecl()); ++ return true; ++ } ++ ++ bool VisitUsingDecl(UsingDecl *UD) { ++ for (const auto *USD : UD->shadows()) ++ report(UD->getLocation(), USD->getTargetDecl()); ++ return true; ++ } ++ ++ bool VisitOverloadExpr(OverloadExpr *E) { ++ if (llvm::isa<UnresolvedMemberExpr>(E) && !Ctx.policy().Members) ++ return true; ++ for (auto *Candidate : E->decls()) ++ report(E->getExprLoc(), Candidate); ++ return true; ++ } ++ ++private: ++ void report(SourceLocation Loc, NamedDecl *ND) { ++ while (Loc.isMacroID()) { ++ auto DecLoc = Ctx.sourceManager().getDecomposedLoc(Loc); ++ const SrcMgr::ExpansionInfo &Expansion = ++ Ctx.sourceManager().getSLocEntry(DecLoc.first).getExpansion(); ++ if (!Expansion.isMacroArgExpansion()) ++ return; // Names within macro bodies are not considered references. ++ Loc = Expansion.getSpellingLoc().getLocWithOffset(DecLoc.second); ++ } ++ // FIXME: relevant ranking hints? ++ if (ND) ++ Callback(Loc, *cast<NamedDecl>(ND->getCanonicalDecl())); ++ } ++ ++ using Base = RecursiveASTVisitor; ++ ++ AnalysisContext &Ctx; ++ DeclCallback Callback; ++ ++ SourceLocation LocationOfType; + }; + + } // namespace + +-void walkAST(Decl &Root, DeclCallback Callback) { +- ASTWalker(Callback).TraverseDecl(&Root); ++void walkAST(AnalysisContext &Ctx, Decl &Root, DeclCallback Callback) { ++ ASTWalker(Ctx, Callback).TraverseDecl(&Root); + } + + } // namespace include_cleaner +diff --git a/clang-tools-extra/include-cleaner/tool/CMakeLists.txt b/clang-tools-extra/include-cleaner/tool/CMakeLists.txt +new file mode 100644 +index 000000000000..f8f7c81c761b +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/tool/CMakeLists.txt +@@ -0,0 +1,17 @@ ++set(LLVM_LINK_COMPONENTS support) ++ ++add_clang_tool(clang-include-cleaner ++ ClangIncludeCleaner.cpp ++ ) ++ ++clang_target_link_libraries(clang-include-cleaner ++ PRIVATE ++ clangBasic ++ clangFrontend ++ clangTooling ++ ) ++ ++target_link_libraries(clang-include-cleaner ++ PRIVATE ++ clangIncludeCleaner ++ ) +\ No newline at end of file +diff --git a/clang-tools-extra/include-cleaner/tool/ClangIncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/ClangIncludeCleaner.cpp +new file mode 100644 +index 000000000000..aad70eabdae9 +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/tool/ClangIncludeCleaner.cpp +@@ -0,0 +1,187 @@ ++//===--- ClangIncludeCleaner.cpp - Standalone used-header analysis --------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// clang-include-cleaner finds violations of include-what-you-use policy. ++// ++// It scans a file, finding referenced symbols and headers providing them. ++// - if a reference is satisfied only by indirect #include dependencies, ++// this violates the policy and direct #includes are suggested. ++// - if some #include directive doesn't satisfy any references, this violates ++// the policy (don't include what you don't use!) and removal is suggested. ++// ++// With the -satisfied flag, it will also explain things that were OK: ++// satisfied references and used #includes. ++// ++// This tool doesn't fix broken code where missing #includes prevent parsing, ++// try clang-include-fixer for this instead. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "clang-include-cleaner/Analysis.h" ++#include "clang-include-cleaner/Hooks.h" ++#include "clang/Basic/Diagnostic.h" ++#include "clang/Frontend/CompilerInstance.h" ++#include "clang/Frontend/FrontendAction.h" ++#include "clang/Tooling/CommonOptionsParser.h" ++#include "clang/Tooling/Tooling.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Support/InitLLVM.h" ++ ++llvm::cl::OptionCategory OptionsCat{"clang-include-cleaner"}; ++llvm::cl::opt<bool> ShowSatisfied{ ++ "satisfied", ++ llvm::cl::cat(OptionsCat), ++ llvm::cl::desc( ++ "Show references whose header is included, and used includes"), ++ llvm::cl::init(false), ++}; ++llvm::cl::opt<bool> Recover{ ++ "recover", ++ llvm::cl::cat(OptionsCat), ++ llvm::cl::desc("Suppress further errors for the same header"), ++ llvm::cl::init(true), ++}; ++ ++namespace clang { ++namespace include_cleaner { ++namespace { ++ ++class Action : public clang::ASTFrontendAction { ++public: ++ bool BeginSourceFileAction(CompilerInstance &CI) override { ++ Diag = &CI.getDiagnostics(); ++ ID.emplace(Diag); ++ Ctx.emplace(Policy{}, CI.getPreprocessor()); ++ CI.getPreprocessor().addPPCallbacks(PP.record(*Ctx)); ++ return true; ++ } ++ ++ void EndSourceFile() override { ++ llvm::DenseSet<Header> Recovered; ++ llvm::DenseMap<const RecordedPP::Include *, Symbol> Used; ++ walkUsed(*Ctx, AST.TopLevelDecls, PP.MacroReferences, ++ [&](SourceLocation Loc, Symbol Sym, ArrayRef<Header> Headers) { ++ diagnoseReference(Loc, Sym, Headers, Recovered, Used); ++ }); ++ diagnoseIncludes(PP.Includes.all(), Used); ++ Ctx.reset(); ++ ++ ASTFrontendAction::EndSourceFile(); ++ } ++ ++ virtual std::unique_ptr<ASTConsumer> ++ CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { ++ return AST.record(*Ctx); ++ } ++ ++private: ++ // The diagnostics that we issue. ++ struct CustomDiagnosticIDs { ++ // References ++ unsigned Satisfied; ++ unsigned Unsatisfied; ++ unsigned NoHeader; ++ unsigned NoteHeader; ++ // #includes ++ unsigned Used; ++ unsigned Unused; ++ ++ CustomDiagnosticIDs(DiagnosticsEngine *D) { ++ auto SatisfiedLevel = ShowSatisfied ? DiagnosticsEngine::Remark ++ : DiagnosticsEngine::Ignored; ++ auto Error = DiagnosticsEngine::Error; ++ auto Note = DiagnosticsEngine::Note; ++ auto Warn = DiagnosticsEngine::Warning; ++ ++ Satisfied = D->getCustomDiagID(SatisfiedLevel, "%0 '%1' provided by %2"); ++ Unsatisfied = D->getCustomDiagID(Error, "no header included for %0 '%1'"); ++ NoHeader = D->getCustomDiagID(Warn, "unknown header provides %0 '%1'"); ++ NoteHeader = D->getCustomDiagID(Note, "provided by %0"); ++ Used = D->getCustomDiagID(SatisfiedLevel, "include provides %0 '%1'"); ++ Unused = D->getCustomDiagID(Error, "include is unused"); ++ } ++ }; ++ ++ void ++ diagnoseReference(SourceLocation Loc, Symbol Sym, ArrayRef<Header> Headers, ++ llvm::DenseSet<Header> &Recovered, ++ llvm::DenseMap<const RecordedPP::Include *, Symbol> &Used) { ++ bool Diagnosed = false; ++ for (const auto &H : Headers) { ++ if (H.kind() == Header::Builtin || H.kind() == Header::MainFile) { ++ if (!Diagnosed) { ++ Diag->Report(Loc, ID->Satisfied) ++ << Sym.nodeName() << Sym.name() << H.name(); ++ Diagnosed = true; ++ } ++ } ++ for (const auto *I : PP.Includes.match(H)) { ++ Used.try_emplace(I, Sym); ++ if (!Diagnosed) { ++ Diag->Report(Loc, ID->Satisfied) ++ << Sym.nodeName() << Sym.name() << I->Spelled; ++ Diagnosed = true; ++ } ++ } ++ } ++ if (Diagnosed) ++ return; ++ for (const auto &H : Headers) { ++ if (Recovered.contains(H)) { ++ Diag->Report(Loc, ID->Satisfied) ++ << Sym.nodeName() << Sym.name() << H.name(); ++ return; ++ } ++ } ++ Diag->Report(Loc, Headers.empty() ? ID->NoHeader : ID->Unsatisfied) ++ << Sym.nodeName() << Sym.name(); ++ for (const auto &H : Headers) { ++ Recovered.insert(H); ++ Diag->Report(ID->NoteHeader) << H.name(); ++ } ++ } ++ ++ void diagnoseIncludes( ++ ArrayRef<RecordedPP::Include> Includes, ++ const llvm::DenseMap<const RecordedPP::Include *, Symbol> &Used) { ++ for (const auto &I : Includes) { ++ auto It = Used.find(&I); ++ if (It == Used.end()) ++ Diag->Report(I.Location, ID->Unused); ++ else ++ Diag->Report(I.Location, ID->Used) ++ << It->second.nodeName() << It->second.name(); ++ } ++ } ++ ++ llvm::Optional<AnalysisContext> Ctx; ++ RecordedPP PP; ++ RecordedAST AST; ++ DiagnosticsEngine *Diag; ++ llvm::Optional<CustomDiagnosticIDs> ID; ++}; ++ ++} // namespace ++} // namespace include_cleaner ++} // namespace clang ++ ++int main(int Argc, const char **Argv) { ++ llvm::InitLLVM X(Argc, Argv); ++ auto OptionsParser = ++ clang::tooling::CommonOptionsParser::create(Argc, Argv, OptionsCat); ++ if (!OptionsParser) { ++ llvm::errs() << toString(OptionsParser.takeError()); ++ return 1; ++ } ++ ++ return clang::tooling::ClangTool(OptionsParser->getCompilations(), ++ OptionsParser->getSourcePathList()) ++ .run(clang::tooling::newFrontendActionFactory< ++ clang::include_cleaner::Action>() ++ .get()); ++} +diff --git a/clang/include/clang/Tooling/Inclusions/StandardLibrary.h b/clang/include/clang/Tooling/Inclusions/StandardLibrary.h +index c6ce2780dae6..e94a7fb9304a 100644 +--- a/clang/include/clang/Tooling/Inclusions/StandardLibrary.h ++++ b/clang/include/clang/Tooling/Inclusions/StandardLibrary.h +@@ -49,6 +49,9 @@ private: + friend bool operator==(const Header &L, const Header &R) { + return L.ID == R.ID; + } ++ friend bool operator<(const Header &L, const Header &R) { ++ return L.ID < R.ID; ++ } + }; + + // A top-level standard library symbol, such as std::vector diff --git a/build/build-clang/compiler-rt-no-codesign.patch b/build/build-clang/compiler-rt-no-codesign.patch new file mode 100644 index 0000000000..99d3f7e992 --- /dev/null +++ b/build/build-clang/compiler-rt-no-codesign.patch @@ -0,0 +1,21 @@ +Disable codesign for macosx cross-compile toolchain. Codesign only works on OSX. + +Index: cmake/Modules/AddCompilerRT.cmake +=================================================================== +--- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake ++++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake +@@ -321,14 +321,6 @@ + set_target_properties(${libname} PROPERTIES IMPORT_PREFIX "") + set_target_properties(${libname} PROPERTIES IMPORT_SUFFIX ".lib") + endif() +- if(APPLE) +- # Ad-hoc sign the dylibs +- add_custom_command(TARGET ${libname} +- POST_BUILD +- COMMAND codesign --sign - $<TARGET_FILE:${libname}> +- WORKING_DIRECTORY ${COMPILER_RT_LIBRARY_OUTPUT_DIR} +- ) +- endif() + endif() + + set(parent_target_arg) diff --git a/build/build-clang/compiler-rt-rss-limit-heap-profile.patch b/build/build-clang/compiler-rt-rss-limit-heap-profile.patch new file mode 100644 index 0000000000..f7dfdfcdae --- /dev/null +++ b/build/build-clang/compiler-rt-rss-limit-heap-profile.patch @@ -0,0 +1,49 @@ +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp +index 8fd398564280..b7c4820971bb 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp +@@ -29,6 +29,7 @@ void *BackgroundThread(void *arg) { + const uptr hard_rss_limit_mb = common_flags()->hard_rss_limit_mb; + const uptr soft_rss_limit_mb = common_flags()->soft_rss_limit_mb; + const bool heap_profile = common_flags()->heap_profile; ++ const bool rss_limit_heap_profile = common_flags()->rss_limit_heap_profile; + uptr prev_reported_rss = 0; + uptr prev_reported_stack_depot_size = 0; + bool reached_soft_rss_limit = false; +@@ -56,6 +57,10 @@ void *BackgroundThread(void *arg) { + Report("%s: hard rss limit exhausted (%zdMb vs %zdMb)\n", + SanitizerToolName, hard_rss_limit_mb, current_rss_mb); + DumpProcessMap(); ++ if (rss_limit_heap_profile) { ++ Printf("\n\nHEAP PROFILE at RSS %zdMb\n", current_rss_mb); ++ __sanitizer_print_memory_profile(90, 20); ++ } + Die(); + } + if (soft_rss_limit_mb) { +@@ -63,6 +68,11 @@ void *BackgroundThread(void *arg) { + reached_soft_rss_limit = true; + Report("%s: soft rss limit exhausted (%zdMb vs %zdMb)\n", + SanitizerToolName, soft_rss_limit_mb, current_rss_mb); ++ if (rss_limit_heap_profile) { ++ Printf("\n\nHEAP PROFILE at RSS %zdMb\n", current_rss_mb); ++ __sanitizer_print_memory_profile(90, 20); ++ rss_during_last_reported_profile = current_rss_mb; ++ } + SetRssLimitExceeded(true); + } else if (soft_rss_limit_mb >= current_rss_mb && + reached_soft_rss_limit) { +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc +index 6148ae56067c..a0fbb8e14bd5 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc +@@ -147,6 +147,9 @@ COMMON_FLAG(uptr, max_allocation_size_mb, 0, + "If non-zero, malloc/new calls larger than this size will return " + "nullptr (or crash if allocator_may_return_null=false).") + COMMON_FLAG(bool, heap_profile, false, "Experimental heap profiler, asan-only") ++COMMON_FLAG(bool, rss_limit_heap_profile, false, ++ "Experimental heap profiler (only when hard/soft rss limit " ++ "exceeded, asan-only") + COMMON_FLAG(s32, allocator_release_to_os_interval_ms, + ((bool)SANITIZER_FUCHSIA || (bool)SANITIZER_WINDOWS) ? -1 : 5000, + "Only affects a 64-bit allocator. If set, tries to release unused " diff --git a/build/build-clang/downgrade-mangling-error_clang_12.patch b/build/build-clang/downgrade-mangling-error_clang_12.patch new file mode 100644 index 0000000000..ad31306ff3 --- /dev/null +++ b/build/build-clang/downgrade-mangling-error_clang_12.patch @@ -0,0 +1,23 @@ +Downgrade unimplemented mangling diagnostic from error to note. +This codepath is exercised by MozsearchIndexer.cpp (the searchfox +indexer) when indexing on Windows. We can do without having the +unimplemented bits for now as long the compiler doesn't fail the +build. See also https://bugs.llvm.org/show_bug.cgi?id=39294 + +diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp +index 4420f6a2c1c3..4d9a6434d245 100644 +--- a/clang/lib/AST/ItaniumMangle.cpp ++++ b/clang/lib/AST/ItaniumMangle.cpp +@@ -4028,10 +4028,11 @@ recurse: + if (!NullOut) { + // As bad as this diagnostic is, it's better than crashing. + DiagnosticsEngine &Diags = Context.getDiags(); +- unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, ++ unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Remark, + "cannot yet mangle expression type %0"); + Diags.Report(E->getExprLoc(), DiagID) + << E->getStmtClassName() << E->getSourceRange(); ++ Out << "MOZ_WE_HACKED_AROUND_BUG_1418415"; + return; + } + break; diff --git a/build/build-clang/find_symbolizer_linux_clang_10.patch b/build/build-clang/find_symbolizer_linux_clang_10.patch new file mode 100644 index 0000000000..1ddb02024d --- /dev/null +++ b/build/build-clang/find_symbolizer_linux_clang_10.patch @@ -0,0 +1,58 @@ +We currently need this patch because ASan only searches PATH to find the +llvm-symbolizer binary to symbolize ASan traces. On testing machines, this +can be installed in PATH easily. However, for e.g. the ASan Nightly Project, +where we ship an ASan build, including llvm-symbolizer, to the user, we +cannot expect llvm-symbolizer to be on PATH. Instead, we should try to look +it up next to the binary. This patch implements the functionality for Linux +only until there is similar functionality provided upstream. + +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp +index 79930d79425..cfb4f90c0d5 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp +@@ -20,6 +20,10 @@ + #include "sanitizer_common.h" + #include "sanitizer_file.h" + ++#if SANITIZER_LINUX ++#include "sanitizer_posix.h" ++#endif ++ + namespace __sanitizer { + + void CatastrophicErrorWrite(const char *buffer, uptr length) { +@@ -194,6 +198,34 @@ char *FindPathToBinary(const char *name) { + if (*end == '\0') break; + beg = end + 1; + } ++ ++#if SANITIZER_LINUX ++ // If we cannot find the requested binary in PATH, we should try to locate ++ // it next to the binary, in case it is shipped with the build itself ++ // (e.g. llvm-symbolizer shipped with sanitizer build to symbolize on client. ++ if (internal_readlink("/proc/self/exe", buffer.data(), kMaxPathLength) < 0) ++ return nullptr; ++ ++ uptr buf_len = internal_strlen(buffer.data()); ++ ++ /* Avoid using dirname() here */ ++ while (buf_len > 0) { ++ if (buffer[buf_len - 1] == '/') ++ break; ++ buf_len--; ++ } ++ ++ if (!buf_len) ++ return nullptr; ++ ++ if (buf_len + name_len + 1 <= kMaxPathLength) { ++ internal_memcpy(&buffer[buf_len], name, name_len); ++ buffer[buf_len + name_len] = '\0'; ++ if (FileExists(buffer.data())) ++ return internal_strdup(buffer.data()); ++ } ++#endif ++ + return nullptr; + } + diff --git a/build/build-clang/find_symbolizer_linux_clang_15.patch b/build/build-clang/find_symbolizer_linux_clang_15.patch new file mode 100644 index 0000000000..63309e8f00 --- /dev/null +++ b/build/build-clang/find_symbolizer_linux_clang_15.patch @@ -0,0 +1,53 @@ +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp +index 7ef499ce07b1..8fd682f943fe 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp +@@ -21,6 +21,10 @@ + #include "sanitizer_file.h" + # include "sanitizer_interface_internal.h" + ++#if SANITIZER_LINUX ++#include "sanitizer_posix.h" ++#endif ++ + namespace __sanitizer { + + void CatastrophicErrorWrite(const char *buffer, uptr length) { +@@ -206,11 +210,35 @@ char *FindPathToBinary(const char *name) { + return internal_strdup(name); + } + ++ uptr name_len = internal_strlen(name); ++ InternalMmapVector<char> buffer(kMaxPathLength); ++ ++#if SANITIZER_LINUX ++ // If we cannot find the requested binary in PATH, we should try to locate ++ // it next to the binary, in case it is shipped with the build itself ++ // (e.g. llvm-symbolizer shipped with sanitizer build to symbolize on client. ++ if (internal_readlink("/proc/self/exe", buffer.data(), kMaxPathLength) >= 0) { ++ uptr buf_len = internal_strlen(buffer.data()); ++ ++ /* Avoid using dirname() here */ ++ while (buf_len > 0) { ++ if (buffer[buf_len - 1] == '/') ++ break; ++ buf_len--; ++ } ++ ++ if (buf_len && buf_len + name_len + 1 <= kMaxPathLength) { ++ internal_memcpy(&buffer[buf_len], name, name_len); ++ buffer[buf_len + name_len] = '\0'; ++ if (FileExists(buffer.data())) ++ return internal_strdup(buffer.data()); ++ } ++ } ++#endif ++ + const char *path = GetEnv("PATH"); + if (!path) + return nullptr; +- uptr name_len = internal_strlen(name); +- InternalMmapVector<char> buffer(kMaxPathLength); + const char *beg = path; + while (true) { + const char *end = internal_strchrnul(beg, kPathSeparator); diff --git a/build/build-clang/fuzzing_ccov_build_clang_12.patch b/build/build-clang/fuzzing_ccov_build_clang_12.patch new file mode 100644 index 0000000000..1b60a95b91 --- /dev/null +++ b/build/build-clang/fuzzing_ccov_build_clang_12.patch @@ -0,0 +1,27 @@ +From 98bf90ef5ea3dd848ce7d81a662eb7499d11c91c Mon Sep 17 00:00:00 2001 +From: Calixte Denizet <calixte.denizet@gmail.com> +Date: Fri, 16 Apr 2021 10:05:34 +0200 +Subject: [PATCH] [Gcov] Don't run global destructor in ccov builds when env + MOZ_FUZZING_CCOV is existing + +--- + compiler-rt/lib/profile/GCDAProfiling.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c +index 4293e8f7b5bf..6cda4bc7601f 100644 +--- a/compiler-rt/lib/profile/GCDAProfiling.c ++++ b/compiler-rt/lib/profile/GCDAProfiling.c +@@ -586,6 +586,9 @@ void llvm_writeout_files(void) { + __attribute__((destructor(100))) + #endif + static void llvm_writeout_and_clear(void) { ++ if (getenv("MOZ_FUZZING_CCOV")) { ++ return; ++ } + llvm_writeout_files(); + fn_list_remove(&writeout_fn_list); + } +-- +2.30.2 + diff --git a/build/build-clang/linux64.json b/build/build-clang/linux64.json new file mode 100644 index 0000000000..48690dbdaa --- /dev/null +++ b/build/build-clang/linux64.json @@ -0,0 +1,5 @@ +{ + "cc": "/usr/lib/llvm-13/bin/clang", + "cxx": "/usr/lib/llvm-13/bin/clang++", + "as": "/usr/lib/llvm-13/bin/clang" +} diff --git a/build/build-clang/llvmorg-15-init-16512-g4b1e3d193706.patch b/build/build-clang/llvmorg-15-init-16512-g4b1e3d193706.patch new file mode 100644 index 0000000000..5ddb6a52de --- /dev/null +++ b/build/build-clang/llvmorg-15-init-16512-g4b1e3d193706.patch @@ -0,0 +1,138 @@ +From 8482662676a4b6ef79a718c8c09943cb15241664 Mon Sep 17 00:00:00 2001 +From: Tom Stellard <tstellar@redhat.com> +Date: Tue, 21 Jun 2022 22:22:11 -0700 +Subject: [PATCH] [gold] Ignore bitcode from sections inside object files + +-fembed-bitcode will put bitcode into special sections within object +files, but this is not meant to be used by LTO, so the gold plugin +should ignore it. + +https://github.com/llvm/llvm-project/issues/47216 + +Reviewed By: tejohnson, MaskRay + +Differential Revision: https://reviews.llvm.org/D116995 +--- + llvm/docs/BitCodeFormat.rst | 3 ++- + llvm/docs/GoldPlugin.rst | 4 ++++ + .../tools/gold/X86/Inputs/bcsection-lib.ll | 6 +++++ + llvm/test/tools/gold/X86/Inputs/bcsection.s | 5 ++++ + llvm/test/tools/gold/X86/bcsection.ll | 23 +++++++++++++++---- + llvm/tools/gold/gold-plugin.cpp | 8 +++++++ + 6 files changed, 43 insertions(+), 6 deletions(-) + create mode 100644 llvm/test/tools/gold/X86/Inputs/bcsection-lib.ll + +diff --git a/llvm/docs/BitCodeFormat.rst b/llvm/docs/BitCodeFormat.rst +index 8e81a7daa459..df1f6915d7d5 100644 +--- a/llvm/docs/BitCodeFormat.rst ++++ b/llvm/docs/BitCodeFormat.rst +@@ -475,7 +475,8 @@ formats. This wrapper format is useful for accommodating LTO in compilation + pipelines where intermediate objects must be native object files which contain + metadata in other sections. + +-Not all tools support this format. ++Not all tools support this format. For example, lld and the gold plugin will ++ignore these sections when linking object files. + + .. _encoding of LLVM IR: + +diff --git a/llvm/docs/GoldPlugin.rst b/llvm/docs/GoldPlugin.rst +index ce310bc2cf3c..07d2fc203eba 100644 +--- a/llvm/docs/GoldPlugin.rst ++++ b/llvm/docs/GoldPlugin.rst +@@ -17,6 +17,10 @@ and above also supports LTO via plugins. However, usage of the LLVM + gold plugin with ld.bfd is not tested and therefore not officially + supported or recommended. + ++As of LLVM 15, the gold plugin will ignore bitcode from the ``.llvmbc`` ++section inside of ELF object files. However, LTO with bitcode files ++is still supported. ++ + .. _`gold linker`: http://sourceware.org/binutils + .. _`GCC LTO`: http://gcc.gnu.org/wiki/LinkTimeOptimization + .. _`gold plugin interface`: http://gcc.gnu.org/wiki/whopr/driver +diff --git a/llvm/test/tools/gold/X86/Inputs/bcsection-lib.ll b/llvm/test/tools/gold/X86/Inputs/bcsection-lib.ll +new file mode 100644 +index 000000000000..ef3557c19cdc +--- /dev/null ++++ b/llvm/test/tools/gold/X86/Inputs/bcsection-lib.ll +@@ -0,0 +1,6 @@ ++declare void @elf_func() ++ ++define i32 @lib_func() { ++ call void @elf_func() ++ ret i32 0 ++} +diff --git a/llvm/test/tools/gold/X86/Inputs/bcsection.s b/llvm/test/tools/gold/X86/Inputs/bcsection.s +index ede1e5c532dd..c523612563b4 100644 +--- a/llvm/test/tools/gold/X86/Inputs/bcsection.s ++++ b/llvm/test/tools/gold/X86/Inputs/bcsection.s +@@ -1,2 +1,7 @@ ++.global elf_func ++ ++elf_func: ++ ret ++ + .section .llvmbc + .incbin "bcsection.bc" +diff --git a/llvm/test/tools/gold/X86/bcsection.ll b/llvm/test/tools/gold/X86/bcsection.ll +index 6d3481f8f966..09882d83fe91 100644 +--- a/llvm/test/tools/gold/X86/bcsection.ll ++++ b/llvm/test/tools/gold/X86/bcsection.ll +@@ -2,16 +2,29 @@ + ; RUN: llvm-as -o %t/bcsection.bc %s + + ; RUN: llvm-mc -I=%t -filetype=obj -triple=x86_64-unknown-unknown -o %t/bcsection.bco %p/Inputs/bcsection.s +-; RUN: llvm-nm --no-llvm-bc %t/bcsection.bco 2>&1 | FileCheck %s -check-prefix=NO-SYMBOLS +-; NO-SYMBOLS: no symbols ++; RUN: llc -filetype=obj -mtriple=x86_64-unknown-unknown -o %t/bcsection-lib.o %p/Inputs/bcsection-lib.ll + +-; RUN: %gold -r -o %t/bcsection.o -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext %t/bcsection.bco +-; RUN: llvm-nm --no-llvm-bc %t/bcsection.o | FileCheck %s ++; RUN: %gold -shared --no-undefined -o %t/bcsection.so -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext %t/bcsection.bco %t/bcsection-lib.o ++ ++; This test checks that the gold plugin does not attempt to use the bitcode ++; in the .llvmbc section for LTO. bcsection-lib.o calls a function that is ++; present the symbol table of bcsection.bco, but not included in the embedded ++; bitcode. If the linker were to use the bitcode, then the symbols in the ++; symbol table of bcsection.bco will be ignored and the link will fail. ++; ++; bcsection.bco: ++; .text: ++; elf_func ++; .llvmbc: ++; bitcode_func ++; ++; bcsection-lib.o: ++; calls elf_func() + + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-unknown" + + ; CHECK: main +-define i32 @main() { ++define i32 @bitcode_func() { + ret i32 0 + } +diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp +index 180c181368e3..294c7a3d6178 100644 +--- a/llvm/tools/gold/gold-plugin.cpp ++++ b/llvm/tools/gold/gold-plugin.cpp +@@ -540,6 +540,14 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, + BufferRef = Buffer->getMemBufferRef(); + } + ++ // Only use bitcode files for LTO. InputFile::create() will load bitcode ++ // from the .llvmbc section within a binary object, this bitcode is typically ++ // generated by -fembed-bitcode and is not to be used by LLVMgold.so for LTO. ++ if (identify_magic(BufferRef.getBuffer()) != file_magic::bitcode) { ++ *claimed = 0; ++ return LDPS_OK; ++ } ++ + *claimed = 1; + + Expected<std::unique_ptr<InputFile>> ObjOrErr = InputFile::create(BufferRef); +-- +2.37.1.1.g659da70093 + diff --git a/build/build-clang/llvmorg-16-init-10850-gff111a997f1b.patch b/build/build-clang/llvmorg-16-init-10850-gff111a997f1b.patch new file mode 100644 index 0000000000..136508a065 --- /dev/null +++ b/build/build-clang/llvmorg-16-init-10850-gff111a997f1b.patch @@ -0,0 +1,35 @@ +From ff111a997f1b40a531bc68b543542746d4b08a5f Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Wed, 16 Nov 2022 21:31:28 -0500 +Subject: [PATCH] [lld-macho] Increase slop to prevent thunk out of range + again. + +Building Firefox with -O0 on arm64 mac recently hit the +"FIXME: thunk range overrun" error on multiple occasions. + +Doubling or tripling slop was not sufficient in some cases, so +quadruple it. + +Reviewed By: #lld-macho, int3 + +Differential Revision: https://reviews.llvm.org/D138174 +--- + lld/MachO/ConcatOutputSection.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp +index e97f1e9b9ddd..cbd3a2492d25 100644 +--- a/lld/MachO/ConcatOutputSection.cpp ++++ b/lld/MachO/ConcatOutputSection.cpp +@@ -246,7 +246,7 @@ void TextOutputSection::finalize() { + // contains several branch instructions in succession, then the distance + // from the current position to the position where the thunks are inserted + // grows. So leave room for a bunch of thunks. +- unsigned slop = 256 * thunkSize; ++ unsigned slop = 1024 * thunkSize; + while (finalIdx < endIdx && addr + size + inputs[finalIdx]->getSize() < + isecVA + forwardBranchRange - slop) + finalizeOne(inputs[finalIdx++]); +-- +2.38.1.1.g6d9df9d320 + diff --git a/build/build-clang/llvmorg-16-init-14003-g78c033b541f0.patch b/build/build-clang/llvmorg-16-init-14003-g78c033b541f0.patch new file mode 100644 index 0000000000..fe9adf1387 --- /dev/null +++ b/build/build-clang/llvmorg-16-init-14003-g78c033b541f0.patch @@ -0,0 +1,111 @@ +From 78c033b541f00d12b7e1ba2a676b02e022c9beb1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Markus=20B=C3=B6ck?= <markus.boeck02@gmail.com> +Date: Mon, 12 Dec 2022 11:45:31 +0100 +Subject: [PATCH] [sanitizers][windows] Correctly override functions with + backward jmps + +To reproduce: Download and run the latest Firefox ASAN build (https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/gecko.v2.mozilla-central.latest.firefox.win64-asan-opt/artifacts/public/build/target.zip) on Windows 11 (version 10.0.22621 Build 22621); it will crash on launch. Note that this doesn't seem to crash on another Windows 11 VM I've tried, so I'm not sure how reproducible it is across machines, but it reproduces on my machine every time. + +The problem seems to be that when overriding the memset function in OverrideFunctionWithRedirectJump(), the relative_offset is stored as a uptr. Per the Intel x64 instruction set reference (https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf - warning: large PDF), on page 646 the jmp instruction (specifically the near jump flavors that start with E9, which are the ones the OverrideFunctionWithRedirectJump() considers) treats the offset as a signed displacement. This causes an incorrect value to be stored for REAL(memset) which points to uninitialized memory, and a crash the next time that gets called. + +The fix is to simply treat that offset as signed. I have also added a test case. + +Fixes https://github.com/llvm/llvm-project/issues/58846 + +Differential Revision: https://reviews.llvm.org/D137788 +--- + .../lib/interception/interception_win.cpp | 2 +- + .../tests/interception_win_test.cpp | 36 +++++++++++++++---- + 2 files changed, 30 insertions(+), 8 deletions(-) + +diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp +index d0db981d519c..faaa8ee15381 100644 +--- a/compiler-rt/lib/interception/interception_win.cpp ++++ b/compiler-rt/lib/interception/interception_win.cpp +@@ -738,7 +738,7 @@ bool OverrideFunctionWithRedirectJump( + return false; + + if (orig_old_func) { +- uptr relative_offset = *(u32*)(old_func + 1); ++ sptr relative_offset = *(s32 *)(old_func + 1); + uptr absolute_target = old_func + relative_offset + kJumpInstructionLength; + *orig_old_func = absolute_target; + } +diff --git a/compiler-rt/lib/interception/tests/interception_win_test.cpp b/compiler-rt/lib/interception/tests/interception_win_test.cpp +index 084a98602969..01b8d3132c37 100644 +--- a/compiler-rt/lib/interception/tests/interception_win_test.cpp ++++ b/compiler-rt/lib/interception/tests/interception_win_test.cpp +@@ -85,7 +85,16 @@ const u8 kIdentityCodeWithJump[] = { + 0xC3, // ret + }; + +-#else ++const u8 kIdentityCodeWithJumpBackwards[] = { ++ 0x89, 0xC8, // mov eax, ecx ++ 0xC3, // ret ++ 0xE9, 0xF8, 0xFF, 0xFF, ++ 0xFF, // jmp - 8 ++ 0xCC, 0xCC, 0xCC, 0xCC, ++}; ++const u8 kIdentityCodeWithJumpBackwardsOffset = 3; ++ ++# else + + const u8 kIdentityCodeWithPrologue[] = { + 0x55, // push ebp +@@ -134,7 +143,16 @@ const u8 kIdentityCodeWithJump[] = { + 0xC3, // ret + }; + +-#endif ++const u8 kIdentityCodeWithJumpBackwards[] = { ++ 0x8B, 0x44, 0x24, 0x04, // mov eax,dword ptr [esp + 4] ++ 0xC3, // ret ++ 0xE9, 0xF6, 0xFF, 0xFF, ++ 0xFF, // jmp - 10 ++ 0xCC, 0xCC, 0xCC, 0xCC, ++}; ++const u8 kIdentityCodeWithJumpBackwardsOffset = 5; ++ ++# endif + + const u8 kPatchableCode1[] = { + 0xB8, 0x4B, 0x00, 0x00, 0x00, // mov eax,4B +@@ -366,13 +384,14 @@ TEST(Interception, InternalGetProcAddress) { + EXPECT_NE(DbgPrint_adddress, isdigit_address); + } + +-template<class T> ++template <class T> + static void TestIdentityFunctionPatching( +- const T &code, +- TestOverrideFunction override, +- FunctionPrefixKind prefix_kind = FunctionPrefixNone) { ++ const T &code, TestOverrideFunction override, ++ FunctionPrefixKind prefix_kind = FunctionPrefixNone, ++ int function_start_offset = 0) { + uptr identity_address; + LoadActiveCode(code, &identity_address, prefix_kind); ++ identity_address += function_start_offset; + IdentityFunction identity = (IdentityFunction)identity_address; + + // Validate behavior before dynamic patching. +@@ -410,7 +429,7 @@ static void TestIdentityFunctionPatching( + TestOnlyReleaseTrampolineRegions(); + } + +-#if !SANITIZER_WINDOWS64 ++# if !SANITIZER_WINDOWS64 + TEST(Interception, OverrideFunctionWithDetour) { + TestOverrideFunction override = OverrideFunctionWithDetour; + FunctionPrefixKind prefix = FunctionPrefixDetour; +@@ -424,6 +443,9 @@ TEST(Interception, OverrideFunctionWithDetour) { + TEST(Interception, OverrideFunctionWithRedirectJump) { + TestOverrideFunction override = OverrideFunctionWithRedirectJump; + TestIdentityFunctionPatching(kIdentityCodeWithJump, override); ++ TestIdentityFunctionPatching(kIdentityCodeWithJumpBackwards, override, ++ FunctionPrefixNone, ++ kIdentityCodeWithJumpBackwardsOffset); + } + + TEST(Interception, OverrideFunctionWithHotPatch) { diff --git a/build/build-clang/llvmorg-16-init-14750-ga903ecb4a26d.patch b/build/build-clang/llvmorg-16-init-14750-ga903ecb4a26d.patch new file mode 100644 index 0000000000..2144459099 --- /dev/null +++ b/build/build-clang/llvmorg-16-init-14750-ga903ecb4a26d.patch @@ -0,0 +1,458 @@ +From f8f064133b8ed9cd8cead1ee416b7c44729184ae Mon Sep 17 00:00:00 2001 +From: Haowei Wu <haowei@google.com> +Date: Fri, 4 Nov 2022 16:40:10 -0700 +Subject: [PATCH] [vfs] Allow root paths relative to the vfsoverlay YAML file + +This change adds 'root-relative' option in vfsoverlay YAML file format +so the root patchs can be relative to the YAML file directory instead of +the current working directory. + +Differential Revision: https://reviews.llvm.org/D137473 +--- + .../VFS/Inputs/root-relative-overlay.yaml | 11 +++ + clang/test/VFS/relative-path.c | 7 ++ + llvm/include/llvm/Support/VirtualFileSystem.h | 56 ++++++++++-- + llvm/lib/Support/VirtualFileSystem.cpp | 85 ++++++++++++++++--- + .../Support/VirtualFileSystemTest.cpp | 75 ++++++++++++++-- + 5 files changed, 207 insertions(+), 27 deletions(-) + create mode 100644 clang/test/VFS/Inputs/root-relative-overlay.yaml + +diff --git a/clang/test/VFS/Inputs/root-relative-overlay.yaml b/clang/test/VFS/Inputs/root-relative-overlay.yaml +new file mode 100644 +index 000000000000..931c1b4b73c5 +--- /dev/null ++++ b/clang/test/VFS/Inputs/root-relative-overlay.yaml +@@ -0,0 +1,11 @@ ++{ ++ 'version': 0, ++ 'case-sensitive': false, ++ 'overlay-relative': OVERLAY_DIR, ++ 'root-relative': 'overlay-dir', ++ 'roots': [ ++ { 'name': 'not_real.h', 'type': 'file', ++ 'external-contents': 'EXTERNAL_DIR/actual_header.h' ++ }, ++ ] ++} +diff --git a/clang/test/VFS/relative-path.c b/clang/test/VFS/relative-path.c +index ab207d096848..ba5451136a0c 100644 +--- a/clang/test/VFS/relative-path.c ++++ b/clang/test/VFS/relative-path.c +@@ -3,6 +3,13 @@ + // RUN: sed -e "s@INPUT_DIR@%{/S:regex_replacement}/Inputs@g" -e "s@OUT_DIR@%{/t:regex_replacement}@g" %S/Inputs/vfsoverlay.yaml > %t.yaml + // RUN: %clang_cc1 -Werror -I . -ivfsoverlay %t.yaml -fsyntax-only %s + ++// RUN: cp %S/Inputs/actual_header.h %t/actual_header.h ++// RUN: sed -e "s@OVERLAY_DIR@true@g" -e "s@EXTERNAL_DIR@.@g" %S/Inputs/root-relative-overlay.yaml > %t/root-relative-overlay.yaml ++// RUN: %clang_cc1 -Werror -I %t -ivfsoverlay %t/root-relative-overlay.yaml -fsyntax-only %s ++ ++// RUN: sed -e "s@OVERLAY_DIR@false@g" -e "s@EXTERNAL_DIR@%{/t:regex_replacement}@g" %S/Inputs/root-relative-overlay.yaml > %t/root-relative-overlay2.yaml ++// RUN: %clang_cc1 -Werror -I %t -ivfsoverlay %t/root-relative-overlay2.yaml -fsyntax-only %s ++ + #include "not_real.h" + + void foo(void) { +diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h +index 6844a406f38c..671b610c93e3 100644 +--- a/llvm/include/llvm/Support/VirtualFileSystem.h ++++ b/llvm/include/llvm/Support/VirtualFileSystem.h +@@ -651,17 +651,28 @@ class RedirectingFileSystemParser; + /// \endverbatim + /// + /// The roots may be absolute or relative. If relative they will be made +-/// absolute against the current working directory. ++/// absolute against either current working directory or the directory where ++/// the Overlay YAML file is located, depending on the 'root-relative' ++/// configuration. + /// + /// All configuration options are optional. + /// 'case-sensitive': <boolean, default=(true for Posix, false for Windows)> + /// 'use-external-names': <boolean, default=true> ++/// 'root-relative': <string, one of 'cwd' or 'overlay-dir', default='cwd'> + /// 'overlay-relative': <boolean, default=false> + /// 'fallthrough': <boolean, default=true, deprecated - use 'redirecting-with' + /// instead> + /// 'redirecting-with': <string, one of 'fallthrough', 'fallback', or + /// 'redirect-only', default='fallthrough'> + /// ++/// To clarify, 'root-relative' option will prepend the current working ++/// directory, or the overlay directory to the 'roots->name' field only if ++/// 'roots->name' is a relative path. On the other hand, when 'overlay-relative' ++/// is set to 'true', external paths will always be prepended with the overlay ++/// directory, even if external paths are not relative paths. The ++/// 'root-relative' option has no interaction with the 'overlay-relative' ++/// option. ++/// + /// Virtual directories that list their contents are represented as + /// \verbatim + /// { +@@ -745,6 +756,15 @@ public: + RedirectOnly + }; + ++ /// The type of relative path used by Roots. ++ enum class RootRelativeKind { ++ /// The roots are relative to the current working directory. ++ CWD, ++ /// The roots are relative to the directory where the Overlay YAML file ++ // locates. ++ OverlayDir ++ }; ++ + /// A single file or directory in the VFS. + class Entry { + EntryKind Kind; +@@ -890,6 +910,21 @@ private: + ErrorOr<Status> getExternalStatus(const Twine &CanonicalPath, + const Twine &OriginalPath) const; + ++ /// Make \a Path an absolute path. ++ /// ++ /// Makes \a Path absolute using the \a WorkingDir if it is not already. ++ /// ++ /// /absolute/path => /absolute/path ++ /// relative/../path => <WorkingDir>/relative/../path ++ /// ++ /// \param WorkingDir A path that will be used as the base Dir if \a Path ++ /// is not already absolute. ++ /// \param Path A path that is modified to be an absolute path. ++ /// \returns success if \a path has been made absolute, otherwise a ++ /// platform-specific error_code. ++ std::error_code makeAbsolute(StringRef WorkingDir, ++ SmallVectorImpl<char> &Path) const; ++ + // In a RedirectingFileSystem, keys can be specified in Posix or Windows + // style (or even a mixture of both), so this comparison helper allows + // slashes (representing a root) to match backslashes (and vice versa). Note +@@ -910,10 +945,11 @@ private: + /// The file system to use for external references. + IntrusiveRefCntPtr<FileSystem> ExternalFS; + +- /// If IsRelativeOverlay is set, this represents the directory +- /// path that should be prefixed to each 'external-contents' entry +- /// when reading from YAML files. +- std::string ExternalContentsPrefixDir; ++ /// This represents the directory path that the YAML file is located. ++ /// This will be prefixed to each 'external-contents' if IsRelativeOverlay ++ /// is set. This will also be prefixed to each 'roots->name' if RootRelative ++ /// is set to RootRelativeKind::OverlayDir and the path is relative. ++ std::string OverlayFileDir; + + /// @name Configuration + /// @{ +@@ -923,7 +959,7 @@ private: + /// Currently, case-insensitive matching only works correctly with ASCII. + bool CaseSensitive = is_style_posix(sys::path::Style::native); + +- /// IsRelativeOverlay marks whether a ExternalContentsPrefixDir path must ++ /// IsRelativeOverlay marks whether a OverlayFileDir path must + /// be prefixed in every 'external-contents' when reading from YAML files. + bool IsRelativeOverlay = false; + +@@ -934,6 +970,10 @@ private: + /// Determines the lookups to perform, as well as their order. See + /// \c RedirectKind for details. + RedirectKind Redirection = RedirectKind::Fallthrough; ++ ++ /// Determine the prefix directory if the roots are relative paths. See ++ /// \c RootRelativeKind for details. ++ RootRelativeKind RootRelative = RootRelativeKind::CWD; + /// @} + + RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS); +@@ -984,9 +1024,9 @@ public: + + directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override; + +- void setExternalContentsPrefixDir(StringRef PrefixDir); ++ void setOverlayFileDir(StringRef PrefixDir); + +- StringRef getExternalContentsPrefixDir() const; ++ StringRef getOverlayFileDir() const; + + /// Sets the redirection kind to \c Fallthrough if true or \c RedirectOnly + /// otherwise. Will removed in the future, use \c setRedirection instead. +diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp +index 97d63fff1069..656d565fb692 100644 +--- a/llvm/lib/Support/VirtualFileSystem.cpp ++++ b/llvm/lib/Support/VirtualFileSystem.cpp +@@ -1350,32 +1350,51 @@ std::error_code RedirectingFileSystem::makeAbsolute(SmallVectorImpl<char> &Path) + if (llvm::sys::path::is_absolute(Path, llvm::sys::path::Style::posix) || + llvm::sys::path::is_absolute(Path, + llvm::sys::path::Style::windows_backslash)) ++ // This covers windows absolute path with forward slash as well, as the ++ // forward slashes are treated as path seperation in llvm::path ++ // regardless of what path::Style is used. + return {}; + + auto WorkingDir = getCurrentWorkingDirectory(); + if (!WorkingDir) + return WorkingDir.getError(); + ++ return makeAbsolute(WorkingDir.get(), Path); ++} ++ ++std::error_code ++RedirectingFileSystem::makeAbsolute(StringRef WorkingDir, ++ SmallVectorImpl<char> &Path) const { + // We can't use sys::fs::make_absolute because that assumes the path style + // is native and there is no way to override that. Since we know WorkingDir + // is absolute, we can use it to determine which style we actually have and + // append Path ourselves. ++ if (!WorkingDir.empty() && ++ !sys::path::is_absolute(WorkingDir, sys::path::Style::posix) && ++ !sys::path::is_absolute(WorkingDir, ++ sys::path::Style::windows_backslash)) { ++ return std::error_code(); ++ } + sys::path::Style style = sys::path::Style::windows_backslash; +- if (sys::path::is_absolute(WorkingDir.get(), sys::path::Style::posix)) { ++ if (sys::path::is_absolute(WorkingDir, sys::path::Style::posix)) { + style = sys::path::Style::posix; + } else { + // Distinguish between windows_backslash and windows_slash; getExistingStyle + // returns posix for a path with windows_slash. +- if (getExistingStyle(WorkingDir.get()) != +- sys::path::Style::windows_backslash) ++ if (getExistingStyle(WorkingDir) != sys::path::Style::windows_backslash) + style = sys::path::Style::windows_slash; + } + +- std::string Result = WorkingDir.get(); ++ std::string Result = std::string(WorkingDir); + StringRef Dir(Result); + if (!Dir.endswith(sys::path::get_separator(style))) { + Result += sys::path::get_separator(style); + } ++ // backslashes '\' are legit path charactors under POSIX. Windows APIs ++ // like CreateFile accepts forward slashes '/' as path ++ // separator (even when mixed with backslashes). Therefore, ++ // `Path` should be directly appended to `WorkingDir` without converting ++ // path separator. + Result.append(Path.data(), Path.size()); + Path.assign(Result.begin(), Result.end()); + +@@ -1482,12 +1501,12 @@ directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir, + return Combined; + } + +-void RedirectingFileSystem::setExternalContentsPrefixDir(StringRef PrefixDir) { +- ExternalContentsPrefixDir = PrefixDir.str(); ++void RedirectingFileSystem::setOverlayFileDir(StringRef Dir) { ++ OverlayFileDir = Dir.str(); + } + +-StringRef RedirectingFileSystem::getExternalContentsPrefixDir() const { +- return ExternalContentsPrefixDir; ++StringRef RedirectingFileSystem::getOverlayFileDir() const { ++ return OverlayFileDir; + } + + void RedirectingFileSystem::setFallthrough(bool Fallthrough) { +@@ -1621,6 +1640,20 @@ class llvm::vfs::RedirectingFileSystemParser { + return None; + } + ++ Optional<RedirectingFileSystem::RootRelativeKind> ++ parseRootRelativeKind(yaml::Node *N) { ++ SmallString<12> Storage; ++ StringRef Value; ++ if (!parseScalarString(N, Value, Storage)) ++ return None; ++ if (Value.equals_insensitive("cwd")) { ++ return RedirectingFileSystem::RootRelativeKind::CWD; ++ } else if (Value.equals_insensitive("overlay-dir")) { ++ return RedirectingFileSystem::RootRelativeKind::OverlayDir; ++ } ++ return None; ++ } ++ + struct KeyStatus { + bool Required; + bool Seen = false; +@@ -1828,7 +1861,7 @@ private: + + SmallString<256> FullPath; + if (FS->IsRelativeOverlay) { +- FullPath = FS->getExternalContentsPrefixDir(); ++ FullPath = FS->getOverlayFileDir(); + assert(!FullPath.empty() && + "External contents prefix directory must exist"); + llvm::sys::path::append(FullPath, Value); +@@ -1885,9 +1918,19 @@ private: + sys::path::Style::windows_backslash)) { + path_style = sys::path::Style::windows_backslash; + } else { +- // Relative VFS root entries are made absolute to the current working +- // directory, then we can determine the path style from that. +- auto EC = sys::fs::make_absolute(Name); ++ // Relative VFS root entries are made absolute to either the overlay ++ // directory, or the current working directory, then we can determine ++ // the path style from that. ++ std::error_code EC; ++ if (FS->RootRelative == ++ RedirectingFileSystem::RootRelativeKind::OverlayDir) { ++ StringRef FullPath = FS->getOverlayFileDir(); ++ assert(!FullPath.empty() && "Overlay file directory must exist"); ++ EC = FS->makeAbsolute(FullPath, Name); ++ Name = canonicalize(Name); ++ } else { ++ EC = sys::fs::make_absolute(Name); ++ } + if (EC) { + assert(NameValueNode && "Name presence should be checked earlier"); + error( +@@ -1899,6 +1942,12 @@ private: + ? sys::path::Style::posix + : sys::path::Style::windows_backslash; + } ++ // is::path::is_absolute(Name, sys::path::Style::windows_backslash) will ++ // return true even if `Name` is using forward slashes. Distinguish ++ // between windows_backslash and windows_slash. ++ if (path_style == sys::path::Style::windows_backslash && ++ getExistingStyle(Name) != sys::path::Style::windows_backslash) ++ path_style = sys::path::Style::windows_slash; + } + + // Remove trailing slash(es), being careful not to remove the root path +@@ -1962,6 +2011,7 @@ public: + KeyStatusPair("version", true), + KeyStatusPair("case-sensitive", false), + KeyStatusPair("use-external-names", false), ++ KeyStatusPair("root-relative", false), + KeyStatusPair("overlay-relative", false), + KeyStatusPair("fallthrough", false), + KeyStatusPair("redirecting-with", false), +@@ -2051,6 +2101,13 @@ public: + error(I.getValue(), "expected valid redirect kind"); + return false; + } ++ } else if (Key == "root-relative") { ++ if (auto Kind = parseRootRelativeKind(I.getValue())) { ++ FS->RootRelative = *Kind; ++ } else { ++ error(I.getValue(), "expected valid root-relative kind"); ++ return false; ++ } + } else { + llvm_unreachable("key missing from Keys"); + } +@@ -2100,13 +2157,13 @@ RedirectingFileSystem::create(std::unique_ptr<MemoryBuffer> Buffer, + // Example: + // -ivfsoverlay dummy.cache/vfs/vfs.yaml + // yields: +- // FS->ExternalContentsPrefixDir => /<absolute_path_to>/dummy.cache/vfs ++ // FS->OverlayFileDir => /<absolute_path_to>/dummy.cache/vfs + // + SmallString<256> OverlayAbsDir = sys::path::parent_path(YAMLFilePath); + std::error_code EC = llvm::sys::fs::make_absolute(OverlayAbsDir); + assert(!EC && "Overlay dir final path must be absolute"); + (void)EC; +- FS->setExternalContentsPrefixDir(OverlayAbsDir); ++ FS->setOverlayFileDir(OverlayAbsDir); + } + + if (!P.parse(Root, FS.get())) +diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp +index 1e300eec711d..6cefd1d02ba3 100644 +--- a/llvm/unittests/Support/VirtualFileSystemTest.cpp ++++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp +@@ -1452,18 +1452,20 @@ public: + + std::unique_ptr<vfs::FileSystem> + getFromYAMLRawString(StringRef Content, +- IntrusiveRefCntPtr<vfs::FileSystem> ExternalFS) { ++ IntrusiveRefCntPtr<vfs::FileSystem> ExternalFS, ++ StringRef YAMLFilePath = "") { + std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(Content); +- return getVFSFromYAML(std::move(Buffer), CountingDiagHandler, "", this, +- ExternalFS); ++ return getVFSFromYAML(std::move(Buffer), CountingDiagHandler, YAMLFilePath, ++ this, ExternalFS); + } + + std::unique_ptr<vfs::FileSystem> getFromYAMLString( + StringRef Content, +- IntrusiveRefCntPtr<vfs::FileSystem> ExternalFS = new DummyFileSystem()) { ++ IntrusiveRefCntPtr<vfs::FileSystem> ExternalFS = new DummyFileSystem(), ++ StringRef YAMLFilePath = "") { + std::string VersionPlusContent("{\n 'version':0,\n"); + VersionPlusContent += Content.slice(Content.find('{') + 1, StringRef::npos); +- return getFromYAMLRawString(VersionPlusContent, ExternalFS); ++ return getFromYAMLRawString(VersionPlusContent, ExternalFS, YAMLFilePath); + } + + // This is intended as a "XFAIL" for windows hosts. +@@ -1849,6 +1851,69 @@ TEST_F(VFSFromYAMLTest, ReturnsExternalPathVFSHit) { + EXPECT_EQ(0, NumDiagnostics); + } + ++TEST_F(VFSFromYAMLTest, RootRelativeTest) { ++ IntrusiveRefCntPtr<DummyFileSystem> Lower(new DummyFileSystem()); ++ Lower->addDirectory("//root/foo/bar"); ++ Lower->addRegularFile("//root/foo/bar/a"); ++ IntrusiveRefCntPtr<vfs::FileSystem> FS = ++ getFromYAMLString("{\n" ++ " 'case-sensitive': false,\n" ++ " 'root-relative': 'overlay-dir',\n" ++ " 'roots': [\n" ++ " { 'name': 'b', 'type': 'file',\n" ++ " 'external-contents': '//root/foo/bar/a'\n" ++ " }\n" ++ " ]\n" ++ "}", ++ Lower, "//root/foo/bar/overlay"); ++ ++ ASSERT_NE(FS.get(), nullptr); ++ ErrorOr<vfs::Status> S = FS->status("//root/foo/bar/b"); ++ ASSERT_FALSE(S.getError()); ++ EXPECT_EQ("//root/foo/bar/a", S->getName()); ++ ++ // On Windows, with overlay-relative set to true, the relative ++ // path in external-contents field will be prepend by OverlayDir ++ // with native path separator, regardless of the actual path separator ++ // used in YAMLFilePath field. ++#ifndef _WIN32 ++ FS = getFromYAMLString("{\n" ++ " 'case-sensitive': false,\n" ++ " 'overlay-relative': true,\n" ++ " 'root-relative': 'overlay-dir',\n" ++ " 'roots': [\n" ++ " { 'name': 'b', 'type': 'file',\n" ++ " 'external-contents': 'a'\n" ++ " }\n" ++ " ]\n" ++ "}", ++ Lower, "//root/foo/bar/overlay"); ++ ASSERT_NE(FS.get(), nullptr); ++ S = FS->status("//root/foo/bar/b"); ++ ASSERT_FALSE(S.getError()); ++ EXPECT_EQ("//root/foo/bar/a", S->getName()); ++#else ++ IntrusiveRefCntPtr<DummyFileSystem> LowerWindows(new DummyFileSystem()); ++ LowerWindows->addDirectory("\\\\root\\foo\\bar"); ++ LowerWindows->addRegularFile("\\\\root\\foo\\bar\\a"); ++ FS = getFromYAMLString("{\n" ++ " 'case-sensitive': false,\n" ++ " 'overlay-relative': true,\n" ++ " 'root-relative': 'overlay-dir',\n" ++ " 'roots': [\n" ++ " { 'name': 'b', 'type': 'file',\n" ++ " 'external-contents': 'a'\n" ++ " }\n" ++ " ]\n" ++ "}", ++ LowerWindows, "\\\\root\\foo\\bar\\overlay"); ++ ASSERT_NE(FS.get(), nullptr); ++ S = FS->status("\\\\root\\foo\\bar\\b"); ++ ASSERT_FALSE(S.getError()); ++ EXPECT_EQ("\\\\root\\foo\\bar\\a", S->getName()); ++#endif ++} ++ + TEST_F(VFSFromYAMLTest, ReturnsInternalPathVFSHit) { + IntrusiveRefCntPtr<vfs::InMemoryFileSystem> BaseFS( + new vfs::InMemoryFileSystem); +-- +2.38.1.1.g6d9df9d320 + diff --git a/build/build-clang/llvmorg-16-init-15447-g8d3ab9dfc4d4.patch b/build/build-clang/llvmorg-16-init-15447-g8d3ab9dfc4d4.patch new file mode 100644 index 0000000000..76bbebdf72 --- /dev/null +++ b/build/build-clang/llvmorg-16-init-15447-g8d3ab9dfc4d4.patch @@ -0,0 +1,48 @@ +From 8d3ab9dfc4d49062e045d6d5db419975b81da7e4 Mon Sep 17 00:00:00 2001 +From: serge-sans-paille <sguelton@mozilla.com> +Date: Thu, 22 Dec 2022 10:48:37 +0100 +Subject: [PATCH] Properly support LLVM_ENABLE_LLD on Windows + +Currently, setting -DLLVM_ENABLE_LLD=ON on windows also requires setting +-DCMAKE_LINKER=lld-link.exe. This is both misleading and redundant. + +Fix this by trying to find llvm-link.exe when -DLLVM_ENABLE_LLD=ON is +set and CMAKE_LINKER is not, and aborting otherwise. + +Differential Revision: https://reviews.llvm.org/D140534 +--- + llvm/cmake/modules/HandleLLVMOptions.cmake | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake +index 7f141d93d4c2..4f4a64bc02eb 100644 +--- a/llvm/cmake/modules/HandleLLVMOptions.cmake ++++ b/llvm/cmake/modules/HandleLLVMOptions.cmake +@@ -293,10 +293,23 @@ if( LLVM_ENABLE_LLD ) + if ( LLVM_USE_LINKER ) + message(FATAL_ERROR "LLVM_ENABLE_LLD and LLVM_USE_LINKER can't be set at the same time") + endif() ++ + # In case of MSVC cmake always invokes the linker directly, so the linker + # should be specified by CMAKE_LINKER cmake variable instead of by -fuse-ld + # compiler option. +- if ( NOT MSVC ) ++ if ( MSVC ) ++ if(NOT CMAKE_LINKER MATCHES "lld-link") ++ get_filename_component(CXX_COMPILER_DIR ${CMAKE_CXX_COMPILER} DIRECTORY) ++ get_filename_component(C_COMPILER_DIR ${CMAKE_C_COMPILER} DIRECTORY) ++ find_program(LLD_LINK NAMES "lld-link" "lld-link.exe" HINTS ${CXX_COMPILER_DIR} ${C_COMPILER_DIR} DOC "lld linker") ++ if(NOT LLD_LINK) ++ message(FATAL_ERROR ++ "LLVM_ENABLE_LLD set, but cannot find lld-link. " ++ "Consider setting CMAKE_LINKER to lld-link path.") ++ endif() ++ set(CMAKE_LINKER ${LLD_LINK}) ++ endif() ++ else() + set(LLVM_USE_LINKER "lld") + endif() + endif() +-- +2.38.1 + diff --git a/build/build-clang/llvmorg-16-init-7778-g232e0a011e8c.patch b/build/build-clang/llvmorg-16-init-7778-g232e0a011e8c.patch new file mode 100644 index 0000000000..7719caeb99 --- /dev/null +++ b/build/build-clang/llvmorg-16-init-7778-g232e0a011e8c.patch @@ -0,0 +1,256 @@ +From e9b4278ca7a492d3710aeae324250189379e6d30 Mon Sep 17 00:00:00 2001 +From: serge-sans-paille <sguelton@redhat.com> +Date: Thu, 13 Oct 2022 10:26:41 +0200 +Subject: [PATCH] [lto] Do not try to internalize symbols with escaped name + +Because of LLVM mangling escape sequence (through '\01' prefix), it is possible +for a single symbols two have two different IR representations. + +For instance, consider @symbol and @"\01_symbol". On OSX, because of the system +mangling rules, these two IR names point are converted in the same final symbol +upon linkage. + +LTO doesn't model this behavior, which may result in symbols being incorrectly +internalized (if all reference use the escaping sequence while the definition +doesn't). + +The proper approach is probably to use the mangled name to compute GUID to +avoid the dual representation, but we can also avoid discarding symbols that are +bound to two different IR names. This is an approximation, but it's less +intrusive on the codebase. + +Fix #57864 + +Differential Revision: https://reviews.llvm.org/D135710 +--- + llvm/lib/LTO/LTO.cpp | 16 +++++++ + .../LTO/X86/hidden-escaped-symbols-alt.ll | 41 ++++++++++++++++++ + llvm/test/LTO/X86/hidden-escaped-symbols.ll | 41 ++++++++++++++++++ + .../ThinLTO/X86/hidden-escaped-symbols-alt.ll | 42 +++++++++++++++++++ + .../ThinLTO/X86/hidden-escaped-symbols.ll | 42 +++++++++++++++++++ + 5 files changed, 182 insertions(+) + create mode 100644 llvm/test/LTO/X86/hidden-escaped-symbols-alt.ll + create mode 100644 llvm/test/LTO/X86/hidden-escaped-symbols.ll + create mode 100644 llvm/test/ThinLTO/X86/hidden-escaped-symbols-alt.ll + create mode 100644 llvm/test/ThinLTO/X86/hidden-escaped-symbols.ll + +diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp +index 0f78c4ebd5ca..93aed72b3d15 100644 +--- a/llvm/lib/LTO/LTO.cpp ++++ b/llvm/lib/LTO/LTO.cpp +@@ -565,6 +565,22 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms, + GlobalRes.IRName = std::string(Sym.getIRName()); + } + ++ // In rare occasion, the symbol used to initialize GlobalRes has a different ++ // IRName from the inspected Symbol. This can happen on macOS + iOS, when a ++ // symbol is referenced through its mangled name, say @"\01_symbol" while ++ // the IRName is @symbol (the prefix underscore comes from MachO mangling). ++ // In that case, we have the same actual Symbol that can get two different ++ // GUID, leading to some invalid internalization. Workaround this by marking ++ // the GlobalRes external. ++ ++ // FIXME: instead of this check, it would be desirable to compute GUIDs ++ // based on mangled name, but this requires an access to the Target Triple ++ // and would be relatively invasive on the codebase. ++ if (GlobalRes.IRName != Sym.getIRName()) { ++ GlobalRes.Partition = GlobalResolution::External; ++ GlobalRes.VisibleOutsideSummary = true; ++ } ++ + // Set the partition to external if we know it is re-defined by the linker + // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a + // regular object, is referenced from llvm.compiler.used/llvm.used, or was +diff --git a/llvm/test/LTO/X86/hidden-escaped-symbols-alt.ll b/llvm/test/LTO/X86/hidden-escaped-symbols-alt.ll +new file mode 100644 +index 000000000000..ca8e7d8eb2b2 +--- /dev/null ++++ b/llvm/test/LTO/X86/hidden-escaped-symbols-alt.ll +@@ -0,0 +1,41 @@ ++; Check interaction between LTO and LLVM mangling escape char, see #57864. ++ ++; RUN: split-file %s %t ++; RUN: opt %t/hide-me.ll -o %t/hide-me.bc ++; RUN: opt %t/ref.ll -o %t/ref.bc ++; RUN: llvm-lto2 run \ ++; RUN: -r %t/hide-me.bc,_hide_me,p \ ++; RUN: -r %t/ref.bc,_main,plx \ ++; RUN: -r %t/ref.bc,_hide_me,l \ ++; RUN: --select-save-temps=precodegen \ ++; RUN: -o %t/out \ ++; RUN: %t/hide-me.bc %t/ref.bc ++; RUN: llvm-dis %t/out.0.5.precodegen.bc -o - | FileCheck %s ++ ++ ++;--- hide-me.ll ++target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ++target triple = "x86_64-apple-macosx10.7.0" ++ ++@"\01_hide_me" = hidden local_unnamed_addr global i8 8, align 1 ++ ++;--- ref.ll ++target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ++target triple = "x86_64-apple-macosx10.7.0" ++ ++@hide_me = external local_unnamed_addr global i8 ++ ++define i8 @main() { ++ %1 = load i8, ptr @hide_me, align 1 ++ ret i8 %1 ++} ++ ++ ++; CHECK: @"\01_hide_me" = hidden local_unnamed_addr global i8 8, align 1 ++; CHECK: @hide_me = external dso_local local_unnamed_addr global i8 ++ ++; CHECK: define dso_local i8 @main() local_unnamed_addr #0 { ++; CHECK: %1 = load i8, ptr @hide_me, align 1 ++; CHECK: ret i8 %1 ++; CHECK: } ++ +diff --git a/llvm/test/LTO/X86/hidden-escaped-symbols.ll b/llvm/test/LTO/X86/hidden-escaped-symbols.ll +new file mode 100644 +index 000000000000..aec617e25d3d +--- /dev/null ++++ b/llvm/test/LTO/X86/hidden-escaped-symbols.ll +@@ -0,0 +1,41 @@ ++; Check interaction between LTO and LLVM mangling escape char, see #57864. ++ ++; RUN: split-file %s %t ++; RUN: opt %t/hide-me.ll -o %t/hide-me.bc ++; RUN: opt %t/ref.ll -o %t/ref.bc ++; RUN: llvm-lto2 run \ ++; RUN: -r %t/hide-me.bc,_hide_me,p \ ++; RUN: -r %t/ref.bc,_main,plx \ ++; RUN: -r %t/ref.bc,_hide_me,l \ ++; RUN: --select-save-temps=precodegen \ ++; RUN: -o %t/out \ ++; RUN: %t/hide-me.bc %t/ref.bc ++; RUN: llvm-dis %t/out.0.5.precodegen.bc -o - | FileCheck %s ++ ++ ++;--- hide-me.ll ++target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ++target triple = "x86_64-apple-macosx10.7.0" ++ ++@hide_me = hidden local_unnamed_addr global i8 8, align 1 ++ ++;--- ref.ll ++target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ++target triple = "x86_64-apple-macosx10.7.0" ++ ++@"\01_hide_me" = external local_unnamed_addr global i8 ++ ++define i8 @main() { ++ %1 = load i8, ptr @"\01_hide_me", align 1 ++ ret i8 %1 ++} ++ ++ ++; CHECK: @hide_me = hidden local_unnamed_addr global i8 8, align 1 ++; CHECK: @"\01_hide_me" = external dso_local local_unnamed_addr global i8 ++ ++; CHECK: define dso_local i8 @main() local_unnamed_addr #0 { ++; CHECK: %1 = load i8, ptr @"\01_hide_me", align 1 ++; CHECK: ret i8 %1 ++; CHECK: } ++ +diff --git a/llvm/test/ThinLTO/X86/hidden-escaped-symbols-alt.ll b/llvm/test/ThinLTO/X86/hidden-escaped-symbols-alt.ll +new file mode 100644 +index 000000000000..dadd1d434256 +--- /dev/null ++++ b/llvm/test/ThinLTO/X86/hidden-escaped-symbols-alt.ll +@@ -0,0 +1,42 @@ ++; Check interaction between LTO and LLVM mangling escape char, see #57864. ++ ++; RUN: split-file %s %t ++; RUN: opt -module-summary %t/hide-me.ll -o %t/hide-me.bc ++; RUN: opt -module-summary %t/ref.ll -o %t/ref.bc ++; RUN: llvm-lto2 run \ ++; RUN: -r %t/hide-me.bc,_hide_me,p \ ++; RUN: -r %t/ref.bc,_main,plx \ ++; RUN: -r %t/ref.bc,_hide_me,l \ ++; RUN: --select-save-temps=precodegen \ ++; RUN: -o %t/out \ ++; RUN: %t/hide-me.bc %t/ref.bc ++; RUN: llvm-dis %t/out.1.5.precodegen.bc -o - | FileCheck --check-prefix=CHECK-HIDE %s ++; RUN: llvm-dis %t/out.2.5.precodegen.bc -o - | FileCheck --check-prefix=CHECK-REF %s ++ ++ ++;--- hide-me.ll ++target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ++target triple = "x86_64-apple-macosx10.7.0" ++ ++@"\01_hide_me" = hidden local_unnamed_addr global i8 8, align 1 ++ ++;--- ref.ll ++target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ++target triple = "x86_64-apple-macosx10.7.0" ++ ++@hide_me = external local_unnamed_addr global i8 ++ ++define i8 @main() { ++ %1 = load i8, ptr @hide_me, align 1 ++ ret i8 %1 ++} ++ ++ ++; CHECK-HIDE: @"\01_hide_me" = hidden local_unnamed_addr global i8 8, align 1 ++ ++; CHECK-REF: @hide_me = external local_unnamed_addr global i8 ++; CHECK-REF: define dso_local i8 @main() local_unnamed_addr #0 { ++; CHECK-REF: %1 = load i8, ptr @hide_me, align 1 ++; CHECK-REF: ret i8 %1 ++; CHECK-REF: } ++ +diff --git a/llvm/test/ThinLTO/X86/hidden-escaped-symbols.ll b/llvm/test/ThinLTO/X86/hidden-escaped-symbols.ll +new file mode 100644 +index 000000000000..8d0e22f0fd22 +--- /dev/null ++++ b/llvm/test/ThinLTO/X86/hidden-escaped-symbols.ll +@@ -0,0 +1,42 @@ ++; Check interaction between LTO and LLVM mangling escape char, see #57864. ++ ++; RUN: split-file %s %t ++; RUN: opt -module-summary %t/hide-me.ll -o %t/hide-me.bc ++; RUN: opt -module-summary %t/ref.ll -o %t/ref.bc ++; RUN: llvm-lto2 run \ ++; RUN: -r %t/hide-me.bc,_hide_me,p \ ++; RUN: -r %t/ref.bc,_main,plx \ ++; RUN: -r %t/ref.bc,_hide_me,l \ ++; RUN: --select-save-temps=precodegen \ ++; RUN: -o %t/out \ ++; RUN: %t/hide-me.bc %t/ref.bc ++; RUN: llvm-dis %t/out.1.5.precodegen.bc -o - | FileCheck --check-prefix=CHECK-HIDE %s ++; RUN: llvm-dis %t/out.2.5.precodegen.bc -o - | FileCheck --check-prefix=CHECK-REF %s ++ ++ ++;--- hide-me.ll ++target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ++target triple = "x86_64-apple-macosx10.7.0" ++ ++@hide_me = hidden local_unnamed_addr global i8 8, align 1 ++ ++;--- ref.ll ++target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ++target triple = "x86_64-apple-macosx10.7.0" ++ ++@"\01_hide_me" = external local_unnamed_addr global i8 ++ ++define i8 @main() { ++ %1 = load i8, ptr @"\01_hide_me", align 1 ++ ret i8 %1 ++} ++ ++ ++; CHECK-HIDE: @hide_me = hidden local_unnamed_addr global i8 8, align 1 ++ ++; CHECK-REF: @"\01_hide_me" = external local_unnamed_addr global i8 ++; CHECK-REF: define dso_local i8 @main() local_unnamed_addr #0 { ++; CHECK-REF: %1 = load i8, ptr @"\01_hide_me", align 1 ++; CHECK-REF: ret i8 %1 ++; CHECK-REF: } ++ +-- +2.38.1.1.g6d9df9d320 + diff --git a/build/build-clang/macosx64.json b/build/build-clang/macosx64.json new file mode 100644 index 0000000000..a7955768a6 --- /dev/null +++ b/build/build-clang/macosx64.json @@ -0,0 +1,10 @@ +{ + "osx_cross_compile": true, + "cc": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "cxx": "{MOZ_FETCHES_DIR}/clang/bin/clang++", + "as": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "ar": "{MOZ_FETCHES_DIR}/cctools/bin/x86_64-apple-darwin-ar", + "ranlib": "{MOZ_FETCHES_DIR}/cctools/bin/x86_64-apple-darwin-ranlib", + "libtool": "{MOZ_FETCHES_DIR}/cctools/bin/x86_64-apple-darwin-libtool", + "ld": "{MOZ_FETCHES_DIR}/clang/bin/clang" +} diff --git a/build/build-clang/profile-g4a10504e1f70c.patch b/build/build-clang/profile-g4a10504e1f70c.patch new file mode 100644 index 0000000000..7800c89786 --- /dev/null +++ b/build/build-clang/profile-g4a10504e1f70c.patch @@ -0,0 +1,43 @@ +commit 4a10504e1f70c70c049fc439c4fce37a12de5941 +Author: Vedant Kumar <vsk@apple.com> +Date: Mon Sep 18 18:13:47 2017 +0000 + + [cmake] Make it possible to build and test profile without sanitizers + + This should fix an issue which arises when running check-compiler-rt on + the coverage bot: + http://green.lab.llvm.org/green/job/clang-stage2-coverage-R_build/1590/ + + The bot doesn't build the sanitizers, but the check-compiler-rt target + always expects the profile runtime to exist. + + llvm-svn: 313549 + +diff --git a/compiler-rt/lib/CMakeLists.txt b/compiler-rt/lib/CMakeLists.txt +index 025320f473f5..a92d0a3f082d 100644 +--- a/compiler-rt/lib/CMakeLists.txt ++++ b/compiler-rt/lib/CMakeLists.txt +@@ -39,7 +39,9 @@ if(COMPILER_RT_BUILD_SANITIZERS) + foreach(sanitizer ${COMPILER_RT_SANITIZERS_TO_BUILD}) + compiler_rt_build_runtime(${sanitizer}) + endforeach() ++endif() + ++if (COMPILER_RT_HAS_PROFILE) + compiler_rt_build_runtime(profile) + endif() + +diff --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt +index 6bc7cdbecf6a..e691eab7d4a7 100644 +--- a/compiler-rt/test/CMakeLists.txt ++++ b/compiler-rt/test/CMakeLists.txt +@@ -71,7 +71,8 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS) + compiler_rt_test_runtime(${sanitizer}) + endif() + endforeach() +- ++ endif() ++ if (COMPILER_RT_HAS_PROFILE) + compiler_rt_test_runtime(profile) + endif() + if(COMPILER_RT_BUILD_XRAY) diff --git a/build/build-clang/profile.json b/build/build-clang/profile.json new file mode 100644 index 0000000000..9fdc70fbec --- /dev/null +++ b/build/build-clang/profile.json @@ -0,0 +1,4 @@ +{ + "stages": "3", + "pgo": true +} diff --git a/build/build-clang/revert-llvmorg-14-init-11890-gf86deb18cab6.patch b/build/build-clang/revert-llvmorg-14-init-11890-gf86deb18cab6.patch new file mode 100644 index 0000000000..9d61910705 --- /dev/null +++ b/build/build-clang/revert-llvmorg-14-init-11890-gf86deb18cab6.patch @@ -0,0 +1,173 @@ +From c8a5013045b5aff8e45418925688ca670545980f Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Fri, 18 Mar 2022 17:58:28 +0900 +Subject: [PATCH] Revert "[lsan] Move out suppression of invalid PCs from + StopTheWorld" + +This reverts commit f86deb18cab6479a0961ade3807e4729f3a27bdf +because of permafail for a sizable amount of ASan test jobs, where the +worker would die without even leaving any logs. + +--- + compiler-rt/lib/lsan/lsan_common.cpp | 108 +++++++++++++++++---------- + 1 file changed, 67 insertions(+), 41 deletions(-) + +diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp +index fd7aa38d99db..658415bce507 100644 +--- a/compiler-rt/lib/lsan/lsan_common.cpp ++++ b/compiler-rt/lib/lsan/lsan_common.cpp +@@ -71,11 +71,9 @@ class LeakSuppressionContext { + SuppressionContext context; + bool suppressed_stacks_sorted = true; + InternalMmapVector<u32> suppressed_stacks; +- const LoadedModule *suppress_module = nullptr; + +- void LazyInit(); + Suppression *GetSuppressionForAddr(uptr addr); +- bool SuppressInvalid(const StackTrace &stack); ++ void LazyInit(); + bool SuppressByRule(const StackTrace &stack, uptr hit_count, uptr total_size); + + public: +@@ -126,8 +124,6 @@ void LeakSuppressionContext::LazyInit() { + if (&__lsan_default_suppressions) + context.Parse(__lsan_default_suppressions()); + context.Parse(kStdSuppressions); +- if (flags()->use_tls && flags()->use_ld_allocations) +- suppress_module = GetLinker(); + } + } + +@@ -152,41 +148,6 @@ Suppression *LeakSuppressionContext::GetSuppressionForAddr(uptr addr) { + return s; + } + +-static uptr GetCallerPC(const StackTrace &stack) { +- // The top frame is our malloc/calloc/etc. The next frame is the caller. +- if (stack.size >= 2) +- return stack.trace[1]; +- return 0; +-} +- +-// On Linux, treats all chunks allocated from ld-linux.so as reachable, which +-// covers dynamically allocated TLS blocks, internal dynamic loader's loaded +-// modules accounting etc. +-// Dynamic TLS blocks contain the TLS variables of dynamically loaded modules. +-// They are allocated with a __libc_memalign() call in allocate_and_init() +-// (elf/dl-tls.c). Glibc won't tell us the address ranges occupied by those +-// blocks, but we can make sure they come from our own allocator by intercepting +-// __libc_memalign(). On top of that, there is no easy way to reach them. Their +-// addresses are stored in a dynamically allocated array (the DTV) which is +-// referenced from the static TLS. Unfortunately, we can't just rely on the DTV +-// being reachable from the static TLS, and the dynamic TLS being reachable from +-// the DTV. This is because the initial DTV is allocated before our interception +-// mechanism kicks in, and thus we don't recognize it as allocated memory. We +-// can't special-case it either, since we don't know its size. +-// Our solution is to include in the root set all allocations made from +-// ld-linux.so (which is where allocate_and_init() is implemented). This is +-// guaranteed to include all dynamic TLS blocks (and possibly other allocations +-// which we don't care about). +-// On all other platforms, this simply checks to ensure that the caller pc is +-// valid before reporting chunks as leaked. +-bool LeakSuppressionContext::SuppressInvalid(const StackTrace &stack) { +- uptr caller_pc = GetCallerPC(stack); +- // If caller_pc is unknown, this chunk may be allocated in a coroutine. Mark +- // it as reachable, as we can't properly report its allocation stack anyway. +- return !caller_pc || +- (suppress_module && suppress_module->containsAddress(caller_pc)); +-} +- + bool LeakSuppressionContext::SuppressByRule(const StackTrace &stack, + uptr hit_count, uptr total_size) { + for (uptr i = 0; i < stack.size; i++) { +@@ -205,7 +166,7 @@ bool LeakSuppressionContext::Suppress(u32 stack_trace_id, uptr hit_count, + uptr total_size) { + LazyInit(); + StackTrace stack = StackDepotGet(stack_trace_id); +- if (!SuppressInvalid(stack) && !SuppressByRule(stack, hit_count, total_size)) ++ if (!SuppressByRule(stack, hit_count, total_size)) + return false; + suppressed_stacks_sorted = false; + suppressed_stacks.push_back(stack_trace_id); +@@ -569,6 +530,68 @@ static void CollectIgnoredCb(uptr chunk, void *arg) { + } + } + ++static uptr GetCallerPC(const StackTrace &stack) { ++ // The top frame is our malloc/calloc/etc. The next frame is the caller. ++ if (stack.size >= 2) ++ return stack.trace[1]; ++ return 0; ++} ++ ++struct InvalidPCParam { ++ Frontier *frontier; ++ bool skip_linker_allocations; ++}; ++ ++// ForEachChunk callback. If the caller pc is invalid or is within the linker, ++// mark as reachable. Called by ProcessPlatformSpecificAllocations. ++static void MarkInvalidPCCb(uptr chunk, void *arg) { ++ CHECK(arg); ++ InvalidPCParam *param = reinterpret_cast<InvalidPCParam *>(arg); ++ chunk = GetUserBegin(chunk); ++ LsanMetadata m(chunk); ++ if (m.allocated() && m.tag() != kReachable && m.tag() != kIgnored) { ++ u32 stack_id = m.stack_trace_id(); ++ uptr caller_pc = 0; ++ if (stack_id > 0) ++ caller_pc = GetCallerPC(StackDepotGet(stack_id)); ++ // If caller_pc is unknown, this chunk may be allocated in a coroutine. Mark ++ // it as reachable, as we can't properly report its allocation stack anyway. ++ if (caller_pc == 0 || (param->skip_linker_allocations && ++ GetLinker()->containsAddress(caller_pc))) { ++ m.set_tag(kIgnored); ++ param->frontier->push_back(chunk); ++ } ++ } ++} ++ ++// On Linux, treats all chunks allocated from ld-linux.so as reachable, which ++// covers dynamically allocated TLS blocks, internal dynamic loader's loaded ++// modules accounting etc. ++// Dynamic TLS blocks contain the TLS variables of dynamically loaded modules. ++// They are allocated with a __libc_memalign() call in allocate_and_init() ++// (elf/dl-tls.c). Glibc won't tell us the address ranges occupied by those ++// blocks, but we can make sure they come from our own allocator by intercepting ++// __libc_memalign(). On top of that, there is no easy way to reach them. Their ++// addresses are stored in a dynamically allocated array (the DTV) which is ++// referenced from the static TLS. Unfortunately, we can't just rely on the DTV ++// being reachable from the static TLS, and the dynamic TLS being reachable from ++// the DTV. This is because the initial DTV is allocated before our interception ++// mechanism kicks in, and thus we don't recognize it as allocated memory. We ++// can't special-case it either, since we don't know its size. ++// Our solution is to include in the root set all allocations made from ++// ld-linux.so (which is where allocate_and_init() is implemented). This is ++// guaranteed to include all dynamic TLS blocks (and possibly other allocations ++// which we don't care about). ++// On all other platforms, this simply checks to ensure that the caller pc is ++// valid before reporting chunks as leaked. ++static void ProcessPC(Frontier *frontier) { ++ InvalidPCParam arg; ++ arg.frontier = frontier; ++ arg.skip_linker_allocations = ++ flags()->use_tls && flags()->use_ld_allocations && GetLinker() != nullptr; ++ ForEachChunk(MarkInvalidPCCb, &arg); ++} ++ + // Sets the appropriate tag on each chunk. + static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads, + Frontier *frontier) { +@@ -584,6 +607,9 @@ static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads, + ProcessRootRegions(frontier); + FloodFillTag(frontier, kReachable); + ++ CHECK_EQ(0, frontier->size()); ++ ProcessPC(frontier); ++ + // The check here is relatively expensive, so we do this in a separate flood + // fill. That way we can skip the check for chunks that are reachable + // otherwise. +-- +2.35.0.1.g829a698654 + diff --git a/build/build-clang/revert-llvmorg-14-init-11890-gf86deb18cab6_clang_16.patch b/build/build-clang/revert-llvmorg-14-init-11890-gf86deb18cab6_clang_16.patch new file mode 100644 index 0000000000..f5493527ab --- /dev/null +++ b/build/build-clang/revert-llvmorg-14-init-11890-gf86deb18cab6_clang_16.patch @@ -0,0 +1,180 @@ +From c8a5013045b5aff8e45418925688ca670545980f Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Fri, 18 Mar 2022 17:58:28 +0900 +Subject: [PATCH] Revert "[lsan] Move out suppression of invalid PCs from + StopTheWorld" + +This reverts commit f86deb18cab6479a0961ade3807e4729f3a27bdf +because of permafail for a sizable amount of ASan test jobs, where the +worker would die without even leaving any logs. + +--- + compiler-rt/lib/lsan/lsan_common.cpp | 108 +++++++++++++++++---------- + 1 file changed, 67 insertions(+), 41 deletions(-) + +diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp +index 51218770d6dc..0a69b010879b 100644 +--- a/compiler-rt/lib/lsan/lsan_common.cpp ++++ b/compiler-rt/lib/lsan/lsan_common.cpp +@@ -83,11 +83,9 @@ class LeakSuppressionContext { + SuppressionContext context; + bool suppressed_stacks_sorted = true; + InternalMmapVector<u32> suppressed_stacks; +- const LoadedModule *suppress_module = nullptr; + +- void LazyInit(); + Suppression *GetSuppressionForAddr(uptr addr); +- bool SuppressInvalid(const StackTrace &stack); ++ void LazyInit(); + bool SuppressByRule(const StackTrace &stack, uptr hit_count, uptr total_size); + + public: +@@ -138,8 +136,6 @@ void LeakSuppressionContext::LazyInit() { + if (&__lsan_default_suppressions) + context.Parse(__lsan_default_suppressions()); + context.Parse(kStdSuppressions); +- if (flags()->use_tls && flags()->use_ld_allocations) +- suppress_module = GetLinker(); + } + } + +@@ -165,13 +161,6 @@ Suppression *LeakSuppressionContext::GetSuppressionForAddr(uptr addr) { + return s; + } + +-static uptr GetCallerPC(const StackTrace &stack) { +- // The top frame is our malloc/calloc/etc. The next frame is the caller. +- if (stack.size >= 2) +- return stack.trace[1]; +- return 0; +-} +- + # if SANITIZER_APPLE + // Objective-C class data pointers are stored with flags in the low bits, so + // they need to be transformed back into something that looks like a pointer. +@@ -183,34 +172,6 @@ static inline void *MaybeTransformPointer(void *p) { + } + # endif + +-// On Linux, treats all chunks allocated from ld-linux.so as reachable, which +-// covers dynamically allocated TLS blocks, internal dynamic loader's loaded +-// modules accounting etc. +-// Dynamic TLS blocks contain the TLS variables of dynamically loaded modules. +-// They are allocated with a __libc_memalign() call in allocate_and_init() +-// (elf/dl-tls.c). Glibc won't tell us the address ranges occupied by those +-// blocks, but we can make sure they come from our own allocator by intercepting +-// __libc_memalign(). On top of that, there is no easy way to reach them. Their +-// addresses are stored in a dynamically allocated array (the DTV) which is +-// referenced from the static TLS. Unfortunately, we can't just rely on the DTV +-// being reachable from the static TLS, and the dynamic TLS being reachable from +-// the DTV. This is because the initial DTV is allocated before our interception +-// mechanism kicks in, and thus we don't recognize it as allocated memory. We +-// can't special-case it either, since we don't know its size. +-// Our solution is to include in the root set all allocations made from +-// ld-linux.so (which is where allocate_and_init() is implemented). This is +-// guaranteed to include all dynamic TLS blocks (and possibly other allocations +-// which we don't care about). +-// On all other platforms, this simply checks to ensure that the caller pc is +-// valid before reporting chunks as leaked. +-bool LeakSuppressionContext::SuppressInvalid(const StackTrace &stack) { +- uptr caller_pc = GetCallerPC(stack); +- // If caller_pc is unknown, this chunk may be allocated in a coroutine. Mark +- // it as reachable, as we can't properly report its allocation stack anyway. +- return !caller_pc || +- (suppress_module && suppress_module->containsAddress(caller_pc)); +-} +- + bool LeakSuppressionContext::SuppressByRule(const StackTrace &stack, + uptr hit_count, uptr total_size) { + for (uptr i = 0; i < stack.size; i++) { +@@ -229,7 +190,7 @@ bool LeakSuppressionContext::Suppress(u32 stack_trace_id, uptr hit_count, + uptr total_size) { + LazyInit(); + StackTrace stack = StackDepotGet(stack_trace_id); +- if (!SuppressInvalid(stack) && !SuppressByRule(stack, hit_count, total_size)) ++ if (!SuppressByRule(stack, hit_count, total_size)) + return false; + suppressed_stacks_sorted = false; + suppressed_stacks.push_back(stack_trace_id); +@@ -600,6 +561,68 @@ static void CollectIgnoredCb(uptr chunk, void *arg) { + } + } + ++static uptr GetCallerPC(const StackTrace &stack) { ++ // The top frame is our malloc/calloc/etc. The next frame is the caller. ++ if (stack.size >= 2) ++ return stack.trace[1]; ++ return 0; ++} ++ ++struct InvalidPCParam { ++ Frontier *frontier; ++ bool skip_linker_allocations; ++}; ++ ++// ForEachChunk callback. If the caller pc is invalid or is within the linker, ++// mark as reachable. Called by ProcessPlatformSpecificAllocations. ++static void MarkInvalidPCCb(uptr chunk, void *arg) { ++ CHECK(arg); ++ InvalidPCParam *param = reinterpret_cast<InvalidPCParam *>(arg); ++ chunk = GetUserBegin(chunk); ++ LsanMetadata m(chunk); ++ if (m.allocated() && m.tag() != kReachable && m.tag() != kIgnored) { ++ u32 stack_id = m.stack_trace_id(); ++ uptr caller_pc = 0; ++ if (stack_id > 0) ++ caller_pc = GetCallerPC(StackDepotGet(stack_id)); ++ // If caller_pc is unknown, this chunk may be allocated in a coroutine. Mark ++ // it as reachable, as we can't properly report its allocation stack anyway. ++ if (caller_pc == 0 || (param->skip_linker_allocations && ++ GetLinker()->containsAddress(caller_pc))) { ++ m.set_tag(kIgnored); ++ param->frontier->push_back(chunk); ++ } ++ } ++} ++ ++// On Linux, treats all chunks allocated from ld-linux.so as reachable, which ++// covers dynamically allocated TLS blocks, internal dynamic loader's loaded ++// modules accounting etc. ++// Dynamic TLS blocks contain the TLS variables of dynamically loaded modules. ++// They are allocated with a __libc_memalign() call in allocate_and_init() ++// (elf/dl-tls.c). Glibc won't tell us the address ranges occupied by those ++// blocks, but we can make sure they come from our own allocator by intercepting ++// __libc_memalign(). On top of that, there is no easy way to reach them. Their ++// addresses are stored in a dynamically allocated array (the DTV) which is ++// referenced from the static TLS. Unfortunately, we can't just rely on the DTV ++// being reachable from the static TLS, and the dynamic TLS being reachable from ++// the DTV. This is because the initial DTV is allocated before our interception ++// mechanism kicks in, and thus we don't recognize it as allocated memory. We ++// can't special-case it either, since we don't know its size. ++// Our solution is to include in the root set all allocations made from ++// ld-linux.so (which is where allocate_and_init() is implemented). This is ++// guaranteed to include all dynamic TLS blocks (and possibly other allocations ++// which we don't care about). ++// On all other platforms, this simply checks to ensure that the caller pc is ++// valid before reporting chunks as leaked. ++static void ProcessPC(Frontier *frontier) { ++ InvalidPCParam arg; ++ arg.frontier = frontier; ++ arg.skip_linker_allocations = ++ flags()->use_tls && flags()->use_ld_allocations && GetLinker() != nullptr; ++ ForEachChunk(MarkInvalidPCCb, &arg); ++} ++ + // Sets the appropriate tag on each chunk. + static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads, + Frontier *frontier, tid_t caller_tid, +@@ -616,6 +639,9 @@ static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads, + ProcessRootRegions(frontier); + FloodFillTag(frontier, kReachable); + ++ CHECK_EQ(0, frontier->size()); ++ ProcessPC(frontier); ++ + // The check here is relatively expensive, so we do this in a separate flood + // fill. That way we can skip the check for chunks that are reachable + // otherwise. +-- +2.35.0.1.g829a698654 + diff --git a/build/build-clang/revert-llvmorg-14-init-14141-gd6d3000a2f6d.patch b/build/build-clang/revert-llvmorg-14-init-14141-gd6d3000a2f6d.patch new file mode 100644 index 0000000000..5a8fb5e701 --- /dev/null +++ b/build/build-clang/revert-llvmorg-14-init-14141-gd6d3000a2f6d.patch @@ -0,0 +1,78 @@ +From e602ffe1785cef7f5502223e81345e6b9395fae1 Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Fri, 11 Mar 2022 10:38:51 +0900 +Subject: [PATCH] Revert "[CMake][WinMsvc] Fix user passed compiler/linker + flags" + +This reverts commit d6d3000a2f6d88ac73e5d4da4005ceadec788a9a +because of bustage building 32-bits compiler-rt for Windows. +See https://reviews.llvm.org/D116709#3374131 +--- + llvm/cmake/platforms/WinMsvc.cmake | 30 ++++++++++++++++++++++-------- + 1 file changed, 22 insertions(+), 8 deletions(-) + +diff --git a/llvm/cmake/platforms/WinMsvc.cmake b/llvm/cmake/platforms/WinMsvc.cmake +index d30701a31858..9a5078894182 100644 +--- a/llvm/cmake/platforms/WinMsvc.cmake ++++ b/llvm/cmake/platforms/WinMsvc.cmake +@@ -84,7 +84,6 @@ + # up a VFS overlay for the SDK headers and case-correcting symlinks for the + # libraries when running on a case-sensitive filesystem. + +-include_guard(GLOBAL) + + # When configuring CMake with a toolchain file against a top-level CMakeLists.txt, + # it will actually run CMake many times, once for each small test program used to +@@ -252,8 +251,6 @@ list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_ASM_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/b + list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_C_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/bin/clang") + list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_CXX_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/bin/clang++") + +-# These flags are used during build time. So if CFLAGS/CXXFLAGS/LDFLAGS is set +-# for the target, makes sure these are unset during build time. + set(CROSS_TOOLCHAIN_FLAGS_NATIVE "${_CTF_NATIVE_DEFAULT}" CACHE STRING "") + + set(COMPILE_FLAGS +@@ -280,8 +277,18 @@ if(case_sensitive_filesystem) + endif() + + string(REPLACE ";" " " COMPILE_FLAGS "${COMPILE_FLAGS}") +-string(APPEND CMAKE_C_FLAGS_INIT " ${COMPILE_FLAGS}") +-string(APPEND CMAKE_CXX_FLAGS_INIT " ${COMPILE_FLAGS}") ++ ++# We need to preserve any flags that were passed in by the user. However, we ++# can't append to CMAKE_C_FLAGS and friends directly, because toolchain files ++# will be re-invoked on each reconfigure and therefore need to be idempotent. ++# The assignments to the _INITIAL cache variables don't use FORCE, so they'll ++# only be populated on the initial configure, and their values won't change ++# afterward. ++set(_CMAKE_C_FLAGS_INITIAL "${CMAKE_C_FLAGS}" CACHE STRING "") ++set(CMAKE_C_FLAGS "${_CMAKE_C_FLAGS_INITIAL} ${COMPILE_FLAGS}" CACHE STRING "" FORCE) ++ ++set(_CMAKE_CXX_FLAGS_INITIAL "${CMAKE_CXX_FLAGS}" CACHE STRING "") ++set(CMAKE_CXX_FLAGS "${_CMAKE_CXX_FLAGS_INITIAL} ${COMPILE_FLAGS}" CACHE STRING "" FORCE) + + set(LINK_FLAGS + # Prevent CMake from attempting to invoke mt.exe. It only recognizes the slashed form and not the dashed form. +@@ -305,9 +312,16 @@ if(case_sensitive_filesystem) + endif() + + string(REPLACE ";" " " LINK_FLAGS "${LINK_FLAGS}") +-string(APPEND CMAKE_EXE_LINKER_FLAGS_INIT " ${LINK_FLAGS}") +-string(APPEND CMAKE_MODULE_LINKER_FLAGS_INIT " ${LINK_FLAGS}") +-string(APPEND CMAKE_SHARED_LINKER_FLAGS_INIT " ${LINK_FLAGS}") ++ ++# See explanation for compiler flags above for the _INITIAL variables. ++set(_CMAKE_EXE_LINKER_FLAGS_INITIAL "${CMAKE_EXE_LINKER_FLAGS}" CACHE STRING "") ++set(CMAKE_EXE_LINKER_FLAGS "${_CMAKE_EXE_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE) ++ ++set(_CMAKE_MODULE_LINKER_FLAGS_INITIAL "${CMAKE_MODULE_LINKER_FLAGS}" CACHE STRING "") ++set(CMAKE_MODULE_LINKER_FLAGS "${_CMAKE_MODULE_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE) ++ ++set(_CMAKE_SHARED_LINKER_FLAGS_INITIAL "${CMAKE_SHARED_LINKER_FLAGS}" CACHE STRING "") ++set(CMAKE_SHARED_LINKER_FLAGS "${_CMAKE_SHARED_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE) + + # CMake populates these with a bunch of unnecessary libraries, which requires + # extra case-correcting symlinks and what not. Instead, let projects explicitly +-- +2.35.0.1.g829a698654 + diff --git a/build/build-clang/revert-llvmorg-14-init-3651-g85ba583eba19.patch b/build/build-clang/revert-llvmorg-14-init-3651-g85ba583eba19.patch new file mode 100644 index 0000000000..03b6235eb9 --- /dev/null +++ b/build/build-clang/revert-llvmorg-14-init-3651-g85ba583eba19.patch @@ -0,0 +1,64 @@ +diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp +index b0b64328b84e..ae0aca09ea4a 100644 +--- a/clang/lib/CodeGen/CGCall.cpp ++++ b/clang/lib/CodeGen/CGCall.cpp +@@ -2087,6 +2087,24 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, + // allows it to work on indirect virtual function calls. + if (AttrOnCallSite && TargetDecl->hasAttr<NoMergeAttr>()) + FuncAttrs.addAttribute(llvm::Attribute::NoMerge); ++ ++ // Add known guaranteed alignment for allocation functions. ++ if (unsigned BuiltinID = Fn->getBuiltinID()) { ++ switch (BuiltinID) { ++ case Builtin::BIaligned_alloc: ++ case Builtin::BIcalloc: ++ case Builtin::BImalloc: ++ case Builtin::BImemalign: ++ case Builtin::BIrealloc: ++ case Builtin::BIstrdup: ++ case Builtin::BIstrndup: ++ RetAttrs.addAlignmentAttr(Context.getTargetInfo().getNewAlign() / ++ Context.getTargetInfo().getCharWidth()); ++ break; ++ default: ++ break; ++ } ++ } + } + + // 'const', 'pure' and 'noalias' attributed functions are also nounwind. +diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp +index 506af5d1458c..5c22df026a7d 100644 +--- a/clang/lib/Sema/SemaDecl.cpp ++++ b/clang/lib/Sema/SemaDecl.cpp +@@ -15215,30 +15215,6 @@ void Sema::AddKnownFunctionAttributes(FunctionDecl *FD) { + else + FD->addAttr(CUDAHostAttr::CreateImplicit(Context, FD->getLocation())); + } +- +- // Add known guaranteed alignment for allocation functions. +- switch (BuiltinID) { +- case Builtin::BIaligned_alloc: +- case Builtin::BIcalloc: +- case Builtin::BImalloc: +- case Builtin::BImemalign: +- case Builtin::BIrealloc: +- case Builtin::BIstrdup: +- case Builtin::BIstrndup: { +- if (!FD->hasAttr<AssumeAlignedAttr>()) { +- unsigned NewAlign = Context.getTargetInfo().getNewAlign() / +- Context.getTargetInfo().getCharWidth(); +- IntegerLiteral *Alignment = IntegerLiteral::Create( +- Context, Context.MakeIntValue(NewAlign, Context.UnsignedIntTy), +- Context.UnsignedIntTy, FD->getLocation()); +- FD->addAttr(AssumeAlignedAttr::CreateImplicit( +- Context, Alignment, /*Offset=*/nullptr, FD->getLocation())); +- } +- break; +- } +- default: +- break; +- } + } + + AddKnownFunctionAttributesForReplaceableGlobalAllocationFunction(FD); diff --git a/build/build-clang/revert-llvmorg-14-init-3652-gf3c2094d8c11.patch b/build/build-clang/revert-llvmorg-14-init-3652-gf3c2094d8c11.patch new file mode 100644 index 0000000000..a8c032303a --- /dev/null +++ b/build/build-clang/revert-llvmorg-14-init-3652-gf3c2094d8c11.patch @@ -0,0 +1,15 @@ +diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp +index 421ca95bb543..506af5d1458c 100644 +--- a/clang/lib/Sema/SemaDecl.cpp ++++ b/clang/lib/Sema/SemaDecl.cpp +@@ -15219,10 +15219,6 @@ void Sema::AddKnownFunctionAttributes(FunctionDecl *FD) { + // Add known guaranteed alignment for allocation functions. + switch (BuiltinID) { + case Builtin::BIaligned_alloc: +- if (!FD->hasAttr<AllocAlignAttr>()) +- FD->addAttr(AllocAlignAttr::CreateImplicit(Context, ParamIdx(1, FD), +- FD->getLocation())); +- LLVM_FALLTHROUGH; + case Builtin::BIcalloc: + case Builtin::BImalloc: + case Builtin::BImemalign: diff --git a/build/build-clang/revert-llvmorg-15-init-11205-gcead4eceb01b.patch b/build/build-clang/revert-llvmorg-15-init-11205-gcead4eceb01b.patch new file mode 100644 index 0000000000..5b46585192 --- /dev/null +++ b/build/build-clang/revert-llvmorg-15-init-11205-gcead4eceb01b.patch @@ -0,0 +1,1027 @@ +From cb411520cb7cd5e6e25966911ca55feb5de779e0 Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Fri, 4 Nov 2022 14:51:38 +0900 +Subject: [PATCH] Revert "[symbolizer] Parse DW_TAG_variable DIs to show line + info for globals" + +This reverts commit cead4eceb01b935fae07bf4a7e91911b344d2fec for causing +yet unidentified problems on some webrtc tests under TSan (bug 1798613). +--- + llvm/include/llvm/DebugInfo/DIContext.h | 4 - + .../llvm/DebugInfo/DWARF/DWARFContext.h | 2 - + llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h | 7 - + llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 14 - + llvm/include/llvm/DebugInfo/PDB/PDBContext.h | 2 - + llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 97 ++-- + llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 60 --- + llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 97 ---- + llvm/lib/DebugInfo/PDB/PDBContext.cpp | 7 - + llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp | 4 - + .../Symbolize/SymbolizableObjectFile.cpp | 8 - + .../Symbolize/ELF/data-command-symtab.yaml | 3 - + .../tools/llvm-symbolizer/data-location.yaml | 450 ------------------ + llvm/test/tools/llvm-symbolizer/data.s | 3 - + 14 files changed, 61 insertions(+), 697 deletions(-) + delete mode 100644 llvm/test/tools/llvm-symbolizer/data-location.yaml + +diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h +index 9b278b696073..a9f98588cf2d 100644 +--- a/llvm/include/llvm/DebugInfo/DIContext.h ++++ b/llvm/include/llvm/DebugInfo/DIContext.h +@@ -114,8 +114,6 @@ struct DIGlobal { + std::string Name; + uint64_t Start = 0; + uint64_t Size = 0; +- std::string DeclFile; +- uint64_t DeclLine = 0; + + DIGlobal() : Name(DILineInfo::BadString) {} + }; +@@ -241,8 +239,6 @@ public: + virtual DILineInfo getLineInfoForAddress( + object::SectionedAddress Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; +- virtual DILineInfo +- getLineInfoForDataAddress(object::SectionedAddress Address) = 0; + virtual DILineInfoTable getLineInfoForAddressRange( + object::SectionedAddress Address, uint64_t Size, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; +diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +index bf591ed554c6..3365ef8d8ee3 100644 +--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h ++++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +@@ -364,8 +364,6 @@ public: + DILineInfo getLineInfoForAddress( + object::SectionedAddress Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; +- DILineInfo +- getLineInfoForDataAddress(object::SectionedAddress Address) override; + DILineInfoTable getLineInfoForAddressRange( + object::SectionedAddress Address, uint64_t Size, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; +diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +index 149c5ef4e493..4a4d105a2b23 100644 +--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h ++++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +@@ -280,13 +280,6 @@ public: + /// \returns an iterator range for the attributes of the current DIE. + iterator_range<attribute_iterator> attributes() const; + +- /// Gets the type size (in bytes) for this DIE. +- /// +- /// \param PointerSize the pointer size of the containing CU. +- /// \returns if this is a type DIE, or this DIE contains a DW_AT_type, returns +- /// the size of the type. +- Optional<uint64_t> getTypeSize(uint64_t PointerSize); +- + class iterator; + + iterator begin() const; +diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +index 9188865b4d77..0341344bc7b8 100644 +--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h ++++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +@@ -9,7 +9,6 @@ + #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H + #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H + +-#include "llvm/ADT/DenseSet.h" + #include "llvm/ADT/Optional.h" + #include "llvm/ADT/STLExtras.h" + #include "llvm/ADT/SmallVector.h" +@@ -28,7 +27,6 @@ + #include <cstdint> + #include <map> + #include <memory> +-#include <set> + #include <utility> + #include <vector> + +@@ -242,11 +240,6 @@ class DWARFUnit { + /// std::map::upper_bound for address range lookup. + std::map<uint64_t, std::pair<uint64_t, DWARFDie>> AddrDieMap; + +- /// Map from the location (interpreted DW_AT_location) of a DW_TAG_variable, +- /// to the end address and the corresponding DIE. +- std::map<uint64_t, std::pair<uint64_t, DWARFDie>> VariableDieMap; +- DenseSet<uint64_t> RootsParsedForVariables; +- + using die_iterator_range = + iterator_range<std::vector<DWARFDebugInfoEntry>::iterator>; + +@@ -329,9 +322,6 @@ public: + /// Recursively update address to Die map. + void updateAddressDieMap(DWARFDie Die); + +- /// Recursively update address to variable Die map. +- void updateVariableDieMap(DWARFDie Die); +- + void setRangesSection(const DWARFSection *RS, uint64_t Base) { + RangeSection = RS; + RangeSectionBase = Base; +@@ -446,10 +436,6 @@ public: + /// cleared. + DWARFDie getSubroutineForAddress(uint64_t Address); + +- /// Returns variable DIE for the address provided. The pointer is alive as +- /// long as parsed compile unit DIEs are not cleared. +- DWARFDie getVariableForAddress(uint64_t Address); +- + /// getInlinedChainForAddress - fetches inlined chain for a given address. + /// Returns empty chain if there is no subprogram containing address. The + /// chain is valid as long as parsed compile unit DIEs are not cleared. +diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBContext.h b/llvm/include/llvm/DebugInfo/PDB/PDBContext.h +index 3163c0a1dae0..7b6793f0a639 100644 +--- a/llvm/include/llvm/DebugInfo/PDB/PDBContext.h ++++ b/llvm/include/llvm/DebugInfo/PDB/PDBContext.h +@@ -45,8 +45,6 @@ namespace pdb { + DILineInfo getLineInfoForAddress( + object::SectionedAddress Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; +- DILineInfo +- getLineInfoForDataAddress(object::SectionedAddress Address) override; + DILineInfoTable getLineInfoForAddressRange( + object::SectionedAddress Address, uint64_t Size, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; +diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +index 19d7d659a86a..1bcfdecfd588 100644 +--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp ++++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +@@ -1053,25 +1053,7 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { + // First, get the offset of the compile unit. + uint64_t CUOffset = getDebugAranges()->findAddress(Address); + // Retrieve the compile unit. +- if (DWARFCompileUnit *OffsetCU = getCompileUnitForOffset(CUOffset)) +- return OffsetCU; +- +- // Global variables are often not found by the above search, for one of two +- // reasons: +- // 1. .debug_aranges may not include global variables. On clang, it seems we +- // put the globals in the aranges, but this isn't true for gcc. +- // 2. Even if the global variable is in a .debug_arange, global variables +- // may not be captured in the [start, end) addresses described by the +- // parent compile unit. +- // +- // So, we walk the CU's and their child DI's manually, looking for the +- // specific global variable. +- for (std::unique_ptr<DWARFUnit> &CU : compile_units()) { +- if (DWARFDie Die = CU->getVariableForAddress(Address)) { +- return static_cast<DWARFCompileUnit *>(CU.get()); +- } +- } +- return nullptr; ++ return getCompileUnitForOffset(CUOffset); + } + + DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) { +@@ -1141,6 +1123,64 @@ static bool getFunctionNameAndStartLineForAddress( + return FoundResult; + } + ++static Optional<uint64_t> getTypeSize(DWARFDie Type, uint64_t PointerSize) { ++ if (auto SizeAttr = Type.find(DW_AT_byte_size)) ++ if (Optional<uint64_t> Size = SizeAttr->getAsUnsignedConstant()) ++ return Size; ++ ++ switch (Type.getTag()) { ++ case DW_TAG_pointer_type: ++ case DW_TAG_reference_type: ++ case DW_TAG_rvalue_reference_type: ++ return PointerSize; ++ case DW_TAG_ptr_to_member_type: { ++ if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type)) ++ if (BaseType.getTag() == DW_TAG_subroutine_type) ++ return 2 * PointerSize; ++ return PointerSize; ++ } ++ case DW_TAG_const_type: ++ case DW_TAG_immutable_type: ++ case DW_TAG_volatile_type: ++ case DW_TAG_restrict_type: ++ case DW_TAG_typedef: { ++ if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type)) ++ return getTypeSize(BaseType, PointerSize); ++ break; ++ } ++ case DW_TAG_array_type: { ++ DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type); ++ if (!BaseType) ++ return Optional<uint64_t>(); ++ Optional<uint64_t> BaseSize = getTypeSize(BaseType, PointerSize); ++ if (!BaseSize) ++ return Optional<uint64_t>(); ++ uint64_t Size = *BaseSize; ++ for (DWARFDie Child : Type) { ++ if (Child.getTag() != DW_TAG_subrange_type) ++ continue; ++ ++ if (auto ElemCountAttr = Child.find(DW_AT_count)) ++ if (Optional<uint64_t> ElemCount = ++ ElemCountAttr->getAsUnsignedConstant()) ++ Size *= *ElemCount; ++ if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound)) ++ if (Optional<int64_t> UpperBound = ++ UpperBoundAttr->getAsSignedConstant()) { ++ int64_t LowerBound = 0; ++ if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound)) ++ LowerBound = LowerBoundAttr->getAsSignedConstant().value_or(0); ++ Size *= *UpperBound - LowerBound + 1; ++ } ++ } ++ return Size; ++ } ++ default: ++ break; ++ } ++ return Optional<uint64_t>(); ++} ++ + static Optional<int64_t> + getExpressionFrameOffset(ArrayRef<uint8_t> Expr, + Optional<unsigned> FrameBaseReg) { +@@ -1201,7 +1241,7 @@ void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, + if (Optional<const char *> Name = dwarf::toString(*NameAttr)) + Local.Name = *Name; + if (auto Type = Die.getAttributeValueAsReferencedDie(DW_AT_type)) +- Local.Size = Type.getTypeSize(getCUAddrSize()); ++ Local.Size = getTypeSize(Type, getCUAddrSize()); + if (auto DeclFileAttr = Die.find(DW_AT_decl_file)) { + if (const auto *LT = CU->getContext().getLineTableForUnit(CU)) + LT->getFileNameByIndex( +@@ -1242,6 +1282,7 @@ DWARFContext::getLocalsForAddress(object::SectionedAddress Address) { + DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, + DILineInfoSpecifier Spec) { + DILineInfo Result; ++ + DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + if (!CU) + return Result; +@@ -1256,22 +1297,6 @@ DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, + Spec.FLIKind, Result); + } + } +- +- return Result; +-} +- +-DILineInfo +-DWARFContext::getLineInfoForDataAddress(object::SectionedAddress Address) { +- DILineInfo Result; +- DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); +- if (!CU) +- return Result; +- +- if (DWARFDie Die = CU->getVariableForAddress(Address.Address)) { +- Result.FileName = Die.getDeclFile(FileLineInfoKind::AbsoluteFilePath); +- Result.Line = Die.getDeclLine(); +- } +- + return Result; + } + +diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +index 15a2d23c4fd2..9bf15c30f714 100644 +--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp ++++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +@@ -492,66 +492,6 @@ void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, + CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0); + } + +-Optional<uint64_t> DWARFDie::getTypeSize(uint64_t PointerSize) { +- if (auto SizeAttr = find(DW_AT_byte_size)) +- if (Optional<uint64_t> Size = SizeAttr->getAsUnsignedConstant()) +- return Size; +- +- switch (getTag()) { +- case DW_TAG_pointer_type: +- case DW_TAG_reference_type: +- case DW_TAG_rvalue_reference_type: +- return PointerSize; +- case DW_TAG_ptr_to_member_type: { +- if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) +- if (BaseType.getTag() == DW_TAG_subroutine_type) +- return 2 * PointerSize; +- return PointerSize; +- } +- case DW_TAG_const_type: +- case DW_TAG_immutable_type: +- case DW_TAG_volatile_type: +- case DW_TAG_restrict_type: +- case DW_TAG_typedef: { +- if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) +- return BaseType.getTypeSize(PointerSize); +- break; +- } +- case DW_TAG_array_type: { +- DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type); +- if (!BaseType) +- return None; +- Optional<uint64_t> BaseSize = BaseType.getTypeSize(PointerSize); +- if (!BaseSize) +- return None; +- uint64_t Size = *BaseSize; +- for (DWARFDie Child : *this) { +- if (Child.getTag() != DW_TAG_subrange_type) +- continue; +- +- if (auto ElemCountAttr = Child.find(DW_AT_count)) +- if (Optional<uint64_t> ElemCount = +- ElemCountAttr->getAsUnsignedConstant()) +- Size *= *ElemCount; +- if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound)) +- if (Optional<int64_t> UpperBound = +- UpperBoundAttr->getAsSignedConstant()) { +- int64_t LowerBound = 0; +- if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound)) +- LowerBound = LowerBoundAttr->getAsSignedConstant().value_or(0); +- Size *= *UpperBound - LowerBound + 1; +- } +- } +- return Size; +- } +- default: +- if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) +- return BaseType.getTypeSize(PointerSize); +- break; +- } +- return None; +-} +- + /// Helper to dump a DIE with all of its parents, but no siblings. + static unsigned dumpParentChain(DWARFDie Die, raw_ostream &OS, unsigned Indent, + DIDumpOptions DumpOpts, unsigned Depth = 0) { +diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +index 74667fcb92bc..148711f0246f 100644 +--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp ++++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +@@ -9,7 +9,6 @@ + #include "llvm/DebugInfo/DWARF/DWARFUnit.h" + #include "llvm/ADT/SmallString.h" + #include "llvm/ADT/StringRef.h" +-#include "llvm/BinaryFormat/Dwarf.h" + #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" + #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" + #include "llvm/DebugInfo/DWARF/DWARFContext.h" +@@ -19,13 +18,11 @@ + #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" + #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h" + #include "llvm/DebugInfo/DWARF/DWARFDie.h" +-#include "llvm/DebugInfo/DWARF/DWARFExpression.h" + #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" + #include "llvm/DebugInfo/DWARF/DWARFListTable.h" + #include "llvm/DebugInfo/DWARF/DWARFObject.h" + #include "llvm/DebugInfo/DWARF/DWARFSection.h" + #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" +-#include "llvm/Object/ObjectFile.h" + #include "llvm/Support/DataExtractor.h" + #include "llvm/Support/Errc.h" + #include "llvm/Support/Path.h" +@@ -752,100 +749,6 @@ DWARFDie DWARFUnit::getSubroutineForAddress(uint64_t Address) { + return R->second.second; + } + +-void DWARFUnit::updateVariableDieMap(DWARFDie Die) { +- for (DWARFDie Child : Die) { +- if (isType(Child.getTag())) +- continue; +- updateVariableDieMap(Child); +- } +- +- if (Die.getTag() != DW_TAG_variable) +- return; +- +- Expected<DWARFLocationExpressionsVector> Locations = +- Die.getLocations(DW_AT_location); +- if (!Locations) { +- // Missing DW_AT_location is fine here. +- consumeError(Locations.takeError()); +- return; +- } +- +- uint64_t Address = UINT64_MAX; +- +- for (const DWARFLocationExpression &Location : *Locations) { +- uint8_t AddressSize = getAddressByteSize(); +- DataExtractor Data(Location.Expr, /*IsLittleEndian=*/true, AddressSize); +- DWARFExpression Expr(Data, AddressSize); +- auto It = Expr.begin(); +- if (It == Expr.end()) +- continue; +- +- // Match exactly the main sequence used to describe global variables: +- // `DW_OP_addr[x] [+ DW_OP_plus_uconst]`. Currently, this is the sequence +- // that LLVM produces for DILocalVariables and DIGlobalVariables. If, in +- // future, the DWARF producer (`DwarfCompileUnit::addLocationAttribute()` is +- // a good starting point) is extended to use further expressions, this code +- // needs to be updated. +- uint64_t LocationAddr; +- if (It->getCode() == dwarf::DW_OP_addr) { +- LocationAddr = It->getRawOperand(0); +- } else if (It->getCode() == dwarf::DW_OP_addrx) { +- uint64_t DebugAddrOffset = It->getRawOperand(0); +- if (auto Pointer = getAddrOffsetSectionItem(DebugAddrOffset)) { +- LocationAddr = Pointer->Address; +- } +- } else { +- continue; +- } +- +- // Read the optional 2nd operand, a DW_OP_plus_uconst. +- if (++It != Expr.end()) { +- if (It->getCode() != dwarf::DW_OP_plus_uconst) +- continue; +- +- LocationAddr += It->getRawOperand(0); +- +- // Probe for a 3rd operand, if it exists, bail. +- if (++It != Expr.end()) +- continue; +- } +- +- Address = LocationAddr; +- break; +- } +- +- // Get the size of the global variable. If all else fails (i.e. the global has +- // no type), then we use a size of one to still allow symbolization of the +- // exact address. +- uint64_t GVSize = 1; +- if (DWARFDie BaseType = Die.getAttributeValueAsReferencedDie(DW_AT_type)) +- if (Optional<uint64_t> Size = Die.getTypeSize(getAddressByteSize())) +- GVSize = *Size; +- +- if (Address != UINT64_MAX) +- VariableDieMap[Address] = {Address + GVSize, Die}; +-} +- +-DWARFDie DWARFUnit::getVariableForAddress(uint64_t Address) { +- extractDIEsIfNeeded(false); +- +- auto RootDie = getUnitDIE(); +- +- auto RootLookup = RootsParsedForVariables.insert(RootDie.getOffset()); +- if (RootLookup.second) +- updateVariableDieMap(RootDie); +- +- auto R = VariableDieMap.upper_bound(Address); +- if (R == VariableDieMap.begin()) +- return DWARFDie(); +- +- // upper_bound's previous item contains Address. +- --R; +- if (Address >= R->second.first) +- return DWARFDie(); +- return R->second.second; +-} +- + void + DWARFUnit::getInlinedChainForAddress(uint64_t Address, + SmallVectorImpl<DWARFDie> &InlinedChain) { +diff --git a/llvm/lib/DebugInfo/PDB/PDBContext.cpp b/llvm/lib/DebugInfo/PDB/PDBContext.cpp +index e600fb7385f1..0444093d7622 100644 +--- a/llvm/lib/DebugInfo/PDB/PDBContext.cpp ++++ b/llvm/lib/DebugInfo/PDB/PDBContext.cpp +@@ -64,13 +64,6 @@ DILineInfo PDBContext::getLineInfoForAddress(object::SectionedAddress Address, + return Result; + } + +-DILineInfo +-PDBContext::getLineInfoForDataAddress(object::SectionedAddress Address) { +- // Unimplemented. S_GDATA and S_LDATA in CodeView (used to describe global +- // variables) aren't capable of carrying line information. +- return DILineInfo(); +-} +- + DILineInfoTable + PDBContext::getLineInfoForAddressRange(object::SectionedAddress Address, + uint64_t Size, +diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +index 877380213f21..496c8149782e 100644 +--- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp ++++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +@@ -206,10 +206,6 @@ void PlainPrinterBase::print(const Request &Request, const DIGlobal &Global) { + Name = DILineInfo::Addr2LineBadString; + OS << Name << "\n"; + OS << Global.Start << " " << Global.Size << "\n"; +- if (Global.DeclFile.empty()) +- OS << "??:?\n"; +- else +- OS << Global.DeclFile << ":" << Global.DeclLine << "\n"; + printFooter(); + } + +diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +index d8ee9264b64f..fcff531895a2 100644 +--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp ++++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +@@ -327,14 +327,6 @@ DIGlobal SymbolizableObjectFile::symbolizeData( + std::string FileName; + getNameFromSymbolTable(ModuleOffset.Address, Res.Name, Res.Start, Res.Size, + FileName); +- Res.DeclFile = FileName; +- +- // Try and get a better filename:lineno pair from the debuginfo, if present. +- DILineInfo DL = DebugInfoContext->getLineInfoForDataAddress(ModuleOffset); +- if (DL.Line != 0) { +- Res.DeclFile = DL.FileName; +- Res.DeclLine = DL.Line; +- } + return Res; + } + +diff --git a/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml +index 83af3111c5dd..984e444b2fda 100644 +--- a/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml ++++ b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml +@@ -7,15 +7,12 @@ + + # CHECK: func + # CHECK-NEXT: 4096 1 +-# CHECK-NEXT: ??:? + # CHECK-EMPTY: + # CHECK-NEXT: data + # CHECK-NEXT: 8192 2 +-# CHECK-NEXT: ??:? + # CHECK-EMPTY: + # CHECK-NEXT: notype + # CHECK-NEXT: 8194 3 +-# CHECK-NEXT: ??:? + # CHECK-EMPTY: + + --- !ELF +diff --git a/llvm/test/tools/llvm-symbolizer/data-location.yaml b/llvm/test/tools/llvm-symbolizer/data-location.yaml +deleted file mode 100644 +index 54f7d9be44a1..000000000000 +--- a/llvm/test/tools/llvm-symbolizer/data-location.yaml ++++ /dev/null +@@ -1,450 +0,0 @@ +-## Show that when "DATA" is used with an address, it forces the found location +-## to be symbolized as data, including the source information. +- +-# RUN: yaml2obj %s -o %t.so +- +-# RUN: llvm-symbolizer 'DATA 0x304d0' 'DATA 0x304d1' 'DATA 0x304d3' \ +-# RUN: 'DATA 0x304c0' 'DATA 0x304c8' 'DATA 0x304d4' 'DATA 0x304dc' \ +-# RUN: 'DATA 0x304d8' --obj=%t.so | FileCheck %s +- +-# CHECK: bss_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:1 +-# CHECK-EMPTY: +- +-## Check that lookups in the middle of the symbol are also resolved correctly. +-# CHECK: bss_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:1 +-# CHECK-EMPTY: +-# CHECK: bss_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:1 +-# CHECK-EMPTY: +- +-## Now, the remainder of the symbols. +-# CHECK-NEXT: data_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:2 +-# CHECK-EMPTY: +-# CHECK-NEXT: str +-# CHECK-NEXT: {{[0-9]+}} 8 +-# CHECK-NEXT: /tmp/file.cpp:4 +-# CHECK-EMPTY: +-# CHECK-NEXT: f()::function_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:8 +-# CHECK-EMPTY: +- +-## Including the one that includes an addend. +-# CHECK-NEXT: alpha +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:12 +-# CHECK-EMPTY: +-# CHECK-NEXT: beta +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:13 +-# CHECK-EMPTY: +- +-## Ensure there's still a global that's offset-based. +-# RUN: llvm-dwarfdump --debug-info %t.so | FileCheck %s --check-prefix=OFFSET +- +-# OFFSET: DW_AT_location (DW_OP_addrx 0x4, DW_OP_plus_uconst 0x4) +- +-################################################################################ +-## File below was generated using: +-## +-## $ clang++ -g -O3 /tmp/file.cpp -shared -fuse-ld=lld -nostdlib \ +-## -target aarch64-linux-gnuabi -mllvm -global-merge-ignore-single-use \ +-## -o /tmp/file.so +-## +-## With /tmp/file.cpp as: +-## 1: int bss_global; +-## 2: int data_global = 2; +-## 3: +-## 4: const char* str = +-## 5: "12345678"; +-## 6: +-## 7: int* f() { +-## 8: static int function_global; +-## 9: return &function_global; +-## 10: } +-## 11: +-## 12: static int alpha; +-## 13: static int beta; +-## 14: int *f(bool b) { return beta ? &alpha : β } +-## 15: +-## +-## ... then, one can get the offsets using `nm`, like: +-## $ nm out.so | grep bss_global +-## 00000000000038fc B bss_global +-## +-## Note the use of the aarch64 target (with -nostdlib in order to allow linkage +-## without libraries for cross-compilation) as well as -O3 and +-## -global-merge-ignore-single-use. This is a specific combination that makes +-## the compiler emit the `alpha` global variable with a more complex +-## DW_AT_location than just a DW_OP_addr/DW_OP_addrx. In this instance, it +-## outputs a `DW_AT_location (DW_OP_addrx 0x4, DW_OP_plus_uconst 0x4)`. +-## +-## Ideally, this would be tested by invoking clang directly on a C source file, +-## but unfortunately there's no way to do that for LLVM tests. The other option +-## is to compile IR to an objfile, but llvm-symbolizer doesn't understand that +-## two symbols can have the same address in different sections. In the code +-## above, for example, we'd have bss_global at .bss+0x0, and data_global at +-## .data+0x0, and so the symbolizer would only print one of them. Hence, we have +-## the ugly dso-to-yaml blob below. +-## +-## For now, constant strings don't have a debuginfo entry, and so can't be +-## symbolized correctly. In future (if D123534 gets merged), this can be updated +-## to include a check that llvm-symbolizer can also symbolize constant strings, +-## like `str` above (basically that &"12345678" should be symbolizable) +-## to the specific line. Then, you can find the address of the constant string +-## from the relocation: +-## +-## $ nm out.so | grep str +-## 00000000000038c0 D str +-## $ llvm-objdump -R out.so | grep 38c0 +-## 00000000000038c0 R_X86_64_RELATIVE *ABS*+0x4f8 # <-- 0x4f8 +-################################################################################ +- +---- !ELF +-FileHeader: +- Class: ELFCLASS64 +- Data: ELFDATA2LSB +- Type: ET_DYN +- Machine: EM_AARCH64 +-ProgramHeaders: +- - Type: PT_PHDR +- Flags: [ PF_R ] +- VAddr: 0x40 +- Align: 0x8 +- - Type: PT_LOAD +- Flags: [ PF_R ] +- FirstSec: .dynsym +- LastSec: .eh_frame +- Align: 0x10000 +- - Type: PT_LOAD +- Flags: [ PF_X, PF_R ] +- FirstSec: .text +- LastSec: .text +- VAddr: 0x103E4 +- Align: 0x10000 +- - Type: PT_LOAD +- Flags: [ PF_W, PF_R ] +- FirstSec: .dynamic +- LastSec: .dynamic +- VAddr: 0x20410 +- Align: 0x10000 +- - Type: PT_LOAD +- Flags: [ PF_W, PF_R ] +- FirstSec: .data +- LastSec: .bss +- VAddr: 0x304C0 +- Align: 0x10000 +- - Type: PT_DYNAMIC +- Flags: [ PF_W, PF_R ] +- FirstSec: .dynamic +- LastSec: .dynamic +- VAddr: 0x20410 +- Align: 0x8 +- - Type: PT_GNU_RELRO +- Flags: [ PF_R ] +- FirstSec: .dynamic +- LastSec: .dynamic +- VAddr: 0x20410 +- - Type: PT_GNU_EH_FRAME +- Flags: [ PF_R ] +- FirstSec: .eh_frame_hdr +- LastSec: .eh_frame_hdr +- VAddr: 0x37C +- Align: 0x4 +- - Type: PT_GNU_STACK +- Flags: [ PF_W, PF_R ] +- Align: 0x0 +-Sections: +- - Name: .dynsym +- Type: SHT_DYNSYM +- Flags: [ SHF_ALLOC ] +- Address: 0x238 +- Link: .dynstr +- AddressAlign: 0x8 +- - Name: .gnu.hash +- Type: SHT_GNU_HASH +- Flags: [ SHF_ALLOC ] +- Address: 0x2C8 +- Link: .dynsym +- AddressAlign: 0x8 +- Header: +- SymNdx: 0x1 +- Shift2: 0x1A +- BloomFilter: [ 0x400188002180000C ] +- HashBuckets: [ 0x1 ] +- HashValues: [ 0xEE8502A, 0xEE85016, 0xC033991C, 0x61F7372E, 0xB88AB7F ] +- - Name: .hash +- Type: SHT_HASH +- Flags: [ SHF_ALLOC ] +- Address: 0x2F8 +- Link: .dynsym +- AddressAlign: 0x4 +- Bucket: [ 5, 0, 4, 0, 3, 0 ] +- Chain: [ 0, 0, 0, 1, 2, 0 ] +- - Name: .dynstr +- Type: SHT_STRTAB +- Flags: [ SHF_ALLOC ] +- Address: 0x330 +- AddressAlign: 0x1 +- - Name: .rela.dyn +- Type: SHT_RELA +- Flags: [ SHF_ALLOC ] +- Address: 0x358 +- Link: .dynsym +- AddressAlign: 0x8 +- Relocations: +- - Offset: 0x304C8 +- Type: R_AARCH64_RELATIVE +- Addend: 880 +- - Name: .rodata +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC, SHF_MERGE, SHF_STRINGS ] +- Address: 0x370 +- AddressAlign: 0x1 +- EntSize: 0x1 +- Content: '313233343536373800' +- - Name: .eh_frame_hdr +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC ] +- Address: 0x37C +- AddressAlign: 0x4 +- Content: 011B033B18000000020000006800010034000000740001004C000000 +- - Name: .eh_frame +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC ] +- Address: 0x398 +- AddressAlign: 0x8 +- Content: 1400000000000000017A5200017C1E011B0C1F0000000000140000001C0000002C0001000C00000000000000000000001400000034000000200001001C000000000000000000000000000000 +- - Name: .text +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC, SHF_EXECINSTR ] +- Address: 0x103E4 +- AddressAlign: 0x4 +- Content: 0001009000501391C0035FD60801009008611391E90308AA2A4540B85F0100710001899AC0035FD6 +- - Name: .dynamic +- Type: SHT_DYNAMIC +- Flags: [ SHF_WRITE, SHF_ALLOC ] +- Address: 0x20410 +- Link: .dynstr +- AddressAlign: 0x8 +- Entries: +- - Tag: DT_RELA +- Value: 0x358 +- - Tag: DT_RELASZ +- Value: 0x18 +- - Tag: DT_RELAENT +- Value: 0x18 +- - Tag: DT_RELACOUNT +- Value: 0x1 +- - Tag: DT_SYMTAB +- Value: 0x238 +- - Tag: DT_SYMENT +- Value: 0x18 +- - Tag: DT_STRTAB +- Value: 0x330 +- - Tag: DT_STRSZ +- Value: 0x28 +- - Tag: DT_GNU_HASH +- Value: 0x2C8 +- - Tag: DT_HASH +- Value: 0x2F8 +- - Tag: DT_NULL +- Value: 0x0 +- - Name: .data +- Type: SHT_PROGBITS +- Flags: [ SHF_WRITE, SHF_ALLOC ] +- Address: 0x304C0 +- AddressAlign: 0x8 +- Content: '02000000000000000000000000000000' +- - Name: .bss +- Type: SHT_NOBITS +- Flags: [ SHF_WRITE, SHF_ALLOC ] +- Address: 0x304D0 +- AddressAlign: 0x4 +- Size: 0x10 +- - Name: .debug_abbrev +- Type: SHT_PROGBITS +- AddressAlign: 0x1 +- Content: 011101252513050325721710171B25111B120673170000023400032549133F193A0B3B0B0218000003240003253E0B0B0B0000040F004913000005260049130000062E01111B120640187A196E2503253A0B3B0B49133F190000073400032549133A0B3B0B02180000083400032549133A0B3B0B02186E25000009050003253A0B3B0B4913000000 +- - Name: .debug_info +- Type: SHT_PROGBITS +- AddressAlign: 0x1 +- Content: AB0000000500010800000000010021000108000000000000000205280000000800000002032E000000000102A1000304050402052E000000000202A101020648000000000402A102044D00000005520000000307080106050C000000016F0D0E0007A500000007082E000000000802A1030008092E000000000D02A1040A080B2E000000000C04A10423040C06061C000000016F0F0E000EA50000000910000EAA00000000042E0000000311020100 +- - Name: .debug_str_offsets +- Type: SHT_PROGBITS +- AddressAlign: 0x1 +- Content: 4C00000005000000A2000000000000002C00000059000000280000001C00000072000000640000008C0000008700000069000000140000007B0000009C0000001A0000000E0000008500000076000000 +- - Name: .comment +- Type: SHT_PROGBITS +- Flags: [ SHF_MERGE, SHF_STRINGS ] +- AddressAlign: 0x1 +- EntSize: 0x1 +- Content: 4C696E6B65723A204C4C442031352E302E300000636C616E672076657273696F6E2031352E302E30202868747470733A2F2F6769746875622E636F6D2F6C6C766D2F6C6C766D2D70726F6A6563742E67697420306462616566363162353666306566306162306366333865613932666663316633356265653366662900 +- - Name: .debug_line +- Type: SHT_PROGBITS +- AddressAlign: 0x1 +- Content: 620000000500080037000000010101FB0E0D00010101010000000100000101011F010E00000003011F020F051E0100000000006C97BBE59F7DC6A9EA956633431DA63E0400000902E4030100000000001805030A140500BF05190A0105120608740204000101 +- - Name: .debug_line_str +- Type: SHT_PROGBITS +- Flags: [ SHF_MERGE, SHF_STRINGS ] +- AddressAlign: 0x1 +- EntSize: 0x1 +- Content: 2F746D702F66696C652E637070002F7573722F6C6F63616C2F676F6F676C652F686F6D652F6D69746368702F6C6C766D2D6275696C642F6F707400 +-Symbols: +- - Name: file.cpp +- Type: STT_FILE +- Index: SHN_ABS +- - Name: '$x.0' +- Section: .text +- Value: 0x103E4 +- - Name: _ZZ1fvE15function_global +- Type: STT_OBJECT +- Section: .bss +- Value: 0x304D4 +- Size: 0x4 +- - Name: '$d.1' +- Section: .bss +- Value: 0x304D0 +- - Name: '$d.2' +- Section: .data +- Value: 0x304C0 +- - Name: '$d.3' +- Section: .rodata +- Value: 0x370 +- - Name: '$d.4' +- Section: .debug_abbrev +- - Name: '$d.5' +- Section: .debug_info +- - Name: '$d.6' +- Section: .debug_str_offsets +- - Name: '$d.7' +- Section: .debug_str +- Value: 0xA2 +- - Name: '$d.8' +- Section: .debug_addr +- - Name: _ZL4beta +- Type: STT_OBJECT +- Section: .bss +- Value: 0x304D8 +- Size: 0x4 +- - Name: _ZL5alpha +- Type: STT_OBJECT +- Section: .bss +- Value: 0x304DC +- Size: 0x4 +- - Name: '$d.9' +- Section: .comment +- Value: 0x13 +- - Name: '$d.10' +- Section: .eh_frame +- Value: 0x398 +- - Name: '$d.11' +- Section: .debug_line +- - Name: '$d.12' +- Section: .debug_line_str +- Value: 0xE +- - Name: _DYNAMIC +- Section: .dynamic +- Value: 0x20410 +- Other: [ STV_HIDDEN ] +- - Name: _Z1fv +- Type: STT_FUNC +- Section: .text +- Binding: STB_GLOBAL +- Value: 0x103E4 +- Size: 0xC +- - Name: _Z1fb +- Type: STT_FUNC +- Section: .text +- Binding: STB_GLOBAL +- Value: 0x103F0 +- Size: 0x1C +- - Name: bss_global +- Type: STT_OBJECT +- Section: .bss +- Binding: STB_GLOBAL +- Value: 0x304D0 +- Size: 0x4 +- - Name: data_global +- Type: STT_OBJECT +- Section: .data +- Binding: STB_GLOBAL +- Value: 0x304C0 +- Size: 0x4 +- - Name: str +- Type: STT_OBJECT +- Section: .data +- Binding: STB_GLOBAL +- Value: 0x304C8 +- Size: 0x8 +-DynamicSymbols: +- - Name: _Z1fv +- Type: STT_FUNC +- Section: .text +- Binding: STB_GLOBAL +- Value: 0x103E4 +- Size: 0xC +- - Name: _Z1fb +- Type: STT_FUNC +- Section: .text +- Binding: STB_GLOBAL +- Value: 0x103F0 +- Size: 0x1C +- - Name: bss_global +- Type: STT_OBJECT +- Section: .bss +- Binding: STB_GLOBAL +- Value: 0x304D0 +- Size: 0x4 +- - Name: data_global +- Type: STT_OBJECT +- Section: .data +- Binding: STB_GLOBAL +- Value: 0x304C0 +- Size: 0x4 +- - Name: str +- Type: STT_OBJECT +- Section: .data +- Binding: STB_GLOBAL +- Value: 0x304C8 +- Size: 0x8 +-DWARF: +- debug_str: +- - '/tmp/file.cpp' +- - _Z1fb +- - alpha +- - f +- - data_global +- - int +- - '/usr/local/google/home/mitchp/llvm-build/opt' +- - bss_global +- - char +- - _ZL4beta +- - str +- - bool +- - _ZL5alpha +- - b +- - beta +- - function_global +- - _Z1fv +- - 'clang version 15.0.0 (https://github.com/llvm/llvm-project.git 0dbaef61b56f0ef0ab0cf38ea92ffc1f35bee3ff)' +- debug_addr: +- - Length: 0x3C +- Version: 0x5 +- AddressSize: 0x8 +- Entries: +- - Address: 0x304D0 +- - Address: 0x304C0 +- - Address: 0x304C8 +- - Address: 0x304D4 +- - Address: 0x304D8 +- - Address: 0x103E4 +- - Address: 0x103F0 +-... +diff --git a/llvm/test/tools/llvm-symbolizer/data.s b/llvm/test/tools/llvm-symbolizer/data.s +index cc9503c59141..e8039f146dbd 100644 +--- a/llvm/test/tools/llvm-symbolizer/data.s ++++ b/llvm/test/tools/llvm-symbolizer/data.s +@@ -7,12 +7,9 @@ + + # CHECK: d1 + # CHECK-NEXT: 0 8 +-# CHECK-NEXT: ??:? + # CHECK-EMPTY: + # CHECK-NEXT: d2 + # CHECK-NEXT: 8 4 +-# CHECK-NEXT: ??:? +-# CHECK-EMPTY: + + d1: + .quad 0x1122334455667788 +-- +2.38.1.1.g6d9df9d320 + diff --git a/build/build-clang/revert-llvmorg-15-init-11205-gcead4eceb01b_clang_16.patch b/build/build-clang/revert-llvmorg-15-init-11205-gcead4eceb01b_clang_16.patch new file mode 100644 index 0000000000..93c7e7d767 --- /dev/null +++ b/build/build-clang/revert-llvmorg-15-init-11205-gcead4eceb01b_clang_16.patch @@ -0,0 +1,1027 @@ +From cb411520cb7cd5e6e25966911ca55feb5de779e0 Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Fri, 4 Nov 2022 14:51:38 +0900 +Subject: [PATCH] Revert "[symbolizer] Parse DW_TAG_variable DIs to show line + info for globals" + +This reverts commit cead4eceb01b935fae07bf4a7e91911b344d2fec for causing +yet unidentified problems on some webrtc tests under TSan (bug 1798613). +--- + llvm/include/llvm/DebugInfo/DIContext.h | 4 - + .../llvm/DebugInfo/DWARF/DWARFContext.h | 2 - + llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h | 7 - + llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 14 - + llvm/include/llvm/DebugInfo/PDB/PDBContext.h | 2 - + llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 97 ++-- + llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 60 --- + llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 97 ---- + llvm/lib/DebugInfo/PDB/PDBContext.cpp | 7 - + llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp | 4 - + .../Symbolize/SymbolizableObjectFile.cpp | 8 - + .../Symbolize/ELF/data-command-symtab.yaml | 3 - + .../tools/llvm-symbolizer/data-location.yaml | 450 ------------------ + llvm/test/tools/llvm-symbolizer/data.s | 3 - + 14 files changed, 61 insertions(+), 697 deletions(-) + delete mode 100644 llvm/test/tools/llvm-symbolizer/data-location.yaml + +diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h +index 9b278b696073..a9f98588cf2d 100644 +--- a/llvm/include/llvm/DebugInfo/DIContext.h ++++ b/llvm/include/llvm/DebugInfo/DIContext.h +@@ -114,8 +114,6 @@ struct DIGlobal { + std::string Name; + uint64_t Start = 0; + uint64_t Size = 0; +- std::string DeclFile; +- uint64_t DeclLine = 0; + + DIGlobal() : Name(DILineInfo::BadString) {} + }; +@@ -241,8 +239,6 @@ public: + virtual DILineInfo getLineInfoForAddress( + object::SectionedAddress Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; +- virtual DILineInfo +- getLineInfoForDataAddress(object::SectionedAddress Address) = 0; + virtual DILineInfoTable getLineInfoForAddressRange( + object::SectionedAddress Address, uint64_t Size, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; +diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +index bf591ed554c6..3365ef8d8ee3 100644 +--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h ++++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +@@ -364,8 +364,6 @@ public: + DILineInfo getLineInfoForAddress( + object::SectionedAddress Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; +- DILineInfo +- getLineInfoForDataAddress(object::SectionedAddress Address) override; + DILineInfoTable getLineInfoForAddressRange( + object::SectionedAddress Address, uint64_t Size, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; +diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +index 149c5ef4e493..4a4d105a2b23 100644 +--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h ++++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +@@ -280,13 +280,6 @@ public: + /// \returns an iterator range for the attributes of the current DIE. + iterator_range<attribute_iterator> attributes() const; + +- /// Gets the type size (in bytes) for this DIE. +- /// +- /// \param PointerSize the pointer size of the containing CU. +- /// \returns if this is a type DIE, or this DIE contains a DW_AT_type, returns +- /// the size of the type. +- std::optional<uint64_t> getTypeSize(uint64_t PointerSize); +- + class iterator; + + iterator begin() const; +diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +index 9188865b4d77..0341344bc7b8 100644 +--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h ++++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +@@ -9,7 +9,6 @@ + #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H + #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H + +-#include "llvm/ADT/DenseSet.h" + #include "llvm/ADT/STLExtras.h" + #include "llvm/ADT/SmallVector.h" + #include "llvm/ADT/StringRef.h" +@@ -28,7 +27,6 @@ + #include <cstdint> + #include <map> + #include <memory> +-#include <set> + #include <utility> + #include <vector> + +@@ -242,11 +240,6 @@ class DWARFUnit { + /// std::map::upper_bound for address range lookup. + std::map<uint64_t, std::pair<uint64_t, DWARFDie>> AddrDieMap; + +- /// Map from the location (interpreted DW_AT_location) of a DW_TAG_variable, +- /// to the end address and the corresponding DIE. +- std::map<uint64_t, std::pair<uint64_t, DWARFDie>> VariableDieMap; +- DenseSet<uint64_t> RootsParsedForVariables; +- + using die_iterator_range = + iterator_range<std::vector<DWARFDebugInfoEntry>::iterator>; + +@@ -329,9 +322,6 @@ public: + /// Recursively update address to Die map. + void updateAddressDieMap(DWARFDie Die); + +- /// Recursively update address to variable Die map. +- void updateVariableDieMap(DWARFDie Die); +- + void setRangesSection(const DWARFSection *RS, uint64_t Base) { + RangeSection = RS; + RangeSectionBase = Base; +@@ -446,10 +436,6 @@ public: + /// cleared. + DWARFDie getSubroutineForAddress(uint64_t Address); + +- /// Returns variable DIE for the address provided. The pointer is alive as +- /// long as parsed compile unit DIEs are not cleared. +- DWARFDie getVariableForAddress(uint64_t Address); +- + /// getInlinedChainForAddress - fetches inlined chain for a given address. + /// Returns empty chain if there is no subprogram containing address. The + /// chain is valid as long as parsed compile unit DIEs are not cleared. +diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBContext.h b/llvm/include/llvm/DebugInfo/PDB/PDBContext.h +index 3163c0a1dae0..7b6793f0a639 100644 +--- a/llvm/include/llvm/DebugInfo/PDB/PDBContext.h ++++ b/llvm/include/llvm/DebugInfo/PDB/PDBContext.h +@@ -45,8 +45,6 @@ namespace pdb { + DILineInfo getLineInfoForAddress( + object::SectionedAddress Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; +- DILineInfo +- getLineInfoForDataAddress(object::SectionedAddress Address) override; + DILineInfoTable getLineInfoForAddressRange( + object::SectionedAddress Address, uint64_t Size, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; +diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +index 19d7d659a86a..1bcfdecfd588 100644 +--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp ++++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +@@ -1053,25 +1053,7 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { + // First, get the offset of the compile unit. + uint64_t CUOffset = getDebugAranges()->findAddress(Address); + // Retrieve the compile unit. +- if (DWARFCompileUnit *OffsetCU = getCompileUnitForOffset(CUOffset)) +- return OffsetCU; +- +- // Global variables are often not found by the above search, for one of two +- // reasons: +- // 1. .debug_aranges may not include global variables. On clang, it seems we +- // put the globals in the aranges, but this isn't true for gcc. +- // 2. Even if the global variable is in a .debug_arange, global variables +- // may not be captured in the [start, end) addresses described by the +- // parent compile unit. +- // +- // So, we walk the CU's and their child DI's manually, looking for the +- // specific global variable. +- for (std::unique_ptr<DWARFUnit> &CU : compile_units()) { +- if (DWARFDie Die = CU->getVariableForAddress(Address)) { +- return static_cast<DWARFCompileUnit *>(CU.get()); +- } +- } +- return nullptr; ++ return getCompileUnitForOffset(CUOffset); + } + + DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) { +@@ -1141,6 +1123,64 @@ static bool getFunctionNameAndStartLineForAddress( + return FoundResult; + } + ++static std::optional<uint64_t> getTypeSize(DWARFDie Type, uint64_t PointerSize) { ++ if (auto SizeAttr = Type.find(DW_AT_byte_size)) ++ if (std::optional<uint64_t> Size = SizeAttr->getAsUnsignedConstant()) ++ return Size; ++ ++ switch (Type.getTag()) { ++ case DW_TAG_pointer_type: ++ case DW_TAG_reference_type: ++ case DW_TAG_rvalue_reference_type: ++ return PointerSize; ++ case DW_TAG_ptr_to_member_type: { ++ if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type)) ++ if (BaseType.getTag() == DW_TAG_subroutine_type) ++ return 2 * PointerSize; ++ return PointerSize; ++ } ++ case DW_TAG_const_type: ++ case DW_TAG_immutable_type: ++ case DW_TAG_volatile_type: ++ case DW_TAG_restrict_type: ++ case DW_TAG_typedef: { ++ if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type)) ++ return getTypeSize(BaseType, PointerSize); ++ break; ++ } ++ case DW_TAG_array_type: { ++ DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type); ++ if (!BaseType) ++ return std::optional<uint64_t>(); ++ std::optional<uint64_t> BaseSize = getTypeSize(BaseType, PointerSize); ++ if (!BaseSize) ++ return std::optional<uint64_t>(); ++ uint64_t Size = *BaseSize; ++ for (DWARFDie Child : Type) { ++ if (Child.getTag() != DW_TAG_subrange_type) ++ continue; ++ ++ if (auto ElemCountAttr = Child.find(DW_AT_count)) ++ if (std::optional<uint64_t> ElemCount = ++ ElemCountAttr->getAsUnsignedConstant()) ++ Size *= *ElemCount; ++ if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound)) ++ if (std::optional<int64_t> UpperBound = ++ UpperBoundAttr->getAsSignedConstant()) { ++ int64_t LowerBound = 0; ++ if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound)) ++ LowerBound = LowerBoundAttr->getAsSignedConstant().value_or(0); ++ Size *= *UpperBound - LowerBound + 1; ++ } ++ } ++ return Size; ++ } ++ default: ++ break; ++ } ++ return std::optional<uint64_t>(); ++} ++ + static std::optional<int64_t> + getExpressionFrameOffset(ArrayRef<uint8_t> Expr, + std::optional<unsigned> FrameBaseReg) { +@@ -1201,7 +1241,7 @@ void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, + if (std::optional<const char *> Name = dwarf::toString(*NameAttr)) + Local.Name = *Name; + if (auto Type = Die.getAttributeValueAsReferencedDie(DW_AT_type)) +- Local.Size = Type.getTypeSize(getCUAddrSize()); ++ Local.Size = getTypeSize(Type, getCUAddrSize()); + if (auto DeclFileAttr = Die.find(DW_AT_decl_file)) { + if (const auto *LT = CU->getContext().getLineTableForUnit(CU)) + LT->getFileNameByIndex( +@@ -1242,6 +1282,7 @@ DWARFContext::getLocalsForAddress(object::SectionedAddress Address) { + DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, + DILineInfoSpecifier Spec) { + DILineInfo Result; ++ + DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + if (!CU) + return Result; +@@ -1256,22 +1297,6 @@ DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, + Spec.FLIKind, Result); + } + } +- +- return Result; +-} +- +-DILineInfo +-DWARFContext::getLineInfoForDataAddress(object::SectionedAddress Address) { +- DILineInfo Result; +- DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); +- if (!CU) +- return Result; +- +- if (DWARFDie Die = CU->getVariableForAddress(Address.Address)) { +- Result.FileName = Die.getDeclFile(FileLineInfoKind::AbsoluteFilePath); +- Result.Line = Die.getDeclLine(); +- } +- + return Result; + } + +diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +index 15a2d23c4fd2..9bf15c30f714 100644 +--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp ++++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +@@ -492,66 +492,6 @@ void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, + CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0); + } + +-std::optional<uint64_t> DWARFDie::getTypeSize(uint64_t PointerSize) { +- if (auto SizeAttr = find(DW_AT_byte_size)) +- if (std::optional<uint64_t> Size = SizeAttr->getAsUnsignedConstant()) +- return Size; +- +- switch (getTag()) { +- case DW_TAG_pointer_type: +- case DW_TAG_reference_type: +- case DW_TAG_rvalue_reference_type: +- return PointerSize; +- case DW_TAG_ptr_to_member_type: { +- if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) +- if (BaseType.getTag() == DW_TAG_subroutine_type) +- return 2 * PointerSize; +- return PointerSize; +- } +- case DW_TAG_const_type: +- case DW_TAG_immutable_type: +- case DW_TAG_volatile_type: +- case DW_TAG_restrict_type: +- case DW_TAG_typedef: { +- if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) +- return BaseType.getTypeSize(PointerSize); +- break; +- } +- case DW_TAG_array_type: { +- DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type); +- if (!BaseType) +- return std::nullopt; +- std::optional<uint64_t> BaseSize = BaseType.getTypeSize(PointerSize); +- if (!BaseSize) +- return std::nullopt; +- uint64_t Size = *BaseSize; +- for (DWARFDie Child : *this) { +- if (Child.getTag() != DW_TAG_subrange_type) +- continue; +- +- if (auto ElemCountAttr = Child.find(DW_AT_count)) +- if (std::optional<uint64_t> ElemCount = +- ElemCountAttr->getAsUnsignedConstant()) +- Size *= *ElemCount; +- if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound)) +- if (std::optional<int64_t> UpperBound = +- UpperBoundAttr->getAsSignedConstant()) { +- int64_t LowerBound = 0; +- if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound)) +- LowerBound = LowerBoundAttr->getAsSignedConstant().value_or(0); +- Size *= *UpperBound - LowerBound + 1; +- } +- } +- return Size; +- } +- default: +- if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) +- return BaseType.getTypeSize(PointerSize); +- break; +- } +- return std::nullopt; +-} +- + /// Helper to dump a DIE with all of its parents, but no siblings. + static unsigned dumpParentChain(DWARFDie Die, raw_ostream &OS, unsigned Indent, + DIDumpOptions DumpOpts, unsigned Depth = 0) { +diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +index 74667fcb92bc..148711f0246f 100644 +--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp ++++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +@@ -9,7 +9,6 @@ + #include "llvm/DebugInfo/DWARF/DWARFUnit.h" + #include "llvm/ADT/SmallString.h" + #include "llvm/ADT/StringRef.h" +-#include "llvm/BinaryFormat/Dwarf.h" + #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" + #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" + #include "llvm/DebugInfo/DWARF/DWARFContext.h" +@@ -19,13 +18,11 @@ + #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" + #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h" + #include "llvm/DebugInfo/DWARF/DWARFDie.h" +-#include "llvm/DebugInfo/DWARF/DWARFExpression.h" + #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" + #include "llvm/DebugInfo/DWARF/DWARFListTable.h" + #include "llvm/DebugInfo/DWARF/DWARFObject.h" + #include "llvm/DebugInfo/DWARF/DWARFSection.h" + #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" +-#include "llvm/Object/ObjectFile.h" + #include "llvm/Support/DataExtractor.h" + #include "llvm/Support/Errc.h" + #include "llvm/Support/Path.h" +@@ -752,100 +749,6 @@ DWARFDie DWARFUnit::getSubroutineForAddress(uint64_t Address) { + return R->second.second; + } + +-void DWARFUnit::updateVariableDieMap(DWARFDie Die) { +- for (DWARFDie Child : Die) { +- if (isType(Child.getTag())) +- continue; +- updateVariableDieMap(Child); +- } +- +- if (Die.getTag() != DW_TAG_variable) +- return; +- +- Expected<DWARFLocationExpressionsVector> Locations = +- Die.getLocations(DW_AT_location); +- if (!Locations) { +- // Missing DW_AT_location is fine here. +- consumeError(Locations.takeError()); +- return; +- } +- +- uint64_t Address = UINT64_MAX; +- +- for (const DWARFLocationExpression &Location : *Locations) { +- uint8_t AddressSize = getAddressByteSize(); +- DataExtractor Data(Location.Expr, /*IsLittleEndian=*/true, AddressSize); +- DWARFExpression Expr(Data, AddressSize); +- auto It = Expr.begin(); +- if (It == Expr.end()) +- continue; +- +- // Match exactly the main sequence used to describe global variables: +- // `DW_OP_addr[x] [+ DW_OP_plus_uconst]`. Currently, this is the sequence +- // that LLVM produces for DILocalVariables and DIGlobalVariables. If, in +- // future, the DWARF producer (`DwarfCompileUnit::addLocationAttribute()` is +- // a good starting point) is extended to use further expressions, this code +- // needs to be updated. +- uint64_t LocationAddr; +- if (It->getCode() == dwarf::DW_OP_addr) { +- LocationAddr = It->getRawOperand(0); +- } else if (It->getCode() == dwarf::DW_OP_addrx) { +- uint64_t DebugAddrOffset = It->getRawOperand(0); +- if (auto Pointer = getAddrOffsetSectionItem(DebugAddrOffset)) { +- LocationAddr = Pointer->Address; +- } +- } else { +- continue; +- } +- +- // Read the optional 2nd operand, a DW_OP_plus_uconst. +- if (++It != Expr.end()) { +- if (It->getCode() != dwarf::DW_OP_plus_uconst) +- continue; +- +- LocationAddr += It->getRawOperand(0); +- +- // Probe for a 3rd operand, if it exists, bail. +- if (++It != Expr.end()) +- continue; +- } +- +- Address = LocationAddr; +- break; +- } +- +- // Get the size of the global variable. If all else fails (i.e. the global has +- // no type), then we use a size of one to still allow symbolization of the +- // exact address. +- uint64_t GVSize = 1; +- if (DWARFDie BaseType = Die.getAttributeValueAsReferencedDie(DW_AT_type)) +- if (std::optional<uint64_t> Size = Die.getTypeSize(getAddressByteSize())) +- GVSize = *Size; +- +- if (Address != UINT64_MAX) +- VariableDieMap[Address] = {Address + GVSize, Die}; +-} +- +-DWARFDie DWARFUnit::getVariableForAddress(uint64_t Address) { +- extractDIEsIfNeeded(false); +- +- auto RootDie = getUnitDIE(); +- +- auto RootLookup = RootsParsedForVariables.insert(RootDie.getOffset()); +- if (RootLookup.second) +- updateVariableDieMap(RootDie); +- +- auto R = VariableDieMap.upper_bound(Address); +- if (R == VariableDieMap.begin()) +- return DWARFDie(); +- +- // upper_bound's previous item contains Address. +- --R; +- if (Address >= R->second.first) +- return DWARFDie(); +- return R->second.second; +-} +- + void + DWARFUnit::getInlinedChainForAddress(uint64_t Address, + SmallVectorImpl<DWARFDie> &InlinedChain) { +diff --git a/llvm/lib/DebugInfo/PDB/PDBContext.cpp b/llvm/lib/DebugInfo/PDB/PDBContext.cpp +index e600fb7385f1..0444093d7622 100644 +--- a/llvm/lib/DebugInfo/PDB/PDBContext.cpp ++++ b/llvm/lib/DebugInfo/PDB/PDBContext.cpp +@@ -64,13 +64,6 @@ DILineInfo PDBContext::getLineInfoForAddress(object::SectionedAddress Address, + return Result; + } + +-DILineInfo +-PDBContext::getLineInfoForDataAddress(object::SectionedAddress Address) { +- // Unimplemented. S_GDATA and S_LDATA in CodeView (used to describe global +- // variables) aren't capable of carrying line information. +- return DILineInfo(); +-} +- + DILineInfoTable + PDBContext::getLineInfoForAddressRange(object::SectionedAddress Address, + uint64_t Size, +diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +index 877380213f21..496c8149782e 100644 +--- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp ++++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +@@ -206,10 +206,6 @@ void PlainPrinterBase::print(const Request &Request, const DIGlobal &Global) { + Name = DILineInfo::Addr2LineBadString; + OS << Name << "\n"; + OS << Global.Start << " " << Global.Size << "\n"; +- if (Global.DeclFile.empty()) +- OS << "??:?\n"; +- else +- OS << Global.DeclFile << ":" << Global.DeclLine << "\n"; + printFooter(); + } + +diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +index d8ee9264b64f..fcff531895a2 100644 +--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp ++++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +@@ -327,14 +327,6 @@ DIGlobal SymbolizableObjectFile::symbolizeData( + std::string FileName; + getNameFromSymbolTable(ModuleOffset.Address, Res.Name, Res.Start, Res.Size, + FileName); +- Res.DeclFile = FileName; +- +- // Try and get a better filename:lineno pair from the debuginfo, if present. +- DILineInfo DL = DebugInfoContext->getLineInfoForDataAddress(ModuleOffset); +- if (DL.Line != 0) { +- Res.DeclFile = DL.FileName; +- Res.DeclLine = DL.Line; +- } + return Res; + } + +diff --git a/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml +index 83af3111c5dd..984e444b2fda 100644 +--- a/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml ++++ b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml +@@ -7,15 +7,12 @@ + + # CHECK: func + # CHECK-NEXT: 4096 1 +-# CHECK-NEXT: ??:? + # CHECK-EMPTY: + # CHECK-NEXT: data + # CHECK-NEXT: 8192 2 +-# CHECK-NEXT: ??:? + # CHECK-EMPTY: + # CHECK-NEXT: notype + # CHECK-NEXT: 8194 3 +-# CHECK-NEXT: ??:? + # CHECK-EMPTY: + + --- !ELF +diff --git a/llvm/test/tools/llvm-symbolizer/data-location.yaml b/llvm/test/tools/llvm-symbolizer/data-location.yaml +deleted file mode 100644 +index 54f7d9be44a1..000000000000 +--- a/llvm/test/tools/llvm-symbolizer/data-location.yaml ++++ /dev/null +@@ -1,450 +0,0 @@ +-## Show that when "DATA" is used with an address, it forces the found location +-## to be symbolized as data, including the source information. +- +-# RUN: yaml2obj %s -o %t.so +- +-# RUN: llvm-symbolizer 'DATA 0x304d0' 'DATA 0x304d1' 'DATA 0x304d3' \ +-# RUN: 'DATA 0x304c0' 'DATA 0x304c8' 'DATA 0x304d4' 'DATA 0x304dc' \ +-# RUN: 'DATA 0x304d8' --obj=%t.so | FileCheck %s +- +-# CHECK: bss_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:1 +-# CHECK-EMPTY: +- +-## Check that lookups in the middle of the symbol are also resolved correctly. +-# CHECK: bss_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:1 +-# CHECK-EMPTY: +-# CHECK: bss_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:1 +-# CHECK-EMPTY: +- +-## Now, the remainder of the symbols. +-# CHECK-NEXT: data_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:2 +-# CHECK-EMPTY: +-# CHECK-NEXT: str +-# CHECK-NEXT: {{[0-9]+}} 8 +-# CHECK-NEXT: /tmp/file.cpp:4 +-# CHECK-EMPTY: +-# CHECK-NEXT: f()::function_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:8 +-# CHECK-EMPTY: +- +-## Including the one that includes an addend. +-# CHECK-NEXT: alpha +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:12 +-# CHECK-EMPTY: +-# CHECK-NEXT: beta +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:13 +-# CHECK-EMPTY: +- +-## Ensure there's still a global that's offset-based. +-# RUN: llvm-dwarfdump --debug-info %t.so | FileCheck %s --check-prefix=OFFSET +- +-# OFFSET: DW_AT_location (DW_OP_addrx 0x4, DW_OP_plus_uconst 0x4) +- +-################################################################################ +-## File below was generated using: +-## +-## $ clang++ -g -O3 /tmp/file.cpp -shared -fuse-ld=lld -nostdlib \ +-## -target aarch64-linux-gnuabi -mllvm -global-merge-ignore-single-use \ +-## -o /tmp/file.so +-## +-## With /tmp/file.cpp as: +-## 1: int bss_global; +-## 2: int data_global = 2; +-## 3: +-## 4: const char* str = +-## 5: "12345678"; +-## 6: +-## 7: int* f() { +-## 8: static int function_global; +-## 9: return &function_global; +-## 10: } +-## 11: +-## 12: static int alpha; +-## 13: static int beta; +-## 14: int *f(bool b) { return beta ? &alpha : β } +-## 15: +-## +-## ... then, one can get the offsets using `nm`, like: +-## $ nm out.so | grep bss_global +-## 00000000000038fc B bss_global +-## +-## Note the use of the aarch64 target (with -nostdlib in order to allow linkage +-## without libraries for cross-compilation) as well as -O3 and +-## -global-merge-ignore-single-use. This is a specific combination that makes +-## the compiler emit the `alpha` global variable with a more complex +-## DW_AT_location than just a DW_OP_addr/DW_OP_addrx. In this instance, it +-## outputs a `DW_AT_location (DW_OP_addrx 0x4, DW_OP_plus_uconst 0x4)`. +-## +-## Ideally, this would be tested by invoking clang directly on a C source file, +-## but unfortunately there's no way to do that for LLVM tests. The other option +-## is to compile IR to an objfile, but llvm-symbolizer doesn't understand that +-## two symbols can have the same address in different sections. In the code +-## above, for example, we'd have bss_global at .bss+0x0, and data_global at +-## .data+0x0, and so the symbolizer would only print one of them. Hence, we have +-## the ugly dso-to-yaml blob below. +-## +-## For now, constant strings don't have a debuginfo entry, and so can't be +-## symbolized correctly. In future (if D123534 gets merged), this can be updated +-## to include a check that llvm-symbolizer can also symbolize constant strings, +-## like `str` above (basically that &"12345678" should be symbolizable) +-## to the specific line. Then, you can find the address of the constant string +-## from the relocation: +-## +-## $ nm out.so | grep str +-## 00000000000038c0 D str +-## $ llvm-objdump -R out.so | grep 38c0 +-## 00000000000038c0 R_X86_64_RELATIVE *ABS*+0x4f8 # <-- 0x4f8 +-################################################################################ +- +---- !ELF +-FileHeader: +- Class: ELFCLASS64 +- Data: ELFDATA2LSB +- Type: ET_DYN +- Machine: EM_AARCH64 +-ProgramHeaders: +- - Type: PT_PHDR +- Flags: [ PF_R ] +- VAddr: 0x40 +- Align: 0x8 +- - Type: PT_LOAD +- Flags: [ PF_R ] +- FirstSec: .dynsym +- LastSec: .eh_frame +- Align: 0x10000 +- - Type: PT_LOAD +- Flags: [ PF_X, PF_R ] +- FirstSec: .text +- LastSec: .text +- VAddr: 0x103E4 +- Align: 0x10000 +- - Type: PT_LOAD +- Flags: [ PF_W, PF_R ] +- FirstSec: .dynamic +- LastSec: .dynamic +- VAddr: 0x20410 +- Align: 0x10000 +- - Type: PT_LOAD +- Flags: [ PF_W, PF_R ] +- FirstSec: .data +- LastSec: .bss +- VAddr: 0x304C0 +- Align: 0x10000 +- - Type: PT_DYNAMIC +- Flags: [ PF_W, PF_R ] +- FirstSec: .dynamic +- LastSec: .dynamic +- VAddr: 0x20410 +- Align: 0x8 +- - Type: PT_GNU_RELRO +- Flags: [ PF_R ] +- FirstSec: .dynamic +- LastSec: .dynamic +- VAddr: 0x20410 +- - Type: PT_GNU_EH_FRAME +- Flags: [ PF_R ] +- FirstSec: .eh_frame_hdr +- LastSec: .eh_frame_hdr +- VAddr: 0x37C +- Align: 0x4 +- - Type: PT_GNU_STACK +- Flags: [ PF_W, PF_R ] +- Align: 0x0 +-Sections: +- - Name: .dynsym +- Type: SHT_DYNSYM +- Flags: [ SHF_ALLOC ] +- Address: 0x238 +- Link: .dynstr +- AddressAlign: 0x8 +- - Name: .gnu.hash +- Type: SHT_GNU_HASH +- Flags: [ SHF_ALLOC ] +- Address: 0x2C8 +- Link: .dynsym +- AddressAlign: 0x8 +- Header: +- SymNdx: 0x1 +- Shift2: 0x1A +- BloomFilter: [ 0x400188002180000C ] +- HashBuckets: [ 0x1 ] +- HashValues: [ 0xEE8502A, 0xEE85016, 0xC033991C, 0x61F7372E, 0xB88AB7F ] +- - Name: .hash +- Type: SHT_HASH +- Flags: [ SHF_ALLOC ] +- Address: 0x2F8 +- Link: .dynsym +- AddressAlign: 0x4 +- Bucket: [ 5, 0, 4, 0, 3, 0 ] +- Chain: [ 0, 0, 0, 1, 2, 0 ] +- - Name: .dynstr +- Type: SHT_STRTAB +- Flags: [ SHF_ALLOC ] +- Address: 0x330 +- AddressAlign: 0x1 +- - Name: .rela.dyn +- Type: SHT_RELA +- Flags: [ SHF_ALLOC ] +- Address: 0x358 +- Link: .dynsym +- AddressAlign: 0x8 +- Relocations: +- - Offset: 0x304C8 +- Type: R_AARCH64_RELATIVE +- Addend: 880 +- - Name: .rodata +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC, SHF_MERGE, SHF_STRINGS ] +- Address: 0x370 +- AddressAlign: 0x1 +- EntSize: 0x1 +- Content: '313233343536373800' +- - Name: .eh_frame_hdr +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC ] +- Address: 0x37C +- AddressAlign: 0x4 +- Content: 011B033B18000000020000006800010034000000740001004C000000 +- - Name: .eh_frame +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC ] +- Address: 0x398 +- AddressAlign: 0x8 +- Content: 1400000000000000017A5200017C1E011B0C1F0000000000140000001C0000002C0001000C00000000000000000000001400000034000000200001001C000000000000000000000000000000 +- - Name: .text +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC, SHF_EXECINSTR ] +- Address: 0x103E4 +- AddressAlign: 0x4 +- Content: 0001009000501391C0035FD60801009008611391E90308AA2A4540B85F0100710001899AC0035FD6 +- - Name: .dynamic +- Type: SHT_DYNAMIC +- Flags: [ SHF_WRITE, SHF_ALLOC ] +- Address: 0x20410 +- Link: .dynstr +- AddressAlign: 0x8 +- Entries: +- - Tag: DT_RELA +- Value: 0x358 +- - Tag: DT_RELASZ +- Value: 0x18 +- - Tag: DT_RELAENT +- Value: 0x18 +- - Tag: DT_RELACOUNT +- Value: 0x1 +- - Tag: DT_SYMTAB +- Value: 0x238 +- - Tag: DT_SYMENT +- Value: 0x18 +- - Tag: DT_STRTAB +- Value: 0x330 +- - Tag: DT_STRSZ +- Value: 0x28 +- - Tag: DT_GNU_HASH +- Value: 0x2C8 +- - Tag: DT_HASH +- Value: 0x2F8 +- - Tag: DT_NULL +- Value: 0x0 +- - Name: .data +- Type: SHT_PROGBITS +- Flags: [ SHF_WRITE, SHF_ALLOC ] +- Address: 0x304C0 +- AddressAlign: 0x8 +- Content: '02000000000000000000000000000000' +- - Name: .bss +- Type: SHT_NOBITS +- Flags: [ SHF_WRITE, SHF_ALLOC ] +- Address: 0x304D0 +- AddressAlign: 0x4 +- Size: 0x10 +- - Name: .debug_abbrev +- Type: SHT_PROGBITS +- AddressAlign: 0x1 +- Content: 011101252513050325721710171B25111B120673170000023400032549133F193A0B3B0B0218000003240003253E0B0B0B0000040F004913000005260049130000062E01111B120640187A196E2503253A0B3B0B49133F190000073400032549133A0B3B0B02180000083400032549133A0B3B0B02186E25000009050003253A0B3B0B4913000000 +- - Name: .debug_info +- Type: SHT_PROGBITS +- AddressAlign: 0x1 +- Content: AB0000000500010800000000010021000108000000000000000205280000000800000002032E000000000102A1000304050402052E000000000202A101020648000000000402A102044D00000005520000000307080106050C000000016F0D0E0007A500000007082E000000000802A1030008092E000000000D02A1040A080B2E000000000C04A10423040C06061C000000016F0F0E000EA50000000910000EAA00000000042E0000000311020100 +- - Name: .debug_str_offsets +- Type: SHT_PROGBITS +- AddressAlign: 0x1 +- Content: 4C00000005000000A2000000000000002C00000059000000280000001C00000072000000640000008C0000008700000069000000140000007B0000009C0000001A0000000E0000008500000076000000 +- - Name: .comment +- Type: SHT_PROGBITS +- Flags: [ SHF_MERGE, SHF_STRINGS ] +- AddressAlign: 0x1 +- EntSize: 0x1 +- Content: 4C696E6B65723A204C4C442031352E302E300000636C616E672076657273696F6E2031352E302E30202868747470733A2F2F6769746875622E636F6D2F6C6C766D2F6C6C766D2D70726F6A6563742E67697420306462616566363162353666306566306162306366333865613932666663316633356265653366662900 +- - Name: .debug_line +- Type: SHT_PROGBITS +- AddressAlign: 0x1 +- Content: 620000000500080037000000010101FB0E0D00010101010000000100000101011F010E00000003011F020F051E0100000000006C97BBE59F7DC6A9EA956633431DA63E0400000902E4030100000000001805030A140500BF05190A0105120608740204000101 +- - Name: .debug_line_str +- Type: SHT_PROGBITS +- Flags: [ SHF_MERGE, SHF_STRINGS ] +- AddressAlign: 0x1 +- EntSize: 0x1 +- Content: 2F746D702F66696C652E637070002F7573722F6C6F63616C2F676F6F676C652F686F6D652F6D69746368702F6C6C766D2D6275696C642F6F707400 +-Symbols: +- - Name: file.cpp +- Type: STT_FILE +- Index: SHN_ABS +- - Name: '$x.0' +- Section: .text +- Value: 0x103E4 +- - Name: _ZZ1fvE15function_global +- Type: STT_OBJECT +- Section: .bss +- Value: 0x304D4 +- Size: 0x4 +- - Name: '$d.1' +- Section: .bss +- Value: 0x304D0 +- - Name: '$d.2' +- Section: .data +- Value: 0x304C0 +- - Name: '$d.3' +- Section: .rodata +- Value: 0x370 +- - Name: '$d.4' +- Section: .debug_abbrev +- - Name: '$d.5' +- Section: .debug_info +- - Name: '$d.6' +- Section: .debug_str_offsets +- - Name: '$d.7' +- Section: .debug_str +- Value: 0xA2 +- - Name: '$d.8' +- Section: .debug_addr +- - Name: _ZL4beta +- Type: STT_OBJECT +- Section: .bss +- Value: 0x304D8 +- Size: 0x4 +- - Name: _ZL5alpha +- Type: STT_OBJECT +- Section: .bss +- Value: 0x304DC +- Size: 0x4 +- - Name: '$d.9' +- Section: .comment +- Value: 0x13 +- - Name: '$d.10' +- Section: .eh_frame +- Value: 0x398 +- - Name: '$d.11' +- Section: .debug_line +- - Name: '$d.12' +- Section: .debug_line_str +- Value: 0xE +- - Name: _DYNAMIC +- Section: .dynamic +- Value: 0x20410 +- Other: [ STV_HIDDEN ] +- - Name: _Z1fv +- Type: STT_FUNC +- Section: .text +- Binding: STB_GLOBAL +- Value: 0x103E4 +- Size: 0xC +- - Name: _Z1fb +- Type: STT_FUNC +- Section: .text +- Binding: STB_GLOBAL +- Value: 0x103F0 +- Size: 0x1C +- - Name: bss_global +- Type: STT_OBJECT +- Section: .bss +- Binding: STB_GLOBAL +- Value: 0x304D0 +- Size: 0x4 +- - Name: data_global +- Type: STT_OBJECT +- Section: .data +- Binding: STB_GLOBAL +- Value: 0x304C0 +- Size: 0x4 +- - Name: str +- Type: STT_OBJECT +- Section: .data +- Binding: STB_GLOBAL +- Value: 0x304C8 +- Size: 0x8 +-DynamicSymbols: +- - Name: _Z1fv +- Type: STT_FUNC +- Section: .text +- Binding: STB_GLOBAL +- Value: 0x103E4 +- Size: 0xC +- - Name: _Z1fb +- Type: STT_FUNC +- Section: .text +- Binding: STB_GLOBAL +- Value: 0x103F0 +- Size: 0x1C +- - Name: bss_global +- Type: STT_OBJECT +- Section: .bss +- Binding: STB_GLOBAL +- Value: 0x304D0 +- Size: 0x4 +- - Name: data_global +- Type: STT_OBJECT +- Section: .data +- Binding: STB_GLOBAL +- Value: 0x304C0 +- Size: 0x4 +- - Name: str +- Type: STT_OBJECT +- Section: .data +- Binding: STB_GLOBAL +- Value: 0x304C8 +- Size: 0x8 +-DWARF: +- debug_str: +- - '/tmp/file.cpp' +- - _Z1fb +- - alpha +- - f +- - data_global +- - int +- - '/usr/local/google/home/mitchp/llvm-build/opt' +- - bss_global +- - char +- - _ZL4beta +- - str +- - bool +- - _ZL5alpha +- - b +- - beta +- - function_global +- - _Z1fv +- - 'clang version 15.0.0 (https://github.com/llvm/llvm-project.git 0dbaef61b56f0ef0ab0cf38ea92ffc1f35bee3ff)' +- debug_addr: +- - Length: 0x3C +- Version: 0x5 +- AddressSize: 0x8 +- Entries: +- - Address: 0x304D0 +- - Address: 0x304C0 +- - Address: 0x304C8 +- - Address: 0x304D4 +- - Address: 0x304D8 +- - Address: 0x103E4 +- - Address: 0x103F0 +-... +diff --git a/llvm/test/tools/llvm-symbolizer/data.s b/llvm/test/tools/llvm-symbolizer/data.s +index cc9503c59141..e8039f146dbd 100644 +--- a/llvm/test/tools/llvm-symbolizer/data.s ++++ b/llvm/test/tools/llvm-symbolizer/data.s +@@ -7,12 +7,9 @@ + + # CHECK: d1 + # CHECK-NEXT: 0 8 +-# CHECK-NEXT: ??:? + # CHECK-EMPTY: + # CHECK-NEXT: d2 + # CHECK-NEXT: 8 4 +-# CHECK-NEXT: ??:? +-# CHECK-EMPTY: + + d1: + .quad 0x1122334455667788 +-- +2.38.1.1.g6d9df9d320 + diff --git a/build/build-clang/revert-llvmorg-15-init-13446-g7524fe962e47.patch b/build/build-clang/revert-llvmorg-15-init-13446-g7524fe962e47.patch new file mode 100644 index 0000000000..5bd4601827 --- /dev/null +++ b/build/build-clang/revert-llvmorg-15-init-13446-g7524fe962e47.patch @@ -0,0 +1,39 @@ +From 12f64ca10837bd68ec30804ebfa21653925ad5cf Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Thu, 16 Jun 2022 12:51:29 +0900 +Subject: [PATCH] Revert "[libFuzzer] Use the compiler to link the relocatable + object" + +This reverts commit 7524fe962e479416fd6318407eff4eed5b96a40b. +--- + compiler-rt/lib/fuzzer/CMakeLists.txt | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/compiler-rt/lib/fuzzer/CMakeLists.txt b/compiler-rt/lib/fuzzer/CMakeLists.txt +index 856cd732d517..d51de53f5acc 100644 +--- a/compiler-rt/lib/fuzzer/CMakeLists.txt ++++ b/compiler-rt/lib/fuzzer/CMakeLists.txt +@@ -138,15 +138,15 @@ if(OS_NAME MATCHES "Linux|Fuchsia" AND + COMPILER_RT_LIBCXX_PATH AND + COMPILER_RT_LIBCXXABI_PATH) + macro(partially_link_libcxx name dir arch) +- get_target_flags_for_arch(${arch} target_cflags) +- if(CMAKE_CXX_COMPILER_ID MATCHES Clang) +- get_compiler_rt_target(${arch} target) +- set(target_cflags --target=${target} ${target_cflags}) ++ if(${arch} MATCHES "i386") ++ set(EMULATION_ARGUMENT "-m" "elf_i386") ++ else() ++ set(EMULATION_ARGUMENT "") + endif() + set(cxx_${arch}_merge_dir "${CMAKE_CURRENT_BINARY_DIR}/cxx_${arch}_merge.dir") + file(MAKE_DIRECTORY ${cxx_${arch}_merge_dir}) + add_custom_command(TARGET clang_rt.${name}-${arch} POST_BUILD +- COMMAND ${CMAKE_CXX_COMPILER} ${target_cflags} -Wl,--whole-archive "$<TARGET_LINKER_FILE:clang_rt.${name}-${arch}>" -Wl,--no-whole-archive ${dir}/lib/libc++.a -r -o ${name}.o ++ COMMAND ${CMAKE_LINKER} ${EMULATION_ARGUMENT} --whole-archive "$<TARGET_LINKER_FILE:clang_rt.${name}-${arch}>" --no-whole-archive ${dir}/lib/libc++.a -r -o ${name}.o + COMMAND ${CMAKE_OBJCOPY} --localize-hidden ${name}.o + COMMAND ${CMAKE_COMMAND} -E remove "$<TARGET_LINKER_FILE:clang_rt.${name}-${arch}>" + COMMAND ${CMAKE_AR} qcs "$<TARGET_LINKER_FILE:clang_rt.${name}-${arch}>" ${name}.o +-- +2.36.0.1.g2bbe56bd8d + diff --git a/build/build-clang/revert-llvmorg-15-init-17171-g8bb4451a651a.patch b/build/build-clang/revert-llvmorg-15-init-17171-g8bb4451a651a.patch new file mode 100644 index 0000000000..2f2011859f --- /dev/null +++ b/build/build-clang/revert-llvmorg-15-init-17171-g8bb4451a651a.patch @@ -0,0 +1,1323 @@ +From cb2407418ae7fbd9874a07ba7943acff5257c996 Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Thu, 26 Jan 2023 13:35:14 +0900 +Subject: Revert "[Reland][DebugInfo][llvm-dwarfutil] Combine + overlapped address ranges." + +This reverts commit 8bb4451a651ac00432d04e020d83f43c445aaebb for +breaking debug line info on mac (bug 1810249). +--- + llvm/include/llvm/ADT/AddressRanges.h | 82 +----- + llvm/include/llvm/DWARFLinker/DWARFLinker.h | 22 +- + .../llvm/DWARFLinker/DWARFLinkerCompileUnit.h | 30 ++- + llvm/include/llvm/DWARFLinker/DWARFStreamer.h | 2 +- + llvm/lib/DWARFLinker/DWARFLinker.cpp | 73 +++-- + .../DWARFLinker/DWARFLinkerCompileUnit.cpp | 10 +- + llvm/lib/DWARFLinker/DWARFStreamer.cpp | 20 +- + llvm/lib/Support/AddressRanges.cpp | 57 ++-- + .../gc-func-overlapping-address-ranges.test | 254 ------------------ + .../gc-unit-overlapping-address-ranges.test | 247 ----------------- + llvm/tools/dsymutil/DwarfLinkerForBinary.cpp | 1 + + llvm/tools/dsymutil/DwarfLinkerForBinary.h | 4 +- + llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp | 22 +- + llvm/unittests/Support/AddressRangeTest.cpp | 102 +------ + 14 files changed, 158 insertions(+), 768 deletions(-) + delete mode 100644 llvm/test/tools/llvm-dwarfutil/ELF/X86/gc-func-overlapping-address-ranges.test + delete mode 100644 llvm/test/tools/llvm-dwarfutil/ELF/X86/gc-unit-overlapping-address-ranges.test + +diff --git a/llvm/include/llvm/ADT/AddressRanges.h b/llvm/include/llvm/ADT/AddressRanges.h +index c02844a095d1..1953680d5222 100644 +--- a/llvm/include/llvm/ADT/AddressRanges.h ++++ b/llvm/include/llvm/ADT/AddressRanges.h +@@ -10,10 +10,9 @@ + #define LLVM_ADT_ADDRESSRANGES_H + + #include "llvm/ADT/Optional.h" +-#include "llvm/ADT/STLExtras.h" +-#include "llvm/ADT/SmallVector.h" + #include <cassert> + #include <stdint.h> ++#include <vector> + + namespace llvm { + +@@ -48,29 +47,20 @@ private: + /// The AddressRanges class helps normalize address range collections. + /// This class keeps a sorted vector of AddressRange objects and can perform + /// insertions and searches efficiently. The address ranges are always sorted +-/// and never contain any invalid or empty address ranges. +-/// Intersecting([100,200), [150,300)) and adjacent([100,200), [200,300)) ++/// and never contain any invalid or empty address ranges. Intersecting + /// address ranges are combined during insertion. + class AddressRanges { + protected: +- using Collection = SmallVector<AddressRange>; ++ using Collection = std::vector<AddressRange>; + Collection Ranges; + + public: + void clear() { Ranges.clear(); } + bool empty() const { return Ranges.empty(); } +- bool contains(uint64_t Addr) const { return find(Addr) != Ranges.end(); } +- bool contains(AddressRange Range) const { +- return find(Range) != Ranges.end(); +- } +- Optional<AddressRange> getRangeThatContains(uint64_t Addr) const { +- Collection::const_iterator It = find(Addr); +- if (It == Ranges.end()) +- return None; +- +- return *It; +- } +- Collection::const_iterator insert(AddressRange Range); ++ bool contains(uint64_t Addr) const; ++ bool contains(AddressRange Range) const; ++ Optional<AddressRange> getRangeThatContains(uint64_t Addr) const; ++ void insert(AddressRange Range); + void reserve(size_t Capacity) { Ranges.reserve(Capacity); } + size_t size() const { return Ranges.size(); } + bool operator==(const AddressRanges &RHS) const { +@@ -82,64 +72,6 @@ public: + } + Collection::const_iterator begin() const { return Ranges.begin(); } + Collection::const_iterator end() const { return Ranges.end(); } +- +-protected: +- Collection::const_iterator find(uint64_t Addr) const; +- Collection::const_iterator find(AddressRange Range) const; +-}; +- +-/// AddressRangesMap class maps values to the address ranges. +-/// It keeps address ranges and corresponding values. If ranges +-/// are combined during insertion, then combined range keeps +-/// newly inserted value. +-template <typename T> class AddressRangesMap : protected AddressRanges { +-public: +- void clear() { +- Ranges.clear(); +- Values.clear(); +- } +- bool empty() const { return AddressRanges::empty(); } +- bool contains(uint64_t Addr) const { return AddressRanges::contains(Addr); } +- bool contains(AddressRange Range) const { +- return AddressRanges::contains(Range); +- } +- void insert(AddressRange Range, T Value) { +- size_t InputSize = Ranges.size(); +- Collection::const_iterator RangesIt = AddressRanges::insert(Range); +- if (RangesIt == Ranges.end()) +- return; +- +- // make Values match to Ranges. +- size_t Idx = RangesIt - Ranges.begin(); +- typename ValuesCollection::iterator ValuesIt = Values.begin() + Idx; +- if (InputSize < Ranges.size()) +- Values.insert(ValuesIt, T()); +- else if (InputSize > Ranges.size()) +- Values.erase(ValuesIt, ValuesIt + InputSize - Ranges.size()); +- assert(Ranges.size() == Values.size()); +- +- // set value to the inserted or combined range. +- Values[Idx] = Value; +- } +- size_t size() const { +- assert(Ranges.size() == Values.size()); +- return AddressRanges::size(); +- } +- Optional<std::pair<AddressRange, T>> +- getRangeValueThatContains(uint64_t Addr) const { +- Collection::const_iterator It = find(Addr); +- if (It == Ranges.end()) +- return None; +- +- return std::make_pair(*It, Values[It - Ranges.begin()]); +- } +- std::pair<AddressRange, T> operator[](size_t Idx) const { +- return std::make_pair(Ranges[Idx], Values[Idx]); +- } +- +-protected: +- using ValuesCollection = SmallVector<T>; +- ValuesCollection Values; + }; + + } // namespace llvm +diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h +index 4729e5f806d8..32636aa9b359 100644 +--- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h ++++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h +@@ -9,7 +9,6 @@ + #ifndef LLVM_DWARFLINKER_DWARFLINKER_H + #define LLVM_DWARFLINKER_DWARFLINKER_H + +-#include "llvm/ADT/AddressRanges.h" + #include "llvm/CodeGen/AccelTable.h" + #include "llvm/CodeGen/NonRelocatableStringpool.h" + #include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" +@@ -38,6 +37,25 @@ enum class DwarfLinkerAccelTableKind : uint8_t { + Pub, ///< .debug_pubnames, .debug_pubtypes + }; + ++/// Partial address range. Besides an offset, only the ++/// HighPC is stored. The structure is stored in a map where the LowPC is the ++/// key. ++struct ObjFileAddressRange { ++ /// Function HighPC. ++ uint64_t HighPC; ++ /// Offset to apply to the linked address. ++ /// should be 0 for not-linked object file. ++ int64_t Offset; ++ ++ ObjFileAddressRange(uint64_t EndPC, int64_t Offset) ++ : HighPC(EndPC), Offset(Offset) {} ++ ++ ObjFileAddressRange() : HighPC(0), Offset(0) {} ++}; ++ ++/// Map LowPC to ObjFileAddressRange. ++using RangesTy = std::map<uint64_t, ObjFileAddressRange>; ++ + /// AddressesMap represents information about valid addresses used + /// by debug information. Valid addresses are those which points to + /// live code sections. i.e. relocations for these addresses point +@@ -124,7 +142,7 @@ public: + /// original \p Entries. + virtual void emitRangesEntries( + int64_t UnitPcOffset, uint64_t OrigLowPc, +- Optional<std::pair<AddressRange, int64_t>> FuncRange, ++ const FunctionIntervals::const_iterator &FuncRange, + const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries, + unsigned AddressSize) = 0; + +diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h +index 05e291c05132..930db0913226 100644 +--- a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h ++++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h +@@ -9,8 +9,8 @@ + #ifndef LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H + #define LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H + +-#include "llvm/ADT/AddressRanges.h" + #include "llvm/ADT/DenseMap.h" ++#include "llvm/ADT/IntervalMap.h" + #include "llvm/CodeGen/DIE.h" + #include "llvm/DebugInfo/DWARF/DWARFUnit.h" + +@@ -18,9 +18,12 @@ namespace llvm { + + class DeclContext; + +-/// Mapped value in the address map is the offset to apply to the +-/// linked address. +-using RangesTy = AddressRangesMap<int64_t>; ++template <typename KeyT, typename ValT> ++using HalfOpenIntervalMap = ++ IntervalMap<KeyT, ValT, IntervalMapImpl::NodeSizer<KeyT, ValT>::LeafSize, ++ IntervalMapHalfOpenInfo<KeyT>>; ++ ++using FunctionIntervals = HalfOpenIntervalMap<uint64_t, int64_t>; + + // FIXME: Delete this structure. + struct PatchLocation { +@@ -81,7 +84,8 @@ public: + + CompileUnit(DWARFUnit &OrigUnit, unsigned ID, bool CanUseODR, + StringRef ClangModuleName) +- : OrigUnit(OrigUnit), ID(ID), ClangModuleName(ClangModuleName) { ++ : OrigUnit(OrigUnit), ID(ID), Ranges(RangeAlloc), ++ ClangModuleName(ClangModuleName) { + Info.resize(OrigUnit.getNumDIEs()); + + auto CUDie = OrigUnit.getUnitDIE(false); +@@ -139,7 +143,7 @@ public: + return UnitRangeAttribute; + } + +- const RangesTy &getFunctionRanges() const { return Ranges; } ++ const FunctionIntervals &getFunctionRanges() const { return Ranges; } + + const std::vector<PatchLocation> &getRangesAttributes() const { + return RangeAttributes; +@@ -178,6 +182,10 @@ public: + /// offset \p PCOffset. + void addFunctionRange(uint64_t LowPC, uint64_t HighPC, int64_t PCOffset); + ++ /// Check whether specified address range \p LowPC \p HighPC ++ /// overlaps with existing function ranges. ++ bool overlapsWithFunctionRanges(uint64_t LowPC, uint64_t HighPC); ++ + /// Keep track of a DW_AT_range attribute that we will need to patch up later. + void noteRangeAttribute(const DIE &Die, PatchLocation Attr); + +@@ -262,10 +270,12 @@ private: + std::tuple<DIE *, const CompileUnit *, DeclContext *, PatchLocation>> + ForwardDIEReferences; + +- /// The ranges in that map are the PC ranges for functions in this unit, +- /// associated with the PC offset to apply to the addresses to get +- /// the linked address. +- RangesTy Ranges; ++ FunctionIntervals::Allocator RangeAlloc; ++ ++ /// The ranges in that interval map are the PC ranges for ++ /// functions in this unit, associated with the PC offset to apply ++ /// to the addresses to get the linked address. ++ FunctionIntervals Ranges; + + /// The DW_AT_low_pc of each DW_TAG_label. + SmallDenseMap<uint64_t, uint64_t, 1> Labels; +diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h +index 0ccab0efa8f4..003fe548252a 100644 +--- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h ++++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h +@@ -96,7 +96,7 @@ public: + /// original \p Entries. + void emitRangesEntries( + int64_t UnitPcOffset, uint64_t OrigLowPc, +- Optional<std::pair<AddressRange, int64_t>> FuncRange, ++ const FunctionIntervals::const_iterator &FuncRange, + const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries, + unsigned AddressSize) override; + +diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp +index 3e14edb5f730..6e0fe801a630 100644 +--- a/llvm/lib/DWARFLinker/DWARFLinker.cpp ++++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp +@@ -504,14 +504,22 @@ unsigned DWARFLinker::shouldKeepSubprogramDIE( + &DIE); + return Flags; + } +- if (*LowPc > *HighPc) { +- reportWarning("low_pc greater than high_pc. Range will be discarded.\n", +- File, &DIE); ++ ++ // TODO: Following check is a workaround for overlapping address ranges. ++ // ELF binaries built with LTO might contain overlapping address ++ // ranges. The better fix would be to combine such ranges. Following ++ // is a workaround that should be removed when a good fix is done. ++ if (Unit.overlapsWithFunctionRanges(*LowPc, *HighPc)) { ++ reportWarning( ++ formatv("Overlapping address range [{0:X}, {1:X}]. Range will " ++ "be discarded.\n", ++ *LowPc, *HighPc), ++ File, &DIE); + return Flags; + } + + // Replace the debug map range with a more accurate one. +- Ranges.insert({*LowPc, *HighPc}, MyInfo.AddrAdjust); ++ Ranges[*LowPc] = ObjFileAddressRange(*HighPc, MyInfo.AddrAdjust); + Unit.addFunctionRange(*LowPc, *HighPc, MyInfo.AddrAdjust); + return Flags; + } +@@ -1580,7 +1588,7 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit, + DWARFDataExtractor RangeExtractor(OrigDwarf.getDWARFObj(), + OrigDwarf.getDWARFObj().getRangesSection(), + OrigDwarf.isLittleEndian(), AddressSize); +- Optional<std::pair<AddressRange, int64_t>> CurrRange; ++ auto InvalidRange = FunctionRanges.end(), CurrRange = InvalidRange; + DWARFUnit &OrigUnit = Unit.getOrigUnit(); + auto OrigUnitDie = OrigUnit.getUnitDIE(false); + uint64_t OrigLowPc = +@@ -1603,11 +1611,12 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit, + if (!Entries.empty()) { + const DWARFDebugRangeList::RangeListEntry &First = Entries.front(); + +- if (!CurrRange || +- !CurrRange->first.contains(First.StartAddress + OrigLowPc)) { +- CurrRange = FunctionRanges.getRangeValueThatContains( +- First.StartAddress + OrigLowPc); +- if (!CurrRange) { ++ if (CurrRange == InvalidRange || ++ First.StartAddress + OrigLowPc < CurrRange.start() || ++ First.StartAddress + OrigLowPc >= CurrRange.stop()) { ++ CurrRange = FunctionRanges.find(First.StartAddress + OrigLowPc); ++ if (CurrRange == InvalidRange || ++ CurrRange.start() > First.StartAddress + OrigLowPc) { + reportWarning("no mapping for range.", File); + continue; + } +@@ -1714,7 +1723,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, + // in NewRows. + std::vector<DWARFDebugLine::Row> Seq; + const auto &FunctionRanges = Unit.getFunctionRanges(); +- Optional<std::pair<AddressRange, int64_t>> CurrRange; ++ auto InvalidRange = FunctionRanges.end(), CurrRange = InvalidRange; + + // FIXME: This logic is meant to generate exactly the same output as + // Darwin's classic dsymutil. There is a nicer way to implement this +@@ -1733,14 +1742,19 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, + // it is marked as end_sequence in the input (because in that + // case, the relocation offset is accurate and that entry won't + // serve as the start of another function). +- if (!CurrRange || !CurrRange->first.contains(Row.Address.Address) || +- (Row.Address.Address == CurrRange->first.end() && !Row.EndSequence)) { ++ if (CurrRange == InvalidRange || Row.Address.Address < CurrRange.start() || ++ Row.Address.Address > CurrRange.stop() || ++ (Row.Address.Address == CurrRange.stop() && !Row.EndSequence)) { + // We just stepped out of a known range. Insert a end_sequence + // corresponding to the end of the range. +- uint64_t StopAddress = +- CurrRange ? CurrRange->first.end() + CurrRange->second : -1ULL; +- CurrRange = FunctionRanges.getRangeValueThatContains(Row.Address.Address); +- if (!CurrRange) { ++ uint64_t StopAddress = CurrRange != InvalidRange ++ ? CurrRange.stop() + CurrRange.value() ++ : -1ULL; ++ CurrRange = FunctionRanges.find(Row.Address.Address); ++ bool CurrRangeValid = ++ CurrRange != InvalidRange && CurrRange.start() <= Row.Address.Address; ++ if (!CurrRangeValid) { ++ CurrRange = InvalidRange; + if (StopAddress != -1ULL) { + // Try harder by looking in the Address ranges map. + // There are corner cases where this finds a +@@ -1748,9 +1762,14 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, + // for now do as dsymutil. + // FIXME: Understand exactly what cases this addresses and + // potentially remove it along with the Ranges map. +- if (Optional<std::pair<AddressRange, int64_t>> Range = +- Ranges.getRangeValueThatContains(Row.Address.Address)) +- StopAddress = Row.Address.Address + (*Range).second; ++ auto Range = Ranges.lower_bound(Row.Address.Address); ++ if (Range != Ranges.begin() && Range != Ranges.end()) ++ --Range; ++ ++ if (Range != Ranges.end() && Range->first <= Row.Address.Address && ++ Range->second.HighPC >= Row.Address.Address) { ++ StopAddress = Row.Address.Address + Range->second.Offset; ++ } + } + } + if (StopAddress != -1ULL && !Seq.empty()) { +@@ -1766,7 +1785,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, + insertLineSequence(Seq, NewRows); + } + +- if (!CurrRange) ++ if (!CurrRangeValid) + continue; + } + +@@ -1775,7 +1794,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, + continue; + + // Relocate row address and add it to the current sequence. +- Row.Address.Address += CurrRange->second; ++ Row.Address.Address += CurrRange.value(); + Seq.emplace_back(Row); + + if (Row.EndSequence) +@@ -1915,9 +1934,11 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File, + // the function entry point, thus we can't just lookup the address + // in the debug map. Use the AddressInfo's range map to see if the FDE + // describes something that we can relocate. +- Optional<std::pair<AddressRange, int64_t>> Range = +- Ranges.getRangeValueThatContains(Loc); +- if (!Range) { ++ auto Range = Ranges.upper_bound(Loc); ++ if (Range != Ranges.begin()) ++ --Range; ++ if (Range == Ranges.end() || Range->first > Loc || ++ Range->second.HighPC <= Loc) { + // The +4 is to account for the size of the InitialLength field itself. + InputOffset = EntryOffset + InitialLength + 4; + continue; +@@ -1945,7 +1966,7 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File, + // fields that will get reconstructed by emitFDE(). + unsigned FDERemainingBytes = InitialLength - (4 + AddrSize); + TheDwarfEmitter->emitFDE(IteratorInserted.first->getValue(), AddrSize, +- Loc + Range->second, ++ Loc + Range->second.Offset, + FrameData.substr(InputOffset, FDERemainingBytes)); + InputOffset += FDERemainingBytes; + } +diff --git a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp +index 1cb20c0bb948..ebb1106521cc 100644 +--- a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp ++++ b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp +@@ -105,11 +105,19 @@ void CompileUnit::addLabelLowPc(uint64_t LabelLowPc, int64_t PcOffset) { + + void CompileUnit::addFunctionRange(uint64_t FuncLowPc, uint64_t FuncHighPc, + int64_t PcOffset) { +- Ranges.insert({FuncLowPc, FuncHighPc}, PcOffset); ++ // Don't add empty ranges to the interval map. They are a problem because ++ // the interval map expects half open intervals. This is safe because they ++ // are empty anyway. ++ if (FuncHighPc != FuncLowPc) ++ Ranges.insert(FuncLowPc, FuncHighPc, PcOffset); + this->LowPc = std::min(LowPc, FuncLowPc + PcOffset); + this->HighPc = std::max(HighPc, FuncHighPc + PcOffset); + } + ++bool CompileUnit::overlapsWithFunctionRanges(uint64_t LowPC, uint64_t HighPC) { ++ return Ranges.overlaps(LowPC, HighPC); ++} ++ + void CompileUnit::noteRangeAttribute(const DIE &Die, PatchLocation Attr) { + if (Die.getTag() != dwarf::DW_TAG_compile_unit) + RangeAttributes.push_back(Attr); +diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp +index a00e51fcf135..55ff6b14f945 100644 +--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp ++++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp +@@ -321,14 +321,13 @@ void DwarfStreamer::emitSwiftReflectionSection( + /// sized addresses describing the ranges. + void DwarfStreamer::emitRangesEntries( + int64_t UnitPcOffset, uint64_t OrigLowPc, +- Optional<std::pair<AddressRange, int64_t>> FuncRange, ++ const FunctionIntervals::const_iterator &FuncRange, + const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries, + unsigned AddressSize) { + MS->switchSection(MC->getObjectFileInfo()->getDwarfRangesSection()); + + // Offset each range by the right amount. +- int64_t PcOffset = +- (Entries.empty() || !FuncRange) ? 0 : FuncRange->second + UnitPcOffset; ++ int64_t PcOffset = Entries.empty() ? 0 : FuncRange.value() + UnitPcOffset; + for (const auto &Range : Entries) { + if (Range.isBaseAddressSelectionEntry(AddressSize)) { + warn("unsupported base address selection operation", +@@ -340,7 +339,8 @@ void DwarfStreamer::emitRangesEntries( + continue; + + // All range entries should lie in the function range. +- if (!FuncRange->first.contains(Range.StartAddress + OrigLowPc)) ++ if (!(Range.StartAddress + OrigLowPc >= FuncRange.start() && ++ Range.EndAddress + OrigLowPc <= FuncRange.stop())) + warn("inconsistent range data.", "emitting debug_ranges"); + MS->emitIntValue(Range.StartAddress + PcOffset, AddressSize); + MS->emitIntValue(Range.EndAddress + PcOffset, AddressSize); +@@ -365,13 +365,11 @@ void DwarfStreamer::emitUnitRangesEntries(CompileUnit &Unit, + // IntervalMap will have coalesced the non-linked ranges, but here + // we want to coalesce the linked addresses. + std::vector<std::pair<uint64_t, uint64_t>> Ranges; +- const RangesTy &FunctionRanges = Unit.getFunctionRanges(); +- for (size_t Idx = 0; Idx < FunctionRanges.size(); Idx++) { +- std::pair<AddressRange, int64_t> CurRange = FunctionRanges[Idx]; +- +- Ranges.push_back(std::make_pair(CurRange.first.start() + CurRange.second, +- CurRange.first.end() + CurRange.second)); +- } ++ const auto &FunctionRanges = Unit.getFunctionRanges(); ++ for (auto Range = FunctionRanges.begin(), End = FunctionRanges.end(); ++ Range != End; ++Range) ++ Ranges.push_back(std::make_pair(Range.start() + Range.value(), ++ Range.stop() + Range.value())); + + // The object addresses where sorted, but again, the linked + // addresses might end up in a different order. +diff --git a/llvm/lib/Support/AddressRanges.cpp b/llvm/lib/Support/AddressRanges.cpp +index 187d5be00dae..5ba011bac4e9 100644 +--- a/llvm/lib/Support/AddressRanges.cpp ++++ b/llvm/lib/Support/AddressRanges.cpp +@@ -12,59 +12,48 @@ + + using namespace llvm; + +-AddressRanges::Collection::const_iterator +-AddressRanges::insert(AddressRange Range) { ++void AddressRanges::insert(AddressRange Range) { + if (Range.size() == 0) +- return Ranges.end(); ++ return; + + auto It = llvm::upper_bound(Ranges, Range); + auto It2 = It; +- while (It2 != Ranges.end() && It2->start() <= Range.end()) ++ while (It2 != Ranges.end() && It2->start() < Range.end()) + ++It2; + if (It != It2) { +- Range = {Range.start(), std::max(Range.end(), std::prev(It2)->end())}; ++ Range = {Range.start(), std::max(Range.end(), It2[-1].end())}; + It = Ranges.erase(It, It2); + } +- if (It != Ranges.begin() && Range.start() <= std::prev(It)->end()) { +- --It; +- *It = {It->start(), std::max(It->end(), Range.end())}; +- return It; +- } +- +- return Ranges.insert(It, Range); ++ if (It != Ranges.begin() && Range.start() < It[-1].end()) ++ It[-1] = {It[-1].start(), std::max(It[-1].end(), Range.end())}; ++ else ++ Ranges.insert(It, Range); + } + +-AddressRanges::Collection::const_iterator +-AddressRanges::find(uint64_t Addr) const { ++bool AddressRanges::contains(uint64_t Addr) const { + auto It = std::partition_point( + Ranges.begin(), Ranges.end(), + [=](const AddressRange &R) { return R.start() <= Addr; }); +- +- if (It == Ranges.begin()) +- return Ranges.end(); +- +- --It; +- if (Addr >= It->end()) +- return Ranges.end(); +- +- return It; ++ return It != Ranges.begin() && Addr < It[-1].end(); + } + +-AddressRanges::Collection::const_iterator +-AddressRanges::find(AddressRange Range) const { ++bool AddressRanges::contains(AddressRange Range) const { + if (Range.size() == 0) +- return Ranges.end(); +- ++ return false; + auto It = std::partition_point( + Ranges.begin(), Ranges.end(), + [=](const AddressRange &R) { return R.start() <= Range.start(); }); +- + if (It == Ranges.begin()) +- return Ranges.end(); +- +- --It; +- if (Range.end() > It->end()) +- return Ranges.end(); ++ return false; ++ return Range.end() <= It[-1].end(); ++} + +- return It; ++Optional<AddressRange> ++AddressRanges::getRangeThatContains(uint64_t Addr) const { ++ auto It = std::partition_point( ++ Ranges.begin(), Ranges.end(), ++ [=](const AddressRange &R) { return R.start() <= Addr; }); ++ if (It != Ranges.begin() && Addr < It[-1].end()) ++ return It[-1]; ++ return llvm::None; + } +diff --git a/llvm/test/tools/llvm-dwarfutil/ELF/X86/gc-func-overlapping-address-ranges.test b/llvm/test/tools/llvm-dwarfutil/ELF/X86/gc-func-overlapping-address-ranges.test +deleted file mode 100644 +index cfbb7cb10558..000000000000 +--- a/llvm/test/tools/llvm-dwarfutil/ELF/X86/gc-func-overlapping-address-ranges.test ++++ /dev/null +@@ -1,254 +0,0 @@ +-## This test checks that overlapping function address ranges +-## are combined during --garbage-collection optimisation. +- +-# RUN: yaml2obj %s -o %t.o +-# RUN: llvm-dwarfutil --garbage-collection %t.o %t1 +-# RUN: llvm-dwarfdump -a %t1 | FileCheck %s +- +-# CHECK: DW_TAG_compile_unit +-# CHECK: DW_AT_name{{.*}}"CU1" +-# CHECK: DW_AT_low_pc{{.*}}0000000000001000 +-# CHECK: DW_AT_ranges +-# CHECK: [0x0000000000001000, 0x000000000000102d) +-# CHECK: [0x0000000000002002, 0x000000000000200d) +-# CHECK: [0x000000000000201b, 0x000000000000202a) +-# CHECK: [0x0000000000003002, 0x0000000000003007) +-# CHECK: [0x0000000000003012, 0x0000000000003017) +-# CHECK: [0x0000000000003018, 0x000000000000301a) +-# CHECK: [0x0000000000003022, 0x0000000000003027 +-# CHECK: DW_TAG_class_type +-# CHECK: DW_AT_name{{.*}}"class1" +-# CHECK: DW_TAG_class_type +-# CHECK: "class2" +-# CHECK: DW_TAG_subprogram +-# CHECK: DW_AT_name{{.*}}"foo1" +-# CHECK: DW_AT_low_pc{{.*}}0x0000000000001000 +-# CHECK: DW_AT_high_pc{{.*}}0x0000000000001010 +-# CHECK: DW_AT_type{{.*}}"class1" +-# CHECK: DW_TAG_subprogram +-# CHECK: "foo2" +-# CHECK: DW_AT_low_pc{{.*}}0x0000000000001004 +-# CHECK: DW_AT_high_pc{{.*}}0x0000000000001007 +-# CHECK: DW_AT_type{{.*}}"class2" +-# CHECK: DW_TAG_subprogram +-# CHECK: "foo3" +-# CHECK: DW_AT_low_pc{{.*}}0x000000000000100d +-# CHECK: DW_AT_high_pc{{.*}}0x000000000000102d +-# CHECK: DW_TAG_subprogram +-# CHECK: "foo4" +-# CHECK: DW_AT_low_pc{{.*}}0x0000000000002002 +-# CHECK: DW_AT_high_pc{{.*}}0x000000000000200d +-# CHECK: DW_TAG_subprogram +-# CHECK: "foo5" +-# CHECK: DW_AT_low_pc{{.*}}0x000000000000201b +-# CHECK: DW_AT_high_pc{{.*}}0x000000000000202a +-# CHECK: DW_TAG_subprogram +-# CHECK: "foo6" +-# CHECK: DW_AT_low_pc{{.*}}0x0000000000003002 +-# CHECK: DW_AT_high_pc{{.*}}0x0000000000003007 +-# CHECK: DW_TAG_subprogram +-# CHECK: "foo7" +-# CHECK: DW_AT_low_pc{{.*}}0x0000000000003012 +-# CHECK: DW_AT_high_pc{{.*}}0x0000000000003017 +-# CHECK: DW_TAG_subprogram +-# CHECK: "foo8" +-# CHECK: DW_AT_low_pc{{.*}}0x0000000000003022 +-# CHECK: DW_AT_high_pc{{.*}}0x0000000000003027 +-# CHECK: DW_TAG_subprogram +-# CHECK: "foo9" +-# CHECK: DW_AT_low_pc{{.*}}0x0000000000003012 +-# CHECK: DW_AT_high_pc{{.*}}0x0000000000003017 +-# CHECK: "foo10" +-# CHECK: DW_AT_low_pc{{.*}}0x0000000000003018 +-# CHECK: DW_AT_high_pc{{.*}}0x000000000000301a +- +---- !ELF +-FileHeader: +- Class: ELFCLASS64 +- Data: ELFDATA2LSB +- Type: ET_REL +- Machine: EM_X86_64 +-Sections: +- - Name: .text +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC, SHF_EXECINSTR ] +- Address: 0x1000 +- AddressAlign: 0x0000000000000010 +- Content: "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" +- - Name: .text2 +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC, SHF_EXECINSTR ] +- Address: 0x2000 +- AddressAlign: 0x0000000000000010 +- Content: "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" +- - Name: .text3 +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC, SHF_EXECINSTR ] +- Address: 0x3000 +- AddressAlign: 0x0000000000000010 +- Content: "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" +-DWARF: +- debug_abbrev: +- - Table: +- - Tag: DW_TAG_compile_unit +- Children: DW_CHILDREN_yes +- Attributes: +- - Attribute: DW_AT_producer +- Form: DW_FORM_string +- - Attribute: DW_AT_language +- Form: DW_FORM_data2 +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Attribute: DW_AT_low_pc +- Form: DW_FORM_addr +- - Attribute: DW_AT_ranges +- Form: DW_FORM_sec_offset +- - Tag: DW_TAG_subprogram +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Attribute: DW_AT_low_pc +- Form: DW_FORM_addr +- - Attribute: DW_AT_high_pc +- Form: DW_FORM_data8 +- - Attribute: DW_AT_type +- Form: DW_FORM_ref4 +- - Tag: DW_TAG_class_type +- Children: DW_CHILDREN_yes +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Tag: DW_TAG_member +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_type +- Form: DW_FORM_ref4 +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Tag: DW_TAG_class_type +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Attribute: DW_AT_declaration +- Form: DW_FORM_flag_present +- - Tag: DW_TAG_class_type +- Children: DW_CHILDREN_yes +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Attribute: DW_AT_declaration +- Form: DW_FORM_flag_present +- - Tag: DW_TAG_template_type_parameter +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_type +- Form: DW_FORM_ref4 +- - Tag: DW_TAG_base_type +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- debug_info: +- - Version: 4 +- Entries: +- - AbbrCode: 1 +- Values: +- - CStr: by_hand +- - Value: 0x04 +- - CStr: CU1 +- - Value: 0x00 +- - Value: 0x00 +- - AbbrCode: 3 +- Values: +- - CStr: class1 +- - AbbrCode: 4 +- Values: +- - Value: 0x00000052 +- - CStr: member1 +- - AbbrCode: 0 +- - AbbrCode: 3 +- Values: +- - CStr: class2 +- - AbbrCode: 4 +- Values: +- - Value: 0x00000052 +- - CStr: member1 +- - AbbrCode: 0 +- - AbbrCode: 8 +- Values: +- - CStr: int +- - AbbrCode: 2 +- Values: +- - CStr: foo1 +- - Value: 0x1000 +- - Value: 0x10 +- - Value: 0x00000026 +- - AbbrCode: 2 +- Values: +- - CStr: foo2 +- - Value: 0x1004 +- - Value: 0x3 +- - Value: 0x0000003c +- - AbbrCode: 2 +- Values: +- - CStr: foo3 +- - Value: 0x100d +- - Value: 0x20 +- - Value: 0x0000003c +- - AbbrCode: 2 +- Values: +- - CStr: foo4 +- - Value: 0x2002 +- - Value: 0xb +- - Value: 0x0000003c +- - AbbrCode: 2 +- Values: +- - CStr: foo5 +- - Value: 0x201b +- - Value: 0xf +- - Value: 0x0000003c +- - AbbrCode: 2 +- Values: +- - CStr: foo6 +- - Value: 0x3002 +- - Value: 0x5 +- - Value: 0x0000003c +- - AbbrCode: 2 +- Values: +- - CStr: foo7 +- - Value: 0x3012 +- - Value: 0x5 +- - Value: 0x0000003c +- - AbbrCode: 2 +- Values: +- - CStr: foo8 +- - Value: 0x3022 +- - Value: 0x5 +- - Value: 0x0000003c +- - AbbrCode: 2 +- Values: +- - CStr: foo9 +- - Value: 0x3012 +- - Value: 0x5 +- - Value: 0x0000003c +- - AbbrCode: 2 +- Values: +- - CStr: foo10 +- - Value: 0x3018 +- - Value: 0x2 +- - Value: 0x0000003c +- - AbbrCode: 0 +- +- debug_ranges: +- - Offset: 0x00000000 +- AddrSize: 0x08 +- Entries: +- - LowOffset: 0x0000000000001000 +- HighOffset: 0x000000000000102d +- - LowOffset: 0x0000000000002000 +- HighOffset: 0x000000000000202d +- - LowOffset: 0x0000000000000000 +- HighOffset: 0x0000000000000000 +-... +diff --git a/llvm/test/tools/llvm-dwarfutil/ELF/X86/gc-unit-overlapping-address-ranges.test b/llvm/test/tools/llvm-dwarfutil/ELF/X86/gc-unit-overlapping-address-ranges.test +deleted file mode 100644 +index f9d42c28d420..000000000000 +--- a/llvm/test/tools/llvm-dwarfutil/ELF/X86/gc-unit-overlapping-address-ranges.test ++++ /dev/null +@@ -1,247 +0,0 @@ +-## This test checks that overlapping compile unit address ranges +-## are ignored (i.e. left unchanged) by --garbage-collection +-## optimisation. +- +-# RUN: yaml2obj %s -o %t.o +-# RUN: llvm-dwarfutil --garbage-collection %t.o %t1 +-# RUN: llvm-dwarfdump -a %t1 | FileCheck %s +- +-# CHECK: DW_TAG_compile_unit +-# CHECK: DW_AT_name{{.*}}"CU1" +-# CHECK: DW_TAG_class_type +-# CHECK: DW_AT_name{{.*}}"class1" +-# CHECK: DW_TAG_subprogram +-# CHECK: DW_AT_name{{.*}}"foo1" +-# CHECK: DW_AT_low_pc{{.*}}0x0000000000001000 +-# CHECK: DW_AT_high_pc{{.*}}0x0000000000001010 +-# CHECK: DW_TAG_subprogram +-# CHECK: DW_AT_name{{.*}}"foo2" +-# CHECK: DW_AT_low_pc{{.*}}0x0000000000001000 +-# CHECK: DW_AT_high_pc{{.*}}0x0000000000001010 +-# CHECK: DW_AT_type{{.*}}"class2" +- +---- !ELF +-FileHeader: +- Class: ELFCLASS64 +- Data: ELFDATA2LSB +- Type: ET_REL +- Machine: EM_X86_64 +-Sections: +- - Name: .text +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC, SHF_EXECINSTR ] +- Address: 0x1000 +- AddressAlign: 0x0000000000000010 +- Content: "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" +-DWARF: +- debug_abbrev: +- - Table: +- - Tag: DW_TAG_compile_unit +- Children: DW_CHILDREN_yes +- Attributes: +- - Attribute: DW_AT_producer +- Form: DW_FORM_string +- - Attribute: DW_AT_language +- Form: DW_FORM_data2 +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Attribute: DW_AT_low_pc +- Form: DW_FORM_addr +- - Attribute: DW_AT_high_pc +- Form: DW_FORM_data8 +- - Tag: DW_TAG_subprogram +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Attribute: DW_AT_low_pc +- Form: DW_FORM_addr +- - Attribute: DW_AT_high_pc +- Form: DW_FORM_data8 +- - Attribute: DW_AT_type +- Form: DW_FORM_ref4 +- - Tag: DW_TAG_class_type +- Children: DW_CHILDREN_yes +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Tag: DW_TAG_member +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_type +- Form: DW_FORM_ref4 +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Tag: DW_TAG_class_type +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Attribute: DW_AT_declaration +- Form: DW_FORM_flag_present +- - Tag: DW_TAG_class_type +- Children: DW_CHILDREN_yes +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Attribute: DW_AT_declaration +- Form: DW_FORM_flag_present +- - Tag: DW_TAG_template_type_parameter +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_type +- Form: DW_FORM_ref4 +- - Tag: DW_TAG_base_type +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Table: +- - Tag: DW_TAG_compile_unit +- Children: DW_CHILDREN_yes +- Attributes: +- - Attribute: DW_AT_producer +- Form: DW_FORM_string +- - Attribute: DW_AT_language +- Form: DW_FORM_data2 +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Attribute: DW_AT_low_pc +- Form: DW_FORM_addr +- - Attribute: DW_AT_high_pc +- Form: DW_FORM_data8 +- - Tag: DW_TAG_subprogram +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Attribute: DW_AT_low_pc +- Form: DW_FORM_addr +- - Attribute: DW_AT_high_pc +- Form: DW_FORM_data8 +- - Attribute: DW_AT_type +- Form: DW_FORM_ref4 +- - Tag: DW_TAG_class_type +- Children: DW_CHILDREN_yes +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Tag: DW_TAG_member +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_type +- Form: DW_FORM_ref4 +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Tag: DW_TAG_class_type +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Attribute: DW_AT_declaration +- Form: DW_FORM_flag_present +- - Tag: DW_TAG_class_type +- Children: DW_CHILDREN_yes +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- - Attribute: DW_AT_declaration +- Form: DW_FORM_flag_present +- - Tag: DW_TAG_template_type_parameter +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_type +- Form: DW_FORM_ref4 +- - Tag: DW_TAG_base_type +- Children: DW_CHILDREN_no +- Attributes: +- - Attribute: DW_AT_name +- Form: DW_FORM_string +- debug_info: +- - Version: 4 +- Entries: +- - AbbrCode: 1 +- Values: +- - CStr: by_hand +- - Value: 0x04 +- - CStr: CU1 +- - Value: 0x1000 +- - Value: 0x1b +- - AbbrCode: 3 +- Values: +- - CStr: class1 +- - AbbrCode: 4 +- Values: +- - Value: 0x0000006c +- - CStr: member1 +- - AbbrCode: 0 +- - AbbrCode: 3 +- Values: +- - CStr: class2 +- - AbbrCode: 4 +- Values: +- - Value: 0x0000006c +- - CStr: member1 +- - AbbrCode: 0 +- - AbbrCode: 3 +- Values: +- - CStr: class3 +- - AbbrCode: 4 +- Values: +- - Value: 0x0000006c +- - CStr: member1 +- - AbbrCode: 0 +- - AbbrCode: 8 +- Values: +- - CStr: int +- - AbbrCode: 2 +- Values: +- - CStr: foo1 +- - Value: 0x1000 +- - Value: 0x10 +- - Value: 0x0000002a +- - AbbrCode: 0 +- - Version: 4 +- Entries: +- - AbbrCode: 1 +- Values: +- - CStr: by_hand +- - Value: 0x04 +- - CStr: CU1 +- - Value: 0x1000 +- - Value: 0x1b +- - AbbrCode: 3 +- Values: +- - CStr: class1 +- - AbbrCode: 4 +- Values: +- - Value: 0x0000006c +- - CStr: member1 +- - AbbrCode: 0 +- - AbbrCode: 3 +- Values: +- - CStr: class2 +- - AbbrCode: 4 +- Values: +- - Value: 0x0000006c +- - CStr: member1 +- - AbbrCode: 0 +- - AbbrCode: 3 +- Values: +- - CStr: class3 +- - AbbrCode: 4 +- Values: +- - Value: 0x0000006c +- - CStr: member1 +- - AbbrCode: 0 +- - AbbrCode: 8 +- Values: +- - CStr: int +- - AbbrCode: 2 +- Values: +- - CStr: foo2 +- - Value: 0x1000 +- - Value: 0x10 +- - Value: 0x00000040 +- - AbbrCode: 0 +-... +diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp +index 5f43680ba498..293fd6b17fe1 100644 +--- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp ++++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp +@@ -18,6 +18,7 @@ + #include "llvm/ADT/DenseSet.h" + #include "llvm/ADT/FoldingSet.h" + #include "llvm/ADT/Hashing.h" ++#include "llvm/ADT/IntervalMap.h" + #include "llvm/ADT/None.h" + #include "llvm/ADT/Optional.h" + #include "llvm/ADT/PointerIntPair.h" +diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.h b/llvm/tools/dsymutil/DwarfLinkerForBinary.h +index 623441c749a5..be9bedc6627d 100644 +--- a/llvm/tools/dsymutil/DwarfLinkerForBinary.h ++++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.h +@@ -132,8 +132,8 @@ private: + for (const auto &Entry : DMO.symbols()) { + const auto &Mapping = Entry.getValue(); + if (Mapping.Size && Mapping.ObjectAddress) +- AddressRanges.insert( +- {*Mapping.ObjectAddress, *Mapping.ObjectAddress + Mapping.Size}, ++ AddressRanges[*Mapping.ObjectAddress] = ObjFileAddressRange( ++ *Mapping.ObjectAddress + Mapping.Size, + int64_t(Mapping.BinaryAddress) - *Mapping.ObjectAddress); + } + } +diff --git a/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp +index 3e70f460bc58..fb8cca0ac55c 100644 +--- a/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp ++++ b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp +@@ -49,7 +49,7 @@ public: + if (Size == 0) + continue; + const uint64_t StartAddr = Sect.getAddress(); +- TextAddressRanges.insert({StartAddr, StartAddr + Size}); ++ TextAddressRanges[{StartAddr}] = {StartAddr + Size, 0}; + } + + // Check CU address ranges for tombstone value. +@@ -60,7 +60,7 @@ public: + for (auto &Range : *ARanges) { + if (!isDeadAddressRange(Range.LowPC, Range.HighPC, CU->getVersion(), + Options.Tombstone, CU->getAddressByteSize())) +- DWARFAddressRanges.insert({Range.LowPC, Range.HighPC}, 0); ++ DWARFAddressRanges[{Range.LowPC}] = {Range.HighPC, 0}; + } + } + } +@@ -147,13 +147,17 @@ protected: + // of executable sections. + bool isInsideExecutableSectionsAddressRange(uint64_t LowPC, + Optional<uint64_t> HighPC) { +- Optional<AddressRange> Range = +- TextAddressRanges.getRangeThatContains(LowPC); +- +- if (HighPC) +- return Range.has_value() && Range->end() >= *HighPC; ++ auto Range = TextAddressRanges.lower_bound(LowPC); ++ if ((Range == TextAddressRanges.end() || Range->first != LowPC) && ++ Range != TextAddressRanges.begin()) ++ --Range; ++ ++ if (Range != TextAddressRanges.end() && Range->first <= LowPC && ++ (HighPC ? Range->second.HighPC >= HighPC ++ : Range->second.HighPC >= LowPC)) ++ return true; + +- return Range.has_value(); ++ return false; + } + + uint64_t isBFDDeadAddressRange(uint64_t LowPC, Optional<uint64_t> HighPC, +@@ -207,7 +211,7 @@ protected: + + private: + RangesTy DWARFAddressRanges; +- AddressRanges TextAddressRanges; ++ RangesTy TextAddressRanges; + const Options &Opts; + }; + +diff --git a/llvm/unittests/Support/AddressRangeTest.cpp b/llvm/unittests/Support/AddressRangeTest.cpp +index 468f1e22ffa8..95943ad0365a 100644 +--- a/llvm/unittests/Support/AddressRangeTest.cpp ++++ b/llvm/unittests/Support/AddressRangeTest.cpp +@@ -100,7 +100,7 @@ TEST(AddressRangeTest, TestRanges) { + EXPECT_FALSE(Ranges.contains(AddressRange(0x1000, 0x1000))); + EXPECT_TRUE(Ranges.contains(AddressRange(0x1000, 0x1000 + 1))); + EXPECT_TRUE(Ranges.contains(AddressRange(0x1000, 0x2000))); +- EXPECT_TRUE(Ranges.contains(AddressRange(0x1000, 0x2001))); ++ EXPECT_FALSE(Ranges.contains(AddressRange(0x1000, 0x2001))); + EXPECT_TRUE(Ranges.contains(AddressRange(0x2000, 0x3000))); + EXPECT_FALSE(Ranges.contains(AddressRange(0x2000, 0x3001))); + EXPECT_FALSE(Ranges.contains(AddressRange(0x3000, 0x3001))); +@@ -125,22 +125,16 @@ TEST(AddressRangeTest, TestRanges) { + EXPECT_EQ(Ranges.size(), 1u); + EXPECT_EQ(Ranges[0], AddressRange(0x1000, 0x2000)); + +- // Verify that adjacent ranges get combined +- Ranges.insert(AddressRange(0x2000, 0x2fff)); +- EXPECT_EQ(Ranges.size(), 1u); +- EXPECT_EQ(Ranges[0], AddressRange(0x1000, 0x2fff)); +- +- // Verify that ranges having 1 byte gap do not get combined +- Ranges.insert(AddressRange(0x3000, 0x4000)); ++ // Verify that adjacent ranges don't get combined ++ Ranges.insert(AddressRange(0x2000, 0x3000)); + EXPECT_EQ(Ranges.size(), 2u); +- EXPECT_EQ(Ranges[0], AddressRange(0x1000, 0x2fff)); +- EXPECT_EQ(Ranges[1], AddressRange(0x3000, 0x4000)); +- ++ EXPECT_EQ(Ranges[0], AddressRange(0x1000, 0x2000)); ++ EXPECT_EQ(Ranges[1], AddressRange(0x2000, 0x3000)); + // Verify if we add an address range that intersects two ranges + // that they get combined + Ranges.insert(AddressRange(Ranges[0].end() - 1, Ranges[1].start() + 1)); + EXPECT_EQ(Ranges.size(), 1u); +- EXPECT_EQ(Ranges[0], AddressRange(0x1000, 0x4000)); ++ EXPECT_EQ(Ranges[0], AddressRange(0x1000, 0x3000)); + + Ranges.insert(AddressRange(0x3000, 0x4000)); + Ranges.insert(AddressRange(0x4000, 0x5000)); +@@ -148,87 +142,3 @@ TEST(AddressRangeTest, TestRanges) { + EXPECT_EQ(Ranges.size(), 1u); + EXPECT_EQ(Ranges[0], AddressRange(0x1000, 0x5000)); + } +- +-TEST(AddressRangeTest, TestRangesMap) { +- AddressRangesMap<int> Ranges; +- +- EXPECT_EQ(Ranges.size(), 0u); +- EXPECT_TRUE(Ranges.empty()); +- +- // Add single range. +- Ranges.insert(AddressRange(0x1000, 0x2000), 0xfe); +- EXPECT_EQ(Ranges.size(), 1u); +- EXPECT_FALSE(Ranges.empty()); +- EXPECT_TRUE(Ranges.contains(0x1500)); +- EXPECT_TRUE(Ranges.contains(AddressRange(0x1000, 0x2000))); +- +- // Clear ranges. +- Ranges.clear(); +- EXPECT_EQ(Ranges.size(), 0u); +- EXPECT_TRUE(Ranges.empty()); +- +- // Add range and check value. +- Ranges.insert(AddressRange(0x1000, 0x2000), 0xfe); +- EXPECT_EQ(Ranges.size(), 1u); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x1000)->second, 0xfe); +- +- // Add adjacent range and check value. +- Ranges.insert(AddressRange(0x2000, 0x3000), 0xfc); +- EXPECT_EQ(Ranges.size(), 1u); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x1000)->second, 0xfc); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x2000)->second, 0xfc); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x2900)->second, 0xfc); +- EXPECT_FALSE(Ranges.getRangeValueThatContains(0x3000)); +- +- // Add intersecting range and check value. +- Ranges.insert(AddressRange(0x2000, 0x3000), 0xff); +- EXPECT_EQ(Ranges.size(), 1u); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x1000)->second, 0xff); +- +- // Add second range and check values. +- Ranges.insert(AddressRange(0x4000, 0x5000), 0x0); +- EXPECT_EQ(Ranges.size(), 2u); +- EXPECT_EQ(Ranges[0].second, 0xff); +- EXPECT_EQ(Ranges[1].second, 0x0); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x1000)->second, 0xff); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x4000)->second, 0x0); +- +- // Add intersecting range and check value. +- Ranges.insert(AddressRange(0x0, 0x6000), 0x1); +- EXPECT_EQ(Ranges.size(), 1u); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x1000)->second, 0x1); +- +- // Check that values are correctly preserved for combined ranges. +- Ranges.clear(); +- Ranges.insert(AddressRange(0x0, 0xff), 0x1); +- Ranges.insert(AddressRange(0x100, 0x1ff), 0x2); +- Ranges.insert(AddressRange(0x200, 0x2ff), 0x3); +- Ranges.insert(AddressRange(0x300, 0x3ff), 0x4); +- Ranges.insert(AddressRange(0x400, 0x4ff), 0x5); +- Ranges.insert(AddressRange(0x500, 0x5ff), 0x6); +- Ranges.insert(AddressRange(0x600, 0x6ff), 0x7); +- +- Ranges.insert(AddressRange(0x150, 0x350), 0xff); +- EXPECT_EQ(Ranges.size(), 5u); +- EXPECT_EQ(Ranges[0].first, AddressRange(0x0, 0xff)); +- EXPECT_EQ(Ranges[0].second, 0x1); +- EXPECT_EQ(Ranges[1].first, AddressRange(0x100, 0x3ff)); +- EXPECT_EQ(Ranges[1].second, 0xff); +- EXPECT_EQ(Ranges[2].first, AddressRange(0x400, 0x4ff)); +- EXPECT_EQ(Ranges[2].second, 0x5); +- EXPECT_EQ(Ranges[3].first, AddressRange(0x500, 0x5ff)); +- EXPECT_EQ(Ranges[3].second, 0x6); +- EXPECT_EQ(Ranges[4].first, AddressRange(0x600, 0x6ff)); +- EXPECT_EQ(Ranges[4].second, 0x7); +- +- Ranges.insert(AddressRange(0x3ff, 0x400), 0x5); +- EXPECT_EQ(Ranges.size(), 4u); +- EXPECT_EQ(Ranges[0].first, AddressRange(0x0, 0xff)); +- EXPECT_EQ(Ranges[0].second, 0x1); +- EXPECT_EQ(Ranges[1].first, AddressRange(0x100, 0x4ff)); +- EXPECT_EQ(Ranges[1].second, 0x5); +- EXPECT_EQ(Ranges[2].first, AddressRange(0x500, 0x5ff)); +- EXPECT_EQ(Ranges[2].second, 0x6); +- EXPECT_EQ(Ranges[3].first, AddressRange(0x600, 0x6ff)); +- EXPECT_EQ(Ranges[3].second, 0x7); +-} +-- +2.39.0.1.g6739ec1790 + diff --git a/build/build-clang/revert-llvmorg-16-init-7598-g54bfd0484615.patch b/build/build-clang/revert-llvmorg-16-init-7598-g54bfd0484615.patch new file mode 100644 index 0000000000..683d87b7a2 --- /dev/null +++ b/build/build-clang/revert-llvmorg-16-init-7598-g54bfd0484615.patch @@ -0,0 +1,866 @@ +The patch changed the way the no_thread_safety_analysis works in a way +that I think actually makes sense, but we have exceptions in the code +that aren't enough to accomodate that change. + +Until our code is adjusted, revert this change. + +--- + clang/docs/ThreadSafetyAnalysis.rst | 10 +- + .../Analysis/Analyses/ThreadSafetyCommon.h | 13 +- + .../clang/Analysis/Analyses/ThreadSafetyTIL.h | 7 +- + .../Analysis/Analyses/ThreadSafetyTraverse.h | 5 +- + clang/lib/Analysis/ThreadSafety.cpp | 202 +++++++++--------- + clang/lib/Analysis/ThreadSafetyCommon.cpp | 46 ++-- + .../SemaCXX/warn-thread-safety-analysis.cpp | 155 +++----------- + 8 files changed, 155 insertions(+), 290 deletions(-) + +diff --git a/clang/docs/ThreadSafetyAnalysis.rst b/clang/docs/ThreadSafetyAnalysis.rst +index dcde0c706c70..23f460b248e1 100644 +--- a/clang/docs/ThreadSafetyAnalysis.rst ++++ b/clang/docs/ThreadSafetyAnalysis.rst +@@ -408,8 +408,7 @@ and destructor refer to the capability via different names; see the + Scoped capabilities are treated as capabilities that are implicitly acquired + on construction and released on destruction. They are associated with + the set of (regular) capabilities named in thread safety attributes on the +-constructor or function returning them by value (using C++17 guaranteed copy +-elision). Acquire-type attributes on other member functions are treated as ++constructor. Acquire-type attributes on other member functions are treated as + applying to that set of associated capabilities, while ``RELEASE`` implies that + a function releases all associated capabilities in whatever mode they're held. + +@@ -931,13 +930,6 @@ implementation. + // Assume mu is not held, implicitly acquire *this and associate it with mu. + MutexLocker(Mutex *mu, defer_lock_t) EXCLUDES(mu) : mut(mu), locked(false) {} + +- // Same as constructors, but without tag types. (Requires C++17 copy elision.) +- static MutexLocker Lock(Mutex *mu) ACQUIRE(mu); +- static MutexLocker Adopt(Mutex *mu) REQUIRES(mu); +- static MutexLocker ReaderLock(Mutex *mu) ACQUIRE_SHARED(mu); +- static MutexLocker AdoptReaderLock(Mutex *mu) REQUIRES_SHARED(mu); +- static MutexLocker DeferLock(Mutex *mu) EXCLUDES(mu); +- + // Release *this and all associated mutexes, if they are still held. + // There is no warning if the scope was already unlocked before. + ~MutexLocker() RELEASE() { +diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h +index 9c73d65db266..da69348ea938 100644 +--- a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h ++++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h +@@ -31,7 +31,6 @@ + #include "clang/Basic/LLVM.h" + #include "llvm/ADT/DenseMap.h" + #include "llvm/ADT/PointerIntPair.h" +-#include "llvm/ADT/PointerUnion.h" + #include "llvm/ADT/SmallVector.h" + #include "llvm/Support/Casting.h" + #include <sstream> +@@ -355,7 +354,7 @@ public: + const NamedDecl *AttrDecl; + + // Implicit object argument -- e.g. 'this' +- llvm::PointerUnion<const Expr *, til::SExpr *> SelfArg = nullptr; ++ const Expr *SelfArg = nullptr; + + // Number of funArgs + unsigned NumArgs = 0; +@@ -379,18 +378,10 @@ public: + // Translate a clang expression in an attribute to a til::SExpr. + // Constructs the context from D, DeclExp, and SelfDecl. + CapabilityExpr translateAttrExpr(const Expr *AttrExp, const NamedDecl *D, +- const Expr *DeclExp, +- til::SExpr *Self = nullptr); ++ const Expr *DeclExp, VarDecl *SelfD=nullptr); + + CapabilityExpr translateAttrExpr(const Expr *AttrExp, CallingContext *Ctx); + +- // Translate a variable reference. +- til::LiteralPtr *createVariable(const VarDecl *VD); +- +- // Create placeholder for this: we don't know the VarDecl on construction yet. +- std::pair<til::LiteralPtr *, StringRef> +- createThisPlaceholder(const Expr *Exp); +- + // Translate a clang statement or expression to a TIL expression. + // Also performs substitution of variables; Ctx provides the context. + // Dispatches on the type of S. +diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h +index 48593516d853..65556c8d584c 100644 +--- a/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h ++++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h +@@ -634,14 +634,15 @@ typename V::R_SExpr Literal::traverse(V &Vs, typename V::R_Ctx Ctx) { + /// At compile time, pointer literals are represented by symbolic names. + class LiteralPtr : public SExpr { + public: +- LiteralPtr(const ValueDecl *D) : SExpr(COP_LiteralPtr), Cvdecl(D) {} ++ LiteralPtr(const ValueDecl *D) : SExpr(COP_LiteralPtr), Cvdecl(D) { ++ assert(D && "ValueDecl must not be null"); ++ } + LiteralPtr(const LiteralPtr &) = default; + + static bool classof(const SExpr *E) { return E->opcode() == COP_LiteralPtr; } + + // The clang declaration for the value that this pointer points to. + const ValueDecl *clangDecl() const { return Cvdecl; } +- void setClangDecl(const ValueDecl *VD) { Cvdecl = VD; } + + template <class V> + typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { +@@ -650,8 +651,6 @@ public: + + template <class C> + typename C::CType compare(const LiteralPtr* E, C& Cmp) const { +- if (!Cvdecl || !E->Cvdecl) +- return Cmp.comparePointers(this, E); + return Cmp.comparePointers(Cvdecl, E->Cvdecl); + } + +diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyTraverse.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyTraverse.h +index 6fc55130655a..e81c00d3dddb 100644 +--- a/clang/include/clang/Analysis/Analyses/ThreadSafetyTraverse.h ++++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyTraverse.h +@@ -623,10 +623,7 @@ protected: + } + + void printLiteralPtr(const LiteralPtr *E, StreamType &SS) { +- if (const NamedDecl *D = E->clangDecl()) +- SS << D->getNameAsString(); +- else +- SS << "<temporary>"; ++ SS << E->clangDecl()->getNameAsString(); + } + + void printVariable(const Variable *V, StreamType &SS, bool IsVarDecl=false) { +diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp +index 76747561a9a3..a29134c6a5e7 100644 +--- a/clang/lib/Analysis/ThreadSafety.cpp ++++ b/clang/lib/Analysis/ThreadSafety.cpp +@@ -1029,7 +1029,7 @@ public: + + template <typename AttrType> + void getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, const Expr *Exp, +- const NamedDecl *D, til::SExpr *Self = nullptr); ++ const NamedDecl *D, VarDecl *SelfDecl = nullptr); + + template <class AttrType> + void getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, const Expr *Exp, +@@ -1220,7 +1220,7 @@ bool ThreadSafetyAnalyzer::inCurrentScope(const CapabilityExpr &CapE) { + if (const auto *LP = dyn_cast<til::LiteralPtr>(SExp)) { + const ValueDecl *VD = LP->clangDecl(); + // Variables defined in a function are always inaccessible. +- if (!VD || !VD->isDefinedOutsideFunctionOrMethod()) ++ if (!VD->isDefinedOutsideFunctionOrMethod()) + return false; + // For now we consider static class members to be inaccessible. + if (isa<CXXRecordDecl>(VD->getDeclContext())) +@@ -1311,10 +1311,10 @@ void ThreadSafetyAnalyzer::removeLock(FactSet &FSet, const CapabilityExpr &Cp, + template <typename AttrType> + void ThreadSafetyAnalyzer::getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, + const Expr *Exp, const NamedDecl *D, +- til::SExpr *Self) { ++ VarDecl *SelfDecl) { + if (Attr->args_size() == 0) { + // The mutex held is the "this" object. +- CapabilityExpr Cp = SxBuilder.translateAttrExpr(nullptr, D, Exp, Self); ++ CapabilityExpr Cp = SxBuilder.translateAttrExpr(nullptr, D, Exp, SelfDecl); + if (Cp.isInvalid()) { + warnInvalidLock(Handler, nullptr, D, Exp, Cp.getKind()); + return; +@@ -1326,7 +1326,7 @@ void ThreadSafetyAnalyzer::getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, + } + + for (const auto *Arg : Attr->args()) { +- CapabilityExpr Cp = SxBuilder.translateAttrExpr(Arg, D, Exp, Self); ++ CapabilityExpr Cp = SxBuilder.translateAttrExpr(Arg, D, Exp, SelfDecl); + if (Cp.isInvalid()) { + warnInvalidLock(Handler, nullptr, D, Exp, Cp.getKind()); + continue; +@@ -1529,26 +1529,21 @@ class BuildLockset : public ConstStmtVisitor<BuildLockset> { + + ThreadSafetyAnalyzer *Analyzer; + FactSet FSet; +- /// Maps constructed objects to `this` placeholder prior to initialization. +- llvm::SmallDenseMap<const Expr *, til::LiteralPtr *> ConstructedObjects; + LocalVariableMap::Context LVarCtx; + unsigned CtxIndex; + + // helper functions + void warnIfMutexNotHeld(const NamedDecl *D, const Expr *Exp, AccessKind AK, + Expr *MutexExp, ProtectedOperationKind POK, +- til::LiteralPtr *Self, SourceLocation Loc); +- void warnIfMutexHeld(const NamedDecl *D, const Expr *Exp, Expr *MutexExp, +- til::LiteralPtr *Self, SourceLocation Loc); ++ SourceLocation Loc); ++ void warnIfMutexHeld(const NamedDecl *D, const Expr *Exp, Expr *MutexExp); + + void checkAccess(const Expr *Exp, AccessKind AK, + ProtectedOperationKind POK = POK_VarAccess); + void checkPtAccess(const Expr *Exp, AccessKind AK, + ProtectedOperationKind POK = POK_VarAccess); + +- void handleCall(const Expr *Exp, const NamedDecl *D, +- til::LiteralPtr *Self = nullptr, +- SourceLocation Loc = SourceLocation()); ++ void handleCall(const Expr *Exp, const NamedDecl *D, VarDecl *VD = nullptr); + void examineArguments(const FunctionDecl *FD, + CallExpr::const_arg_iterator ArgBegin, + CallExpr::const_arg_iterator ArgEnd, +@@ -1565,7 +1560,6 @@ public: + void VisitCallExpr(const CallExpr *Exp); + void VisitCXXConstructExpr(const CXXConstructExpr *Exp); + void VisitDeclStmt(const DeclStmt *S); +- void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *Exp); + }; + + } // namespace +@@ -1575,12 +1569,10 @@ public: + void BuildLockset::warnIfMutexNotHeld(const NamedDecl *D, const Expr *Exp, + AccessKind AK, Expr *MutexExp, + ProtectedOperationKind POK, +- til::LiteralPtr *Self, + SourceLocation Loc) { + LockKind LK = getLockKindFromAccessKind(AK); + +- CapabilityExpr Cp = +- Analyzer->SxBuilder.translateAttrExpr(MutexExp, D, Exp, Self); ++ CapabilityExpr Cp = Analyzer->SxBuilder.translateAttrExpr(MutexExp, D, Exp); + if (Cp.isInvalid()) { + warnInvalidLock(Analyzer->Handler, MutexExp, D, Exp, Cp.getKind()); + return; +@@ -1637,10 +1629,8 @@ void BuildLockset::warnIfMutexNotHeld(const NamedDecl *D, const Expr *Exp, + + /// Warn if the LSet contains the given lock. + void BuildLockset::warnIfMutexHeld(const NamedDecl *D, const Expr *Exp, +- Expr *MutexExp, til::LiteralPtr *Self, +- SourceLocation Loc) { +- CapabilityExpr Cp = +- Analyzer->SxBuilder.translateAttrExpr(MutexExp, D, Exp, Self); ++ Expr *MutexExp) { ++ CapabilityExpr Cp = Analyzer->SxBuilder.translateAttrExpr(MutexExp, D, Exp); + if (Cp.isInvalid()) { + warnInvalidLock(Analyzer->Handler, MutexExp, D, Exp, Cp.getKind()); + return; +@@ -1651,7 +1641,7 @@ void BuildLockset::warnIfMutexHeld(const NamedDecl *D, const Expr *Exp, + const FactEntry *LDat = FSet.findLock(Analyzer->FactMan, Cp); + if (LDat) { + Analyzer->Handler.handleFunExcludesLock(Cp.getKind(), D->getNameAsString(), +- Cp.toString(), Loc); ++ Cp.toString(), Exp->getExprLoc()); + } + } + +@@ -1721,7 +1711,7 @@ void BuildLockset::checkAccess(const Expr *Exp, AccessKind AK, + } + + for (const auto *I : D->specific_attrs<GuardedByAttr>()) +- warnIfMutexNotHeld(D, Exp, AK, I->getArg(), POK, nullptr, Loc); ++ warnIfMutexNotHeld(D, Exp, AK, I->getArg(), POK, Loc); + } + + /// Checks pt_guarded_by and pt_guarded_var attributes. +@@ -1758,8 +1748,7 @@ void BuildLockset::checkPtAccess(const Expr *Exp, AccessKind AK, + Analyzer->Handler.handleNoMutexHeld(D, PtPOK, AK, Exp->getExprLoc()); + + for (auto const *I : D->specific_attrs<PtGuardedByAttr>()) +- warnIfMutexNotHeld(D, Exp, AK, I->getArg(), PtPOK, nullptr, +- Exp->getExprLoc()); ++ warnIfMutexNotHeld(D, Exp, AK, I->getArg(), PtPOK, Exp->getExprLoc()); + } + + /// Process a function call, method call, constructor call, +@@ -1772,35 +1761,21 @@ void BuildLockset::checkPtAccess(const Expr *Exp, AccessKind AK, + /// and check that the appropriate locks are held. Non-const method calls with + /// the same signature as const method calls can be also treated as reads. + /// +-/// \param Exp The call expression. +-/// \param D The callee declaration. +-/// \param Self If \p Exp = nullptr, the implicit this argument. +-/// \param Loc If \p Exp = nullptr, the location. + void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, +- til::LiteralPtr *Self, SourceLocation Loc) { ++ VarDecl *VD) { ++ SourceLocation Loc = Exp->getExprLoc(); + CapExprSet ExclusiveLocksToAdd, SharedLocksToAdd; + CapExprSet ExclusiveLocksToRemove, SharedLocksToRemove, GenericLocksToRemove; + CapExprSet ScopedReqsAndExcludes; + + // Figure out if we're constructing an object of scoped lockable class +- CapabilityExpr Scp; +- if (Exp) { +- assert(!Self); +- const auto *TagT = Exp->getType()->getAs<TagType>(); +- if (TagT && Exp->isPRValue()) { +- std::pair<til::LiteralPtr *, StringRef> Placeholder = +- Analyzer->SxBuilder.createThisPlaceholder(Exp); +- [[maybe_unused]] auto inserted = +- ConstructedObjects.insert({Exp, Placeholder.first}); +- assert(inserted.second && "Are we visiting the same expression again?"); +- if (isa<CXXConstructExpr>(Exp)) +- Self = Placeholder.first; +- if (TagT->getDecl()->hasAttr<ScopedLockableAttr>()) +- Scp = CapabilityExpr(Placeholder.first, Placeholder.second, false); ++ bool isScopedVar = false; ++ if (VD) { ++ if (const auto *CD = dyn_cast<const CXXConstructorDecl>(D)) { ++ const CXXRecordDecl* PD = CD->getParent(); ++ if (PD && PD->hasAttr<ScopedLockableAttr>()) ++ isScopedVar = true; + } +- +- assert(Loc.isInvalid()); +- Loc = Exp->getExprLoc(); + } + + for(const Attr *At : D->attrs()) { +@@ -1811,7 +1786,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + const auto *A = cast<AcquireCapabilityAttr>(At); + Analyzer->getMutexIDs(A->isShared() ? SharedLocksToAdd + : ExclusiveLocksToAdd, +- A, Exp, D, Self); ++ A, Exp, D, VD); + break; + } + +@@ -1822,7 +1797,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + const auto *A = cast<AssertExclusiveLockAttr>(At); + + CapExprSet AssertLocks; +- Analyzer->getMutexIDs(AssertLocks, A, Exp, D, Self); ++ Analyzer->getMutexIDs(AssertLocks, A, Exp, D, VD); + for (const auto &AssertLock : AssertLocks) + Analyzer->addLock( + FSet, std::make_unique<LockableFactEntry>( +@@ -1833,7 +1808,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + const auto *A = cast<AssertSharedLockAttr>(At); + + CapExprSet AssertLocks; +- Analyzer->getMutexIDs(AssertLocks, A, Exp, D, Self); ++ Analyzer->getMutexIDs(AssertLocks, A, Exp, D, VD); + for (const auto &AssertLock : AssertLocks) + Analyzer->addLock( + FSet, std::make_unique<LockableFactEntry>( +@@ -1844,7 +1819,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + case attr::AssertCapability: { + const auto *A = cast<AssertCapabilityAttr>(At); + CapExprSet AssertLocks; +- Analyzer->getMutexIDs(AssertLocks, A, Exp, D, Self); ++ Analyzer->getMutexIDs(AssertLocks, A, Exp, D, VD); + for (const auto &AssertLock : AssertLocks) + Analyzer->addLock(FSet, std::make_unique<LockableFactEntry>( + AssertLock, +@@ -1858,11 +1833,11 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + case attr::ReleaseCapability: { + const auto *A = cast<ReleaseCapabilityAttr>(At); + if (A->isGeneric()) +- Analyzer->getMutexIDs(GenericLocksToRemove, A, Exp, D, Self); ++ Analyzer->getMutexIDs(GenericLocksToRemove, A, Exp, D, VD); + else if (A->isShared()) +- Analyzer->getMutexIDs(SharedLocksToRemove, A, Exp, D, Self); ++ Analyzer->getMutexIDs(SharedLocksToRemove, A, Exp, D, VD); + else +- Analyzer->getMutexIDs(ExclusiveLocksToRemove, A, Exp, D, Self); ++ Analyzer->getMutexIDs(ExclusiveLocksToRemove, A, Exp, D, VD); + break; + } + +@@ -1870,10 +1845,10 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + const auto *A = cast<RequiresCapabilityAttr>(At); + for (auto *Arg : A->args()) { + warnIfMutexNotHeld(D, Exp, A->isShared() ? AK_Read : AK_Written, Arg, +- POK_FunctionCall, Self, Loc); ++ POK_FunctionCall, Exp->getExprLoc()); + // use for adopting a lock +- if (!Scp.shouldIgnore()) +- Analyzer->getMutexIDs(ScopedReqsAndExcludes, A, Exp, D, Self); ++ if (isScopedVar) ++ Analyzer->getMutexIDs(ScopedReqsAndExcludes, A, Exp, D, VD); + } + break; + } +@@ -1881,10 +1856,10 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + case attr::LocksExcluded: { + const auto *A = cast<LocksExcludedAttr>(At); + for (auto *Arg : A->args()) { +- warnIfMutexHeld(D, Exp, Arg, Self, Loc); ++ warnIfMutexHeld(D, Exp, Arg); + // use for deferring a lock +- if (!Scp.shouldIgnore()) +- Analyzer->getMutexIDs(ScopedReqsAndExcludes, A, Exp, D, Self); ++ if (isScopedVar) ++ Analyzer->getMutexIDs(ScopedReqsAndExcludes, A, Exp, D, VD); + } + break; + } +@@ -1907,7 +1882,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + + // Add locks. + FactEntry::SourceKind Source = +- !Scp.shouldIgnore() ? FactEntry::Managed : FactEntry::Acquired; ++ isScopedVar ? FactEntry::Managed : FactEntry::Acquired; + for (const auto &M : ExclusiveLocksToAdd) + Analyzer->addLock(FSet, std::make_unique<LockableFactEntry>(M, LK_Exclusive, + Loc, Source)); +@@ -1915,9 +1890,15 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + Analyzer->addLock( + FSet, std::make_unique<LockableFactEntry>(M, LK_Shared, Loc, Source)); + +- if (!Scp.shouldIgnore()) { ++ if (isScopedVar) { + // Add the managing object as a dummy mutex, mapped to the underlying mutex. +- auto ScopedEntry = std::make_unique<ScopedLockableFactEntry>(Scp, Loc); ++ SourceLocation MLoc = VD->getLocation(); ++ DeclRefExpr DRE(VD->getASTContext(), VD, false, VD->getType(), VK_LValue, ++ VD->getLocation()); ++ // FIXME: does this store a pointer to DRE? ++ CapabilityExpr Scp = Analyzer->SxBuilder.translateAttrExpr(&DRE, nullptr); ++ ++ auto ScopedEntry = std::make_unique<ScopedLockableFactEntry>(Scp, MLoc); + for (const auto &M : ExclusiveLocksToAdd) + ScopedEntry->addLock(M); + for (const auto &M : SharedLocksToAdd) +@@ -2077,11 +2058,36 @@ void BuildLockset::VisitCXXConstructExpr(const CXXConstructExpr *Exp) { + } else { + examineArguments(D, Exp->arg_begin(), Exp->arg_end()); + } +- if (D && D->hasAttrs()) +- handleCall(Exp, D); + } + +-static const Expr *UnpackConstruction(const Expr *E) { ++static CXXConstructorDecl * ++findConstructorForByValueReturn(const CXXRecordDecl *RD) { ++ // Prefer a move constructor over a copy constructor. If there's more than ++ // one copy constructor or more than one move constructor, we arbitrarily ++ // pick the first declared such constructor rather than trying to guess which ++ // one is more appropriate. ++ CXXConstructorDecl *CopyCtor = nullptr; ++ for (auto *Ctor : RD->ctors()) { ++ if (Ctor->isDeleted()) ++ continue; ++ if (Ctor->isMoveConstructor()) ++ return Ctor; ++ if (!CopyCtor && Ctor->isCopyConstructor()) ++ CopyCtor = Ctor; ++ } ++ return CopyCtor; ++} ++ ++static Expr *buildFakeCtorCall(CXXConstructorDecl *CD, ArrayRef<Expr *> Args, ++ SourceLocation Loc) { ++ ASTContext &Ctx = CD->getASTContext(); ++ return CXXConstructExpr::Create(Ctx, Ctx.getRecordType(CD->getParent()), Loc, ++ CD, true, Args, false, false, false, false, ++ CXXConstructExpr::CK_Complete, ++ SourceRange(Loc, Loc)); ++} ++ ++static Expr *UnpackConstruction(Expr *E) { + if (auto *CE = dyn_cast<CastExpr>(E)) + if (CE->getCastKind() == CK_NoOp) + E = CE->getSubExpr()->IgnoreParens(); +@@ -2100,7 +2106,7 @@ void BuildLockset::VisitDeclStmt(const DeclStmt *S) { + + for (auto *D : S->getDeclGroup()) { + if (auto *VD = dyn_cast_or_null<VarDecl>(D)) { +- const Expr *E = VD->getInit(); ++ Expr *E = VD->getInit(); + if (!E) + continue; + E = E->IgnoreParens(); +@@ -2110,27 +2116,29 @@ void BuildLockset::VisitDeclStmt(const DeclStmt *S) { + E = EWC->getSubExpr()->IgnoreParens(); + E = UnpackConstruction(E); + +- if (auto Object = ConstructedObjects.find(E); +- Object != ConstructedObjects.end()) { +- Object->second->setClangDecl(VD); +- ConstructedObjects.erase(Object); ++ if (const auto *CE = dyn_cast<CXXConstructExpr>(E)) { ++ const auto *CtorD = dyn_cast_or_null<NamedDecl>(CE->getConstructor()); ++ if (!CtorD || !CtorD->hasAttrs()) ++ continue; ++ handleCall(E, CtorD, VD); ++ } else if (isa<CallExpr>(E) && E->isPRValue()) { ++ // If the object is initialized by a function call that returns a ++ // scoped lockable by value, use the attributes on the copy or move ++ // constructor to figure out what effect that should have on the ++ // lockset. ++ // FIXME: Is this really the best way to handle this situation? ++ auto *RD = E->getType()->getAsCXXRecordDecl(); ++ if (!RD || !RD->hasAttr<ScopedLockableAttr>()) ++ continue; ++ CXXConstructorDecl *CtorD = findConstructorForByValueReturn(RD); ++ if (!CtorD || !CtorD->hasAttrs()) ++ continue; ++ handleCall(buildFakeCtorCall(CtorD, {E}, E->getBeginLoc()), CtorD, VD); + } + } + } + } + +-void BuildLockset::VisitMaterializeTemporaryExpr( +- const MaterializeTemporaryExpr *Exp) { +- if (const ValueDecl *ExtD = Exp->getExtendingDecl()) { +- if (auto Object = +- ConstructedObjects.find(UnpackConstruction(Exp->getSubExpr())); +- Object != ConstructedObjects.end()) { +- Object->second->setClangDecl(ExtD); +- ConstructedObjects.erase(Object); +- } +- } +-} +- + /// Given two facts merging on a join point, possibly warn and decide whether to + /// keep or replace. + /// +@@ -2403,33 +2411,19 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { + LocksetBuilder.Visit(CS.getStmt()); + break; + } +- // Ignore BaseDtor and MemberDtor for now. ++ // Ignore BaseDtor, MemberDtor, and TemporaryDtor for now. + case CFGElement::AutomaticObjectDtor: { + CFGAutomaticObjDtor AD = BI.castAs<CFGAutomaticObjDtor>(); + const auto *DD = AD.getDestructorDecl(AC.getASTContext()); + if (!DD->hasAttrs()) + break; + +- LocksetBuilder.handleCall(nullptr, DD, +- SxBuilder.createVariable(AD.getVarDecl()), +- AD.getTriggerStmt()->getEndLoc()); +- break; +- } +- case CFGElement::TemporaryDtor: { +- auto TD = BI.castAs<CFGTemporaryDtor>(); +- +- // Clean up constructed object even if there are no attributes to +- // keep the number of objects in limbo as small as possible. +- if (auto Object = LocksetBuilder.ConstructedObjects.find( +- TD.getBindTemporaryExpr()->getSubExpr()); +- Object != LocksetBuilder.ConstructedObjects.end()) { +- const auto *DD = TD.getDestructorDecl(AC.getASTContext()); +- if (DD->hasAttrs()) +- // TODO: the location here isn't quite correct. +- LocksetBuilder.handleCall(nullptr, DD, Object->second, +- TD.getBindTemporaryExpr()->getEndLoc()); +- LocksetBuilder.ConstructedObjects.erase(Object); +- } ++ // Create a dummy expression, ++ auto *VD = const_cast<VarDecl *>(AD.getVarDecl()); ++ DeclRefExpr DRE(VD->getASTContext(), VD, false, ++ VD->getType().getNonReferenceType(), VK_LValue, ++ AD.getTriggerStmt()->getEndLoc()); ++ LocksetBuilder.handleCall(&DRE, DD); + break; + } + default: +diff --git a/clang/lib/Analysis/ThreadSafetyCommon.cpp b/clang/lib/Analysis/ThreadSafetyCommon.cpp +index a771149f1591..06b61b4de92f 100644 +--- a/clang/lib/Analysis/ThreadSafetyCommon.cpp ++++ b/clang/lib/Analysis/ThreadSafetyCommon.cpp +@@ -115,22 +115,19 @@ static StringRef ClassifyDiagnostic(QualType VDT) { + /// \param D The declaration to which the attribute is attached. + /// \param DeclExp An expression involving the Decl to which the attribute + /// is attached. E.g. the call to a function. +-/// \param Self S-expression to substitute for a \ref CXXThisExpr. + CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, + const NamedDecl *D, + const Expr *DeclExp, +- til::SExpr *Self) { ++ VarDecl *SelfDecl) { + // If we are processing a raw attribute expression, with no substitutions. +- if (!DeclExp && !Self) ++ if (!DeclExp) + return translateAttrExpr(AttrExp, nullptr); + + CallingContext Ctx(nullptr, D); + + // Examine DeclExp to find SelfArg and FunArgs, which are used to substitute + // for formal parameters when we call buildMutexID later. +- if (!DeclExp) +- /* We'll use Self. */; +- else if (const auto *ME = dyn_cast<MemberExpr>(DeclExp)) { ++ if (const auto *ME = dyn_cast<MemberExpr>(DeclExp)) { + Ctx.SelfArg = ME->getBase(); + Ctx.SelfArrow = ME->isArrow(); + } else if (const auto *CE = dyn_cast<CXXMemberCallExpr>(DeclExp)) { +@@ -145,24 +142,29 @@ CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, + Ctx.SelfArg = nullptr; // Will be set below + Ctx.NumArgs = CE->getNumArgs(); + Ctx.FunArgs = CE->getArgs(); ++ } else if (D && isa<CXXDestructorDecl>(D)) { ++ // There's no such thing as a "destructor call" in the AST. ++ Ctx.SelfArg = DeclExp; + } + +- if (Self) { +- assert(!Ctx.SelfArg && "Ambiguous self argument"); +- Ctx.SelfArg = Self; ++ // Hack to handle constructors, where self cannot be recovered from ++ // the expression. ++ if (SelfDecl && !Ctx.SelfArg) { ++ DeclRefExpr SelfDRE(SelfDecl->getASTContext(), SelfDecl, false, ++ SelfDecl->getType(), VK_LValue, ++ SelfDecl->getLocation()); ++ Ctx.SelfArg = &SelfDRE; + + // If the attribute has no arguments, then assume the argument is "this". + if (!AttrExp) +- return CapabilityExpr( +- Self, ClassifyDiagnostic(cast<CXXMethodDecl>(D)->getThisObjectType()), +- false); ++ return translateAttrExpr(Ctx.SelfArg, nullptr); + else // For most attributes. + return translateAttrExpr(AttrExp, &Ctx); + } + + // If the attribute has no arguments, then assume the argument is "this". + if (!AttrExp) +- return translateAttrExpr(cast<const Expr *>(Ctx.SelfArg), nullptr); ++ return translateAttrExpr(Ctx.SelfArg, nullptr); + else // For most attributes. + return translateAttrExpr(AttrExp, &Ctx); + } +@@ -216,16 +218,6 @@ CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, + return CapabilityExpr(E, Kind, Neg); + } + +-til::LiteralPtr *SExprBuilder::createVariable(const VarDecl *VD) { +- return new (Arena) til::LiteralPtr(VD); +-} +- +-std::pair<til::LiteralPtr *, StringRef> +-SExprBuilder::createThisPlaceholder(const Expr *Exp) { +- return {new (Arena) til::LiteralPtr(nullptr), +- ClassifyDiagnostic(Exp->getType())}; +-} +- + // Translate a clang statement or expression to a TIL expression. + // Also performs substitution of variables; Ctx provides the context. + // Dispatches on the type of S. +@@ -335,12 +327,8 @@ til::SExpr *SExprBuilder::translateDeclRefExpr(const DeclRefExpr *DRE, + til::SExpr *SExprBuilder::translateCXXThisExpr(const CXXThisExpr *TE, + CallingContext *Ctx) { + // Substitute for 'this' +- if (Ctx && Ctx->SelfArg) { +- if (const auto *SelfArg = dyn_cast<const Expr *>(Ctx->SelfArg)) +- return translate(SelfArg, Ctx->Prev); +- else +- return cast<til::SExpr *>(Ctx->SelfArg); +- } ++ if (Ctx && Ctx->SelfArg) ++ return translate(Ctx->SelfArg, Ctx->Prev); + assert(SelfVar && "We have no variable for 'this'!"); + return SelfVar; + } +diff --git a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp +index 8e312e589d81..e1cfa1f3fd17 100644 +--- a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp ++++ b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp +@@ -1606,30 +1606,6 @@ namespace substitution_test { + dlr.unlockData(d1); + } + }; +- +- // Automatic object destructor calls don't appear as expressions in the CFG, +- // so we have to handle them separately whenever substitutions are required. +- struct DestructorRequires { +- Mutex mu; +- ~DestructorRequires() EXCLUSIVE_LOCKS_REQUIRED(mu); +- }; +- +- void destructorRequires() { +- DestructorRequires rd; +- rd.mu.AssertHeld(); +- } +- +- struct DestructorExcludes { +- Mutex mu; +- ~DestructorExcludes() LOCKS_EXCLUDED(mu); +- }; +- +- void destructorExcludes() { +- DestructorExcludes ed; +- ed.mu.Lock(); // expected-note {{mutex acquired here}} +- } // expected-warning {{cannot call function '~DestructorExcludes' while mutex 'ed.mu' is held}} +- // expected-warning@-1 {{mutex 'ed.mu' is still held at the end of function}} +- + } // end namespace substituation_test + + +@@ -1714,15 +1690,6 @@ struct TestScopedLockable { + } + #endif + +- void temporary() { +- MutexLock{&mu1}, a = 5; +- } +- +- void lifetime_extension() { +- const MutexLock &mulock = MutexLock(&mu1); +- a = 5; +- } +- + void foo2() { + ReaderMutexLock mulock1(&mu1); + if (getBool()) { +@@ -1741,12 +1708,6 @@ struct TestScopedLockable { + // expected-warning {{acquiring mutex 'mu1' that is already held}} + } + +- void temporary_double_lock() { +- MutexLock mulock_a(&mu1); // expected-note{{mutex acquired here}} +- MutexLock{&mu1}; // \ +- // expected-warning {{acquiring mutex 'mu1' that is already held}} +- } +- + void foo4() { + MutexLock mulock1(&mu1), mulock2(&mu2); + a = b+1; +@@ -4226,20 +4187,6 @@ public: + void foo() EXCLUSIVE_LOCKS_REQUIRED(this); + }; + +-class SelfLockDeferred { +-public: +- SelfLockDeferred() LOCKS_EXCLUDED(mu_); +- ~SelfLockDeferred() UNLOCK_FUNCTION(mu_); +- +- Mutex mu_; +-}; +- +-class LOCKABLE SelfLockDeferred2 { +-public: +- SelfLockDeferred2() LOCKS_EXCLUDED(this); +- ~SelfLockDeferred2() UNLOCK_FUNCTION(); +-}; +- + + void test() { + SelfLock s; +@@ -4251,14 +4198,6 @@ void test2() { + s2.foo(); + } + +-void testDeferredTemporary() { +- SelfLockDeferred(); // expected-warning {{releasing mutex '<temporary>.mu_' that was not held}} +-} +- +-void testDeferredTemporary2() { +- SelfLockDeferred2(); // expected-warning {{releasing mutex '<temporary>' that was not held}} +-} +- + } // end namespace SelfConstructorTest + + +@@ -5953,75 +5892,47 @@ C c; + void f() { c[A()]->g(); } + } // namespace PR34800 + +-#ifdef __cpp_guaranteed_copy_elision +- + namespace ReturnScopedLockable { ++ template<typename Object> class SCOPED_LOCKABLE ReadLockedPtr { ++ public: ++ ReadLockedPtr(Object *ptr) SHARED_LOCK_FUNCTION((*this)->mutex); ++ ReadLockedPtr(ReadLockedPtr &&) SHARED_LOCK_FUNCTION((*this)->mutex); ++ ~ReadLockedPtr() UNLOCK_FUNCTION(); + +-class Object { +-public: +- MutexLock lock() EXCLUSIVE_LOCK_FUNCTION(mutex) { +- // TODO: False positive because scoped lock isn't destructed. +- return MutexLock(&mutex); // expected-note {{mutex acquired here}} +- } // expected-warning {{mutex 'mutex' is still held at the end of function}} +- +- ReaderMutexLock lockShared() SHARED_LOCK_FUNCTION(mutex) { +- // TODO: False positive because scoped lock isn't destructed. +- return ReaderMutexLock(&mutex); // expected-note {{mutex acquired here}} +- } // expected-warning {{mutex 'mutex' is still held at the end of function}} +- +- MutexLock adopt() EXCLUSIVE_LOCKS_REQUIRED(mutex) { +- // TODO: False positive because scoped lock isn't destructed. +- return MutexLock(&mutex, true); // expected-note {{mutex acquired here}} +- } // expected-warning {{mutex 'mutex' is still held at the end of function}} ++ Object *operator->() const { return object; } + +- ReaderMutexLock adoptShared() SHARED_LOCKS_REQUIRED(mutex) { +- // TODO: False positive because scoped lock isn't destructed. +- return ReaderMutexLock(&mutex, true); // expected-note {{mutex acquired here}} +- } // expected-warning {{mutex 'mutex' is still held at the end of function}} ++ private: ++ Object *object; ++ }; + +- int x GUARDED_BY(mutex); +- void needsLock() EXCLUSIVE_LOCKS_REQUIRED(mutex); ++ struct Object { ++ int f() SHARED_LOCKS_REQUIRED(mutex); ++ Mutex mutex; ++ }; + +- void testInside() { +- MutexLock scope = lock(); +- x = 1; +- needsLock(); ++ ReadLockedPtr<Object> get(); ++ int use() { ++ auto ptr = get(); ++ return ptr->f(); ++ } ++ void use_constructor() { ++ auto ptr = ReadLockedPtr<Object>(nullptr); ++ ptr->f(); ++ auto ptr2 = ReadLockedPtr<Object>{nullptr}; ++ ptr2->f(); ++ auto ptr3 = (ReadLockedPtr<Object>{nullptr}); ++ ptr3->f(); ++ } ++ struct Convertible { ++ Convertible(); ++ operator ReadLockedPtr<Object>(); ++ }; ++ void use_conversion() { ++ ReadLockedPtr<Object> ptr = Convertible(); ++ ptr->f(); + } +- +- Mutex mutex; +-}; +- +-Object obj; +- +-void testLock() { +- MutexLock scope = obj.lock(); +- obj.x = 1; +- obj.needsLock(); + } + +-int testSharedLock() { +- ReaderMutexLock scope = obj.lockShared(); +- obj.x = 1; // expected-warning {{writing variable 'x' requires holding mutex 'obj.mutex' exclusively}} +- return obj.x; +-} +- +-void testAdopt() { +- obj.mutex.Lock(); +- MutexLock scope = obj.adopt(); +- obj.x = 1; +-} +- +-int testAdoptShared() { +- obj.mutex.Lock(); +- ReaderMutexLock scope = obj.adoptShared(); +- obj.x = 1; +- return obj.x; +-} +- +-} // namespace ReturnScopedLockable +- +-#endif +- + namespace PR38640 { + void f() { + // Self-referencing assignment previously caused an infinite loop when thread +-- +2.37.1.1.g659da70093 + diff --git a/build/build-clang/revert-llvmorg-16-init-9324-g01859da84bad.patch b/build/build-clang/revert-llvmorg-16-init-9324-g01859da84bad.patch new file mode 100644 index 0000000000..c5166b636e --- /dev/null +++ b/build/build-clang/revert-llvmorg-16-init-9324-g01859da84bad.patch @@ -0,0 +1,869 @@ +From 4ebea86ddf20d3f9be3cb363eed3f313591bee66 Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Thu, 3 Nov 2022 13:54:08 +0900 +Subject: [PATCH] Revert "[AliasAnalysis] Introduce getModRefInfoMask() as a + generalization of pointsToConstantMemory()." + +This reverts commit 01859da84bad95fd51d6a03b08b60c660e642a4f +for causing https://github.com/llvm/llvm-project/issues/58776 +--- + llvm/docs/AliasAnalysis.rst | 26 +++----- + llvm/include/llvm/Analysis/AliasAnalysis.h | 51 ++++----------- + .../llvm/Analysis/BasicAliasAnalysis.h | 12 +--- + .../llvm/Analysis/ObjCARCAliasAnalysis.h | 4 +- + .../llvm/Analysis/TypeBasedAliasAnalysis.h | 4 +- + llvm/lib/Analysis/AliasAnalysis.cpp | 63 +++++++++---------- + llvm/lib/Analysis/BasicAliasAnalysis.cpp | 43 ++++--------- + .../lib/Analysis/MemoryDependenceAnalysis.cpp | 2 +- + llvm/lib/Analysis/MemorySSA.cpp | 5 +- + llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp | 19 +++--- + llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp | 14 ++--- + .../lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp | 47 ++++++++++++-- + llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h | 4 +- + llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 7 +-- + .../InstCombine/InstCombineCalls.cpp | 4 +- + .../InstCombineLoadStoreAlloca.cpp | 33 +++++----- + llvm/lib/Transforms/Scalar/LICM.cpp | 2 +- + .../lib/Transforms/Scalar/LoopPredication.cpp | 2 +- + llvm/test/Analysis/BasicAA/constant-memory.ll | 14 +++-- + .../InstCombine/memcpy-from-global.ll | 6 +- + llvm/test/Transforms/InstCombine/store.ll | 4 +- + 21 files changed, 168 insertions(+), 198 deletions(-) + +diff --git a/llvm/docs/AliasAnalysis.rst b/llvm/docs/AliasAnalysis.rst +index 046dd24d7332..b9a8a3a4eb52 100644 +--- a/llvm/docs/AliasAnalysis.rst ++++ b/llvm/docs/AliasAnalysis.rst +@@ -161,24 +161,14 @@ Other useful ``AliasAnalysis`` methods + Several other tidbits of information are often collected by various alias + analysis implementations and can be put to good use by various clients. + +-The ``getModRefInfoMask`` method +-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +- +-The ``getModRefInfoMask`` method returns a bound on Mod/Ref information for +-the supplied pointer, based on knowledge about whether the pointer points to +-globally-constant memory (for which it returns ``NoModRef``) or +-locally-invariant memory (for which it returns ``Ref``). Globally-constant +-memory includes functions, constant global variables, and the null pointer. +-Locally-invariant memory is memory that we know is invariant for the lifetime +-of its SSA value, but not necessarily for the life of the program: for example, +-the memory pointed to by ``readonly`` ``noalias`` parameters is known-invariant +-for the duration of the corresponding function call. Given Mod/Ref information +-``MRI`` for a memory location ``Loc``, ``MRI`` can be refined with a statement +-like ``MRI &= AA.getModRefInfoMask(Loc);``. Another useful idiom is +-``isModSet(AA.getModRefInfoMask(Loc))``; this checks to see if the given +-location can be modified at all. For convenience, there is also a method +-``pointsToConstantMemory(Loc)``; this is synonymous with +-``isNoModRef(AA.getModRefInfoMask(Loc))``. ++The ``pointsToConstantMemory`` method ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++The ``pointsToConstantMemory`` method returns true if and only if the analysis ++can prove that the pointer only points to unchanging memory locations ++(functions, constant global variables, and the null pointer). This information ++can be used to refine mod/ref information: it is impossible for an unchanging ++memory location to be modified. + + .. _never access memory or only read memory: + +diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h +index 953e15e358f1..1b15d130967a 100644 +--- a/llvm/include/llvm/Analysis/AliasAnalysis.h ++++ b/llvm/include/llvm/Analysis/AliasAnalysis.h +@@ -375,9 +375,7 @@ public: + + /// Checks whether the given location points to constant memory, or if + /// \p OrLocal is true whether it points to a local alloca. +- bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false) { +- return isNoModRef(getModRefInfoMask(Loc, OrLocal)); +- } ++ bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false); + + /// A convenience wrapper around the primary \c pointsToConstantMemory + /// interface. +@@ -390,22 +388,6 @@ public: + /// \name Simple mod/ref information + /// @{ + +- /// Returns a bitmask that should be unconditionally applied to the ModRef +- /// info of a memory location. This allows us to eliminate Mod and/or Ref +- /// from the ModRef info based on the knowledge that the memory location +- /// points to constant and/or locally-invariant memory. +- /// +- /// If IgnoreLocals is true, then this method returns NoModRef for memory +- /// that points to a local alloca. +- ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, +- bool IgnoreLocals = false); +- +- /// A convenience wrapper around the primary \c getModRefInfoMask +- /// interface. +- ModRefInfo getModRefInfoMask(const Value *P, bool IgnoreLocals = false) { +- return getModRefInfoMask(MemoryLocation::getBeforeOrAfter(P), IgnoreLocals); +- } +- + /// Get the ModRef info associated with a pointer argument of a call. The + /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note + /// that these bits do not necessarily account for the overall behavior of +@@ -555,8 +537,6 @@ public: + + bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, + bool OrLocal = false); +- ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI, +- bool IgnoreLocals = false); + ModRefInfo getModRefInfo(const Instruction *I, const CallBase *Call2, + AAQueryInfo &AAQIP); + ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc, +@@ -624,10 +604,6 @@ public: + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false) { + return AA.pointsToConstantMemory(Loc, AAQI, OrLocal); + } +- ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, +- bool IgnoreLocals = false) { +- return AA.getModRefInfoMask(Loc, AAQI, IgnoreLocals); +- } + ModRefInfo getModRefInfo(const Instruction *I, + const std::optional<MemoryLocation> &OptLoc) { + return AA.getModRefInfo(I, OptLoc, AAQI); +@@ -690,19 +666,16 @@ public: + const MemoryLocation &LocB, AAQueryInfo &AAQI, + const Instruction *CtxI) = 0; + ++ /// Checks whether the given location points to constant memory, or if ++ /// \p OrLocal is true whether it points to a local alloca. ++ virtual bool pointsToConstantMemory(const MemoryLocation &Loc, ++ AAQueryInfo &AAQI, bool OrLocal) = 0; ++ + /// @} + //===--------------------------------------------------------------------===// + /// \name Simple mod/ref information + /// @{ + +- /// Returns a bitmask that should be unconditionally applied to the ModRef +- /// info of a memory location. This allows us to eliminate Mod and/or Ref from +- /// the ModRef info based on the knowledge that the memory location points to +- /// constant and/or locally-invariant memory. +- virtual ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, +- AAQueryInfo &AAQI, +- bool IgnoreLocals) = 0; +- + /// Get the ModRef info associated with a pointer argument of a callsite. The + /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note + /// that these bits do not necessarily account for the overall behavior of +@@ -751,9 +724,9 @@ public: + return Result.alias(LocA, LocB, AAQI, CtxI); + } + +- ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI, +- bool IgnoreLocals) override { +- return Result.getModRefInfoMask(Loc, AAQI, IgnoreLocals); ++ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, ++ bool OrLocal) override { ++ return Result.pointsToConstantMemory(Loc, AAQI, OrLocal); + } + + ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) override { +@@ -806,9 +779,9 @@ public: + return AliasResult::MayAlias; + } + +- ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI, +- bool IgnoreLocals) { +- return ModRefInfo::ModRef; ++ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, ++ bool OrLocal) { ++ return false; + } + + ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) { +diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +index a2735f039a01..397692694322 100644 +--- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h ++++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +@@ -73,15 +73,9 @@ public: + ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, + AAQueryInfo &AAQI); + +- /// Returns a bitmask that should be unconditionally applied to the ModRef +- /// info of a memory location. This allows us to eliminate Mod and/or Ref +- /// from the ModRef info based on the knowledge that the memory location +- /// points to constant and/or locally-invariant memory. +- /// +- /// If IgnoreLocals is true, then this method returns NoModRef for memory +- /// that points to a local alloca. +- ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI, +- bool IgnoreLocals = false); ++ /// Chases pointers until we find a (constant global) or not. ++ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, ++ bool OrLocal); + + /// Get the location associated with a pointer argument of a callsite. + ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx); +diff --git a/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h b/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h +index 1a154c648fe6..ef162ece8a32 100644 +--- a/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h ++++ b/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h +@@ -52,8 +52,8 @@ public: + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, + AAQueryInfo &AAQI, const Instruction *CtxI); +- ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI, +- bool IgnoreLocals); ++ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, ++ bool OrLocal); + + using AAResultBase::getMemoryEffects; + MemoryEffects getMemoryEffects(const Function *F); +diff --git a/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h b/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h +index 36dd39c033aa..84a2a6a2fdac 100644 +--- a/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h ++++ b/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h +@@ -40,8 +40,8 @@ public: + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, + AAQueryInfo &AAQI, const Instruction *CtxI); +- ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI, +- bool IgnoreLocals); ++ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, ++ bool OrLocal); + + MemoryEffects getMemoryEffects(const CallBase *Call, AAQueryInfo &AAQI); + MemoryEffects getMemoryEffects(const Function *F); +diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp +index 9e24f6b87bdb..5e27808cfdb3 100644 +--- a/llvm/lib/Analysis/AliasAnalysis.cpp ++++ b/llvm/lib/Analysis/AliasAnalysis.cpp +@@ -146,25 +146,19 @@ AliasResult AAResults::alias(const MemoryLocation &LocA, + return Result; + } + +-ModRefInfo AAResults::getModRefInfoMask(const MemoryLocation &Loc, +- bool IgnoreLocals) { ++bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc, ++ bool OrLocal) { + SimpleAAQueryInfo AAQIP(*this); +- return getModRefInfoMask(Loc, AAQIP, IgnoreLocals); ++ return pointsToConstantMemory(Loc, AAQIP, OrLocal); + } + +-ModRefInfo AAResults::getModRefInfoMask(const MemoryLocation &Loc, +- AAQueryInfo &AAQI, bool IgnoreLocals) { +- ModRefInfo Result = ModRefInfo::ModRef; +- +- for (const auto &AA : AAs) { +- Result &= AA->getModRefInfoMask(Loc, AAQI, IgnoreLocals); +- +- // Early-exit the moment we reach the bottom of the lattice. +- if (isNoModRef(Result)) +- return ModRefInfo::NoModRef; +- } ++bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc, ++ AAQueryInfo &AAQI, bool OrLocal) { ++ for (const auto &AA : AAs) ++ if (AA->pointsToConstantMemory(Loc, AAQI, OrLocal)) ++ return true; + +- return Result; ++ return false; + } + + ModRefInfo AAResults::getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) { +@@ -253,11 +247,10 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call, + + Result &= ArgMR | OtherMR; + +- // Apply the ModRef mask. This ensures that if Loc is a constant memory +- // location, we take into account the fact that the call definitely could not ++ // If Loc is a constant memory location, the call definitely could not + // modify the memory location. +- if (!isNoModRef(Result)) +- Result &= getModRefInfoMask(Loc); ++ if (isModSet(Result) && pointsToConstantMemory(Loc, AAQI, /*OrLocal*/ false)) ++ Result &= ModRefInfo::Ref; + + return Result; + } +@@ -495,11 +488,9 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S, + if (AR == AliasResult::NoAlias) + return ModRefInfo::NoModRef; + +- // Examine the ModRef mask. If Mod isn't present, then return NoModRef. +- // This ensures that if Loc is a constant memory location, we take into +- // account the fact that the store definitely could not modify the memory +- // location. +- if (!isModSet(getModRefInfoMask(Loc))) ++ // If the pointer is a pointer to constant memory, then it could not have ++ // been modified by this store. ++ if (pointsToConstantMemory(Loc, AAQI)) + return ModRefInfo::NoModRef; + } + +@@ -510,11 +501,10 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S, + ModRefInfo AAResults::getModRefInfo(const FenceInst *S, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { +- // All we know about a fence instruction is what we get from the ModRef +- // mask: if Loc is a constant memory location, the fence definitely could +- // not modify it. +- if (Loc.Ptr) +- return getModRefInfoMask(Loc); ++ // If we know that the location is a constant memory location, the fence ++ // cannot modify this location. ++ if (Loc.Ptr && pointsToConstantMemory(Loc, AAQI)) ++ return ModRefInfo::Ref; + return ModRefInfo::ModRef; + } + +@@ -528,9 +518,10 @@ ModRefInfo AAResults::getModRefInfo(const VAArgInst *V, + if (AR == AliasResult::NoAlias) + return ModRefInfo::NoModRef; + +- // If the pointer is a pointer to invariant memory, then it could not have ++ // If the pointer is a pointer to constant memory, then it could not have + // been modified by this va_arg. +- return getModRefInfoMask(Loc, AAQI); ++ if (pointsToConstantMemory(Loc, AAQI)) ++ return ModRefInfo::NoModRef; + } + + // Otherwise, a va_arg reads and writes. +@@ -541,9 +532,10 @@ ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { + if (Loc.Ptr) { +- // If the pointer is a pointer to invariant memory, ++ // If the pointer is a pointer to constant memory, + // then it could not have been modified by this catchpad. +- return getModRefInfoMask(Loc, AAQI); ++ if (pointsToConstantMemory(Loc, AAQI)) ++ return ModRefInfo::NoModRef; + } + + // Otherwise, a catchpad reads and writes. +@@ -554,9 +546,10 @@ ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { + if (Loc.Ptr) { +- // If the pointer is a pointer to invariant memory, ++ // If the pointer is a pointer to constant memory, + // then it could not have been modified by this catchpad. +- return getModRefInfoMask(Loc, AAQI); ++ if (pointsToConstantMemory(Loc, AAQI)) ++ return ModRefInfo::NoModRef; + } + + // Otherwise, a catchret reads and writes. +diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp +index 1ea1d4196f80..edf46664139e 100644 +--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp ++++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp +@@ -678,46 +678,33 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, + return Decomposed; + } + +-ModRefInfo BasicAAResult::getModRefInfoMask(const MemoryLocation &Loc, +- AAQueryInfo &AAQI, +- bool IgnoreLocals) { ++/// Returns whether the given pointer value points to memory that is local to ++/// the function, with global constants being considered local to all ++/// functions. ++bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc, ++ AAQueryInfo &AAQI, bool OrLocal) { + assert(Visited.empty() && "Visited must be cleared after use!"); +- auto _ = make_scope_exit([&] { Visited.clear(); }); ++ auto _ = make_scope_exit([&]{ Visited.clear(); }); + + unsigned MaxLookup = 8; + SmallVector<const Value *, 16> Worklist; + Worklist.push_back(Loc.Ptr); +- ModRefInfo Result = ModRefInfo::NoModRef; +- + do { + const Value *V = getUnderlyingObject(Worklist.pop_back_val()); + if (!Visited.insert(V).second) + continue; + +- // Ignore allocas if we were instructed to do so. +- if (IgnoreLocals && isa<AllocaInst>(V)) ++ // An alloca instruction defines local memory. ++ if (OrLocal && isa<AllocaInst>(V)) + continue; + +- // If the location points to memory that is known to be invariant for +- // the life of the underlying SSA value, then we can exclude Mod from +- // the set of valid memory effects. +- // +- // An argument that is marked readonly and noalias is known to be +- // invariant while that function is executing. +- if (const Argument *Arg = dyn_cast<Argument>(V)) { +- if (Arg->hasNoAliasAttr() && Arg->onlyReadsMemory()) { +- Result |= ModRefInfo::Ref; +- continue; +- } +- } +- +- // A global constant can't be mutated. ++ // A global constant counts as local memory for our purposes. + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + // Note: this doesn't require GV to be "ODR" because it isn't legal for a + // global to be marked constant in some modules and non-constant in + // others. GV may even be a declaration, not a definition. + if (!GV->isConstant()) +- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals); ++ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); + continue; + } + +@@ -733,20 +720,16 @@ ModRefInfo BasicAAResult::getModRefInfoMask(const MemoryLocation &Loc, + if (const PHINode *PN = dyn_cast<PHINode>(V)) { + // Don't bother inspecting phi nodes with many operands. + if (PN->getNumIncomingValues() > MaxLookup) +- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals); ++ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); + append_range(Worklist, PN->incoming_values()); + continue; + } + + // Otherwise be conservative. +- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals); ++ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); + } while (!Worklist.empty() && --MaxLookup); + +- // If we hit the maximum number of instructions to examine, be conservative. +- if (!Worklist.empty()) +- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals); +- +- return Result; ++ return Worklist.empty(); + } + + static bool isIntrinsicCall(const CallBase *Call, Intrinsic::ID IID) { +diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +index 2340015d6517..6a0b123f0b8f 100644 +--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp ++++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +@@ -524,7 +524,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( + } + + // Stores don't alias loads from read-only memory. +- if (!isModSet(BatchAA.getModRefInfoMask(LoadLoc))) ++ if (BatchAA.pointsToConstantMemory(LoadLoc)) + continue; + + // Stores depend on may/must aliased loads. +diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp +index 0e6a12261e23..c2e2c8d9ec0f 100644 +--- a/llvm/lib/Analysis/MemorySSA.cpp ++++ b/llvm/lib/Analysis/MemorySSA.cpp +@@ -375,10 +375,9 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(BatchAAResults &AA, + const Instruction *I) { + // If the memory can't be changed, then loads of the memory can't be + // clobbered. +- if (auto *LI = dyn_cast<LoadInst>(I)) { ++ if (auto *LI = dyn_cast<LoadInst>(I)) + return I->hasMetadata(LLVMContext::MD_invariant_load) || +- !isModSet(AA.getModRefInfoMask(MemoryLocation::get(LI))); +- } ++ AA.pointsToConstantMemory(MemoryLocation::get(LI)); + return false; + } + +diff --git a/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp +index 1ccf792d2f8c..dd9eae428b0a 100644 +--- a/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp ++++ b/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp +@@ -69,29 +69,28 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA, + return AliasResult::MayAlias; + } + +-ModRefInfo ObjCARCAAResult::getModRefInfoMask(const MemoryLocation &Loc, +- AAQueryInfo &AAQI, +- bool IgnoreLocals) { ++bool ObjCARCAAResult::pointsToConstantMemory(const MemoryLocation &Loc, ++ AAQueryInfo &AAQI, bool OrLocal) { + if (!EnableARCOpts) +- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals); ++ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making + // a precise alias query. + const Value *S = GetRCIdentityRoot(Loc.Ptr); +- if (isNoModRef(AAResultBase::getModRefInfoMask( +- MemoryLocation(S, Loc.Size, Loc.AATags), AAQI, IgnoreLocals))) +- return ModRefInfo::NoModRef; ++ if (AAResultBase::pointsToConstantMemory( ++ MemoryLocation(S, Loc.Size, Loc.AATags), AAQI, OrLocal)) ++ return true; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *U = GetUnderlyingObjCPtr(S); + if (U != S) +- return AAResultBase::getModRefInfoMask(MemoryLocation::getBeforeOrAfter(U), +- AAQI, IgnoreLocals); ++ return AAResultBase::pointsToConstantMemory( ++ MemoryLocation::getBeforeOrAfter(U), AAQI, OrLocal); + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. +- return ModRefInfo::ModRef; ++ return false; + } + + MemoryEffects ObjCARCAAResult::getMemoryEffects(const Function *F) { +diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +index 529f3a76d23e..077fbdbe821c 100644 +--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp ++++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +@@ -385,23 +385,23 @@ AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA, + return AliasResult::NoAlias; + } + +-ModRefInfo TypeBasedAAResult::getModRefInfoMask(const MemoryLocation &Loc, +- AAQueryInfo &AAQI, +- bool IgnoreLocals) { ++bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc, ++ AAQueryInfo &AAQI, ++ bool OrLocal) { + if (!EnableTBAA) +- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals); ++ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); + + const MDNode *M = Loc.AATags.TBAA; + if (!M) +- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals); ++ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); + + // If this is an "immutable" type, we can assume the pointer is pointing + // to constant memory. + if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) || + (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable())) +- return ModRefInfo::NoModRef; ++ return true; + +- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals); ++ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); + } + + MemoryEffects TypeBasedAAResult::getMemoryEffects(const CallBase *Call, +diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +index 8155c895e366..1dd91f2b0be8 100644 +--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp ++++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +@@ -124,19 +124,54 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA, + return AAResultBase::alias(LocA, LocB, AAQI, nullptr); + } + +-ModRefInfo AMDGPUAAResult::getModRefInfoMask(const MemoryLocation &Loc, +- AAQueryInfo &AAQI, +- bool IgnoreLocals) { ++bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc, ++ AAQueryInfo &AAQI, bool OrLocal) { + unsigned AS = Loc.Ptr->getType()->getPointerAddressSpace(); + if (AS == AMDGPUAS::CONSTANT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) +- return ModRefInfo::NoModRef; ++ return true; + + const Value *Base = getUnderlyingObject(Loc.Ptr); + AS = Base->getType()->getPointerAddressSpace(); + if (AS == AMDGPUAS::CONSTANT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) +- return ModRefInfo::NoModRef; ++ return true; ++ ++ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { ++ if (GV->isConstant()) ++ return true; ++ } else if (const Argument *Arg = dyn_cast<Argument>(Base)) { ++ const Function *F = Arg->getParent(); ++ ++ // Only assume constant memory for arguments on kernels. ++ switch (F->getCallingConv()) { ++ default: ++ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); ++ case CallingConv::AMDGPU_LS: ++ case CallingConv::AMDGPU_HS: ++ case CallingConv::AMDGPU_ES: ++ case CallingConv::AMDGPU_GS: ++ case CallingConv::AMDGPU_VS: ++ case CallingConv::AMDGPU_PS: ++ case CallingConv::AMDGPU_CS: ++ case CallingConv::AMDGPU_KERNEL: ++ case CallingConv::SPIR_KERNEL: ++ break; ++ } + +- return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals); ++ unsigned ArgNo = Arg->getArgNo(); ++ /* On an argument, ReadOnly attribute indicates that the function does ++ not write through this pointer argument, even though it may write ++ to the memory that the pointer points to. ++ On an argument, ReadNone attribute indicates that the function does ++ not dereference that pointer argument, even though it may read or write ++ the memory that the pointer points to if accessed through other pointers. ++ */ ++ if (F->hasParamAttribute(ArgNo, Attribute::NoAlias) && ++ (F->hasParamAttribute(ArgNo, Attribute::ReadNone) || ++ F->hasParamAttribute(ArgNo, Attribute::ReadOnly))) { ++ return true; ++ } ++ } ++ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); + } +diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +index 8ce7000222fa..140ed12a8f6d 100644 +--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h ++++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +@@ -38,8 +38,8 @@ public: + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, + AAQueryInfo &AAQI, const Instruction *CtxI); +- ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI, +- bool IgnoreLocals); ++ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, ++ bool OrLocal); + }; + + /// Analysis pass providing a never-invalidated alias analysis result. +diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +index 3f61dbe3354e..cfbceebf66d4 100644 +--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp ++++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +@@ -138,14 +138,13 @@ static MemoryEffects checkFunctionMemoryAccess(Function &F, bool ThisBody, + ME |= MemoryEffects::argMemOnly(ModRefInfo::ModRef); + + auto AddLocAccess = [&](const MemoryLocation &Loc, ModRefInfo MR) { +- // Ignore accesses to known-invariant or local memory. +- MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/true); +- if (isNoModRef(MR)) ++ // Ignore accesses to local memory. ++ if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + return; + + const Value *UO = getUnderlyingObject(Loc.Ptr); + assert(!isa<AllocaInst>(UO) && +- "Should have been handled by getModRefInfoMask()"); ++ "Should have been handled by pointsToConstantMemory()"); + if (isa<Argument>(UO)) { + ME |= MemoryEffects::argMemOnly(MR); + return; +diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +index bd3db4534a85..8fad7e8195c8 100644 +--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp ++++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +@@ -135,7 +135,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { + // If we have a store to a location which is known constant, we can conclude + // that the store must be storing the constant value (else the memory + // wouldn't be constant), and this must be a noop. +- if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) { ++ if (AA->pointsToConstantMemory(MI->getDest())) { + // Set the size of the copy to 0, it will be deleted on the next iteration. + MI->setLength(Constant::getNullValue(MI->getLength()->getType())); + return MI; +@@ -253,7 +253,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) { + // If we have a store to a location which is known constant, we can conclude + // that the store must be storing the constant value (else the memory + // wouldn't be constant), and this must be a noop. +- if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) { ++ if (AA->pointsToConstantMemory(MI->getDest())) { + // Set the size of the copy to 0, it will be deleted on the next iteration. + MI->setLength(Constant::getNullValue(MI->getLength()->getType())); + return MI; +diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +index c8226ce2816a..8d58241895b5 100644 +--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp ++++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +@@ -36,20 +36,20 @@ static cl::opt<unsigned> MaxCopiedFromConstantUsers( + cl::desc("Maximum users to visit in copy from constant transform"), + cl::Hidden); + +-/// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived) ++/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) + /// pointer to an alloca. Ignore any reads of the pointer, return false if we + /// see any stores or other unknown uses. If we see pointer arithmetic, keep + /// track of whether it moves the pointer (with IsOffset) but otherwise traverse + /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to +-/// the alloca, and if the source pointer is a pointer to a constant memory +-/// location, we can optimize this. ++/// the alloca, and if the source pointer is a pointer to a constant global, we ++/// can optimize this. + static bool + isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V, + MemTransferInst *&TheCopy, + SmallVectorImpl<Instruction *> &ToDelete) { + // We track lifetime intrinsics as we encounter them. If we decide to go +- // ahead and replace the value with the memory location, this lets the caller +- // quickly eliminate the markers. ++ // ahead and replace the value with the global, this lets the caller quickly ++ // eliminate the markers. + + using ValueAndIsOffset = PointerIntPair<Value *, 1, bool>; + SmallVector<ValueAndIsOffset, 32> Worklist; +@@ -150,8 +150,8 @@ isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V, + // If the memintrinsic isn't using the alloca as the dest, reject it. + if (U.getOperandNo() != 0) return false; + +- // If the source of the memcpy/move is not constant, reject it. +- if (isModSet(AA->getModRefInfoMask(MI->getSource()))) ++ // If the source of the memcpy/move is not a constant global, reject it. ++ if (!AA->pointsToConstantMemory(MI->getSource())) + return false; + + // Otherwise, the transform is safe. Remember the copy instruction. +@@ -161,10 +161,9 @@ isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V, + return true; + } + +-/// isOnlyCopiedFromConstantMemory - Return true if the specified alloca is only +-/// modified by a copy from a constant memory location. If we can prove this, we +-/// can replace any uses of the alloca with uses of the memory location +-/// directly. ++/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only ++/// modified by a copy from a constant global. If we can prove this, we can ++/// replace any uses of the alloca with uses of the global directly. + static MemTransferInst * + isOnlyCopiedFromConstantMemory(AAResults *AA, + AllocaInst *AI, +@@ -420,11 +419,11 @@ Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) { + } + + // Check to see if this allocation is only modified by a memcpy/memmove from +- // a memory location whose alignment is equal to or exceeds that of the +- // allocation. If this is the case, we can change all users to use the +- // constant memory location instead. This is commonly produced by the CFE by +- // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' +- // is only subsequently read. ++ // a constant whose alignment is equal to or exceeds that of the allocation. ++ // If this is the case, we can change all users to use the constant global ++ // instead. This is commonly produced by the CFE by constructs like "void ++ // foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' is only subsequently ++ // read. + SmallVector<Instruction *, 4> ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) { + Value *TheSrc = Copy->getSource(); +@@ -1401,7 +1400,7 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) { + // If we have a store to a location which is known constant, we can conclude + // that the store must be storing the constant value (else the memory + // wouldn't be constant), and this must be a noop. +- if (!isModSet(AA->getModRefInfoMask(Ptr))) ++ if (AA->pointsToConstantMemory(Ptr)) + return eraseInstFromFunction(SI); + + // Do really simple DSE, to catch cases where there are several consecutive +diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp +index d3739f31bc57..11b96af0a30e 100644 +--- a/llvm/lib/Transforms/Scalar/LICM.cpp ++++ b/llvm/lib/Transforms/Scalar/LICM.cpp +@@ -1160,7 +1160,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, + + // Loads from constant memory are always safe to move, even if they end up + // in the same alias set as something that ends up being modified. +- if (!isModSet(AA->getModRefInfoMask(LI->getOperand(0)))) ++ if (AA->pointsToConstantMemory(LI->getOperand(0))) + return true; + if (LI->hasMetadata(LLVMContext::MD_invariant_load)) + return true; +diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp +index 0492ed5cdb1c..8dd9f0144941 100644 +--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp ++++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp +@@ -570,7 +570,7 @@ bool LoopPredication::isLoopInvariantValue(const SCEV* S) { + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) + if (const auto *LI = dyn_cast<LoadInst>(U->getValue())) + if (LI->isUnordered() && L->hasLoopInvariantOperands(LI)) +- if (!isModSet(AA->getModRefInfoMask(LI->getOperand(0))) || ++ if (AA->pointsToConstantMemory(LI->getOperand(0)) || + LI->hasMetadata(LLVMContext::MD_invariant_load)) + return true; + return false; +diff --git a/llvm/test/Analysis/BasicAA/constant-memory.ll b/llvm/test/Analysis/BasicAA/constant-memory.ll +index 2b197d6dbc71..6ef875d9358e 100644 +--- a/llvm/test/Analysis/BasicAA/constant-memory.ll ++++ b/llvm/test/Analysis/BasicAA/constant-memory.ll +@@ -6,16 +6,18 @@ declare void @dummy() + + declare void @foo(ptr) + ++; FIXME: This could be NoModRef + ; CHECK-LABEL: Function: basic +-; CHECK: NoModRef: Ptr: i32* @c <-> call void @dummy() ++; CHECK: Just Ref: Ptr: i32* @c <-> call void @dummy() + define void @basic(ptr %p) { + call void @dummy() + load i32, ptr @c + ret void + } + ++; FIXME: This could be NoModRef + ; CHECK-LABEL: Function: recphi +-; CHECK: NoModRef: Ptr: i32* %p <-> call void @dummy() ++; CHECK: Just Ref: Ptr: i32* %p <-> call void @dummy() + define void @recphi() { + entry: + br label %loop +@@ -32,20 +34,20 @@ exit: + ret void + } + +-; Tests that readonly noalias implies !Mod. ++; Tests that readonly noalias doesn't imply !Mod yet. + ; + ; CHECK-LABEL: Function: readonly_noalias +-; CHECK: Just Ref: Ptr: i32* %p <-> call void @foo(ptr %p) ++; CHECK: Both ModRef: Ptr: i32* %p <-> call void @foo(ptr %p) + define void @readonly_noalias(ptr readonly noalias %p) { + call void @foo(ptr %p) + load i32, ptr %p + ret void + } + +-; Tests that readnone noalias implies !Mod. ++; Tests that readnone noalias doesn't imply !Mod yet. + ; + ; CHECK-LABEL: Function: readnone_noalias +-; CHECK: Just Ref: Ptr: i32* %p <-> call void @foo(ptr %p) ++; CHECK: Both ModRef: Ptr: i32* %p <-> call void @foo(ptr %p) + define void @readnone_noalias(ptr readnone noalias %p) { + call void @foo(ptr %p) + load i32, ptr %p +diff --git a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll +index b230c1488f75..cd6dfc4561b9 100644 +--- a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll ++++ b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll +@@ -338,10 +338,12 @@ entry: + ret float %r + } + +-; Tests that we can eliminate allocas copied from readonly noalias pointers. ++; Tests that we can't eliminate allocas copied from readonly noalias pointers yet. + define void @memcpy_from_readonly_noalias(ptr readonly noalias align 8 dereferenceable(124) %arg) { + ; CHECK-LABEL: @memcpy_from_readonly_noalias( +-; CHECK-NEXT: call void @bar(ptr nonnull [[ARG:%.*]]) #[[ATTR3]] ++; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [[T:%.*]], align 8 ++; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(124) [[ALLOCA]], ptr noundef nonnull align 8 dereferenceable(124) [[ARG:%.*]], i64 124, i1 false) ++; CHECK-NEXT: call void @bar(ptr nonnull [[ALLOCA]]) #[[ATTR3]] + ; CHECK-NEXT: ret void + ; + %alloca = alloca %T, align 8 +diff --git a/llvm/test/Transforms/InstCombine/store.ll b/llvm/test/Transforms/InstCombine/store.ll +index 95ba64c9e640..7cbe91b44beb 100644 +--- a/llvm/test/Transforms/InstCombine/store.ll ++++ b/llvm/test/Transforms/InstCombine/store.ll +@@ -336,10 +336,12 @@ define void @store_to_constant() { + ret void + } + +-; Delete stores to readonly noalias pointers. ++; We can't delete stores to readonly noalias pointers yet. + define void @store_to_readonly_noalias(ptr readonly noalias %0) { + ; CHECK-LABEL: @store_to_readonly_noalias( ++; CHECK-NEXT: store i32 3, ptr [[TMP0:%.*]], align 4 + ; CHECK-NEXT: ret void ++; + store i32 3, ptr %0, align 4 + ret void + } +-- +2.38.1.1.g6d9df9d320 + diff --git a/build/build-clang/skip-3-stages.json b/build/build-clang/skip-3-stages.json new file mode 100644 index 0000000000..79b1bf193f --- /dev/null +++ b/build/build-clang/skip-3-stages.json @@ -0,0 +1,6 @@ +{ + "skip_stages": "3", + "cc": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "cxx": "{MOZ_FETCHES_DIR}/clang/bin/clang++", + "as": "{MOZ_FETCHES_DIR}/clang/bin/clang" +} diff --git a/build/build-clang/skip-stage-1-win64.json b/build/build-clang/skip-stage-1-win64.json new file mode 100644 index 0000000000..8dee151003 --- /dev/null +++ b/build/build-clang/skip-stage-1-win64.json @@ -0,0 +1,7 @@ +{ + "skip_stages": "1", + "cc": "{MOZ_FETCHES_DIR}/clang/bin/clang-cl.exe", + "cxx": "{MOZ_FETCHES_DIR}/clang/bin/clang-cl.exe", + "ml": "{MOZ_FETCHES_DIR}/clang/bin/clang-cl.exe", + "lib": "{MOZ_FETCHES_DIR}/clang/bin/llvm-lib.exe" +} diff --git a/build/build-clang/skip-stage-1.json b/build/build-clang/skip-stage-1.json new file mode 100644 index 0000000000..aa1101b13b --- /dev/null +++ b/build/build-clang/skip-stage-1.json @@ -0,0 +1,6 @@ +{ + "skip_stages": "1", + "cc": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "cxx": "{MOZ_FETCHES_DIR}/clang/bin/clang++", + "as": "{MOZ_FETCHES_DIR}/clang/bin/clang" +} diff --git a/build/build-clang/tsan-D101154.patch b/build/build-clang/tsan-D101154.patch new file mode 100644 index 0000000000..1b7c97520f --- /dev/null +++ b/build/build-clang/tsan-D101154.patch @@ -0,0 +1,240 @@ +diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp ++++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +@@ -1976,7 +1976,8 @@ + // because in async signal processing case (when handler is called directly + // from rtl_generic_sighandler) we have not yet received the reraised + // signal; and it looks too fragile to intercept all ways to reraise a signal. +- if (flags()->report_bugs && !sync && sig != SIGTERM && errno != 99) { ++ if (ShouldReport(thr, ReportTypeErrnoInSignal) && !sync && sig != SIGTERM && ++ errno != 99) { + VarSizeStackTrace stack; + // StackTrace::GetNestInstructionPc(pc) is used because return address is + // expected, OutputReport() will undo this. +diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp +--- a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp ++++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp +@@ -145,7 +145,7 @@ + + static void SignalUnsafeCall(ThreadState *thr, uptr pc) { + if (atomic_load_relaxed(&thr->in_signal_handler) == 0 || +- !flags()->report_signal_unsafe) ++ !ShouldReport(thr, ReportTypeSignalUnsafe)) + return; + VarSizeStackTrace stack; + ObtainCurrentStack(thr, pc, &stack); +diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h +--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h ++++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h +@@ -624,6 +624,7 @@ + ScopedErrorReportLock lock_; + }; + ++bool ShouldReport(ThreadState *thr, ReportType typ); + ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack); + void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk, + MutexSet *mset, uptr *tag = nullptr); +diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp ++++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +@@ -519,23 +519,22 @@ + void ForkBefore(ThreadState *thr, uptr pc) { + ctx->thread_registry->Lock(); + ctx->report_mtx.Lock(); +- // Ignore memory accesses in the pthread_atfork callbacks. +- // If any of them triggers a data race we will deadlock +- // on the report_mtx. ++ // Suppress all reports in the pthread_atfork callbacks. ++ // Reports will deadlock on the report_mtx. + // We could ignore interceptors and sync operations as well, + // but so far it's unclear if it will do more good or harm. + // Unnecessarily ignoring things can lead to false positives later. +- ThreadIgnoreBegin(thr, pc); ++ thr->suppress_reports++; + } + + void ForkParentAfter(ThreadState *thr, uptr pc) { +- ThreadIgnoreEnd(thr, pc); // Begin is in ForkBefore. ++ thr->suppress_reports--; // Enabled in ForkBefore. + ctx->report_mtx.Unlock(); + ctx->thread_registry->Unlock(); + } + + void ForkChildAfter(ThreadState *thr, uptr pc) { +- ThreadIgnoreEnd(thr, pc); // Begin is in ForkBefore. ++ thr->suppress_reports--; // Enabled in ForkBefore. + ctx->report_mtx.Unlock(); + ctx->thread_registry->Unlock(); + +diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp +--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp ++++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp +@@ -51,6 +51,8 @@ + // or false positives (e.g. unlock in a different thread). + if (SANITIZER_GO) + return; ++ if (!ShouldReport(thr, typ)) ++ return; + ThreadRegistryLock l(ctx->thread_registry); + ScopedReport rep(typ); + rep.AddMutex(mid); +@@ -107,7 +109,7 @@ + if (!unlock_locked) + s->Reset(thr->proc()); // must not reset it before the report is printed + s->mtx.Unlock(); +- if (unlock_locked) { ++ if (unlock_locked && ShouldReport(thr, ReportTypeMutexDestroyLocked)) { + ThreadRegistryLock l(ctx->thread_registry); + ScopedReport rep(ReportTypeMutexDestroyLocked); + rep.AddMutex(mid); +@@ -534,7 +536,7 @@ + } + + void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) { +- if (r == 0) ++ if (r == 0 || !ShouldReport(thr, ReportTypeDeadlock)) + return; + ThreadRegistryLock l(ctx->thread_registry); + ScopedReport rep(ReportTypeDeadlock); +diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp +--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp ++++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp +@@ -142,6 +142,27 @@ + return stack; + } + ++bool ShouldReport(ThreadState *thr, ReportType typ) { ++ // We set thr->suppress_reports in the fork context. ++ // Taking any locking in the fork context can lead to deadlocks. ++ // If any locks are already taken, it's too late to do this check. ++ CheckNoLocks(thr); ++ if (SANITIZER_DEBUG) ++ ThreadRegistryLock l(ctx->thread_registry); ++ if (!flags()->report_bugs || thr->suppress_reports) ++ return false; ++ switch (typ) { ++ case ReportTypeSignalUnsafe: ++ return flags()->report_signal_unsafe; ++ case ReportTypeThreadLeak: ++ return flags()->report_thread_leaks; ++ case ReportTypeMutexDestroyLocked: ++ return flags()->report_destroy_locked; ++ default: ++ return true; ++ } ++} ++ + ScopedReportBase::ScopedReportBase(ReportType typ, uptr tag) { + ctx->thread_registry->CheckLocked(); + void *mem = internal_alloc(MBlockReport, sizeof(ReportDesc)); +@@ -497,8 +518,10 @@ + } + + bool OutputReport(ThreadState *thr, const ScopedReport &srep) { +- if (!flags()->report_bugs || thr->suppress_reports) +- return false; ++ // These should have been checked in ShouldReport. ++ // It's too late to check them here, we have already taken locks. ++ CHECK(flags()->report_bugs); ++ CHECK(!thr->suppress_reports); + atomic_store_relaxed(&ctx->last_symbolize_time_ns, NanoTime()); + const ReportDesc *rep = srep.GetReport(); + CHECK_EQ(thr->current_report, nullptr); +@@ -589,7 +612,7 @@ + // at best it will cause deadlocks on internal mutexes. + ScopedIgnoreInterceptors ignore; + +- if (!flags()->report_bugs) ++ if (!ShouldReport(thr, ReportTypeRace)) + return; + if (!flags()->report_atomic_races && !RaceBetweenAtomicAndFree(thr)) + return; +diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp +--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp ++++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp +@@ -210,7 +210,7 @@ + void ThreadFinalize(ThreadState *thr) { + ThreadCheckIgnore(thr); + #if !SANITIZER_GO +- if (!flags()->report_thread_leaks) ++ if (!ShouldReport(thr, ReportTypeThreadLeak)) + return; + ThreadRegistryLock l(ctx->thread_registry); + Vector<ThreadLeak> leaks; +diff --git a/compiler-rt/test/tsan/pthread_atfork_deadlock3.c b/compiler-rt/test/tsan/pthread_atfork_deadlock3.c +new file mode 100644 +--- /dev/null ++++ b/compiler-rt/test/tsan/pthread_atfork_deadlock3.c +@@ -0,0 +1,71 @@ ++// RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s ++// Regression test for ++// https://groups.google.com/g/thread-sanitizer/c/TQrr4-9PRYo/m/HFR4FMi6AQAJ ++#include "test.h" ++#include <sys/types.h> ++#include <sys/wait.h> ++#include <errno.h> ++#include <string.h> ++#include <signal.h> ++ ++long glob = 0; ++ ++void *worker(void *main) { ++ glob++; ++ barrier_wait(&barrier); ++ barrier_wait(&barrier); ++ pthread_kill((pthread_t)main, SIGPROF); ++ barrier_wait(&barrier); ++ return NULL; ++} ++ ++void atfork() { ++ barrier_wait(&barrier); ++ barrier_wait(&barrier); ++ write(2, "in atfork\n", strlen("in atfork\n")); ++ static volatile long a; ++ __atomic_fetch_add(&a, 1, __ATOMIC_RELEASE); ++} ++ ++void handler(int sig) { ++ write(2, "in handler\n", strlen("in handler\n")); ++ glob++; ++} ++ ++int main() { ++ barrier_init(&barrier, 2); ++ struct sigaction act = {}; ++ act.sa_handler = &handler; ++ if (sigaction(SIGPROF, &act, 0)) { ++ perror("sigaction"); ++ exit(1); ++ } ++ pthread_atfork(atfork, NULL, NULL); ++ pthread_t t; ++ pthread_create(&t, NULL, worker, (void*)pthread_self()); ++ barrier_wait(&barrier); ++ pid_t pid = fork(); ++ if (pid < 0) { ++ fprintf(stderr, "fork failed: %d\n", errno); ++ return 1; ++ } ++ if (pid == 0) { ++ fprintf(stderr, "CHILD\n"); ++ return 0; ++ } ++ if (pid != waitpid(pid, NULL, 0)) { ++ fprintf(stderr, "waitpid failed: %d\n", errno); ++ return 1; ++ } ++ pthread_join(t, NULL); ++ fprintf(stderr, "PARENT\n"); ++ return 0; ++} ++ ++// CHECK: in atfork ++// CHECK: in handler ++// Note: There is a race, but we won't report it ++// to not deadlock. ++// CHECK-NOT: ThreadSanitizer: data race ++// CHECK: CHILD ++// CHECK: PARENT + diff --git a/build/build-clang/unpoison-thread-stacks_clang_10.patch b/build/build-clang/unpoison-thread-stacks_clang_10.patch new file mode 100644 index 0000000000..563fa1d7bf --- /dev/null +++ b/build/build-clang/unpoison-thread-stacks_clang_10.patch @@ -0,0 +1,64 @@ +[winasan] Unpoison the stack in NtTerminateThread + +In long-running builds we've seen some ASan complaints during thread creation +that we suspect are due to leftover poisoning from previous threads whose stacks +occupied that memory. This patch adds a hook that unpoisons the stack just +before the NtTerminateThread syscall. + +Differential Revision: https://reviews.llvm.org/D52091 + +** Update for clang 9 ** : After some backouts, this patch eventually landed +upstream in a different form, as the TLS handler `asan_thread_exit`, but that +variant causes failures in our test suite, so revert the TLS handler in favor of +the interceptor approach from the first patch. + +diff --git a/compiler-rt/lib/asan/asan_win.cpp b/compiler-rt/lib/asan/asan_win.cpp +index 417892aaedd..5fe86db44f4 100644 +--- a/compiler-rt/lib/asan/asan_win.cpp ++++ b/compiler-rt/lib/asan/asan_win.cpp +@@ -154,6 +154,14 @@ INTERCEPTOR_WINAPI(HANDLE, CreateThread, LPSECURITY_ATTRIBUTES security, + thr_flags, tid); + } + ++INTERCEPTOR_WINAPI(void, NtTerminateThread, void *rcx) { ++ // Unpoison the terminating thread's stack because the memory may be re-used. ++ NT_TIB *tib = (NT_TIB *)NtCurrentTeb(); ++ uptr stackSize = (uptr)tib->StackBase - (uptr)tib->StackLimit; ++ __asan_unpoison_memory_region(tib->StackLimit, stackSize); ++ return REAL(NtTerminateThread(rcx)); ++} ++ + // }}} + + namespace __asan { +@@ -168,7 +176,9 @@ void InitializePlatformInterceptors() { + + ASAN_INTERCEPT_FUNC(CreateThread); + ASAN_INTERCEPT_FUNC(SetUnhandledExceptionFilter); +- ++ CHECK(::__interception::OverrideFunction("NtTerminateThread", ++ (uptr)WRAP(NtTerminateThread), ++ (uptr *)&REAL(NtTerminateThread))); + #ifdef _WIN64 + ASAN_INTERCEPT_FUNC(__C_specific_handler); + #else +@@ -380,19 +390,6 @@ __declspec(allocate(".CRT$XLAB")) void(NTAPI *__asan_tls_init)( + void *, unsigned long, void *) = asan_thread_init; + #endif + +-static void NTAPI asan_thread_exit(void *module, DWORD reason, void *reserved) { +- if (reason == DLL_THREAD_DETACH) { +- // Unpoison the thread's stack because the memory may be re-used. +- NT_TIB *tib = (NT_TIB *)NtCurrentTeb(); +- uptr stackSize = (uptr)tib->StackBase - (uptr)tib->StackLimit; +- __asan_unpoison_memory_region(tib->StackLimit, stackSize); +- } +-} +- +-#pragma section(".CRT$XLY", long, read) +-__declspec(allocate(".CRT$XLY")) void(NTAPI *__asan_tls_exit)( +- void *, unsigned long, void *) = asan_thread_exit; +- + WIN_FORCE_LINK(__asan_dso_reg_hook) + + // }}} diff --git a/build/build-clang/win64-no-symlink.patch b/build/build-clang/win64-no-symlink.patch new file mode 100644 index 0000000000..44d3de7585 --- /dev/null +++ b/build/build-clang/win64-no-symlink.patch @@ -0,0 +1,30 @@ +diff --git a/llvm/cmake/modules/LLVMInstallSymlink.cmake b/llvm/cmake/modules/LLVMInstallSymlink.cmake +index b5c35f706cb7..d9f629be06a7 100644 +--- a/llvm/cmake/modules/LLVMInstallSymlink.cmake ++++ b/llvm/cmake/modules/LLVMInstallSymlink.cmake +@@ -6,17 +6,18 @@ include(GNUInstallDirs) + + function(install_symlink name target outdir) + set(DESTDIR $ENV{DESTDIR}) ++ if(CMAKE_HOST_UNIX) ++ set(LINK_OR_COPY create_symlink) ++ else() ++ set(LINK_OR_COPY copy) ++ endif() ++ + set(bindir "${DESTDIR}${CMAKE_INSTALL_PREFIX}/${outdir}") + + message(STATUS "Creating ${name}") + + execute_process( +- COMMAND "${CMAKE_COMMAND}" -E create_symlink "${target}" "${name}" +- WORKING_DIRECTORY "${bindir}" ERROR_VARIABLE has_err) +- if(CMAKE_HOST_WIN32 AND has_err) +- execute_process( +- COMMAND "${CMAKE_COMMAND}" -E copy "${target}" "${name}" +- WORKING_DIRECTORY "${bindir}") +- endif() ++ COMMAND "${CMAKE_COMMAND}" -E ${LINK_OR_COPY} "${target}" "${name}" ++ WORKING_DIRECTORY "${bindir}") + + endfunction() diff --git a/build/build-clang/win64-no-symlink_clang_16.patch b/build/build-clang/win64-no-symlink_clang_16.patch new file mode 100644 index 0000000000..7fb34eae8c --- /dev/null +++ b/build/build-clang/win64-no-symlink_clang_16.patch @@ -0,0 +1,32 @@ +diff --git a/llvm/cmake/modules/LLVMInstallSymlink.cmake b/llvm/cmake/modules/LLVMInstallSymlink.cmake +index e9be04aceb3d..3c0bdc25bd46 100644 +--- a/llvm/cmake/modules/LLVMInstallSymlink.cmake ++++ b/llvm/cmake/modules/LLVMInstallSymlink.cmake +@@ -6,6 +6,12 @@ include(GNUInstallDirs) + + function(install_symlink name target outdir) + set(DESTDIR $ENV{DESTDIR}) ++ if(CMAKE_HOST_UNIX) ++ set(LINK_OR_COPY create_symlink) ++ else() ++ set(LINK_OR_COPY copy) ++ endif() ++ + if(NOT IS_ABSOLUTE "${outdir}") + set(outdir "${CMAKE_INSTALL_PREFIX}/${outdir}") + endif() +@@ -14,12 +19,7 @@ function(install_symlink name target outdir) + message(STATUS "Creating ${name}") + + execute_process( +- COMMAND "${CMAKE_COMMAND}" -E create_symlink "${target}" "${name}" +- WORKING_DIRECTORY "${outdir}" ERROR_VARIABLE has_err) +- if(CMAKE_HOST_WIN32 AND has_err) +- execute_process( +- COMMAND "${CMAKE_COMMAND}" -E copy "${target}" "${name}" +- WORKING_DIRECTORY "${outdir}") +- endif() ++ COMMAND "${CMAKE_COMMAND}" -E ${LINK_OR_COPY} "${target}" "${name}" ++ WORKING_DIRECTORY "${outdir}") + + endfunction() diff --git a/build/build-clang/win64-ret-null-on-commitment-limit_clang_14.patch b/build/build-clang/win64-ret-null-on-commitment-limit_clang_14.patch new file mode 100644 index 0000000000..23b001bc68 --- /dev/null +++ b/build/build-clang/win64-ret-null-on-commitment-limit_clang_14.patch @@ -0,0 +1,14 @@ +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp +index 7c84cdc22ce4..e13fff03489e 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp +@@ -157,7 +157,8 @@ void UnmapOrDie(void *addr, uptr size) { + static void *ReturnNullptrOnOOMOrDie(uptr size, const char *mem_type, + const char *mmap_type) { + error_t last_error = GetLastError(); +- if (last_error == ERROR_NOT_ENOUGH_MEMORY) ++ if (last_error == ERROR_NOT_ENOUGH_MEMORY || ++ last_error == ERROR_COMMITMENT_LIMIT) + return nullptr; + ReportMmapFailureAndDie(size, mem_type, mmap_type, last_error); + } diff --git a/build/build-clang/win64.json b/build/build-clang/win64.json new file mode 100644 index 0000000000..947c5ed1ef --- /dev/null +++ b/build/build-clang/win64.json @@ -0,0 +1,5 @@ +{ + "cc": "cl.exe", + "cxx": "cl.exe", + "ml": "ml64.exe" +} |