diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 01:47:29 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 01:47:29 +0000 |
commit | 0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d (patch) | |
tree | a31f07c9bcca9d56ce61e9a1ffd30ef350d513aa /build/build-clang | |
parent | Initial commit. (diff) | |
download | firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.tar.xz firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.zip |
Adding upstream version 115.8.0esr.upstream/115.8.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
58 files changed, 9766 insertions, 0 deletions
diff --git a/build/build-clang/1stage.json b/build/build-clang/1stage.json new file mode 100644 index 0000000000..4633a3e93e --- /dev/null +++ b/build/build-clang/1stage.json @@ -0,0 +1,4 @@ +{ + "stages": "1", + "targets": "X86;ARM;AArch64;WebAssembly" +} diff --git a/build/build-clang/2stages.json b/build/build-clang/2stages.json new file mode 100644 index 0000000000..e34226758d --- /dev/null +++ b/build/build-clang/2stages.json @@ -0,0 +1,3 @@ +{ + "stages": "2" +} diff --git a/build/build-clang/4stages-pgo.json b/build/build-clang/4stages-pgo.json new file mode 100644 index 0000000000..8f0c5aa97d --- /dev/null +++ b/build/build-clang/4stages-pgo.json @@ -0,0 +1,7 @@ +{ + "stages": "4", + "targets": "X86;ARM;AArch64;WebAssembly", + "pgo": true, + "ranlib": "{MOZ_FETCHES_DIR}/clang/bin/llvm-ranlib", + "ar": "{MOZ_FETCHES_DIR}/clang/bin/llvm-ar" +} diff --git a/build/build-clang/D146664.patch b/build/build-clang/D146664.patch new file mode 100644 index 0000000000..9813c6c86e --- /dev/null +++ b/build/build-clang/D146664.patch @@ -0,0 +1,99 @@ +From b57ff6da9c8b281ae9312e245fd3372e7ffaff28 Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Thu, 23 Mar 2023 06:52:28 +0900 +Subject: [PATCH] Apply the same fallbacks as runtimes search for stdlib search + +When building clang with e.g. LLVM_ENABLE_RUNTIMES=libcxx;libunwind, +those runtimes end up in the stdlib search directory, and when +LLVM_ENABLE_PER_TARGET_RUNTIME_DIR is set, that ends up in a +target-specific subdirectory. The stdlib search does handle the +situation, but when the target in question is Android, the same issues +as those that required fallbacks for runtimes search apply. + +Traditionally, those libraries are shipped as part of the Android NDK, +but when one builds their own clang for Android, they may want to use +the runtimes from the same version rather than the ones from the NDK. + +Differential Revision: https://reviews.llvm.org/D146664 +--- + clang/lib/Driver/ToolChain.cpp | 42 +++++++++++++++++++--------------- + 1 file changed, 24 insertions(+), 18 deletions(-) + +diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp +index 2dba975a5a8f..9052099cad5e 100644 +--- a/clang/lib/Driver/ToolChain.cpp ++++ b/clang/lib/Driver/ToolChain.cpp +@@ -569,15 +569,9 @@ const char *ToolChain::getCompilerRTArgString(const llvm::opt::ArgList &Args, + return Args.MakeArgString(getCompilerRT(Args, Component, Type)); + } + +-ToolChain::path_list ToolChain::getRuntimePaths() const { +- path_list Paths; +- auto addPathForTriple = [this, &Paths](const llvm::Triple &Triple) { +- SmallString<128> P(D.ResourceDir); +- llvm::sys::path::append(P, "lib", Triple.str()); +- Paths.push_back(std::string(P.str())); +- }; +- +- addPathForTriple(getTriple()); ++template <typename F> ++static void fillPaths(const ToolChain &TC, F addPathForTriple) { ++ addPathForTriple(TC.getTriple()); + + // When building with per target runtime directories, various ways of naming + // the Arm architecture may have been normalised to simply "arm". +@@ -594,30 +588,42 @@ ToolChain::path_list ToolChain::getRuntimePaths() const { + // + // M profile Arm is bare metal and we know they will not be using the per + // target runtime directory layout. +- if (getTriple().getArch() == Triple::arm && !getTriple().isArmMClass()) { +- llvm::Triple ArmTriple = getTriple(); ++ if (TC.getTriple().getArch() == Triple::arm && ++ !TC.getTriple().isArmMClass()) { ++ llvm::Triple ArmTriple = TC.getTriple(); + ArmTriple.setArch(Triple::arm); + addPathForTriple(ArmTriple); + } + + // Android targets may include an API level at the end. We still want to fall + // back on a path without the API level. +- if (getTriple().isAndroid() && +- getTriple().getEnvironmentName() != "android") { +- llvm::Triple TripleWithoutLevel = getTriple(); ++ if (TC.getTriple().isAndroid() && ++ TC.getTriple().getEnvironmentName() != "android") { ++ llvm::Triple TripleWithoutLevel = TC.getTriple(); + TripleWithoutLevel.setEnvironmentName("android"); + addPathForTriple(TripleWithoutLevel); + } ++} + ++ToolChain::path_list ToolChain::getRuntimePaths() const { ++ path_list Paths; ++ auto addPathForTriple = [this, &Paths](const llvm::Triple &Triple) { ++ SmallString<128> P(D.ResourceDir); ++ llvm::sys::path::append(P, "lib", Triple.str()); ++ Paths.push_back(std::string(P.str())); ++ }; ++ fillPaths(*this, addPathForTriple); + return Paths; + } + + ToolChain::path_list ToolChain::getStdlibPaths() const { + path_list Paths; +- SmallString<128> P(D.Dir); +- llvm::sys::path::append(P, "..", "lib", getTripleString()); +- Paths.push_back(std::string(P.str())); +- ++ auto addPathForTriple = [this, &Paths](const llvm::Triple &Triple) { ++ SmallString<128> P(D.Dir); ++ llvm::sys::path::append(P, "..", "lib", Triple.str()); ++ Paths.push_back(std::string(P.str())); ++ }; ++ fillPaths(*this, addPathForTriple); + return Paths; + } + +-- +2.39.0.1.g6739ec1790 + diff --git a/build/build-clang/D146664_clang_15.patch b/build/build-clang/D146664_clang_15.patch new file mode 100644 index 0000000000..e8b000ca39 --- /dev/null +++ b/build/build-clang/D146664_clang_15.patch @@ -0,0 +1,75 @@ +From: Mike Hommey <mh@glandium.org> +Date: Thu, 23 Mar 2023 06:52:28 +0900 +Subject: [PATCH] Apply the same fallbacks as runtimes search for stdlib search + +When building clang with e.g. LLVM_ENABLE_RUNTIMES=libcxx;libunwind, +those runtimes end up in the stdlib search directory, and when +LLVM_ENABLE_PER_TARGET_RUNTIME_DIR is set, that ends up in a +target-specific subdirectory. The stdlib search does handle the +situation, but when the target in question is Android, the same issues +as those that required fallbacks for runtimes search apply. + +Traditionally, those libraries are shipped as part of the Android NDK, +but when one builds their own clang for Android, they may want to use +the runtimes from the same version rather than the ones from the NDK. + +diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp +index 7a4319ea680f..afbc7de8befd 100644 +--- a/clang/lib/Driver/ToolChain.cpp ++++ b/clang/lib/Driver/ToolChain.cpp +@@ -535,34 +535,39 @@ const char *ToolChain::getCompilerRTArgString(const llvm::opt::ArgList &Args, + return Args.MakeArgString(getCompilerRT(Args, Component, Type)); + } + +-ToolChain::path_list ToolChain::getRuntimePaths() const { +- path_list Paths; +- auto addPathForTriple = [this, &Paths](const llvm::Triple &Triple) { +- SmallString<128> P(D.ResourceDir); +- llvm::sys::path::append(P, "lib", Triple.str()); +- Paths.push_back(std::string(P.str())); +- }; +- +- addPathForTriple(getTriple()); ++template <typename F> ++static void fillPaths(const ToolChain &TC, F addPathForTriple) { ++ addPathForTriple(TC.getTriple()); + + // Android targets may include an API level at the end. We still want to fall + // back on a path without the API level. +- if (getTriple().isAndroid() && +- getTriple().getEnvironmentName() != "android") { +- llvm::Triple TripleWithoutLevel = getTriple(); ++ if (TC.getTriple().isAndroid() && ++ TC.getTriple().getEnvironmentName() != "android") { ++ llvm::Triple TripleWithoutLevel = TC.getTriple(); + TripleWithoutLevel.setEnvironmentName("android"); + addPathForTriple(TripleWithoutLevel); + } ++} + ++ToolChain::path_list ToolChain::getRuntimePaths() const { ++ path_list Paths; ++ auto addPathForTriple = [this, &Paths](const llvm::Triple &Triple) { ++ SmallString<128> P(D.ResourceDir); ++ llvm::sys::path::append(P, "lib", Triple.str()); ++ Paths.push_back(std::string(P.str())); ++ }; ++ fillPaths(*this, addPathForTriple); + return Paths; + } + + ToolChain::path_list ToolChain::getStdlibPaths() const { + path_list Paths; +- SmallString<128> P(D.Dir); +- llvm::sys::path::append(P, "..", "lib", getTripleString()); +- Paths.push_back(std::string(P.str())); +- ++ auto addPathForTriple = [this, &Paths](const llvm::Triple &Triple) { ++ SmallString<128> P(D.Dir); ++ llvm::sys::path::append(P, "..", "lib", Triple.str()); ++ Paths.push_back(std::string(P.str())); ++ }; ++ fillPaths(*this, addPathForTriple); + return Paths; + } + diff --git a/build/build-clang/D151864.patch b/build/build-clang/D151864.patch new file mode 100644 index 0000000000..000ee2b59d --- /dev/null +++ b/build/build-clang/D151864.patch @@ -0,0 +1,308 @@ +From 49abc88612014eb99580a2870257d2bc70b16333 Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Thu, 1 Jun 2023 13:26:51 +0900 +Subject: [PATCH] Strip stabs symbols in Mach-O when stripping debug info + +Differential Revision: https://reviews.llvm.org/D151864 +--- + llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp | 3 + + .../MachO/Inputs/strip-stabs.yaml | 248 ++++++++++++++++++ + .../tools/llvm-objcopy/MachO/strip-stabs.test | 17 ++ + 3 files changed, 268 insertions(+) + create mode 100644 llvm/test/tools/llvm-objcopy/MachO/Inputs/strip-stabs.yaml + create mode 100644 llvm/test/tools/llvm-objcopy/MachO/strip-stabs.test + +diff --git a/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp b/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp +index d37241682efe..e26b363df21c 100644 +--- a/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp ++++ b/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp +@@ -112,6 +112,9 @@ static void updateAndRemoveSymbols(const CommonConfig &Config, + if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT)) + return true; + // This behavior is consistent with cctools' strip. ++ if (Config.StripDebug && (N->n_type & MachO::N_STAB)) ++ return true; ++ // This behavior is consistent with cctools' strip. + if (MachOConfig.StripSwiftSymbols && + (Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion && + *Obj.SwiftVersion && N->isSwiftSymbol()) +diff --git a/llvm/test/tools/llvm-objcopy/MachO/Inputs/strip-stabs.yaml b/llvm/test/tools/llvm-objcopy/MachO/Inputs/strip-stabs.yaml +new file mode 100644 +index 000000000000..3259aa228fed +--- /dev/null ++++ b/llvm/test/tools/llvm-objcopy/MachO/Inputs/strip-stabs.yaml +@@ -0,0 +1,248 @@ ++--- !mach-o ++FileHeader: ++ magic: 0xFEEDFACF ++ cputype: 0x1000007 ++ cpusubtype: 0x80000003 ++ filetype: 0x2 ++ ncmds: 13 ++ sizeofcmds: 808 ++ flags: 0x200085 ++ reserved: 0x0 ++LoadCommands: ++ - cmd: LC_SEGMENT_64 ++ cmdsize: 72 ++ segname: __PAGEZERO ++ vmaddr: 0 ++ vmsize: 4294967296 ++ fileoff: 0 ++ filesize: 0 ++ maxprot: 0 ++ initprot: 0 ++ nsects: 0 ++ flags: 0 ++ - cmd: LC_SEGMENT_64 ++ cmdsize: 312 ++ segname: __TEXT ++ vmaddr: 4294967296 ++ vmsize: 8192 ++ fileoff: 0 ++ filesize: 8192 ++ maxprot: 5 ++ initprot: 5 ++ nsects: 3 ++ flags: 0 ++ Sections: ++ - sectname: __text ++ segname: __TEXT ++ addr: 0x100000370 ++ size: 8 ++ offset: 0x370 ++ align: 4 ++ reloff: 0x0 ++ nreloc: 0 ++ flags: 0x80000400 ++ reserved1: 0x0 ++ reserved2: 0x0 ++ reserved3: 0x0 ++ content: 554889E531C05DC3 ++ - sectname: __unwind_info ++ segname: __TEXT ++ addr: 0x100000378 ++ size: 4152 ++ offset: 0x378 ++ align: 2 ++ reloff: 0x0 ++ nreloc: 0 ++ flags: 0x0 ++ reserved1: 0x0 ++ reserved2: 0x0 ++ reserved3: 0x0 ++ - sectname: __eh_frame ++ segname: __TEXT ++ addr: 0x1000013B0 ++ size: 24 ++ offset: 0x13B0 ++ align: 3 ++ reloff: 0x0 ++ nreloc: 0 ++ flags: 0x6000000B ++ reserved1: 0x0 ++ reserved2: 0x0 ++ reserved3: 0x0 ++ content: 1400000000000000017A520001781001100C070890010000 ++ - cmd: LC_SEGMENT_64 ++ cmdsize: 72 ++ segname: __LINKEDIT ++ vmaddr: 4294975488 ++ vmsize: 272 ++ fileoff: 8192 ++ filesize: 272 ++ maxprot: 1 ++ initprot: 1 ++ nsects: 0 ++ flags: 0 ++ - cmd: LC_DYLD_INFO_ONLY ++ cmdsize: 48 ++ rebase_off: 0 ++ rebase_size: 0 ++ bind_off: 0 ++ bind_size: 0 ++ weak_bind_off: 0 ++ weak_bind_size: 0 ++ lazy_bind_off: 0 ++ lazy_bind_size: 0 ++ export_off: 8192 ++ export_size: 48 ++ - cmd: LC_SYMTAB ++ cmdsize: 24 ++ symoff: 8248 ++ nsyms: 8 ++ stroff: 8376 ++ strsize: 88 ++ - cmd: LC_DYSYMTAB ++ cmdsize: 80 ++ ilocalsym: 0 ++ nlocalsym: 5 ++ iextdefsym: 5 ++ nextdefsym: 2 ++ iundefsym: 7 ++ nundefsym: 1 ++ tocoff: 0 ++ ntoc: 0 ++ modtaboff: 0 ++ nmodtab: 0 ++ extrefsymoff: 0 ++ nextrefsyms: 0 ++ indirectsymoff: 0 ++ nindirectsyms: 0 ++ extreloff: 0 ++ nextrel: 0 ++ locreloff: 0 ++ nlocrel: 0 ++ - cmd: LC_LOAD_DYLINKER ++ cmdsize: 32 ++ name: 12 ++ Content: '/usr/lib/dyld' ++ ZeroPadBytes: 7 ++ - cmd: LC_UUID ++ cmdsize: 24 ++ uuid: 4C4C44DE-5555-3144-A19D-79B149A02D5F ++ - cmd: LC_BUILD_VERSION ++ cmdsize: 32 ++ platform: 1 ++ minos: 852736 ++ sdk: 852736 ++ ntools: 1 ++ Tools: ++ - tool: 3 ++ version: 1048580 ++ - cmd: LC_MAIN ++ cmdsize: 24 ++ entryoff: 880 ++ stacksize: 0 ++ - cmd: LC_LOAD_DYLIB ++ cmdsize: 56 ++ dylib: ++ name: 24 ++ timestamp: 0 ++ current_version: 86467587 ++ compatibility_version: 65536 ++ Content: '/usr/lib/libSystem.B.dylib' ++ ZeroPadBytes: 6 ++ - cmd: LC_FUNCTION_STARTS ++ cmdsize: 16 ++ dataoff: 8240 ++ datasize: 8 ++ - cmd: LC_DATA_IN_CODE ++ cmdsize: 16 ++ dataoff: 8248 ++ datasize: 0 ++LinkEditData: ++ ExportTrie: ++ TerminalSize: 0 ++ NodeOffset: 0 ++ Name: '' ++ Flags: 0x0 ++ Address: 0x0 ++ Other: 0x0 ++ ImportName: '' ++ Children: ++ - TerminalSize: 0 ++ NodeOffset: 5 ++ Name: _ ++ Flags: 0x0 ++ Address: 0x0 ++ Other: 0x0 ++ ImportName: '' ++ Children: ++ - TerminalSize: 3 ++ NodeOffset: 33 ++ Name: main ++ Flags: 0x0 ++ Address: 0x370 ++ Other: 0x0 ++ ImportName: '' ++ - TerminalSize: 2 ++ NodeOffset: 38 ++ Name: _mh_execute_header ++ Flags: 0x0 ++ Address: 0x0 ++ Other: 0x0 ++ ImportName: '' ++ NameList: ++ - n_strx: 45 ++ n_type: 0x64 ++ n_sect: 0 ++ n_desc: 0 ++ n_value: 0 ++ - n_strx: 57 ++ n_type: 0x66 ++ n_sect: 3 ++ n_desc: 1 ++ n_value: 0 ++ - n_strx: 76 ++ n_type: 0x24 ++ n_sect: 1 ++ n_desc: 0 ++ n_value: 4294968176 ++ - n_strx: 1 ++ n_type: 0x24 ++ n_sect: 0 ++ n_desc: 0 ++ n_value: 8 ++ - n_strx: 1 ++ n_type: 0x64 ++ n_sect: 1 ++ n_desc: 0 ++ n_value: 0 ++ - n_strx: 2 ++ n_type: 0xF ++ n_sect: 1 ++ n_desc: 0 ++ n_value: 4294968176 ++ - n_strx: 25 ++ n_type: 0xF ++ n_sect: 1 ++ n_desc: 16 ++ n_value: 4294967296 ++ - n_strx: 8 ++ n_type: 0x1 ++ n_sect: 0 ++ n_desc: 256 ++ n_value: 0 ++ StringTable: ++ - ' ' ++ - _main ++ - dyld_stub_binder ++ - __mh_execute_header ++ - '/tmp/test.c' ++ - '/tmp/test-6aa924.o' ++ - _main ++ - '' ++ - '' ++ - '' ++ - '' ++ - '' ++ - '' ++ FunctionStarts: [ 0x370 ] ++... +diff --git a/llvm/test/tools/llvm-objcopy/MachO/strip-stabs.test b/llvm/test/tools/llvm-objcopy/MachO/strip-stabs.test +new file mode 100644 +index 000000000000..90c00f60a152 +--- /dev/null ++++ b/llvm/test/tools/llvm-objcopy/MachO/strip-stabs.test +@@ -0,0 +1,17 @@ ++## Show that llvm-objcopy/llvm-strip stabs symbols and debug sections. ++ ++# RUN: yaml2obj %p/Inputs/strip-stabs.yaml -o %t ++ ++# RUN: llvm-objcopy --strip-debug %t %t.stripped ++ ++## Make sure that stabs symbols are stripped. ++# RUN: llvm-readobj --symbols %t | FileCheck %s --check-prefix=SYM ++# RUN: llvm-readobj --symbols %t.stripped | FileCheck %s --check-prefix=SYM_STRIP ++ ++# SYM: Symbols [ ++# SYM-COUNT-5: Type: SymDebugTable ({{.*}}) ++# SYM: ] ++ ++# SYM_STRIP: Symbols [ ++# SYM_STRIP-NOT: Type: SymDebugTable ({{.*}}) ++# SYM_STRIP: ] +-- +2.40.0.1.gc689dad23e + diff --git a/build/build-clang/README b/build/build-clang/README new file mode 100644 index 0000000000..5b13edeeb9 --- /dev/null +++ b/build/build-clang/README @@ -0,0 +1,53 @@ +build-clang.py +============== + +A script to build clang from source. + +``` +usage: build-clang.py [-h] -c CONFIG [--clean] + +optional arguments: + -h, --help show this help message and exit + -c CONFIG, --config CONFIG + Clang configuration file + --clean Clean the build directory +``` + +Pre-requisites +-------------- +* Working build toolchain. +* git +* CMake +* Ninja +* Python 2.7 and 3 + +Please use the latest available CMake for your platform to avoid surprises. + +Config file format +------------------ + +build-clang.py accepts a JSON config format with the following fields: + +* stages: Use 1, 2, 3 or 4 to select different compiler stages. The default is 2. +* cc: Path to the bootsraping C Compiler. +* cxx: Path to the bootsraping C++ Compiler. +* as: Path to the assembler tool. +* ar: Path to the library archiver tool. +* ranlib: Path to the ranlib tool (optional). +* ld: Path to the linker. +* patches: Optional list of patches to apply. +* build_type: The type of build to make. Supported types: Release, Debug, RelWithDebInfo or MinSizeRel. +* targets: The targets supported by the final stage LLVM/clang. +* build_clang_tidy: Whether to build clang-tidy with the Mozilla checks imported. The default is false. +* osx_cross_compile: Whether to invoke CMake for OS X cross compile builds. +* assertions: Whether to enable LLVM assertions. The default is false. +* pgo: Whether to build with PGO (requires stages == 4). The default is false. + +The revisions are defined in taskcluster/ci/fetch/toolchains.yml. They are usually commit sha1s corresponding to upstream tags. + +Environment Variables +--------------------- + +The following environment variables are used for cross-compile builds targeting OS X on Linux. + +* CROSS_SYSROOT: Path to the OS X SDK directory for cross compile builds. diff --git a/build/build-clang/Remove-FlushViewOfFile-when-unmaping-gcda-files.patch b/build/build-clang/Remove-FlushViewOfFile-when-unmaping-gcda-files.patch new file mode 100644 index 0000000000..a3ea2d75f9 --- /dev/null +++ b/build/build-clang/Remove-FlushViewOfFile-when-unmaping-gcda-files.patch @@ -0,0 +1,31 @@ +From 78a6bcfed4b73f13b9973afd69b76067dd4a5dde Mon Sep 17 00:00:00 2001 +From: Calixte Denizet <calixte.denizet@gmail.com> +Date: Mon, 4 Oct 2021 11:07:56 +0200 +Subject: [PATCH] Remove FlushViewOfFile when unmaping gcda files - it can + causes bad performances with slow disks; - MS docs say that it's mainly + useful in case of hard failures (OS crash, electrical failure, ...): so it's + useless to call this function when ccov builds run on CI. + +--- + compiler-rt/lib/profile/GCDAProfiling.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c +index 4293e8f7b5bf..83650d33c95d 100644 +--- a/compiler-rt/lib/profile/GCDAProfiling.c ++++ b/compiler-rt/lib/profile/GCDAProfiling.c +@@ -286,11 +286,6 @@ static int map_file() { + + static void unmap_file() { + #if defined(_WIN32) +- if (!FlushViewOfFile(write_buffer, file_size)) { +- fprintf(stderr, "profiling: %s: cannot flush mapped view: %lu\n", filename, +- GetLastError()); +- } +- + if (!UnmapViewOfFile(write_buffer)) { + fprintf(stderr, "profiling: %s: cannot unmap mapped view: %lu\n", filename, + GetLastError()); +-- +2.33.0 + diff --git a/build/build-clang/android-mangling-error_clang_12.patch b/build/build-clang/android-mangling-error_clang_12.patch new file mode 100644 index 0000000000..315756d30b --- /dev/null +++ b/build/build-clang/android-mangling-error_clang_12.patch @@ -0,0 +1,24 @@ +Workaround segfault in clang's mangling code that is tickled when +attempting to mangle the declaration: + std:__ndk1::__find_detail::__find_exactly_one_checked::__matches +in the <tuple> header in the Android NDK. +This codepath is exercised by MozsearchIndexer.cpp (the searchfox +indexer) when indexing on Android. See also +https://bugs.llvm.org/show_bug.cgi?id=40747 + +diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp +index 4420f6a2c1c3..39792e6b7350 100644 +--- a/clang/lib/AST/ItaniumMangle.cpp ++++ b/clang/lib/AST/ItaniumMangle.cpp +@@ -3954,6 +3954,11 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity, + // produces no output, where ImplicitlyConvertedToType and AsTemplateArg need + // to be preserved. + recurse: ++ if (!E) { ++ Out << "MOZ_WE_HACKED_AROUND_BUG_1500941"; ++ return; ++ } ++ + switch (E->getStmtClass()) { + case Expr::NoStmtClass: + #define ABSTRACT_STMT(Type) diff --git a/build/build-clang/build-clang.py b/build/build-clang/build-clang.py new file mode 100755 index 0000000000..3237d9c9b9 --- /dev/null +++ b/build/build-clang/build-clang.py @@ -0,0 +1,843 @@ +#!/usr/bin/python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Only necessary for flake8 to be happy... +import argparse +import errno +import fnmatch +import glob +import json +import os +import os.path +import platform +import re +import shutil +import subprocess +import sys +import tarfile +from contextlib import contextmanager +from shutil import which + +import zstandard + +SUPPORTED_TARGETS = { + "x86_64-unknown-linux-gnu": ("Linux", "x86_64"), + "x86_64-pc-windows-msvc": ("Windows", "AMD64"), + "x86_64-apple-darwin": ("Darwin", "x86_64"), + "aarch64-apple-darwin": ("Darwin", "arm64"), +} + + +def is_llvm_toolchain(cc, cxx): + return "clang" in cc and "clang" in cxx + + +def check_run(args): + print(" ".join(args), file=sys.stderr, flush=True) + if args[0] == "cmake": + # CMake `message(STATUS)` messages, as appearing in failed source code + # compiles, appear on stdout, so we only capture that. + p = subprocess.Popen(args, stdout=subprocess.PIPE) + lines = [] + for line in p.stdout: + lines.append(line) + sys.stdout.write(line.decode()) + sys.stdout.flush() + r = p.wait() + if r != 0 and os.environ.get("UPLOAD_DIR"): + cmake_output_re = re.compile(b'See also "(.*/CMakeOutput.log)"') + cmake_error_re = re.compile(b'See also "(.*/CMakeError.log)"') + + def find_first_match(re): + for l in lines: + match = re.search(l) + if match: + return match + + output_match = find_first_match(cmake_output_re) + error_match = find_first_match(cmake_error_re) + + upload_dir = os.environ["UPLOAD_DIR"].encode("utf-8") + if output_match or error_match: + mkdir_p(upload_dir) + if output_match: + shutil.copy2(output_match.group(1), upload_dir) + if error_match: + shutil.copy2(error_match.group(1), upload_dir) + else: + r = subprocess.call(args) + assert r == 0 + + +def run_in(path, args): + with chdir(path): + check_run(args) + + +@contextmanager +def chdir(path): + d = os.getcwd() + print('cd "%s"' % path, file=sys.stderr) + os.chdir(path) + try: + yield + finally: + print('cd "%s"' % d, file=sys.stderr) + os.chdir(d) + + +def patch(patch, srcdir): + patch = os.path.realpath(patch) + check_run(["patch", "-d", srcdir, "-p1", "-i", patch, "--fuzz=0", "-s"]) + + +def import_clang_tidy(source_dir, build_clang_tidy_alpha, build_clang_tidy_external): + clang_plugin_path = os.path.join(os.path.dirname(sys.argv[0]), "..", "clang-plugin") + clang_tidy_path = os.path.join(source_dir, "clang-tools-extra/clang-tidy") + sys.path.append(clang_plugin_path) + from import_mozilla_checks import do_import + + import_options = { + "alpha": build_clang_tidy_alpha, + "external": build_clang_tidy_external, + } + do_import(clang_plugin_path, clang_tidy_path, import_options) + + +def build_package(package_build_dir, cmake_args): + if not os.path.exists(package_build_dir): + os.mkdir(package_build_dir) + # If CMake has already been run, it may have been run with different + # arguments, so we need to re-run it. Make sure the cached copy of the + # previous CMake run is cleared before running it again. + if os.path.exists(package_build_dir + "/CMakeCache.txt"): + os.remove(package_build_dir + "/CMakeCache.txt") + if os.path.exists(package_build_dir + "/CMakeFiles"): + shutil.rmtree(package_build_dir + "/CMakeFiles") + + run_in(package_build_dir, ["cmake"] + cmake_args) + run_in(package_build_dir, ["ninja", "install", "-v"]) + + +@contextmanager +def updated_env(env): + old_env = os.environ.copy() + os.environ.update(env) + yield + os.environ.clear() + os.environ.update(old_env) + + +def build_tar_package(name, base, directory): + name = os.path.realpath(name) + print("tarring {} from {}/{}".format(name, base, directory), file=sys.stderr) + assert name.endswith(".tar.zst") + + cctx = zstandard.ZstdCompressor() + with open(name, "wb") as f, cctx.stream_writer(f) as z: + with tarfile.open(mode="w|", fileobj=z) as tf: + with chdir(base): + tf.add(directory) + + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST or not os.path.isdir(path): + raise + + +def delete(path): + if os.path.isdir(path): + shutil.rmtree(path) + else: + try: + os.unlink(path) + except Exception: + pass + + +def install_import_library(build_dir, clang_dir): + shutil.copy2( + os.path.join(build_dir, "lib", "clang.lib"), os.path.join(clang_dir, "lib") + ) + + +def is_darwin(target): + return "-apple-darwin" in target + + +def is_linux(target): + return "-linux-gnu" in target + + +def is_windows(target): + return "-windows-msvc" in target + + +def is_cross_compile(target): + return SUPPORTED_TARGETS[target] != (platform.system(), platform.machine()) + + +def build_one_stage( + cc, + cxx, + asm, + ar, + ranlib, + ldflags, + src_dir, + stage_dir, + package_name, + build_type, + assertions, + target, + targets, + is_final_stage=False, + profile=None, +): + if not os.path.exists(stage_dir): + os.mkdir(stage_dir) + + build_dir = stage_dir + "/build" + inst_dir = stage_dir + "/" + package_name + + # cmake doesn't deal well with backslashes in paths. + def slashify_path(path): + return path.replace("\\", "/") + + def cmake_base_args(cc, cxx, asm, ar, ranlib, ldflags, inst_dir): + machine_targets = targets if is_final_stage and targets else "X86" + + cmake_args = [ + "-GNinja", + "-DCMAKE_C_COMPILER=%s" % slashify_path(cc[0]), + "-DCMAKE_CXX_COMPILER=%s" % slashify_path(cxx[0]), + "-DCMAKE_ASM_COMPILER=%s" % slashify_path(asm[0]), + "-DCMAKE_AR=%s" % slashify_path(ar), + "-DCMAKE_C_FLAGS=%s" % " ".join(cc[1:]), + "-DCMAKE_CXX_FLAGS=%s" % " ".join(cxx[1:]), + "-DCMAKE_ASM_FLAGS=%s" % " ".join(asm[1:]), + "-DCMAKE_EXE_LINKER_FLAGS=%s" % " ".join(ldflags), + "-DCMAKE_SHARED_LINKER_FLAGS=%s" % " ".join(ldflags), + "-DCMAKE_BUILD_TYPE=%s" % build_type, + "-DCMAKE_INSTALL_PREFIX=%s" % inst_dir, + "-DLLVM_TARGETS_TO_BUILD=%s" % machine_targets, + "-DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=OFF", + "-DLLVM_ENABLE_ASSERTIONS=%s" % ("ON" if assertions else "OFF"), + "-DLLVM_ENABLE_BINDINGS=OFF", + "-DLLVM_ENABLE_CURL=OFF", + "-DLLVM_INCLUDE_TESTS=OFF", + ] + if is_llvm_toolchain(cc[0], cxx[0]): + cmake_args += ["-DLLVM_ENABLE_LLD=ON"] + elif is_windows(target) and is_cross_compile(target): + raise Exception( + "Cannot cross-compile for Windows with a compiler that is not clang" + ) + + if "TASK_ID" in os.environ: + cmake_args += [ + "-DCLANG_REPOSITORY_STRING=taskcluster-%s" % os.environ["TASK_ID"], + ] + projects = ["clang", "lld"] + if is_final_stage: + projects.append("clang-tools-extra") + else: + cmake_args.append("-DLLVM_TOOL_LLI_BUILD=OFF") + + cmake_args.append("-DLLVM_ENABLE_PROJECTS=%s" % ";".join(projects)) + + # There is no libxml2 on Windows except if we build one ourselves. + # libxml2 is only necessary for llvm-mt, but Windows can just use the + # native MT tool. + if not is_windows(target) and is_final_stage: + cmake_args += ["-DLLVM_ENABLE_LIBXML2=FORCE_ON"] + if is_linux(target) and is_final_stage: + sysroot = os.path.join(os.environ.get("MOZ_FETCHES_DIR", ""), "sysroot") + if os.path.exists(sysroot): + cmake_args += ["-DLLVM_BINUTILS_INCDIR=/usr/include"] + cmake_args += ["-DCMAKE_SYSROOT=%s" % sysroot] + # Work around the LLVM build system not building the i386 compiler-rt + # because it doesn't allow to use a sysroot for that during the cmake + # checks. + cmake_args += ["-DCAN_TARGET_i386=1"] + cmake_args += ["-DLLVM_ENABLE_TERMINFO=OFF"] + if is_windows(target): + cmake_args.insert(-1, "-DLLVM_EXPORT_SYMBOLS_FOR_PLUGINS=ON") + cmake_args.insert(-1, "-DLLVM_USE_CRT_RELEASE=MT") + if is_cross_compile(target): + cmake_args += [ + f"-DCMAKE_TOOLCHAIN_FILE={src_dir}/cmake/platforms/WinMsvc.cmake", + f"-DLLVM_NATIVE_TOOLCHAIN={os.path.dirname(os.path.dirname(cc[0]))}", + f"-DHOST_ARCH={target[: -len('-pc-windows-msvc')]}", + f"-DLLVM_WINSYSROOT={os.environ['VSINSTALLDIR']}", + "-DLLVM_DISABLE_ASSEMBLY_FILES=ON", + ] + else: + # libllvm as a shared library is not supported on Windows + cmake_args += ["-DLLVM_LINK_LLVM_DYLIB=ON"] + if ranlib is not None: + cmake_args += ["-DCMAKE_RANLIB=%s" % slashify_path(ranlib)] + if is_darwin(target) and is_cross_compile(target): + arch = "arm64" if target.startswith("aarch64") else "x86_64" + cmake_args += [ + "-DCMAKE_SYSTEM_NAME=Darwin", + "-DCMAKE_SYSTEM_VERSION=%s" % os.environ["MACOSX_DEPLOYMENT_TARGET"], + "-DCMAKE_OSX_SYSROOT=%s" % slashify_path(os.getenv("CROSS_SYSROOT")), + "-DCMAKE_FIND_ROOT_PATH=%s" % slashify_path(os.getenv("CROSS_SYSROOT")), + "-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER", + "-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY", + "-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY", + "-DCMAKE_MACOSX_RPATH=ON", + "-DCMAKE_OSX_ARCHITECTURES=%s" % arch, + "-DDARWIN_osx_ARCHS=%s" % arch, + "-DDARWIN_osx_SYSROOT=%s" % slashify_path(os.getenv("CROSS_SYSROOT")), + "-DLLVM_DEFAULT_TARGET_TRIPLE=%s" % target, + "-DCMAKE_C_COMPILER_TARGET=%s" % target, + "-DCMAKE_CXX_COMPILER_TARGET=%s" % target, + "-DCMAKE_ASM_COMPILER_TARGET=%s" % target, + ] + if arch == "arm64": + cmake_args += [ + "-DDARWIN_osx_BUILTIN_ARCHS=arm64", + ] + # Starting in LLVM 11 (which requires SDK 10.12) the build tries to + # detect the SDK version by calling xcrun. Cross-compiles don't have + # an xcrun, so we have to set the version explicitly. + cmake_args += [ + "-DDARWIN_macosx_OVERRIDE_SDK_VERSION=%s" + % os.environ["MACOSX_DEPLOYMENT_TARGET"], + ] + + if profile == "gen": + # Per https://releases.llvm.org/10.0.0/docs/HowToBuildWithPGO.html + cmake_args += [ + "-DLLVM_BUILD_INSTRUMENTED=IR", + "-DLLVM_BUILD_RUNTIME=No", + ] + elif profile: + cmake_args += [ + "-DLLVM_PROFDATA_FILE=%s" % profile, + ] + + # Using LTO for both profile generation and usage to avoid most + # "function control flow change detected (hash mismatch)" error. + if profile and not is_windows(target): + cmake_args.append("-DLLVM_ENABLE_LTO=Thin") + return cmake_args + + cmake_args = [] + cmake_args += cmake_base_args(cc, cxx, asm, ar, ranlib, ldflags, inst_dir) + cmake_args += [src_dir] + build_package(build_dir, cmake_args) + + # For some reasons the import library clang.lib of clang.exe is not + # installed, so we copy it by ourselves. + if is_windows(target) and is_final_stage: + install_import_library(build_dir, inst_dir) + + +# Return the absolute path of a build tool. We first look to see if the +# variable is defined in the config file, and if so we make sure it's an +# absolute path to an existing tool, otherwise we look for a program in +# $PATH named "key". +# +# This expects the name of the key in the config file to match the name of +# the tool in the default toolchain on the system (for example, "ld" on Unix +# and "link" on Windows). +def get_tool(config, key): + f = None + if key in config: + f = config[key].format(**os.environ) + if os.path.isabs(f): + if not os.path.exists(f): + raise ValueError("%s must point to an existing path" % key) + return f + + # Assume that we have the name of some program that should be on PATH. + tool = which(f) if f else which(key) + if not tool: + raise ValueError("%s not found on PATH" % (f or key)) + return tool + + +# This function is intended to be called on the final build directory when +# building clang-tidy. Also clang-format binaries are included that can be used +# in conjunction with clang-tidy. +# As a separate binary we also ship clangd for the language server protocol that +# can be used as a plugin in `vscode`. +# Its job is to remove all of the files which won't be used for clang-tidy or +# clang-format to reduce the download size. Currently when this function +# finishes its job, it will leave final_dir with a layout like this: +# +# clang/ +# bin/ +# clang-apply-replacements +# clang-format +# clang-tidy +# clangd +# run-clang-tidy +# include/ +# * (nothing will be deleted here) +# lib/ +# clang/ +# 4.0.0/ +# include/ +# * (nothing will be deleted here) +# share/ +# clang/ +# clang-format-diff.py +# clang-tidy-diff.py +# run-clang-tidy.py +def prune_final_dir_for_clang_tidy(final_dir, target): + # Make sure we only have what we expect. + dirs = [ + "bin", + "include", + "lib", + "lib32", + "libexec", + "msbuild-bin", + "share", + "tools", + ] + if is_linux(target): + dirs.append("x86_64-unknown-linux-gnu") + for f in glob.glob("%s/*" % final_dir): + if os.path.basename(f) not in dirs: + raise Exception("Found unknown file %s in the final directory" % f) + if not os.path.isdir(f): + raise Exception("Expected %s to be a directory" % f) + + kept_binaries = [ + "clang-apply-replacements", + "clang-format", + "clang-tidy", + "clangd", + "clang-query", + "run-clang-tidy", + ] + re_clang_tidy = re.compile(r"^(" + "|".join(kept_binaries) + r")(\.exe)?$", re.I) + for f in glob.glob("%s/bin/*" % final_dir): + if re_clang_tidy.search(os.path.basename(f)) is None: + delete(f) + + # Keep include/ intact. + + # Remove the target-specific files. + if is_linux(target): + if os.path.exists(os.path.join(final_dir, "x86_64-unknown-linux-gnu")): + shutil.rmtree(os.path.join(final_dir, "x86_64-unknown-linux-gnu")) + + # In lib/, only keep lib/clang/N.M.O/include and the LLVM shared library. + re_ver_num = re.compile(r"^\d+(?:\.\d+\.\d+)?$", re.I) + for f in glob.glob("%s/lib/*" % final_dir): + name = os.path.basename(f) + if name == "clang": + continue + if is_darwin(target) and name in ["libLLVM.dylib", "libclang-cpp.dylib"]: + continue + if is_linux(target) and ( + fnmatch.fnmatch(name, "libLLVM*.so") + or fnmatch.fnmatch(name, "libclang-cpp.so*") + ): + continue + delete(f) + for f in glob.glob("%s/lib/clang/*" % final_dir): + if re_ver_num.search(os.path.basename(f)) is None: + delete(f) + for f in glob.glob("%s/lib/clang/*/*" % final_dir): + if os.path.basename(f) != "include": + delete(f) + + # Completely remove libexec/, msbuild-bin and tools, if it exists. + shutil.rmtree(os.path.join(final_dir, "libexec")) + for d in ("msbuild-bin", "tools"): + d = os.path.join(final_dir, d) + if os.path.exists(d): + shutil.rmtree(d) + + # In share/, only keep share/clang/*tidy* + re_clang_tidy = re.compile(r"format|tidy", re.I) + for f in glob.glob("%s/share/*" % final_dir): + if os.path.basename(f) != "clang": + delete(f) + for f in glob.glob("%s/share/clang/*" % final_dir): + if re_clang_tidy.search(os.path.basename(f)) is None: + delete(f) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-c", + "--config", + action="append", + required=True, + type=argparse.FileType("r"), + help="Clang configuration file", + ) + parser.add_argument( + "--clean", required=False, action="store_true", help="Clean the build directory" + ) + parser.add_argument( + "--skip-tar", + required=False, + action="store_true", + help="Skip tar packaging stage", + ) + parser.add_argument( + "--skip-patch", + required=False, + action="store_true", + help="Do not patch source", + ) + + args = parser.parse_args() + + if not os.path.exists("llvm/README.txt"): + raise Exception( + "The script must be run from the root directory of the llvm-project tree" + ) + source_dir = os.getcwd() + build_dir = source_dir + "/build" + + if args.clean: + shutil.rmtree(build_dir) + os.sys.exit(0) + + llvm_source_dir = source_dir + "/llvm" + + config = {} + # Merge all the configs we got from the command line. + for c in args.config: + this_config_dir = os.path.dirname(c.name) + this_config = json.load(c) + patches = this_config.get("patches") + if patches: + this_config["patches"] = [os.path.join(this_config_dir, p) for p in patches] + for key, value in this_config.items(): + old_value = config.get(key) + if old_value is None: + config[key] = value + elif value is None: + if key in config: + del config[key] + elif type(old_value) != type(value): + raise Exception( + "{} is overriding `{}` with a value of the wrong type".format( + c.name, key + ) + ) + elif isinstance(old_value, list): + for v in value: + if v not in old_value: + old_value.append(v) + elif isinstance(old_value, dict): + raise Exception("{} is setting `{}` to a dict?".format(c.name, key)) + else: + config[key] = value + + stages = 2 + if "stages" in config: + stages = int(config["stages"]) + if stages not in (1, 2, 3, 4): + raise ValueError("We only know how to build 1, 2, 3, or 4 stages.") + skip_stages = 0 + if "skip_stages" in config: + # The assumption here is that the compiler given in `cc` and other configs + # is the result of the last skip stage, built somewhere else. + skip_stages = int(config["skip_stages"]) + if skip_stages >= stages: + raise ValueError("Cannot skip more stages than are built.") + pgo = False + if "pgo" in config: + pgo = config["pgo"] + if pgo not in (True, False): + raise ValueError("Only boolean values are accepted for pgo.") + build_type = "Release" + if "build_type" in config: + build_type = config["build_type"] + if build_type not in ("Release", "Debug", "RelWithDebInfo", "MinSizeRel"): + raise ValueError( + "We only know how to do Release, Debug, RelWithDebInfo or " + "MinSizeRel builds" + ) + targets = config.get("targets") + build_clang_tidy = False + if "build_clang_tidy" in config: + build_clang_tidy = config["build_clang_tidy"] + if build_clang_tidy not in (True, False): + raise ValueError("Only boolean values are accepted for build_clang_tidy.") + build_clang_tidy_alpha = False + # check for build_clang_tidy_alpha only if build_clang_tidy is true + if build_clang_tidy and "build_clang_tidy_alpha" in config: + build_clang_tidy_alpha = config["build_clang_tidy_alpha"] + if build_clang_tidy_alpha not in (True, False): + raise ValueError( + "Only boolean values are accepted for build_clang_tidy_alpha." + ) + build_clang_tidy_external = False + # check for build_clang_tidy_external only if build_clang_tidy is true + if build_clang_tidy and "build_clang_tidy_external" in config: + build_clang_tidy_external = config["build_clang_tidy_external"] + if build_clang_tidy_external not in (True, False): + raise ValueError( + "Only boolean values are accepted for build_clang_tidy_external." + ) + assertions = False + if "assertions" in config: + assertions = config["assertions"] + if assertions not in (True, False): + raise ValueError("Only boolean values are accepted for assertions.") + + for t in SUPPORTED_TARGETS: + if not is_cross_compile(t): + host = t + break + else: + raise Exception( + f"Cannot use this script on {platform.system()} {platform.machine()}" + ) + + target = config.get("target", host) + if target not in SUPPORTED_TARGETS: + raise ValueError(f"{target} is not a supported target.") + + if is_cross_compile(target) and not is_linux(host): + raise Exception("Cross-compilation is only supported on Linux") + + if is_darwin(target): + os.environ["MACOSX_DEPLOYMENT_TARGET"] = ( + "11.0" if target.startswith("aarch64") else "10.12" + ) + + if is_windows(target): + exe_ext = ".exe" + cc_name = "clang-cl" + cxx_name = "clang-cl" + else: + exe_ext = "" + cc_name = "clang" + cxx_name = "clang++" + + cc = get_tool(config, "cc") + cxx = get_tool(config, "cxx") + asm = get_tool(config, "ml" if is_windows(target) else "as") + # Not using lld here as default here because it's not in PATH. But clang + # knows how to find it when they are installed alongside each others. + ar = get_tool(config, "lib" if is_windows(target) else "ar") + ranlib = None if is_windows(target) else get_tool(config, "ranlib") + + if not os.path.exists(source_dir): + os.makedirs(source_dir) + + if not args.skip_patch: + for p in config.get("patches", []): + patch(p, source_dir) + + package_name = "clang" + if build_clang_tidy: + package_name = "clang-tidy" + if not args.skip_patch: + import_clang_tidy( + source_dir, build_clang_tidy_alpha, build_clang_tidy_external + ) + + if not os.path.exists(build_dir): + os.makedirs(build_dir) + + stage1_dir = build_dir + "/stage1" + stage1_inst_dir = stage1_dir + "/" + package_name + + final_stage_dir = stage1_dir + + if is_darwin(target): + extra_cflags = [] + extra_cxxflags = [] + extra_cflags2 = [] + extra_cxxflags2 = [] + extra_asmflags = [] + # It's unfortunately required to specify the linker used here because + # the linker flags are used in LLVM's configure step before + # -DLLVM_ENABLE_LLD is actually processed. + extra_ldflags = [ + "-fuse-ld=lld", + "-Wl,-dead_strip", + ] + elif is_linux(target): + extra_cflags = [] + extra_cxxflags = [] + extra_cflags2 = ["-fPIC"] + # Silence clang's warnings about arguments not being used in compilation. + extra_cxxflags2 = [ + "-fPIC", + "-Qunused-arguments", + ] + extra_asmflags = [] + # Avoid libLLVM internal function calls going through the PLT. + extra_ldflags = ["-Wl,-Bsymbolic-functions"] + # For whatever reason, LLVM's build system will set things up to turn + # on -ffunction-sections and -fdata-sections, but won't turn on the + # corresponding option to strip unused sections. We do it explicitly + # here. LLVM's build system is also picky about turning on ICF, so + # we do that explicitly here, too. + + # It's unfortunately required to specify the linker used here because + # the linker flags are used in LLVM's configure step before + # -DLLVM_ENABLE_LLD is actually processed. + if is_llvm_toolchain(cc, cxx): + extra_ldflags += ["-fuse-ld=lld", "-Wl,--icf=safe"] + extra_ldflags += ["-Wl,--gc-sections"] + elif is_windows(target): + extra_cflags = [] + extra_cxxflags = [] + # clang-cl would like to figure out what it's supposed to be emulating + # by looking at an MSVC install, but we don't really have that here. + # Force things on based on WinMsvc.cmake. + # Ideally, we'd just use WinMsvc.cmake as a toolchain file, but it only + # really works for cross-compiles, which this is not. + with open(os.path.join(llvm_source_dir, "cmake/platforms/WinMsvc.cmake")) as f: + compat = [ + item + for line in f + for item in line.split() + if "-fms-compatibility-version=" in item + ][0] + extra_cflags2 = [compat] + extra_cxxflags2 = [compat] + extra_asmflags = [] + extra_ldflags = [] + + upload_dir = os.getenv("UPLOAD_DIR") + if assertions and upload_dir: + extra_cflags2 += ["-fcrash-diagnostics-dir=%s" % upload_dir] + extra_cxxflags2 += ["-fcrash-diagnostics-dir=%s" % upload_dir] + + if skip_stages < 1: + build_one_stage( + [cc] + extra_cflags, + [cxx] + extra_cxxflags, + [asm] + extra_asmflags, + ar, + ranlib, + extra_ldflags, + llvm_source_dir, + stage1_dir, + package_name, + build_type, + assertions, + target, + targets, + is_final_stage=(stages == 1), + ) + + if stages >= 2 and skip_stages < 2: + stage2_dir = build_dir + "/stage2" + stage2_inst_dir = stage2_dir + "/" + package_name + final_stage_dir = stage2_dir + if skip_stages < 1: + cc = stage1_inst_dir + "/bin/%s%s" % (cc_name, exe_ext) + cxx = stage1_inst_dir + "/bin/%s%s" % (cxx_name, exe_ext) + asm = stage1_inst_dir + "/bin/%s%s" % (cc_name, exe_ext) + build_one_stage( + [cc] + extra_cflags2, + [cxx] + extra_cxxflags2, + [asm] + extra_asmflags, + ar, + ranlib, + extra_ldflags, + llvm_source_dir, + stage2_dir, + package_name, + build_type, + assertions, + target, + targets, + is_final_stage=(stages == 2), + profile="gen" if pgo else None, + ) + + if stages >= 3 and skip_stages < 3: + stage3_dir = build_dir + "/stage3" + stage3_inst_dir = stage3_dir + "/" + package_name + final_stage_dir = stage3_dir + if skip_stages < 2: + cc = stage2_inst_dir + "/bin/%s%s" % (cc_name, exe_ext) + cxx = stage2_inst_dir + "/bin/%s%s" % (cxx_name, exe_ext) + asm = stage2_inst_dir + "/bin/%s%s" % (cc_name, exe_ext) + build_one_stage( + [cc] + extra_cflags2, + [cxx] + extra_cxxflags2, + [asm] + extra_asmflags, + ar, + ranlib, + extra_ldflags, + llvm_source_dir, + stage3_dir, + package_name, + build_type, + assertions, + target, + targets, + (stages == 3), + ) + if pgo: + llvm_profdata = stage2_inst_dir + "/bin/llvm-profdata%s" % exe_ext + merge_cmd = [llvm_profdata, "merge", "-o", "merged.profdata"] + profraw_files = glob.glob( + os.path.join(stage2_dir, "build", "profiles", "*.profraw") + ) + run_in(stage3_dir, merge_cmd + profraw_files) + if stages == 3: + mkdir_p(upload_dir) + shutil.copy2(os.path.join(stage3_dir, "merged.profdata"), upload_dir) + return + + if stages >= 4 and skip_stages < 4: + stage4_dir = build_dir + "/stage4" + final_stage_dir = stage4_dir + profile = None + if pgo: + if skip_stages == 3: + profile_dir = os.environ.get("MOZ_FETCHES_DIR", "") + else: + profile_dir = stage3_dir + profile = os.path.join(profile_dir, "merged.profdata") + if skip_stages < 3: + cc = stage3_inst_dir + "/bin/%s%s" % (cc_name, exe_ext) + cxx = stage3_inst_dir + "/bin/%s%s" % (cxx_name, exe_ext) + asm = stage3_inst_dir + "/bin/%s%s" % (cc_name, exe_ext) + build_one_stage( + [cc] + extra_cflags2, + [cxx] + extra_cxxflags2, + [asm] + extra_asmflags, + ar, + ranlib, + extra_ldflags, + llvm_source_dir, + stage4_dir, + package_name, + build_type, + assertions, + target, + targets, + (stages == 4), + profile=profile, + ) + + if build_clang_tidy: + prune_final_dir_for_clang_tidy( + os.path.join(final_stage_dir, package_name), target + ) + + if not args.skip_tar: + build_tar_package("%s.tar.zst" % package_name, final_stage_dir, package_name) + + +if __name__ == "__main__": + main() diff --git a/build/build-clang/clang-14.json b/build/build-clang/clang-14.json new file mode 100644 index 0000000000..43227689c1 --- /dev/null +++ b/build/build-clang/clang-14.json @@ -0,0 +1,15 @@ +{ + "patches": [ + "find_symbolizer_linux_clang_10.patch", + "android-mangling-error_clang_12.patch", + "unpoison-thread-stacks_clang_10.patch", + "downgrade-mangling-error_clang_12.patch", + "Remove-FlushViewOfFile-when-unmaping-gcda-files.patch", + "fuzzing_ccov_build_clang_12.patch", + "llvmorg-15-init-16512-g4b1e3d193706.patch", + "revert-llvmorg-14-init-14141-gd6d3000a2f6d.patch", + "revert-llvmorg-14-init-11890-gf86deb18cab6.patch", + "win64-ret-null-on-commitment-limit_clang_14.patch", + "compiler-rt-rss-limit-heap-profile.patch" + ] +} diff --git a/build/build-clang/clang-16.json b/build/build-clang/clang-16.json new file mode 100644 index 0000000000..ae94126f63 --- /dev/null +++ b/build/build-clang/clang-16.json @@ -0,0 +1,30 @@ +{ + "patches": [ + "find_symbolizer_linux_clang_15.patch", + "android-mangling-error_clang_12.patch", + "unpoison-thread-stacks_clang_10.patch", + "downgrade-mangling-error_clang_12.patch", + "fuzzing_ccov_build_clang_12.patch", + "partial-revert-llvmorg-16-init-15775-g1ae7d83803e4.patch", + "revert-llvmorg-16-init-11301-g163bb6d64e5f.patch", + "revert-llvmorg-16-init-7598-g54bfd0484615.patch", + "revert-llvmorg-15-init-13446-g7524fe962e47.patch", + "revert-llvmorg-15-init-11205-gcead4eceb01b_clang_16.patch", + "revert-llvmorg-14-init-14141-gd6d3000a2f6d.patch", + "revert-llvmorg-14-init-11890-gf86deb18cab6_clang_16.patch", + "llvmorg-17-init-237-g1b9fbc81ff15.patch", + "llvmorg-17-init-994-g1e72920c8859.patch", + "llvmorg-17-init-1242-g5de5f66b984a.patch", + "llvmorg-17-init-2171-g8198f30f7e75.patch", + "llvmorg-17-init-4170-g5c602c46b1ef.patch", + "llvmorg-17-init-6897-g415b1cfd57de.patch", + "llvmorg-17-init-6905-gc81a322476a1.patch", + "llvmorg-17-init-6909-gd644ab022a7b.patch", + "llvmorg-17-init-8140-gb1bd52cd0d86.patch", + "llvmorg-17-init-11952-g2f0a1699eab7.patch", + "D146664_clang_15.patch", + "D151864.patch", + "win64-ret-null-on-commitment-limit_clang_14.patch", + "compiler-rt-rss-limit-heap-profile.patch" + ] +} diff --git a/build/build-clang/clang-7.0.json b/build/build-clang/clang-7.0.json new file mode 100644 index 0000000000..703774b6f0 --- /dev/null +++ b/build/build-clang/clang-7.0.json @@ -0,0 +1,5 @@ +{ + "cc": "/usr/bin/gcc", + "cxx": "/usr/bin/g++", + "as": "/usr/bin/gcc" +} diff --git a/build/build-clang/clang-tidy-ci.patch b/build/build-clang/clang-tidy-ci.patch new file mode 100644 index 0000000000..6c31752136 --- /dev/null +++ b/build/build-clang/clang-tidy-ci.patch @@ -0,0 +1,34 @@ +diff --git a/clang-tools-extra/clang-tidy/ClangTidy.cpp b/clang-tools-extra/clang-tidy/ClangTidy.cpp +index 7de313ad4da6..697f98c362d1 100644 +--- a/clang-tools-extra/clang-tidy/ClangTidy.cpp ++++ b/clang-tools-extra/clang-tidy/ClangTidy.cpp +@@ -432,6 +432,7 @@ ClangTidyASTConsumerFactory::createASTConsumer( + + for (auto &Check : Checks) { + Check->registerMatchers(&*Finder); ++ Check->registerPPCallbacks(Compiler); + Check->registerPPCallbacks(*SM, PP, ModuleExpanderPP); + } + +diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.h b/clang-tools-extra/clang-tidy/ClangTidyCheck.h +index 9b41e5836de7..d8938b8fe05e 100644 +--- a/clang-tools-extra/clang-tidy/ClangTidyCheck.h ++++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.h +@@ -20,6 +20,7 @@ + + namespace clang { + ++class CompilerInstance; + class SourceManager; + + namespace tidy { +@@ -69,6 +70,9 @@ public: + return true; + } + ++ /// This has been deprecated in clang 9 - needed by mozilla-must-override ++ virtual void registerPPCallbacks(CompilerInstance &Compiler) {} ++ + /// Override this to register ``PPCallbacks`` in the preprocessor. + /// + /// This should be used for clang-tidy checks that analyze preprocessor- diff --git a/build/build-clang/clang-tidy-external-linux64.json b/build/build-clang/clang-tidy-external-linux64.json new file mode 100644 index 0000000000..897911d3e3 --- /dev/null +++ b/build/build-clang/clang-tidy-external-linux64.json @@ -0,0 +1,11 @@ +{ + "stages": "1", + "build_clang_tidy": true, + "cc": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "cxx": "{MOZ_FETCHES_DIR}/clang/bin/clang++", + "as": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "patches": [ + "clang-tidy-ci.patch" + ], + "build_clang_tidy_external": true +} diff --git a/build/build-clang/clang-tidy-linux64.json b/build/build-clang/clang-tidy-linux64.json new file mode 100644 index 0000000000..e654aeef92 --- /dev/null +++ b/build/build-clang/clang-tidy-linux64.json @@ -0,0 +1,10 @@ +{ + "stages": "1", + "build_clang_tidy": true, + "cc": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "cxx": "{MOZ_FETCHES_DIR}/clang/bin/clang++", + "as": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "patches": [ + "clang-tidy-ci.patch" + ] +} diff --git a/build/build-clang/clang-tidy-macosx64.json b/build/build-clang/clang-tidy-macosx64.json new file mode 100644 index 0000000000..6da85803f5 --- /dev/null +++ b/build/build-clang/clang-tidy-macosx64.json @@ -0,0 +1,7 @@ +{ + "stages": "1", + "build_clang_tidy": true, + "patches": [ + "clang-tidy-ci.patch" + ] +} diff --git a/build/build-clang/clang-tidy-win64.json b/build/build-clang/clang-tidy-win64.json new file mode 100644 index 0000000000..19f5e428ad --- /dev/null +++ b/build/build-clang/clang-tidy-win64.json @@ -0,0 +1,8 @@ +{ + "stages": "1", + "build_clang_tidy": true, + "patches": [ + "revert-llvmorg-14-init-14141-gd6d3000a2f6d.patch", + "clang-tidy-ci.patch" + ] +} diff --git a/build/build-clang/clang-trunk.json b/build/build-clang/clang-trunk.json new file mode 100644 index 0000000000..f3da908a6f --- /dev/null +++ b/build/build-clang/clang-trunk.json @@ -0,0 +1,22 @@ +{ + "patches": [ + "find_symbolizer_linux_clang_15.patch", + "android-mangling-error_clang_12.patch", + "unpoison-thread-stacks_clang_10.patch", + "downgrade-mangling-error_clang_12.patch", + "fuzzing_ccov_build_clang_12.patch", + "partial-revert-llvmorg-17-init-7686-g244be0b0de19.patch", + "revert-llvmorg-17-init-4120-g02e8eb1a438b.patch", + "partial-revert-llvmorg-16-init-15775-g1ae7d83803e4_clang_17.patch", + "revert-llvmorg-16-init-11301-g163bb6d64e5f_clang_17.patch", + "revert-llvmorg-16-init-7598-g54bfd0484615.patch", + "revert-llvmorg-15-init-13446-g7524fe962e47.patch", + "revert-llvmorg-15-init-11205-gcead4eceb01b_clang_16.patch", + "revert-llvmorg-14-init-14141-gd6d3000a2f6d.patch", + "revert-llvmorg-14-init-11890-gf86deb18cab6_clang_16.patch", + "D146664.patch", + "D151864.patch", + "win64-ret-null-on-commitment-limit_clang_14.patch", + "compiler-rt-rss-limit-heap-profile.patch" + ] +} diff --git a/build/build-clang/clang_include_cleaner.patch b/build/build-clang/clang_include_cleaner.patch new file mode 100644 index 0000000000..0bc3b5b019 --- /dev/null +++ b/build/build-clang/clang_include_cleaner.patch @@ -0,0 +1,2235 @@ +Ported from clangd, this still can be improved over time, but it can be landed. +This was based on the work from https://bit.ly/3TkV2N1 + + The utility makes the assumption that all header are self contained! + It only checkes Decls from the main translation file, where SourceLocarion is the passed cpp file. + It builds a list with all of the includes from the translation unit. + It matches all of the Decls from the main translation units with definitions from the included header files and builds a list with used header files. + All of the includes that are not part of the matched used header files are considered to be unused. Of course this is correct if the first assumption if followed by the coding guide, where all of the header are self contained. Since the mozilla code base doesn't follow this approach false positives might appear where the is the following situation: + +FOO.cpp + +#include <A> +#Include <B> + +If header A defines a symbol that is used by header B and B doesn't include A nor +it has symbols defined that are used by FOO.cpp then B it will be marked as potentially to be removed +by the tool. +This is the limitation determined by header that are not self contained. + +The limitation presented above can be fixed in the future with extra work, but it's very time expensive +during the runtime of the checker. + +diff --git a/clang-tools-extra/CMakeLists.txt b/clang-tools-extra/CMakeLists.txt +index 6a3f741721ee..ff17c8e8472a 100644 +--- a/clang-tools-extra/CMakeLists.txt ++++ b/clang-tools-extra/CMakeLists.txt +@@ -16,6 +16,7 @@ endif() + add_subdirectory(clang-apply-replacements) + add_subdirectory(clang-reorder-fields) + add_subdirectory(modularize) ++add_subdirectory(include-cleaner) + add_subdirectory(clang-tidy) + + add_subdirectory(clang-change-namespace) +@@ -23,7 +24,6 @@ add_subdirectory(clang-doc) + add_subdirectory(clang-include-fixer) + add_subdirectory(clang-move) + add_subdirectory(clang-query) +-add_subdirectory(include-cleaner) + add_subdirectory(pp-trace) + add_subdirectory(pseudo) + add_subdirectory(tool-template) +diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt +index 8a953eeea275..f2edc509acaf 100644 +--- a/clang-tools-extra/clang-tidy/CMakeLists.txt ++++ b/clang-tools-extra/clang-tidy/CMakeLists.txt +@@ -50,6 +50,7 @@ endif() + + # Checks. + # If you add a check, also add it to ClangTidyForceLinker.h in this directory. ++add_subdirectory(alpha) + add_subdirectory(android) + add_subdirectory(abseil) + add_subdirectory(altera) +@@ -77,6 +78,7 @@ add_subdirectory(portability) + add_subdirectory(readability) + add_subdirectory(zircon) + set(ALL_CLANG_TIDY_CHECKS ++ clangTidyAlphaModule + clangTidyAndroidModule + clangTidyAbseilModule + clangTidyAlteraModule +diff --git a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h +index 2691d90fa521..2fa064cff22a 100644 +--- a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h ++++ b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h +@@ -20,6 +20,11 @@ extern volatile int AbseilModuleAnchorSource; + static int LLVM_ATTRIBUTE_UNUSED AbseilModuleAnchorDestination = + AbseilModuleAnchorSource; + ++// This anchor is used to force the linker to link the AlphaModule. ++extern volatile int AlphaModuleAnchorSource; ++static int LLVM_ATTRIBUTE_UNUSED AlphaModuleAnchorDestination = ++ AlphaModuleAnchorSource; ++ + // This anchor is used to force the linker to link the AlteraModule. + extern volatile int AlteraModuleAnchorSource; + static int LLVM_ATTRIBUTE_UNUSED AlteraModuleAnchorDestination = +diff --git a/clang-tools-extra/clang-tidy/alpha/AlphaTidyModule.cpp b/clang-tools-extra/clang-tidy/alpha/AlphaTidyModule.cpp +new file mode 100644 +index 000000000000..b598a36cebf7 +--- /dev/null ++++ b/clang-tools-extra/clang-tidy/alpha/AlphaTidyModule.cpp +@@ -0,0 +1,38 @@ ++//===--- AlphaTidyModule.cpp - clang-tidy ----------------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "../ClangTidy.h" ++#include "../ClangTidyModule.h" ++#include "../ClangTidyModuleRegistry.h" ++#include "UnusedIncludesCheck.h" ++ ++ ++namespace clang { ++namespace tidy { ++namespace alpha { ++ ++class AlphaModule : public ClangTidyModule { ++public: ++ void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { ++ ++ CheckFactories.registerCheck<UnusedIncludesCheck>("alpha-unused-includes"); ++ } ++}; ++ ++} // namespace alpha ++ ++// Register the AlphaTidyModule using this statically initialized variable. ++static ClangTidyModuleRegistry::Add<alpha::AlphaModule> ++ X("alpha-module", "Adds alpha lint checks."); ++ ++// This anchor is used to force the linker to link in the generated object file ++// and thus register the AlphaModule. ++volatile int AlphaModuleAnchorSource = 0; ++ ++} // namespace tidy ++} // namespace clang +diff --git a/clang-tools-extra/clang-tidy/alpha/CMakeLists.txt b/clang-tools-extra/clang-tidy/alpha/CMakeLists.txt +new file mode 100644 +index 000000000000..b50576868645 +--- /dev/null ++++ b/clang-tools-extra/clang-tidy/alpha/CMakeLists.txt +@@ -0,0 +1,32 @@ ++include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../include-cleaner/include) ++ ++set(LLVM_LINK_COMPONENTS ++ Support ++ ) ++ ++add_clang_library(clangTidyAlphaModule ++ ++ AlphaTidyModule.cpp ++ UnusedIncludesCheck.cpp ++ ++ LINK_LIBS ++ clangAnalysis ++ clangIncludeCleaner ++ clangTidy ++ clangTidyUtils ++ ++ DEPENDS ++ omp_gen ++ ) ++ ++clang_target_link_libraries(clangTidyAlphaModule ++ PRIVATE ++ clangAnalysis ++ clangAST ++ clangASTMatchers ++ clangBasic ++ clangIncludeCleaner ++ clangLex ++ clangSerialization ++ clangTooling ++ ) +diff --git a/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.cpp b/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.cpp +new file mode 100644 +index 000000000000..0d6a6bf7a367 +--- /dev/null ++++ b/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.cpp +@@ -0,0 +1,76 @@ ++//===--- UnusedIncludesCheck.cpp - clang-tidy------------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "UnusedIncludesCheck.h" ++#include "clang-include-cleaner/Analysis.h" ++#include "clang-include-cleaner/Hooks.h" ++#include "clang/Basic/Diagnostic.h" ++#include "clang/Basic/LLVM.h" ++#include "clang/Basic/SourceLocation.h" ++#include "clang/Lex/Preprocessor.h" ++ ++using namespace clang::ast_matchers; ++ ++namespace clang { ++namespace tidy { ++namespace alpha { ++ ++UnusedIncludesCheck::UnusedIncludesCheck(StringRef Name, ++ ClangTidyContext *Context) ++ : ClangTidyCheck(Name, Context) {} ++ ++void UnusedIncludesCheck::registerPPCallbacks(const SourceManager &SM, ++ Preprocessor *PP, ++ Preprocessor *) { ++ Ctx = std::make_unique<include_cleaner::AnalysisContext>( ++ include_cleaner::Policy{}, *PP); ++ RecordedPP = std::make_unique<include_cleaner::RecordedPP>(); ++ PP->addPPCallbacks(RecordedPP->record(*Ctx)); ++} ++ ++void UnusedIncludesCheck::registerMatchers(MatchFinder *Finder) { ++ Finder->addMatcher( ++ translationUnitDecl(forEach(decl(isExpansionInMainFile()).bind("top"))), ++ this); ++} ++ ++void UnusedIncludesCheck::check(const MatchFinder::MatchResult &Result) { ++ Top.push_back(const_cast<Decl *>(Result.Nodes.getNodeAs<Decl>("top"))); ++} ++ ++void UnusedIncludesCheck::onEndOfTranslationUnit() { ++ llvm::DenseSet<const include_cleaner::RecordedPP::Include *> Used; ++ llvm::DenseSet<include_cleaner::Header> Seen; ++ include_cleaner::walkUsed( ++ *Ctx, Top, RecordedPP->MacroReferences, ++ [&](SourceLocation Loc, include_cleaner::Symbol Sym, ++ llvm::ArrayRef<include_cleaner::Header> Headers) { ++ for (const auto &Header : Headers) { ++ if (!Seen.insert(Header).second) ++ continue; ++ const auto& HeadersToInsert = RecordedPP->Includes.match(Header); ++ Used.insert(HeadersToInsert.begin(), HeadersToInsert.end()); ++ } ++ }); ++ for (const auto &I : RecordedPP->Includes.all()) { ++ if (!Used.contains(&I)) { ++ const auto &SM = Ctx->sourceManager(); ++ FileID FID = SM.getFileID(I.Location); ++ diag(I.Location, "there is a high probability that include is unused") ++ << FixItHint::CreateRemoval(CharSourceRange::getCharRange( ++ SM.translateLineCol(FID, I.Line, 1), ++ SM.translateLineCol(FID, I.Line + 1, 1))); ++ } ++ } ++} ++ ++UnusedIncludesCheck::~UnusedIncludesCheck() = default; ++ ++} // namespace alpha ++} // namespace tidy ++} // namespace clang +diff --git a/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.h b/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.h +new file mode 100644 +index 000000000000..f67c46e6cc3e +--- /dev/null ++++ b/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.h +@@ -0,0 +1,42 @@ ++//===--- UnusedIncludesCheck.h - clang-tidy----------------------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_INCLUDES_H ++#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_INCLUDES_H ++ ++#include "../ClangTidyCheck.h" ++ ++namespace clang { ++namespace include_cleaner { ++class AnalysisContext; ++struct RecordedPP; ++} // namespace include_cleaner ++namespace tidy { ++namespace alpha { ++ ++class UnusedIncludesCheck : public ClangTidyCheck { ++public: ++ UnusedIncludesCheck(StringRef Name, ClangTidyContext *Context); ++ ~UnusedIncludesCheck(); ++ void registerPPCallbacks(const SourceManager &SM, Preprocessor *, ++ Preprocessor *) override; ++ void registerMatchers(ast_matchers::MatchFinder *Finder) override; ++ void check(const ast_matchers::MatchFinder::MatchResult &Result) override; ++ void onEndOfTranslationUnit() override; ++ ++private: ++ std::unique_ptr<include_cleaner::AnalysisContext> Ctx; ++ std::unique_ptr<include_cleaner::RecordedPP> RecordedPP; ++ std::vector<Decl *> Top; ++}; ++ ++} // namespace misc ++} // namespace tidy ++} // namespace clang ++ ++#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_INCLUDES_H +diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt +index de8f087a52a5..14f605b1efaf 100644 +--- a/clang-tools-extra/clangd/CMakeLists.txt ++++ b/clang-tools-extra/clangd/CMakeLists.txt +@@ -2,6 +2,8 @@ + include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + include_directories(${CMAKE_CURRENT_BINARY_DIR}) + ++include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include-cleaner/include) ++ + add_subdirectory(support) + + # Configure the Features.inc file. +@@ -153,6 +155,7 @@ clang_target_link_libraries(clangDaemon + clangDriver + clangFormat + clangFrontend ++ clangIncludeCleaner + clangIndex + clangLex + clangSema +diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp +index 26eb2574195d..a3cbc8894f6d 100644 +--- a/clang-tools-extra/clangd/Hover.cpp ++++ b/clang-tools-extra/clangd/Hover.cpp +@@ -12,9 +12,11 @@ + #include "CodeCompletionStrings.h" + #include "Config.h" + #include "FindTarget.h" ++#include "IncludeCleaner.h" + #include "ParsedAST.h" + #include "Selection.h" + #include "SourceCode.h" ++#include "clang-include-cleaner/Analysis.h" + #include "index/SymbolCollector.h" + #include "support/Markup.h" + #include "clang/AST/ASTContext.h" +@@ -985,6 +987,23 @@ llvm::Optional<HoverInfo> getHover(ParsedAST &AST, Position Pos, + // FIXME: We don't have a fitting value for Kind. + HI.Definition = + URIForFile::canonicalize(Inc.Resolved, *MainFilePath).file().str(); ++ ++ // FIXME: share code, macros too... ++ include_cleaner::AnalysisContext Ctx(include_cleaner::Policy{}, ++ AST.getPreprocessor()); ++ std::vector<std::string> Provides; ++ include_cleaner::walkUsed( ++ Ctx, AST.getLocalTopLevelDecls(), /*Macros=*/{}, ++ [&](SourceLocation Loc, include_cleaner::Symbol S, ++ llvm::ArrayRef<include_cleaner::Header> Headers) { ++ for (const auto &H : Headers) ++ if (match(H, Inc, AST.getIncludeStructure())) ++ Provides.push_back(S.name()); ++ }); ++ llvm::sort(Provides); ++ Provides.erase(std::unique(Provides.begin(), Provides.end()), ++ Provides.end()); ++ HI.Documentation = "provides " + llvm::join(Provides, ", "); + HI.DefinitionLanguage = ""; + return HI; + } +diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp +index e5b5187e030c..3c0ba06316ac 100644 +--- a/clang-tools-extra/clangd/IncludeCleaner.cpp ++++ b/clang-tools-extra/clangd/IncludeCleaner.cpp +@@ -12,6 +12,8 @@ + #include "ParsedAST.h" + #include "Protocol.h" + #include "SourceCode.h" ++#include "clang-include-cleaner/Analysis.h" ++#include "clang-include-cleaner/Types.h" + #include "index/CanonicalIncludes.h" + #include "support/Logger.h" + #include "support/Trace.h" +@@ -40,181 +42,6 @@ void setIncludeCleanerAnalyzesStdlib(bool B) { AnalyzeStdlib = B; } + + namespace { + +-/// Crawler traverses the AST and feeds in the locations of (sometimes +-/// implicitly) used symbols into \p Result. +-class ReferencedLocationCrawler +- : public RecursiveASTVisitor<ReferencedLocationCrawler> { +-public: +- ReferencedLocationCrawler(ReferencedLocations &Result, +- const SourceManager &SM) +- : Result(Result), SM(SM) {} +- +- bool VisitDeclRefExpr(DeclRefExpr *DRE) { +- add(DRE->getDecl()); +- add(DRE->getFoundDecl()); +- return true; +- } +- +- bool VisitMemberExpr(MemberExpr *ME) { +- add(ME->getMemberDecl()); +- add(ME->getFoundDecl().getDecl()); +- return true; +- } +- +- bool VisitTagType(TagType *TT) { +- add(TT->getDecl()); +- return true; +- } +- +- bool VisitFunctionDecl(FunctionDecl *FD) { +- // Function definition will require redeclarations to be included. +- if (FD->isThisDeclarationADefinition()) +- add(FD); +- return true; +- } +- +- bool VisitCXXConstructExpr(CXXConstructExpr *CCE) { +- add(CCE->getConstructor()); +- return true; +- } +- +- bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) { +- // Using templateName case is handled by the override TraverseTemplateName. +- if (TST->getTemplateName().getKind() == TemplateName::UsingTemplate) +- return true; +- add(TST->getAsCXXRecordDecl()); // Specialization +- return true; +- } +- +- // There is no VisitTemplateName in RAV, thus we override the Traverse version +- // to handle the Using TemplateName case. +- bool TraverseTemplateName(TemplateName TN) { +- VisitTemplateName(TN); +- return Base::TraverseTemplateName(TN); +- } +- // A pseudo VisitTemplateName, dispatched by the above TraverseTemplateName! +- bool VisitTemplateName(TemplateName TN) { +- if (const auto *USD = TN.getAsUsingShadowDecl()) { +- add(USD); +- return true; +- } +- add(TN.getAsTemplateDecl()); // Primary template. +- return true; +- } +- +- bool VisitUsingType(UsingType *UT) { +- add(UT->getFoundDecl()); +- return true; +- } +- +- bool VisitTypedefType(TypedefType *TT) { +- add(TT->getDecl()); +- return true; +- } +- +- // Consider types of any subexpression used, even if the type is not named. +- // This is helpful in getFoo().bar(), where Foo must be complete. +- // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to +- // consider types "used" when they are not directly spelled in code. +- bool VisitExpr(Expr *E) { +- TraverseType(E->getType()); +- return true; +- } +- +- bool TraverseType(QualType T) { +- if (isNew(T.getTypePtrOrNull())) // don't care about quals +- Base::TraverseType(T); +- return true; +- } +- +- bool VisitUsingDecl(UsingDecl *D) { +- for (const auto *Shadow : D->shadows()) +- add(Shadow->getTargetDecl()); +- return true; +- } +- +- // Enums may be usefully forward-declared as *complete* types by specifying +- // an underlying type. In this case, the definition should see the declaration +- // so they can be checked for compatibility. +- bool VisitEnumDecl(EnumDecl *D) { +- if (D->isThisDeclarationADefinition() && D->getIntegerTypeSourceInfo()) +- add(D); +- return true; +- } +- +- // When the overload is not resolved yet, mark all candidates as used. +- bool VisitOverloadExpr(OverloadExpr *E) { +- for (const auto *ResolutionDecl : E->decls()) +- add(ResolutionDecl); +- return true; +- } +- +-private: +- using Base = RecursiveASTVisitor<ReferencedLocationCrawler>; +- +- void add(const Decl *D) { +- if (!D || !isNew(D->getCanonicalDecl())) +- return; +- if (auto SS = StdRecognizer(D)) { +- Result.Stdlib.insert(*SS); +- return; +- } +- // Special case RecordDecls, as it is common for them to be forward +- // declared multiple times. The most common cases are: +- // - Definition available in TU, only mark that one as usage. The rest is +- // likely to be unnecessary. This might result in false positives when an +- // internal definition is visible. +- // - There's a forward declaration in the main file, no need for other +- // redecls. +- if (const auto *RD = llvm::dyn_cast<RecordDecl>(D)) { +- if (const auto *Definition = RD->getDefinition()) { +- Result.User.insert(Definition->getLocation()); +- return; +- } +- if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation())) +- return; +- } +- for (const Decl *Redecl : D->redecls()) +- Result.User.insert(Redecl->getLocation()); +- } +- +- bool isNew(const void *P) { return P && Visited.insert(P).second; } +- +- ReferencedLocations &Result; +- llvm::DenseSet<const void *> Visited; +- const SourceManager &SM; +- tooling::stdlib::Recognizer StdRecognizer; +-}; +- +-// Given a set of referenced FileIDs, determines all the potentially-referenced +-// files and macros by traversing expansion/spelling locations of macro IDs. +-// This is used to map the referenced SourceLocations onto real files. +-struct ReferencedFilesBuilder { +- ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {} +- llvm::DenseSet<FileID> Files; +- llvm::DenseSet<FileID> Macros; +- const SourceManager &SM; +- +- void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); } +- +- void add(FileID FID, SourceLocation Loc) { +- if (FID.isInvalid()) +- return; +- assert(SM.isInFileID(Loc, FID)); +- if (Loc.isFileID()) { +- Files.insert(FID); +- return; +- } +- // Don't process the same macro FID twice. +- if (!Macros.insert(FID).second) +- return; +- const auto &Exp = SM.getSLocEntry(FID).getExpansion(); +- add(Exp.getSpellingLoc()); +- add(Exp.getExpansionLocStart()); +- add(Exp.getExpansionLocEnd()); +- } +-}; +- + // Returns the range starting at '#' and ending at EOL. Escaped newlines are not + // handled. + clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) { +@@ -231,10 +58,10 @@ clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) { + + // Finds locations of macros referenced from within the main file. That includes + // references that were not yet expanded, e.g `BAR` in `#define FOO BAR`. +-void findReferencedMacros(const SourceManager &SM, Preprocessor &PP, +- const syntax::TokenBuffer *Tokens, +- ReferencedLocations &Result) { ++std::vector<include_cleaner::SymbolReference> ++findReferencedMacros(ParsedAST &AST, include_cleaner::AnalysisContext &Ctx) { + trace::Span Tracer("IncludeCleaner::findReferencedMacros"); ++ std::vector<include_cleaner::SymbolReference> Result; + // FIXME(kirillbobyrev): The macros from the main file are collected in + // ParsedAST's MainFileMacros. However, we can't use it here because it + // doesn't handle macro references that were not expanded, e.g. in macro +@@ -244,15 +71,19 @@ void findReferencedMacros(const SourceManager &SM, Preprocessor &PP, + // this mechanism (as opposed to iterating through all tokens) will improve + // the performance of findReferencedMacros and also improve other features + // relying on MainFileMacros. +- for (const syntax::Token &Tok : Tokens->spelledTokens(SM.getMainFileID())) { +- auto Macro = locateMacroAt(Tok, PP); ++ for (const syntax::Token &Tok : ++ AST.getTokens().spelledTokens(AST.getSourceManager().getMainFileID())) { ++ auto Macro = locateMacroAt(Tok, AST.getPreprocessor()); + if (!Macro) + continue; + auto Loc = Macro->Info->getDefinitionLoc(); + if (Loc.isValid()) +- Result.User.insert(Loc); +- // FIXME: support stdlib macros ++ Result.push_back(include_cleaner::SymbolReference{ ++ Tok.location(), ++ Ctx.macro(AST.getPreprocessor().getIdentifierInfo(Macro->Name), ++ Loc)}); + } ++ return Result; + } + + static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST, +@@ -296,110 +127,8 @@ static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST, + } + return true; + } +- +-// In case symbols are coming from non self-contained header, we need to find +-// its first includer that is self-contained. This is the header users can +-// include, so it will be responsible for bringing the symbols from given +-// header into the scope. +-FileID headerResponsible(FileID ID, const SourceManager &SM, +- const IncludeStructure &Includes) { +- // Unroll the chain of non self-contained headers until we find the one that +- // can be included. +- for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID(); +- FE = SM.getFileEntryForID(ID)) { +- // If FE is nullptr, we consider it to be the responsible header. +- if (!FE) +- break; +- auto HID = Includes.getID(FE); +- assert(HID && "We're iterating over headers already existing in " +- "IncludeStructure"); +- if (Includes.isSelfContained(*HID)) +- break; +- // The header is not self-contained: put the responsibility for its symbols +- // on its includer. +- ID = SM.getFileID(SM.getIncludeLoc(ID)); +- } +- return ID; +-} +- + } // namespace + +-ReferencedLocations findReferencedLocations(ASTContext &Ctx, Preprocessor &PP, +- const syntax::TokenBuffer *Tokens) { +- trace::Span Tracer("IncludeCleaner::findReferencedLocations"); +- ReferencedLocations Result; +- const auto &SM = Ctx.getSourceManager(); +- ReferencedLocationCrawler Crawler(Result, SM); +- Crawler.TraverseAST(Ctx); +- if (Tokens) +- findReferencedMacros(SM, PP, Tokens, Result); +- return Result; +-} +- +-ReferencedLocations findReferencedLocations(ParsedAST &AST) { +- return findReferencedLocations(AST.getASTContext(), AST.getPreprocessor(), +- &AST.getTokens()); +-} +- +-ReferencedFiles findReferencedFiles( +- const ReferencedLocations &Locs, const SourceManager &SM, +- llvm::function_ref<FileID(FileID)> HeaderResponsible, +- llvm::function_ref<Optional<StringRef>(FileID)> UmbrellaHeader) { +- std::vector<SourceLocation> Sorted{Locs.User.begin(), Locs.User.end()}; +- llvm::sort(Sorted); // Group by FileID. +- ReferencedFilesBuilder Builder(SM); +- for (auto It = Sorted.begin(); It < Sorted.end();) { +- FileID FID = SM.getFileID(*It); +- Builder.add(FID, *It); +- // Cheaply skip over all the other locations from the same FileID. +- // This avoids lots of redundant Loc->File lookups for the same file. +- do +- ++It; +- while (It != Sorted.end() && SM.isInFileID(*It, FID)); +- } +- +- // If a header is not self-contained, we consider its symbols a logical part +- // of the including file. Therefore, mark the parents of all used +- // non-self-contained FileIDs as used. Perform this on FileIDs rather than +- // HeaderIDs, as each inclusion of a non-self-contained file is distinct. +- llvm::DenseSet<FileID> UserFiles; +- llvm::StringSet<> PublicHeaders; +- for (FileID ID : Builder.Files) { +- UserFiles.insert(HeaderResponsible(ID)); +- if (auto PublicHeader = UmbrellaHeader(ID)) { +- PublicHeaders.insert(*PublicHeader); +- } +- } +- +- llvm::DenseSet<tooling::stdlib::Header> StdlibFiles; +- for (const auto &Symbol : Locs.Stdlib) +- for (const auto &Header : Symbol.headers()) +- StdlibFiles.insert(Header); +- +- return {std::move(UserFiles), std::move(StdlibFiles), +- std::move(PublicHeaders)}; +-} +- +-ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs, +- const IncludeStructure &Includes, +- const CanonicalIncludes &CanonIncludes, +- const SourceManager &SM) { +- return findReferencedFiles( +- Locs, SM, +- [&SM, &Includes](FileID ID) { +- return headerResponsible(ID, SM, Includes); +- }, +- [&SM, &CanonIncludes](FileID ID) -> Optional<StringRef> { +- auto Entry = SM.getFileEntryRefForID(ID); +- if (!Entry) +- return llvm::None; +- auto PublicHeader = CanonIncludes.mapHeader(*Entry); +- if (PublicHeader.empty()) +- return llvm::None; +- return PublicHeader; +- }); +-} +- + std::vector<const Inclusion *> + getUnused(ParsedAST &AST, + const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles, +@@ -426,51 +155,50 @@ getUnused(ParsedAST &AST, + return Unused; + } + +-#ifndef NDEBUG +-// Is FID a <built-in>, <scratch space> etc? +-static bool isSpecialBuffer(FileID FID, const SourceManager &SM) { +- const SrcMgr::FileInfo &FI = SM.getSLocEntry(FID).getFile(); +- return FI.getName().startswith("<"); +-} +-#endif +- +-llvm::DenseSet<IncludeStructure::HeaderID> +-translateToHeaderIDs(const ReferencedFiles &Files, +- const IncludeStructure &Includes, +- const SourceManager &SM) { +- trace::Span Tracer("IncludeCleaner::translateToHeaderIDs"); +- llvm::DenseSet<IncludeStructure::HeaderID> TranslatedHeaderIDs; +- TranslatedHeaderIDs.reserve(Files.User.size()); +- for (FileID FID : Files.User) { +- const FileEntry *FE = SM.getFileEntryForID(FID); +- if (!FE) { +- assert(isSpecialBuffer(FID, SM)); +- continue; +- } +- const auto File = Includes.getID(FE); +- assert(File); +- TranslatedHeaderIDs.insert(*File); +- } +- for (tooling::stdlib::Header StdlibUsed : Files.Stdlib) +- for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed)) +- TranslatedHeaderIDs.insert(HID); +- return TranslatedHeaderIDs; ++bool match(const include_cleaner::Header &H, const Inclusion &I, ++ const IncludeStructure &S) { ++ switch (H.kind()) { ++ case include_cleaner::Header::Physical: ++ if (auto HID = S.getID(H.getPhysical())) ++ if (static_cast<unsigned>(*HID) == I.HeaderID) ++ return true; ++ break; ++ case include_cleaner::Header::StandardLibrary: ++ return I.Written == H.getStandardLibrary().name(); ++ case include_cleaner::Header::Verbatim: ++ return llvm::StringRef(I.Written).trim("\"<>") == H.getVerbatimSpelling(); ++ case include_cleaner::Header::Builtin: ++ case include_cleaner::Header::MainFile: ++ break; ++ } ++ return false; + } + + std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST) { +- const auto &SM = AST.getSourceManager(); +- +- auto Refs = findReferencedLocations(AST); +- auto ReferencedFiles = +- findReferencedFiles(Refs, AST.getIncludeStructure(), +- AST.getCanonicalIncludes(), AST.getSourceManager()); +- auto ReferencedHeaders = +- translateToHeaderIDs(ReferencedFiles, AST.getIncludeStructure(), SM); +- return getUnused(AST, ReferencedHeaders, ReferencedFiles.SpelledUmbrellas); ++ include_cleaner::AnalysisContext Ctx(include_cleaner::Policy{}, ++ AST.getPreprocessor()); ++ llvm::DenseSet<const Inclusion *> Used; ++ include_cleaner::walkUsed( ++ Ctx, AST.getLocalTopLevelDecls(), ++ /*MacroRefs=*/findReferencedMacros(AST, Ctx), ++ [&](SourceLocation Loc, include_cleaner::Symbol Sym, ++ llvm::ArrayRef<include_cleaner::Header> Headers) { ++ for (const auto &I : AST.getIncludeStructure().MainFileIncludes) ++ for (const auto &H : Headers) ++ if (match(H, I, AST.getIncludeStructure())) ++ Used.insert(&I); ++ }); ++ std::vector<const Inclusion *> Unused; ++ const Config &Cfg = Config::current(); ++ for (const auto &I : AST.getIncludeStructure().MainFileIncludes) { ++ if (!Used.contains(&I) && mayConsiderUnused(I, AST, Cfg)) ++ Unused.push_back(&I); ++ } ++ return Unused; + } + +-std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST, +- llvm::StringRef Code) { ++auto issueUnusedIncludesDiagnostics(ParsedAST &AST, ++ llvm::StringRef Code) -> std::vector<Diag> { + const Config &Cfg = Config::current(); + if (Cfg.Diagnostics.UnusedIncludes != Config::UnusedIncludesPolicy::Strict || + Cfg.Diagnostics.SuppressAll || +diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h +index 4ce31baaa067..c858a60c5db7 100644 +--- a/clang-tools-extra/clangd/IncludeCleaner.h ++++ b/clang-tools-extra/clangd/IncludeCleaner.h +@@ -23,6 +23,7 @@ + #include "index/CanonicalIncludes.h" + #include "clang/Basic/SourceLocation.h" + #include "clang/Tooling/Inclusions/StandardLibrary.h" ++#include "clang-include-cleaner/Types.h" + #include "llvm/ADT/DenseSet.h" + #include "llvm/ADT/STLFunctionalExtras.h" + #include "llvm/ADT/StringSet.h" +@@ -100,6 +101,10 @@ std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST); + std::vector<Diag> issueUnusedIncludesDiagnostics(ParsedAST &AST, + llvm::StringRef Code); + ++// Does an include-cleaner header spec match a clangd recorded inclusion? ++bool match(const include_cleaner::Header &H, const Inclusion &I, ++ const IncludeStructure &S); ++ + /// Affects whether standard library includes should be considered for + /// removal. This is off by default for now due to implementation limitations: + /// - macros are not tracked +diff --git a/clang-tools-extra/include-cleaner/CMakeLists.txt b/clang-tools-extra/include-cleaner/CMakeLists.txt +index 0550b02f603b..325186879a47 100644 +--- a/clang-tools-extra/include-cleaner/CMakeLists.txt ++++ b/clang-tools-extra/include-cleaner/CMakeLists.txt +@@ -1,4 +1,8 @@ ++include_directories(include) ++include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) + add_subdirectory(lib) ++add_subdirectory(tool) ++ + if(CLANG_INCLUDE_TESTS) + add_subdirectory(test) + add_subdirectory(unittests) +diff --git a/clang-tools-extra/include-cleaner/README.md b/clang-tools-extra/include-cleaner/README.md +deleted file mode 100644 +index e69de29bb2d1..000000000000 +diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h +new file mode 100644 +index 000000000000..4e5cc8d03814 +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h +@@ -0,0 +1,77 @@ ++//===--- Analysis.h - Analyze used files --------------------------- C++-*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef CLANG_INCLUDE_CLEANER_ANALYSIS_H ++#define CLANG_INCLUDE_CLEANER_ANALYSIS_H ++ ++#include "clang-include-cleaner/Policy.h" ++#include "clang-include-cleaner/Types.h" ++ ++namespace clang { ++namespace include_cleaner { ++class Cache; ++ ++// Bundles the policy, compiler state, and caches for one include-cleaner run. ++// (This is needed everywhere, but shouldn't be used to propagate state around!) ++class AnalysisContext { ++public: ++ AnalysisContext(const Policy &, const Preprocessor &); ++ AnalysisContext(AnalysisContext &&) = delete; ++ AnalysisContext &operator=(AnalysisContext &&) = delete; ++ ~AnalysisContext(); ++ ++ const Policy &policy() const { return P; } ++ ++ const SourceManager &sourceManager() const { return *SM; } ++ const Preprocessor &preprocessor() const { return *PP; } ++ ++ // Only for internal use (the Cache class definition is not exposed). ++ // This allows us to reuse e.g. mappings from symbols to their locations. ++ Cache &cache() { return *C; } ++ // FIXME: does this need to be public? ++ Symbol macro(const IdentifierInfo *, SourceLocation); ++ ++private: ++ Policy P; ++ const SourceManager *SM; ++ const Preprocessor *PP; ++ std::unique_ptr<Cache> C; ++}; ++ ++// A UsedSymbolVisitor is a callback invoked for each symbol reference seen. ++// ++// References occur at a particular location, refer to a single symbol, and ++// that symbol may be provided by any of several headers. ++// ++// The first element of ProvidedBy is the *preferred* header, e.g. to insert. ++using UsedSymbolVisitor = ++ llvm::function_ref<void(SourceLocation UsedAt, Symbol UsedSymbol, ++ llvm::ArrayRef<Header> ProvidedBy)>; ++ ++// Find and report all references to symbols in a region of code. ++// ++// The AST traversal is rooted at ASTRoots - typically top-level declarations ++// of a single source file. MacroRefs are additional recorded references to ++// macros, which do not appear in the AST. ++// ++// This is the main entrypoint of the include-cleaner library, and can be used: ++// - to diagnose missing includes: a referenced symbol is provided by ++// headers which don't match any #include in the main file ++// - to diagnose unused includes: an #include in the main file does not match ++// the headers for any referenced symbol ++// ++// Mapping between Header and #include directives is not provided here, but see ++// RecordedPP::Includes::match() in Hooks.h. ++void walkUsed(AnalysisContext &, llvm::ArrayRef<Decl *> ASTRoots, ++ llvm::ArrayRef<SymbolReference> MacroRefs, ++ UsedSymbolVisitor Callback); ++ ++} // namespace include_cleaner ++} // namespace clang ++ ++#endif +diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Hooks.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Hooks.h +new file mode 100644 +index 000000000000..39e11653b210 +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Hooks.h +@@ -0,0 +1,87 @@ ++//===--- Hooks.h - Record compiler events -------------------------- C++-*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// Where Analysis.h analyzes AST nodes and recorded preprocessor events, this ++// file defines ways to capture AST and preprocessor information from a parse. ++// ++// These are the simplest way to connect include-cleaner logic to the parser, ++// but other ways are possible (for example clangd records includes separately). ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef CLANG_INCLUDE_CLEANER_HOOKS_H ++#define CLANG_INCLUDE_CLEANER_HOOKS_H ++ ++#include "Analysis.h" ++#include "Types.h" ++#include "clang/Basic/FileEntry.h" ++#include "clang/Basic/SourceLocation.h" ++#include "llvm/ADT/DenseMap.h" ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/ADT/StringMap.h" ++#include "llvm/ADT/StringRef.h" ++#include <vector> ++ ++namespace clang { ++class FileEntry; ++class PPCallbacks; ++namespace include_cleaner { ++class PPRecorder; ++ ++// Contains recorded preprocessor events relevant to include-cleaner. ++struct RecordedPP { ++ // The callback (when installed into clang) tracks macros/includes in this. ++ std::unique_ptr<PPCallbacks> record(AnalysisContext &Ctx); ++ // FIXME: probably also want a comment handler to capture IWYU pragmas. ++ ++ // Describes where macros were used from the main file. ++ std::vector<SymbolReference> MacroReferences; ++ ++ // A single #include directive from the main file. ++ struct Include { ++ llvm::StringRef Spelled; // e.g. vector ++ const FileEntry *Resolved; // e.g. /path/to/c++/v1/vector ++ SourceLocation Location; // of hash in #include <vector> ++ unsigned Line; // 1-based line number for #include ++ }; ++ // The set of includes recorded from the main file. ++ class RecordedIncludes { ++ public: ++ // All #includes seen, in the order they appear. ++ llvm::ArrayRef<Include> all() const { return All; } ++ // Determine #includes that match a header (that provides a used symbol). ++ // ++ // Matching is based on the type of Header specified: ++ // - for a physical file like /path/to/foo.h, we check Resolved ++ // - for a logical file like <vector>, we check Spelled ++ llvm::SmallVector<const Include *> match(Header H) const; ++ ++ private: ++ std::vector<Include> All; ++ llvm::StringMap<llvm::SmallVector<unsigned>> BySpelling; ++ llvm::DenseMap<const FileEntry *, llvm::SmallVector<unsigned>> ByFile; ++ friend PPRecorder; ++ } Includes; ++}; ++ ++// Contains recorded parser events relevant to include-cleaner. ++struct RecordedAST { ++ // The consumer (when installed into clang) tracks declarations in this. ++ std::unique_ptr<ASTConsumer> record(AnalysisContext &Ctx); ++ ++ // The set of declarations written at file scope inside the main file. ++ // ++ // These are the roots of the subtrees that should be traversed to find uses. ++ // (Traversing the TranslationUnitDecl would find uses inside headers!) ++ std::vector<Decl *> TopLevelDecls; ++}; ++ ++} // namespace include_cleaner ++} // namespace clang ++ ++#endif +diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Policy.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Policy.h +new file mode 100644 +index 000000000000..142887b85529 +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Policy.h +@@ -0,0 +1,35 @@ ++//===--- Policy.h - Tuning what is considered used ----------------- C++-*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef CLANG_INCLUDE_CLEANER_POLICY_H ++#define CLANG_INCLUDE_CLEANER_POLICY_H ++ ++namespace clang { ++namespace include_cleaner { ++ ++// Provides some fine-tuning of include-cleaner's choices about what is used. ++// ++// Changing the policy serves two purposes: ++// - marking more things used reduces the false-positives for "unused include", ++// while marking fewer things improves "missing include" in the same way. ++// - different coding styles may make different decisions about which includes ++// are required. ++struct Policy { ++ // Does construction count as use of the type, when the type is not named? ++ // e.g. printVector({x, y, z}); - is std::vector used? ++ bool Construction = false; ++ // Is member access tracked as a reference? ++ bool Members = false; ++ // Are operator calls tracked as references? ++ bool Operators = false; ++}; ++ ++} // namespace include_cleaner ++} // namespace clang ++ ++#endif +diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h +new file mode 100644 +index 000000000000..2a91473b926e +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h +@@ -0,0 +1,219 @@ ++//===--- Types.h - Data structures for used-symbol analysis -------- C++-*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// Find referenced files is mostly a matter of translating: ++// AST Node => declaration => source location => file ++// ++// clang has types for these (DynTypedNode, Decl, SourceLocation, FileID), but ++// there are special cases: macros are not declarations, the concrete file where ++// a standard library symbol was defined doesn't matter, etc. ++// ++// We define some slightly more abstract sum types to handle these cases while ++// keeping the API clean. For example, Symbol is Decl+DefinedMacro. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef CLANG_INCLUDE_CLEANER_TYPES_H ++#define CLANG_INCLUDE_CLEANER_TYPES_H ++ ++#include "clang/AST/DeclBase.h" ++#include "clang/Tooling/Inclusions/StandardLibrary.h" ++#include "llvm/ADT/BitmaskEnum.h" ++#include "llvm/ADT/PointerSumType.h" ++ ++namespace clang { ++class IdentifierInfo; ++class MacroDirective; ++namespace include_cleaner { ++ ++// Identifies a macro, along with a particular definition of it. ++// We generally consider redefined macros to be different symbols. ++struct DefinedMacro { ++ const IdentifierInfo *Name; ++ const SourceLocation Definition; ++}; ++ ++// A Symbol is an entity that can be referenced. ++// It is either a declaration (NamedDecl) or a macro (DefinedMacro). ++class Symbol { ++public: ++ enum Kind { ++ Macro, ++ Declaration, ++ }; ++ Symbol(NamedDecl *ND) : Target(ND) {} ++ Symbol(const DefinedMacro *M) : Target(M) {} ++ ++ std::string name() const; ++ std::string nodeName() const; ++ Kind kind() const { return Target.is<NamedDecl *>() ? Declaration : Macro; } ++ ++ NamedDecl *getDeclaration() const { return Target.get<NamedDecl *>(); } ++ const DefinedMacro *getMacro() const { ++ return Target.get<const DefinedMacro *>(); ++ } ++ ++private: ++ llvm::PointerUnion<const DefinedMacro *, NamedDecl *> Target; ++}; ++ ++// A usage of a Symbol seen in our source code. ++struct SymbolReference { ++ // The point in the code where the reference occurred. ++ // We could track the DynTypedNode we found it in if it's important. ++ SourceLocation Location; ++ Symbol Target; ++}; ++ ++// A Location is a place where a symbol can be provided. ++// It is either a physical part of the TU (SourceLocation) or a logical location ++// in the standard library (stdlib::Symbol). ++class Location { ++public: ++ enum Kind : uint8_t { ++ Physical, ++ StandardLibrary, ++ }; ++ ++ Location(SourceLocation S) : K(Physical), SrcLoc(S) {} ++ Location(tooling::stdlib::Symbol S) : K(StandardLibrary), StdlibSym(S) {} ++ ++ std::string name(const SourceManager &SM) const; ++ Kind kind() const { return K; } ++ ++ SourceLocation getPhysical() const { ++ assert(kind() == Physical); ++ return SrcLoc; ++ }; ++ tooling::stdlib::Symbol getStandardLibrary() const { ++ assert(kind() == StandardLibrary); ++ return StdlibSym; ++ }; ++ ++private: ++ Kind K; ++ union { ++ SourceLocation SrcLoc; ++ tooling::stdlib::Symbol StdlibSym; ++ }; ++}; ++ ++// A Header is an includable file that can provide access to Locations. ++// It is either a physical file (FileEntry), a logical location in the standard ++// library (stdlib::Header), or a verbatim header spelling (StringRef). ++class Header { ++public: ++ enum Kind : uint8_t { ++ Physical, ++ StandardLibrary, ++ Verbatim, ++ Builtin, ++ MainFile, ++ }; ++ ++ Header(const FileEntry *FE) : K(Physical), PhysicalFile(FE) {} ++ Header(tooling::stdlib::Header H) : K(StandardLibrary), StdlibHeader(H) {} ++ Header(const char *V) : K(Verbatim), VerbatimSpelling(V) {} ++ static Header builtin() { return Header{Builtin}; }; ++ static Header mainFile() { return Header{MainFile}; }; ++ ++ std::string name() const; ++ Kind kind() const { return K; } ++ ++ const FileEntry *getPhysical() const { ++ assert(kind() == Physical); ++ return PhysicalFile; ++ }; ++ tooling::stdlib::Header getStandardLibrary() const { ++ assert(kind() == StandardLibrary); ++ return StdlibHeader; ++ }; ++ llvm::StringRef getVerbatimSpelling() const { ++ assert(kind() == Verbatim); ++ return VerbatimSpelling; ++ }; ++ ++private: ++ Header(Kind K) : K(K) {} ++ ++ Kind K; ++ union { ++ const FileEntry *PhysicalFile; ++ tooling::stdlib::Header StdlibHeader; ++ const char *VerbatimSpelling; ++ }; ++ ++ friend bool operator==(const Header &L, const Header &R) { ++ if (L.kind() != R.kind()) ++ return false; ++ switch (L.kind()) { ++ case Physical: ++ return L.getPhysical() == R.getPhysical(); ++ case StandardLibrary: ++ return L.getStandardLibrary() == R.getStandardLibrary(); ++ case Verbatim: ++ return L.getVerbatimSpelling() == R.getVerbatimSpelling(); ++ case Builtin: ++ case MainFile: ++ return true; // no payload ++ } ++ llvm_unreachable("unhandled Header kind"); ++ } ++ ++ friend bool operator<(const Header &L, const Header &R) { ++ if (L.kind() != R.kind()) ++ return L.kind() < R.kind(); ++ switch (L.kind()) { ++ case Physical: ++ return L.getPhysical() == R.getPhysical(); ++ case StandardLibrary: ++ return L.getStandardLibrary() < R.getStandardLibrary(); ++ case Verbatim: ++ return L.getVerbatimSpelling() < R.getVerbatimSpelling(); ++ case Builtin: ++ case MainFile: ++ return false; // no payload ++ } ++ llvm_unreachable("unhandled Header kind"); ++ } ++ ++ friend llvm::hash_code hash_value(const Header &H) { ++ switch (H.K) { ++ case Header::Physical: ++ return llvm::hash_combine(H.K, H.getPhysical()); ++ case Header::StandardLibrary: ++ // FIXME: make StdlibHeader hashable instead. ++ return llvm::hash_combine(H.K, H.getStandardLibrary().name()); ++ case Header::Verbatim: ++ return llvm::hash_combine(H.K, llvm::StringRef(H.VerbatimSpelling)); ++ case Header::Builtin: ++ case Header::MainFile: ++ return llvm::hash_value(H.K); ++ } ++ } ++}; ++ ++template <typename T> struct DefaultDenseMapInfo { ++ static T isEqual(const T &L, const T &R) { return L == R; } ++ static unsigned getHashValue(const T &V) { return hash_value(V); } ++}; ++ ++} // namespace include_cleaner ++} // namespace clang ++ ++namespace llvm { ++template <> struct DenseMapInfo<clang::include_cleaner::Header> { ++ using Header = clang::include_cleaner::Header; ++ static Header getTombstoneKey() { return Header("__tombstone__"); } ++ static Header getEmptyKey() { return Header("__empty__"); } ++ static bool isEqual(const Header &L, const Header &R) { return L == R; } ++ static unsigned getHashValue(const Header &V) { return hash_value(V); } ++}; ++} // namespace llvm ++ ++#endif +diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp +new file mode 100644 +index 000000000000..5ac0008b07e8 +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp +@@ -0,0 +1,101 @@ ++//===--- Analysis.cpp - Analyze used files --------------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "clang-include-cleaner/Analysis.h" ++#include "AnalysisInternal.h" ++#include "clang/Lex/Preprocessor.h" ++ ++namespace clang { ++namespace include_cleaner { ++ ++AnalysisContext::AnalysisContext(const Policy &P, const Preprocessor &PP) ++ : P(P), SM(&PP.getSourceManager()), PP(&PP), C(std::make_unique<Cache>()) {} ++AnalysisContext::~AnalysisContext() = default; ++ ++static bool prefer(AnalysisContext &Ctx, Hint L, Hint R) { ++ return std::make_tuple(bool(L & Hint::NameMatch), bool(L & Hint::Complete)) > ++ std::make_tuple(bool(R & Hint::NameMatch), bool(R & Hint::Complete)); ++} ++ ++// Is this hint actually useful? ++static void addNameMatchHint(const IdentifierInfo *II, ++ llvm::SmallVector<Hinted<Header>> &H) { ++ if (!II) ++ return; ++ for (auto &HH : H) ++ if (HH->kind() == Header::Physical && ++ II->getName().equals_insensitive(HH->getPhysical()->getName())) ++ HH.Hint |= Hint::NameMatch; ++} ++ ++static llvm::SmallVector<Header> ++rank(AnalysisContext &Ctx, llvm::SmallVector<Hinted<Header>> &Candidates) { ++ // Sort by Header, so we can deduplicate (and combine flags). ++ llvm::stable_sort(Candidates, ++ [&](const Hinted<Header> &L, const Hinted<Header> &R) { ++ return *L < *R; ++ }); ++ // Like unique(), but merge hints. ++ auto *Write = Candidates.begin(); ++ for (auto *Read = Candidates.begin(); Read != Candidates.end(); ++Write) { ++ *Write = *Read; ++ for (++Read; Read != Candidates.end() && Read->Value == Write->Value; ++ ++Read) ++ Write->Hint |= Read->Hint; ++ } ++ Candidates.erase(Write, Candidates.end()); ++ // Now sort by hints. ++ llvm::stable_sort(Candidates, ++ [&](const Hinted<Header> &L, const Hinted<Header> &R) { ++ return prefer(Ctx, L.Hint, R.Hint); ++ }); ++ // Drop hints to return clean result list. ++ llvm::SmallVector<Header> Result; ++ for (const auto &H : Candidates) ++ Result.push_back(*H); ++ return Result; ++} ++ ++template <typename T> void addHint(Hint H, T &Items) { ++ for (auto &Item : Items) ++ Item.Hint |= H; ++} ++ ++void walkUsed(AnalysisContext &Ctx, llvm::ArrayRef<Decl *> ASTRoots, ++ llvm::ArrayRef<SymbolReference> MacroRefs, ++ UsedSymbolVisitor Callback) { ++ for (Decl *Root : ASTRoots) { ++ walkAST(Ctx, *Root, [&](SourceLocation RefLoc, Hinted<NamedDecl &> ND) { ++ auto Locations = locateDecl(Ctx, *ND); ++ llvm::SmallVector<Hinted<Header>> Headers; ++ for (const auto &Loc : Locations) { ++ auto LocHeaders = includableHeader(Ctx, *Loc); ++ addHint(Loc.Hint, LocHeaders); ++ Headers.append(std::move(LocHeaders)); ++ } ++ addHint(ND.Hint, Headers); ++ addNameMatchHint(ND.Value.getDeclName().getAsIdentifierInfo(), Headers); ++ Callback(RefLoc, &ND.Value, rank(Ctx, Headers)); ++ }); ++ } ++ for (const SymbolReference &MacroRef : MacroRefs) { ++ assert(MacroRef.Target.kind() == Symbol::Macro); ++ auto Loc = locateMacro(Ctx, *MacroRef.Target.getMacro()); ++ auto Headers = includableHeader(Ctx, *Loc); ++ addHint(Loc.Hint, Headers); ++ addNameMatchHint(MacroRef.Target.getMacro()->Name, Headers); ++ Callback(MacroRef.Location, MacroRef.Target, rank(Ctx, Headers)); ++ } ++} ++ ++Symbol AnalysisContext::macro(const IdentifierInfo *II, SourceLocation Loc) { ++ return cache().macro(II, Loc); ++} ++ ++} // namespace include_cleaner ++} // namespace clang +diff --git a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h +index 8b0c73fe7997..31b1ad8039d8 100644 +--- a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h ++++ b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h +@@ -21,6 +21,95 @@ + #ifndef CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H + #define CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H + ++#include "clang-include-cleaner/Analysis.h" ++#include "clang-include-cleaner/Types.h" ++#include "clang/Tooling/Inclusions/StandardLibrary.h" ++ ++namespace clang { ++namespace include_cleaner { ++ ++// FIXME: Right now we cache nothing, this is just used as an arena for macros. ++// Verify we're burning time in repeated analysis and cache partial operations. ++class Cache { ++public: ++ Symbol macro(const IdentifierInfo *Name, const SourceLocation Def) { ++ auto &DMS = DefinedMacros[Name->getName()]; ++ // Linear search. We probably only saw ~1 definition of each macro name. ++ for (const DefinedMacro &DM : DMS) ++ if (DM.Definition == Def) ++ return &DM; ++ DMS.push_back(DefinedMacro{Name, Def}); ++ return &DMS.back(); ++ } ++ ++ tooling::stdlib::Recognizer StdlibRecognizer; ++ ++private: ++ llvm::StringMap<llvm::SmallVector<DefinedMacro>> DefinedMacros; ++}; ++ ++enum class Hint : uint16_t { ++ None = 0, ++ Complete = 1, // Provides a complete definition that is often needed. ++ // e.g. classes, templates. ++ NameMatch = 1, // Header name matches the symbol name. ++ LLVM_MARK_AS_BITMASK_ENUM(Hint::Complete) ++}; ++LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); ++ ++template <typename T> struct Hinted { ++ Hinted(T Value, Hint H = Hint::None) : Value(Value), Hint(H) {} ++ T Value; ++ include_cleaner::Hint Hint; ++ ++ T &operator*() { return Value; } ++ const T &operator*() const { return Value; } ++ std::remove_reference_t<T> *operator->() { return &Value; } ++ const std::remove_reference_t<T> *operator->() const { return &Value; } ++}; ++ ++// Traverses a subtree of the AST, reporting declarations referenced. ++void walkAST(AnalysisContext &, Decl &Root, ++ llvm::function_ref<void(SourceLocation, Hinted<NamedDecl &>)>); ++ ++// Finds the locations where a declaration is provided. ++llvm::SmallVector<Hinted<Location>> locateDecl(AnalysisContext &, ++ const NamedDecl &); ++ ++// Finds the locations where a macro is provided. ++Hinted<Location> locateMacro(AnalysisContext &, const DefinedMacro &); ++ ++// Finds the headers that provide a location. ++llvm::SmallVector<Hinted<Header>> includableHeader(AnalysisContext &, ++ const Location &); ++ ++} // namespace include_cleaner ++} // namespace clang ++ ++#endif ++//===--- AnalysisInternal.h - Analysis building blocks ------------- C++-*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file provides smaller, testable pieces of the used-header analysis. ++// We find the headers by chaining together several mappings. ++// ++// AST => AST node => Symbol => Location => Header ++// / ++// Macro expansion => ++// ++// The individual steps are declared here. ++// (AST => AST Node => Symbol is one API to avoid materializing DynTypedNodes). ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H ++#define CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H ++ + #include "clang/Basic/SourceLocation.h" + #include "llvm/ADT/STLFunctionalExtras.h" + +diff --git a/clang-tools-extra/include-cleaner/lib/CMakeLists.txt b/clang-tools-extra/include-cleaner/lib/CMakeLists.txt +index 5e2807332f94..25d66b4f30df 100644 +--- a/clang-tools-extra/include-cleaner/lib/CMakeLists.txt ++++ b/clang-tools-extra/include-cleaner/lib/CMakeLists.txt +@@ -1,10 +1,15 @@ + set(LLVM_LINK_COMPONENTS Support) + + add_clang_library(clangIncludeCleaner ++ Analysis.cpp ++ Headers.cpp ++ Hooks.cpp ++ Locations.cpp ++ Types.cpp + WalkAST.cpp + + LINK_LIBS + clangBasic ++ clangLex + clangAST + ) +- +diff --git a/clang-tools-extra/include-cleaner/lib/Headers.cpp b/clang-tools-extra/include-cleaner/lib/Headers.cpp +new file mode 100644 +index 000000000000..f41bbe4c59c8 +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/lib/Headers.cpp +@@ -0,0 +1,46 @@ ++//===--- Headers.cpp - Find headers that provide locations ----------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "AnalysisInternal.h" ++#include "clang/Basic/SourceManager.h" ++#include "clang/Lex/Preprocessor.h" ++ ++namespace clang { ++namespace include_cleaner { ++ ++llvm::SmallVector<Hinted<Header>> includableHeader(AnalysisContext &Ctx, ++ const Location &Loc) { ++ switch (Loc.kind()) { ++ case Location::Physical: { ++ FileID FID = Ctx.sourceManager().getFileID( ++ Ctx.sourceManager().getExpansionLoc(Loc.getPhysical())); ++ if (FID == Ctx.sourceManager().getMainFileID()) ++ return {Header::mainFile()}; ++ if (FID == Ctx.preprocessor().getPredefinesFileID()) ++ return {Header::builtin()}; ++ // FIXME: if the file is not self-contained, find its umbrella header: ++ // - files that lack header guards (e.g. *.def) ++ // - IWYU private pragmas (and maybe export?) ++ // - #pragma clang include_instead ++ // - headers containing "#error ... include" clangd isDontIncludeMeHeader ++ // - apple framework header layout ++ if (auto *FE = Ctx.sourceManager().getFileEntryForID(FID)) ++ return {{FE}}; ++ return {}; ++ } ++ case Location::StandardLibrary: ++ // FIXME: some symbols are provided by multiple stdlib headers: ++ // - for historical reasons, like size_t ++ // - some headers are guaranteed to include others (<initializer_list>) ++ // - ::printf is de facto provided by cstdio and stdio.h, etc ++ return {{Loc.getStandardLibrary().header()}}; ++ } ++} ++ ++} // namespace include_cleaner ++} // namespace clang +diff --git a/clang-tools-extra/include-cleaner/lib/Hooks.cpp b/clang-tools-extra/include-cleaner/lib/Hooks.cpp +new file mode 100644 +index 000000000000..decb83110c65 +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/lib/Hooks.cpp +@@ -0,0 +1,166 @@ ++//===--- Hooks.cpp - Record events from the compiler --------------- C++-*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "clang-include-cleaner/Hooks.h" ++#include "AnalysisInternal.h" ++#include "clang-include-cleaner/Analysis.h" ++#include "clang/AST/ASTConsumer.h" ++#include "clang/AST/DeclCXX.h" ++#include "clang/AST/DeclGroup.h" ++#include "clang/AST/DeclObjC.h" ++#include "clang/Lex/MacroInfo.h" ++#include "clang/Lex/PPCallbacks.h" ++#include "clang/Lex/Preprocessor.h" ++#include "clang/Lex/Token.h" ++ ++namespace clang { ++namespace include_cleaner { ++ ++class PPRecorder : public PPCallbacks { ++public: ++ PPRecorder(AnalysisContext &Ctx, RecordedPP &Recorded) ++ : Ctx(Ctx), Recorded(Recorded) {} ++ ++ virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, ++ SrcMgr::CharacteristicKind FileType, ++ FileID PrevFID) override { ++ Active = Ctx.sourceManager().isWrittenInMainFile(Loc); ++ } ++ ++ void InclusionDirective(SourceLocation Hash, const Token &IncludeTok, ++ StringRef SpelledFilename, bool IsAngled, ++ CharSourceRange FilenameRange, Optional<FileEntryRef> File, ++ StringRef SearchPath, StringRef RelativePath, ++ const Module *, SrcMgr::CharacteristicKind) override { ++ if (!Active) ++ return; ++ ++ unsigned Index = Recorded.Includes.All.size(); ++ Recorded.Includes.All.emplace_back(); ++ RecordedPP::Include &I = Recorded.Includes.All.back(); ++ const auto *const RawFile = &(*File).getFileEntry(); ++ I.Location = Hash; ++ I.Resolved = RawFile; ++ I.Line = Ctx.sourceManager().getSpellingLineNumber(Hash); ++ auto BySpellingIt = ++ Recorded.Includes.BySpelling.try_emplace(SpelledFilename).first; ++ I.Spelled = BySpellingIt->first(); ++ ++ BySpellingIt->second.push_back(Index); ++ Recorded.Includes.ByFile[RawFile].push_back(Index); ++ } ++ ++ void MacroExpands(const Token &MacroName, const MacroDefinition &MD, ++ SourceRange Range, const MacroArgs *Args) override { ++ if (!Active) ++ return; ++ recordMacroRef(MacroName, *MD.getMacroInfo()); ++ } ++ ++ void MacroDefined(const Token &MacroName, const MacroDirective *MD) override { ++ if (!Active) ++ return; ++ ++ const auto *MI = MD->getMacroInfo(); ++ // The tokens of a macro definition could refer to a macro. ++ // Formally this reference isn't resolved until this macro is expanded, ++ // but we want to treat it as a reference anyway. ++ for (const auto &Tok : MI->tokens()) { ++ auto *II = Tok.getIdentifierInfo(); ++ // Could this token be a reference to a macro? (Not param to this macro). ++ if (!II || !II->hadMacroDefinition() || ++ llvm::is_contained(MI->params(), II)) ++ continue; ++ if (const MacroInfo *MI = Ctx.preprocessor().getMacroInfo(II)) ++ recordMacroRef(Tok, *MI); ++ } ++ } ++ ++private: ++ void recordMacroRef(const Token &Tok, const MacroInfo &MI) { ++ if (MI.isBuiltinMacro()) ++ return; // __FILE__ is not a reference. ++ Recorded.MacroReferences.push_back(SymbolReference{ ++ Tok.getLocation(), ++ Ctx.cache().macro(Tok.getIdentifierInfo(), MI.getDefinitionLoc())}); ++ } ++ ++ bool Active = false; ++ AnalysisContext &Ctx; ++ RecordedPP &Recorded; ++}; ++ ++llvm::SmallVector<const RecordedPP::Include *> ++RecordedPP::RecordedIncludes::match(Header H) const { ++ llvm::SmallVector<const Include *> Result; ++ switch (H.kind()) { ++ case Header::Physical: ++ for (unsigned I : ByFile.lookup(H.getPhysical())) ++ Result.push_back(&All[I]); ++ break; ++ case Header::StandardLibrary: ++ for (unsigned I : ++ BySpelling.lookup(H.getStandardLibrary().name().trim("<>"))) ++ Result.push_back(&All[I]); ++ break; ++ case Header::Verbatim: ++ for (unsigned I : BySpelling.lookup(H.getVerbatimSpelling())) ++ Result.push_back(&All[I]); ++ break; ++ case Header::Builtin: ++ case Header::MainFile: ++ break; ++ } ++ llvm::sort(Result); ++ Result.erase(std::unique(Result.begin(), Result.end()), Result.end()); ++ return Result; ++} ++ ++class ASTRecorder : public ASTConsumer { ++public: ++ ASTRecorder(AnalysisContext &Ctx, RecordedAST &Recorded) ++ : Ctx(Ctx), Recorded(Recorded) {} ++ ++ bool HandleTopLevelDecl(DeclGroupRef DG) override { ++ for (Decl *D : DG) { ++ if (!Ctx.sourceManager().isWrittenInMainFile( ++ Ctx.sourceManager().getExpansionLoc(D->getLocation()))) ++ continue; ++ if (const auto *T = llvm::dyn_cast<FunctionDecl>(D)) ++ if (T->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) ++ continue; ++ if (const auto *T = llvm::dyn_cast<CXXRecordDecl>(D)) ++ if (T->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) ++ continue; ++ if (const auto *T = llvm::dyn_cast<VarDecl>(D)) ++ if (T->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) ++ continue; ++ // ObjCMethodDecl are not actually top-level! ++ if (isa<ObjCMethodDecl>(D)) ++ continue; ++ ++ Recorded.TopLevelDecls.push_back(D); ++ } ++ return true; ++ } ++ ++private: ++ AnalysisContext &Ctx; ++ RecordedAST &Recorded; ++}; ++ ++std::unique_ptr<PPCallbacks> RecordedPP::record(AnalysisContext &Ctx) { ++ return std::make_unique<PPRecorder>(Ctx, *this); ++} ++ ++std::unique_ptr<ASTConsumer> RecordedAST::record(AnalysisContext &Ctx) { ++ return std::make_unique<ASTRecorder>(Ctx, *this); ++} ++ ++} // namespace include_cleaner ++} // namespace clang +\ No newline at end of file +diff --git a/clang-tools-extra/include-cleaner/lib/Locations.cpp b/clang-tools-extra/include-cleaner/lib/Locations.cpp +new file mode 100644 +index 000000000000..7e23c56c1dfc +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/lib/Locations.cpp +@@ -0,0 +1,60 @@ ++//===--- Locations.cpp - Find the locations that provide symbols ----------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "AnalysisInternal.h" ++#include "clang-include-cleaner/Analysis.h" ++#include "clang-include-cleaner/Types.h" ++#include "clang/AST/Decl.h" ++#include "clang/AST/DeclBase.h" ++#include "clang/AST/DeclTemplate.h" ++#include "clang/Basic/SourceLocation.h" ++#include "llvm/ADT/SmallVector.h" ++ ++namespace clang { ++namespace include_cleaner { ++ ++Hint declHint(const NamedDecl &D) { ++ Hint H = Hint::None; ++ if (auto *TD = llvm::dyn_cast<TagDecl>(&D)) ++ if (TD->isThisDeclarationADefinition()) ++ H |= Hint::Complete; ++ if (auto *CTD = llvm::dyn_cast<ClassTemplateDecl>(&D)) ++ if (CTD->isThisDeclarationADefinition()) ++ H |= Hint::Complete; ++ // A function template being defined is similar to a class being defined. ++ if (auto *FTD = llvm::dyn_cast<FunctionTemplateDecl>(&D)) ++ if (FTD->isThisDeclarationADefinition()) ++ H |= Hint::Complete; ++ return H; ++} ++ ++llvm::SmallVector<Hinted<Location>> locateDecl(AnalysisContext &Ctx, ++ const NamedDecl &ND) { ++ if (auto StdlibSym = Ctx.cache().StdlibRecognizer(&ND)) ++ return {{*StdlibSym}}; ++ ++ llvm::SmallVector<Hinted<Location>> Result; ++ // Is accepting all the redecls too naive? ++ for (const Decl *RD : ND.redecls()) { ++ // `friend X` is not an interesting location for X unless it's acting as a ++ // forward-declaration. ++ if (RD->getFriendObjectKind() == Decl::FOK_Declared) ++ continue; ++ SourceLocation Loc = RD->getLocation(); ++ if (Loc.isValid()) ++ Result.push_back({Loc, declHint(*cast<NamedDecl>(RD))}); ++ } ++ return Result; ++} ++ ++Hinted<Location> locateMacro(AnalysisContext &Ctx, const DefinedMacro &M) { ++ return {M.Definition}; ++} ++ ++} // namespace include_cleaner ++} // namespace clang +diff --git a/clang-tools-extra/include-cleaner/lib/Types.cpp b/clang-tools-extra/include-cleaner/lib/Types.cpp +new file mode 100644 +index 000000000000..6b79c603a70d +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/lib/Types.cpp +@@ -0,0 +1,61 @@ ++//===--- Types.cpp - Data structures for used-symbol analysis -------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include "clang-include-cleaner/Types.h" ++#include "clang/AST/Decl.h" ++#include "clang/Basic/FileEntry.h" ++#include "clang/Basic/IdentifierTable.h" ++#include "clang/Tooling/Inclusions/StandardLibrary.h" ++ ++namespace clang { ++namespace include_cleaner { ++ ++std::string Symbol::name() const { ++ switch (kind()) { ++ case Macro: ++ return getMacro()->Name->getName().str(); ++ case Declaration: ++ return getDeclaration()->getNameAsString(); ++ } ++ llvm_unreachable("Unhandled Symbol kind"); ++} ++ ++std::string Symbol::nodeName() const { ++ if (kind() == Macro) ++ return "macro"; ++ return getDeclaration()->getDeclKindName(); ++} ++ ++std::string Location::name(const SourceManager &SM) const { ++ switch (K) { ++ case Physical: ++ return SrcLoc.printToString(SM); ++ case StandardLibrary: ++ return StdlibSym.name().str(); ++ } ++ llvm_unreachable("Unhandled Location kind"); ++} ++ ++std::string Header::name() const { ++ switch (K) { ++ case Physical: ++ return PhysicalFile->getName().str(); ++ case StandardLibrary: ++ return StdlibHeader.name().str(); ++ case Verbatim: ++ return VerbatimSpelling; ++ case Builtin: ++ return "<built-in>"; ++ case MainFile: ++ return "<main-file>"; ++ } ++ llvm_unreachable("Unhandled Header kind"); ++} ++ ++} // namespace include_cleaner ++} // namespace clang +diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp +index b7354fe300e0..02a27977005f 100644 +--- a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp ++++ b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp +@@ -7,40 +7,132 @@ + //===----------------------------------------------------------------------===// + + #include "AnalysisInternal.h" ++#include "clang-include-cleaner/Analysis.h" + #include "clang/AST/RecursiveASTVisitor.h" ++#include "clang/Basic/SourceManager.h" ++#include "llvm/Support/SaveAndRestore.h" + + namespace clang { + namespace include_cleaner { + namespace { +-using DeclCallback = llvm::function_ref<void(SourceLocation, NamedDecl &)>; + ++using DeclCallback = ++ llvm::function_ref<void(SourceLocation, Hinted<NamedDecl &>)>; ++ ++// Traverses part of the AST, looking for references and reporting them. + class ASTWalker : public RecursiveASTVisitor<ASTWalker> { +- DeclCallback Callback; ++public: ++ ASTWalker(AnalysisContext &Ctx, DeclCallback Callback) ++ : Ctx(Ctx), Callback(Callback) {} + +- void report(SourceLocation Loc, NamedDecl *ND) { +- if (!ND || Loc.isInvalid()) +- return; +- Callback(Loc, *cast<NamedDecl>(ND->getCanonicalDecl())); ++ bool VisitDeclRefExpr(DeclRefExpr *E) { ++ if (!Ctx.policy().Operators) ++ if (auto *FD = E->getDecl()->getAsFunction()) ++ if (FD->isOverloadedOperator()) ++ return true; ++ report(E->getLocation(), E->getFoundDecl()); ++ return true; + } + +-public: +- ASTWalker(DeclCallback Callback) : Callback(Callback) {} ++ bool VisitMemberExpr(MemberExpr *ME) { ++ if (Ctx.policy().Members) ++ report(ME->getMemberLoc(), ME->getFoundDecl().getDecl()); ++ return true; ++ } ++ ++ bool VisitTagType(TagType *TT) { ++ report(LocationOfType, TT->getDecl()); ++ return true; ++ } ++ ++ bool VisitFunctionDecl(FunctionDecl *FD) { ++ // Count function definitions as a reference to their declarations. ++ if (FD->isThisDeclarationADefinition() && FD->getCanonicalDecl() != FD) ++ report(FD->getLocation(), FD->getCanonicalDecl()); ++ return true; ++ } ++ ++ bool VisitCXXConstructExpr(CXXConstructExpr *E) { ++ if (!Ctx.policy().Construction) ++ return true; ++ SaveAndRestore<SourceLocation> Loc(LocationOfType, E->getLocation()); ++ LocationOfType = E->getLocation(); ++ return TraverseType(E->getType()); ++ } ++ ++ // We handle TypeLocs by saving their loc and consuming it in Visit*Type(). ++ // ++ // Handling Visit*TypeLoc() directly would be simpler, but sometimes unwritten ++ // types count as references (e.g. implicit conversions, with no TypeLoc). ++ // Stashing the location and visiting the contained type lets us handle both ++ // cases in VisitTagType() etc. ++ bool TraverseTypeLoc(TypeLoc TL) { ++ SaveAndRestore<SourceLocation> Loc(LocationOfType, TL.getBeginLoc()); ++ // The base implementation calls: ++ // - Visit*TypeLoc() - does nothing ++ // - Visit*Type() - where we handle type references ++ // - TraverseTypeLoc for each lexically nested type. ++ return Base::TraverseTypeLoc(TL); ++ } + +- bool VisitTagTypeLoc(TagTypeLoc TTL) { +- report(TTL.getNameLoc(), TTL.getDecl()); ++ bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) { ++ report(LocationOfType, ++ TST->getTemplateName().getAsTemplateDecl()); // Primary template. ++ report(LocationOfType, TST->getAsCXXRecordDecl()); // Specialization + return true; + } + +- bool VisitDeclRefExpr(DeclRefExpr *DRE) { +- report(DRE->getLocation(), DRE->getFoundDecl()); ++ bool VisitUsingType(UsingType *UT) { ++ report(LocationOfType, UT->getFoundDecl()); + return true; + } ++ ++ bool VisitTypedefType(TypedefType *TT) { ++ report(LocationOfType, TT->getDecl()); ++ return true; ++ } ++ ++ bool VisitUsingDecl(UsingDecl *UD) { ++ for (const auto *USD : UD->shadows()) ++ report(UD->getLocation(), USD->getTargetDecl()); ++ return true; ++ } ++ ++ bool VisitOverloadExpr(OverloadExpr *E) { ++ if (llvm::isa<UnresolvedMemberExpr>(E) && !Ctx.policy().Members) ++ return true; ++ for (auto *Candidate : E->decls()) ++ report(E->getExprLoc(), Candidate); ++ return true; ++ } ++ ++private: ++ void report(SourceLocation Loc, NamedDecl *ND) { ++ while (Loc.isMacroID()) { ++ auto DecLoc = Ctx.sourceManager().getDecomposedLoc(Loc); ++ const SrcMgr::ExpansionInfo &Expansion = ++ Ctx.sourceManager().getSLocEntry(DecLoc.first).getExpansion(); ++ if (!Expansion.isMacroArgExpansion()) ++ return; // Names within macro bodies are not considered references. ++ Loc = Expansion.getSpellingLoc().getLocWithOffset(DecLoc.second); ++ } ++ // FIXME: relevant ranking hints? ++ if (ND) ++ Callback(Loc, *cast<NamedDecl>(ND->getCanonicalDecl())); ++ } ++ ++ using Base = RecursiveASTVisitor; ++ ++ AnalysisContext &Ctx; ++ DeclCallback Callback; ++ ++ SourceLocation LocationOfType; + }; + + } // namespace + +-void walkAST(Decl &Root, DeclCallback Callback) { +- ASTWalker(Callback).TraverseDecl(&Root); ++void walkAST(AnalysisContext &Ctx, Decl &Root, DeclCallback Callback) { ++ ASTWalker(Ctx, Callback).TraverseDecl(&Root); + } + + } // namespace include_cleaner +diff --git a/clang-tools-extra/include-cleaner/tool/CMakeLists.txt b/clang-tools-extra/include-cleaner/tool/CMakeLists.txt +new file mode 100644 +index 000000000000..f8f7c81c761b +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/tool/CMakeLists.txt +@@ -0,0 +1,17 @@ ++set(LLVM_LINK_COMPONENTS support) ++ ++add_clang_tool(clang-include-cleaner ++ ClangIncludeCleaner.cpp ++ ) ++ ++clang_target_link_libraries(clang-include-cleaner ++ PRIVATE ++ clangBasic ++ clangFrontend ++ clangTooling ++ ) ++ ++target_link_libraries(clang-include-cleaner ++ PRIVATE ++ clangIncludeCleaner ++ ) +\ No newline at end of file +diff --git a/clang-tools-extra/include-cleaner/tool/ClangIncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/ClangIncludeCleaner.cpp +new file mode 100644 +index 000000000000..aad70eabdae9 +--- /dev/null ++++ b/clang-tools-extra/include-cleaner/tool/ClangIncludeCleaner.cpp +@@ -0,0 +1,187 @@ ++//===--- ClangIncludeCleaner.cpp - Standalone used-header analysis --------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// clang-include-cleaner finds violations of include-what-you-use policy. ++// ++// It scans a file, finding referenced symbols and headers providing them. ++// - if a reference is satisfied only by indirect #include dependencies, ++// this violates the policy and direct #includes are suggested. ++// - if some #include directive doesn't satisfy any references, this violates ++// the policy (don't include what you don't use!) and removal is suggested. ++// ++// With the -satisfied flag, it will also explain things that were OK: ++// satisfied references and used #includes. ++// ++// This tool doesn't fix broken code where missing #includes prevent parsing, ++// try clang-include-fixer for this instead. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "clang-include-cleaner/Analysis.h" ++#include "clang-include-cleaner/Hooks.h" ++#include "clang/Basic/Diagnostic.h" ++#include "clang/Frontend/CompilerInstance.h" ++#include "clang/Frontend/FrontendAction.h" ++#include "clang/Tooling/CommonOptionsParser.h" ++#include "clang/Tooling/Tooling.h" ++#include "llvm/Support/CommandLine.h" ++#include "llvm/Support/InitLLVM.h" ++ ++llvm::cl::OptionCategory OptionsCat{"clang-include-cleaner"}; ++llvm::cl::opt<bool> ShowSatisfied{ ++ "satisfied", ++ llvm::cl::cat(OptionsCat), ++ llvm::cl::desc( ++ "Show references whose header is included, and used includes"), ++ llvm::cl::init(false), ++}; ++llvm::cl::opt<bool> Recover{ ++ "recover", ++ llvm::cl::cat(OptionsCat), ++ llvm::cl::desc("Suppress further errors for the same header"), ++ llvm::cl::init(true), ++}; ++ ++namespace clang { ++namespace include_cleaner { ++namespace { ++ ++class Action : public clang::ASTFrontendAction { ++public: ++ bool BeginSourceFileAction(CompilerInstance &CI) override { ++ Diag = &CI.getDiagnostics(); ++ ID.emplace(Diag); ++ Ctx.emplace(Policy{}, CI.getPreprocessor()); ++ CI.getPreprocessor().addPPCallbacks(PP.record(*Ctx)); ++ return true; ++ } ++ ++ void EndSourceFile() override { ++ llvm::DenseSet<Header> Recovered; ++ llvm::DenseMap<const RecordedPP::Include *, Symbol> Used; ++ walkUsed(*Ctx, AST.TopLevelDecls, PP.MacroReferences, ++ [&](SourceLocation Loc, Symbol Sym, ArrayRef<Header> Headers) { ++ diagnoseReference(Loc, Sym, Headers, Recovered, Used); ++ }); ++ diagnoseIncludes(PP.Includes.all(), Used); ++ Ctx.reset(); ++ ++ ASTFrontendAction::EndSourceFile(); ++ } ++ ++ virtual std::unique_ptr<ASTConsumer> ++ CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { ++ return AST.record(*Ctx); ++ } ++ ++private: ++ // The diagnostics that we issue. ++ struct CustomDiagnosticIDs { ++ // References ++ unsigned Satisfied; ++ unsigned Unsatisfied; ++ unsigned NoHeader; ++ unsigned NoteHeader; ++ // #includes ++ unsigned Used; ++ unsigned Unused; ++ ++ CustomDiagnosticIDs(DiagnosticsEngine *D) { ++ auto SatisfiedLevel = ShowSatisfied ? DiagnosticsEngine::Remark ++ : DiagnosticsEngine::Ignored; ++ auto Error = DiagnosticsEngine::Error; ++ auto Note = DiagnosticsEngine::Note; ++ auto Warn = DiagnosticsEngine::Warning; ++ ++ Satisfied = D->getCustomDiagID(SatisfiedLevel, "%0 '%1' provided by %2"); ++ Unsatisfied = D->getCustomDiagID(Error, "no header included for %0 '%1'"); ++ NoHeader = D->getCustomDiagID(Warn, "unknown header provides %0 '%1'"); ++ NoteHeader = D->getCustomDiagID(Note, "provided by %0"); ++ Used = D->getCustomDiagID(SatisfiedLevel, "include provides %0 '%1'"); ++ Unused = D->getCustomDiagID(Error, "include is unused"); ++ } ++ }; ++ ++ void ++ diagnoseReference(SourceLocation Loc, Symbol Sym, ArrayRef<Header> Headers, ++ llvm::DenseSet<Header> &Recovered, ++ llvm::DenseMap<const RecordedPP::Include *, Symbol> &Used) { ++ bool Diagnosed = false; ++ for (const auto &H : Headers) { ++ if (H.kind() == Header::Builtin || H.kind() == Header::MainFile) { ++ if (!Diagnosed) { ++ Diag->Report(Loc, ID->Satisfied) ++ << Sym.nodeName() << Sym.name() << H.name(); ++ Diagnosed = true; ++ } ++ } ++ for (const auto *I : PP.Includes.match(H)) { ++ Used.try_emplace(I, Sym); ++ if (!Diagnosed) { ++ Diag->Report(Loc, ID->Satisfied) ++ << Sym.nodeName() << Sym.name() << I->Spelled; ++ Diagnosed = true; ++ } ++ } ++ } ++ if (Diagnosed) ++ return; ++ for (const auto &H : Headers) { ++ if (Recovered.contains(H)) { ++ Diag->Report(Loc, ID->Satisfied) ++ << Sym.nodeName() << Sym.name() << H.name(); ++ return; ++ } ++ } ++ Diag->Report(Loc, Headers.empty() ? ID->NoHeader : ID->Unsatisfied) ++ << Sym.nodeName() << Sym.name(); ++ for (const auto &H : Headers) { ++ Recovered.insert(H); ++ Diag->Report(ID->NoteHeader) << H.name(); ++ } ++ } ++ ++ void diagnoseIncludes( ++ ArrayRef<RecordedPP::Include> Includes, ++ const llvm::DenseMap<const RecordedPP::Include *, Symbol> &Used) { ++ for (const auto &I : Includes) { ++ auto It = Used.find(&I); ++ if (It == Used.end()) ++ Diag->Report(I.Location, ID->Unused); ++ else ++ Diag->Report(I.Location, ID->Used) ++ << It->second.nodeName() << It->second.name(); ++ } ++ } ++ ++ llvm::Optional<AnalysisContext> Ctx; ++ RecordedPP PP; ++ RecordedAST AST; ++ DiagnosticsEngine *Diag; ++ llvm::Optional<CustomDiagnosticIDs> ID; ++}; ++ ++} // namespace ++} // namespace include_cleaner ++} // namespace clang ++ ++int main(int Argc, const char **Argv) { ++ llvm::InitLLVM X(Argc, Argv); ++ auto OptionsParser = ++ clang::tooling::CommonOptionsParser::create(Argc, Argv, OptionsCat); ++ if (!OptionsParser) { ++ llvm::errs() << toString(OptionsParser.takeError()); ++ return 1; ++ } ++ ++ return clang::tooling::ClangTool(OptionsParser->getCompilations(), ++ OptionsParser->getSourcePathList()) ++ .run(clang::tooling::newFrontendActionFactory< ++ clang::include_cleaner::Action>() ++ .get()); ++} +diff --git a/clang/include/clang/Tooling/Inclusions/StandardLibrary.h b/clang/include/clang/Tooling/Inclusions/StandardLibrary.h +index c6ce2780dae6..e94a7fb9304a 100644 +--- a/clang/include/clang/Tooling/Inclusions/StandardLibrary.h ++++ b/clang/include/clang/Tooling/Inclusions/StandardLibrary.h +@@ -49,6 +49,9 @@ private: + friend bool operator==(const Header &L, const Header &R) { + return L.ID == R.ID; + } ++ friend bool operator<(const Header &L, const Header &R) { ++ return L.ID < R.ID; ++ } + }; + + // A top-level standard library symbol, such as std::vector diff --git a/build/build-clang/compiler-rt-rss-limit-heap-profile.patch b/build/build-clang/compiler-rt-rss-limit-heap-profile.patch new file mode 100644 index 0000000000..f7dfdfcdae --- /dev/null +++ b/build/build-clang/compiler-rt-rss-limit-heap-profile.patch @@ -0,0 +1,49 @@ +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp +index 8fd398564280..b7c4820971bb 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp +@@ -29,6 +29,7 @@ void *BackgroundThread(void *arg) { + const uptr hard_rss_limit_mb = common_flags()->hard_rss_limit_mb; + const uptr soft_rss_limit_mb = common_flags()->soft_rss_limit_mb; + const bool heap_profile = common_flags()->heap_profile; ++ const bool rss_limit_heap_profile = common_flags()->rss_limit_heap_profile; + uptr prev_reported_rss = 0; + uptr prev_reported_stack_depot_size = 0; + bool reached_soft_rss_limit = false; +@@ -56,6 +57,10 @@ void *BackgroundThread(void *arg) { + Report("%s: hard rss limit exhausted (%zdMb vs %zdMb)\n", + SanitizerToolName, hard_rss_limit_mb, current_rss_mb); + DumpProcessMap(); ++ if (rss_limit_heap_profile) { ++ Printf("\n\nHEAP PROFILE at RSS %zdMb\n", current_rss_mb); ++ __sanitizer_print_memory_profile(90, 20); ++ } + Die(); + } + if (soft_rss_limit_mb) { +@@ -63,6 +68,11 @@ void *BackgroundThread(void *arg) { + reached_soft_rss_limit = true; + Report("%s: soft rss limit exhausted (%zdMb vs %zdMb)\n", + SanitizerToolName, soft_rss_limit_mb, current_rss_mb); ++ if (rss_limit_heap_profile) { ++ Printf("\n\nHEAP PROFILE at RSS %zdMb\n", current_rss_mb); ++ __sanitizer_print_memory_profile(90, 20); ++ rss_during_last_reported_profile = current_rss_mb; ++ } + SetRssLimitExceeded(true); + } else if (soft_rss_limit_mb >= current_rss_mb && + reached_soft_rss_limit) { +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc +index 6148ae56067c..a0fbb8e14bd5 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc +@@ -147,6 +147,9 @@ COMMON_FLAG(uptr, max_allocation_size_mb, 0, + "If non-zero, malloc/new calls larger than this size will return " + "nullptr (or crash if allocator_may_return_null=false).") + COMMON_FLAG(bool, heap_profile, false, "Experimental heap profiler, asan-only") ++COMMON_FLAG(bool, rss_limit_heap_profile, false, ++ "Experimental heap profiler (only when hard/soft rss limit " ++ "exceeded, asan-only") + COMMON_FLAG(s32, allocator_release_to_os_interval_ms, + ((bool)SANITIZER_FUCHSIA || (bool)SANITIZER_WINDOWS) ? -1 : 5000, + "Only affects a 64-bit allocator. If set, tries to release unused " diff --git a/build/build-clang/downgrade-mangling-error_clang_12.patch b/build/build-clang/downgrade-mangling-error_clang_12.patch new file mode 100644 index 0000000000..ad31306ff3 --- /dev/null +++ b/build/build-clang/downgrade-mangling-error_clang_12.patch @@ -0,0 +1,23 @@ +Downgrade unimplemented mangling diagnostic from error to note. +This codepath is exercised by MozsearchIndexer.cpp (the searchfox +indexer) when indexing on Windows. We can do without having the +unimplemented bits for now as long the compiler doesn't fail the +build. See also https://bugs.llvm.org/show_bug.cgi?id=39294 + +diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp +index 4420f6a2c1c3..4d9a6434d245 100644 +--- a/clang/lib/AST/ItaniumMangle.cpp ++++ b/clang/lib/AST/ItaniumMangle.cpp +@@ -4028,10 +4028,11 @@ recurse: + if (!NullOut) { + // As bad as this diagnostic is, it's better than crashing. + DiagnosticsEngine &Diags = Context.getDiags(); +- unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, ++ unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Remark, + "cannot yet mangle expression type %0"); + Diags.Report(E->getExprLoc(), DiagID) + << E->getStmtClassName() << E->getSourceRange(); ++ Out << "MOZ_WE_HACKED_AROUND_BUG_1418415"; + return; + } + break; diff --git a/build/build-clang/find_symbolizer_linux_clang_10.patch b/build/build-clang/find_symbolizer_linux_clang_10.patch new file mode 100644 index 0000000000..1ddb02024d --- /dev/null +++ b/build/build-clang/find_symbolizer_linux_clang_10.patch @@ -0,0 +1,58 @@ +We currently need this patch because ASan only searches PATH to find the +llvm-symbolizer binary to symbolize ASan traces. On testing machines, this +can be installed in PATH easily. However, for e.g. the ASan Nightly Project, +where we ship an ASan build, including llvm-symbolizer, to the user, we +cannot expect llvm-symbolizer to be on PATH. Instead, we should try to look +it up next to the binary. This patch implements the functionality for Linux +only until there is similar functionality provided upstream. + +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp +index 79930d79425..cfb4f90c0d5 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp +@@ -20,6 +20,10 @@ + #include "sanitizer_common.h" + #include "sanitizer_file.h" + ++#if SANITIZER_LINUX ++#include "sanitizer_posix.h" ++#endif ++ + namespace __sanitizer { + + void CatastrophicErrorWrite(const char *buffer, uptr length) { +@@ -194,6 +198,34 @@ char *FindPathToBinary(const char *name) { + if (*end == '\0') break; + beg = end + 1; + } ++ ++#if SANITIZER_LINUX ++ // If we cannot find the requested binary in PATH, we should try to locate ++ // it next to the binary, in case it is shipped with the build itself ++ // (e.g. llvm-symbolizer shipped with sanitizer build to symbolize on client. ++ if (internal_readlink("/proc/self/exe", buffer.data(), kMaxPathLength) < 0) ++ return nullptr; ++ ++ uptr buf_len = internal_strlen(buffer.data()); ++ ++ /* Avoid using dirname() here */ ++ while (buf_len > 0) { ++ if (buffer[buf_len - 1] == '/') ++ break; ++ buf_len--; ++ } ++ ++ if (!buf_len) ++ return nullptr; ++ ++ if (buf_len + name_len + 1 <= kMaxPathLength) { ++ internal_memcpy(&buffer[buf_len], name, name_len); ++ buffer[buf_len + name_len] = '\0'; ++ if (FileExists(buffer.data())) ++ return internal_strdup(buffer.data()); ++ } ++#endif ++ + return nullptr; + } + diff --git a/build/build-clang/find_symbolizer_linux_clang_15.patch b/build/build-clang/find_symbolizer_linux_clang_15.patch new file mode 100644 index 0000000000..63309e8f00 --- /dev/null +++ b/build/build-clang/find_symbolizer_linux_clang_15.patch @@ -0,0 +1,53 @@ +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp +index 7ef499ce07b1..8fd682f943fe 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp +@@ -21,6 +21,10 @@ + #include "sanitizer_file.h" + # include "sanitizer_interface_internal.h" + ++#if SANITIZER_LINUX ++#include "sanitizer_posix.h" ++#endif ++ + namespace __sanitizer { + + void CatastrophicErrorWrite(const char *buffer, uptr length) { +@@ -206,11 +210,35 @@ char *FindPathToBinary(const char *name) { + return internal_strdup(name); + } + ++ uptr name_len = internal_strlen(name); ++ InternalMmapVector<char> buffer(kMaxPathLength); ++ ++#if SANITIZER_LINUX ++ // If we cannot find the requested binary in PATH, we should try to locate ++ // it next to the binary, in case it is shipped with the build itself ++ // (e.g. llvm-symbolizer shipped with sanitizer build to symbolize on client. ++ if (internal_readlink("/proc/self/exe", buffer.data(), kMaxPathLength) >= 0) { ++ uptr buf_len = internal_strlen(buffer.data()); ++ ++ /* Avoid using dirname() here */ ++ while (buf_len > 0) { ++ if (buffer[buf_len - 1] == '/') ++ break; ++ buf_len--; ++ } ++ ++ if (buf_len && buf_len + name_len + 1 <= kMaxPathLength) { ++ internal_memcpy(&buffer[buf_len], name, name_len); ++ buffer[buf_len + name_len] = '\0'; ++ if (FileExists(buffer.data())) ++ return internal_strdup(buffer.data()); ++ } ++ } ++#endif ++ + const char *path = GetEnv("PATH"); + if (!path) + return nullptr; +- uptr name_len = internal_strlen(name); +- InternalMmapVector<char> buffer(kMaxPathLength); + const char *beg = path; + while (true) { + const char *end = internal_strchrnul(beg, kPathSeparator); diff --git a/build/build-clang/fuzzing_ccov_build_clang_12.patch b/build/build-clang/fuzzing_ccov_build_clang_12.patch new file mode 100644 index 0000000000..1b60a95b91 --- /dev/null +++ b/build/build-clang/fuzzing_ccov_build_clang_12.patch @@ -0,0 +1,27 @@ +From 98bf90ef5ea3dd848ce7d81a662eb7499d11c91c Mon Sep 17 00:00:00 2001 +From: Calixte Denizet <calixte.denizet@gmail.com> +Date: Fri, 16 Apr 2021 10:05:34 +0200 +Subject: [PATCH] [Gcov] Don't run global destructor in ccov builds when env + MOZ_FUZZING_CCOV is existing + +--- + compiler-rt/lib/profile/GCDAProfiling.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c +index 4293e8f7b5bf..6cda4bc7601f 100644 +--- a/compiler-rt/lib/profile/GCDAProfiling.c ++++ b/compiler-rt/lib/profile/GCDAProfiling.c +@@ -586,6 +586,9 @@ void llvm_writeout_files(void) { + __attribute__((destructor(100))) + #endif + static void llvm_writeout_and_clear(void) { ++ if (getenv("MOZ_FUZZING_CCOV")) { ++ return; ++ } + llvm_writeout_files(); + fn_list_remove(&writeout_fn_list); + } +-- +2.30.2 + diff --git a/build/build-clang/linux64.json b/build/build-clang/linux64.json new file mode 100644 index 0000000000..48690dbdaa --- /dev/null +++ b/build/build-clang/linux64.json @@ -0,0 +1,5 @@ +{ + "cc": "/usr/lib/llvm-13/bin/clang", + "cxx": "/usr/lib/llvm-13/bin/clang++", + "as": "/usr/lib/llvm-13/bin/clang" +} diff --git a/build/build-clang/llvmorg-15-init-16512-g4b1e3d193706.patch b/build/build-clang/llvmorg-15-init-16512-g4b1e3d193706.patch new file mode 100644 index 0000000000..5ddb6a52de --- /dev/null +++ b/build/build-clang/llvmorg-15-init-16512-g4b1e3d193706.patch @@ -0,0 +1,138 @@ +From 8482662676a4b6ef79a718c8c09943cb15241664 Mon Sep 17 00:00:00 2001 +From: Tom Stellard <tstellar@redhat.com> +Date: Tue, 21 Jun 2022 22:22:11 -0700 +Subject: [PATCH] [gold] Ignore bitcode from sections inside object files + +-fembed-bitcode will put bitcode into special sections within object +files, but this is not meant to be used by LTO, so the gold plugin +should ignore it. + +https://github.com/llvm/llvm-project/issues/47216 + +Reviewed By: tejohnson, MaskRay + +Differential Revision: https://reviews.llvm.org/D116995 +--- + llvm/docs/BitCodeFormat.rst | 3 ++- + llvm/docs/GoldPlugin.rst | 4 ++++ + .../tools/gold/X86/Inputs/bcsection-lib.ll | 6 +++++ + llvm/test/tools/gold/X86/Inputs/bcsection.s | 5 ++++ + llvm/test/tools/gold/X86/bcsection.ll | 23 +++++++++++++++---- + llvm/tools/gold/gold-plugin.cpp | 8 +++++++ + 6 files changed, 43 insertions(+), 6 deletions(-) + create mode 100644 llvm/test/tools/gold/X86/Inputs/bcsection-lib.ll + +diff --git a/llvm/docs/BitCodeFormat.rst b/llvm/docs/BitCodeFormat.rst +index 8e81a7daa459..df1f6915d7d5 100644 +--- a/llvm/docs/BitCodeFormat.rst ++++ b/llvm/docs/BitCodeFormat.rst +@@ -475,7 +475,8 @@ formats. This wrapper format is useful for accommodating LTO in compilation + pipelines where intermediate objects must be native object files which contain + metadata in other sections. + +-Not all tools support this format. ++Not all tools support this format. For example, lld and the gold plugin will ++ignore these sections when linking object files. + + .. _encoding of LLVM IR: + +diff --git a/llvm/docs/GoldPlugin.rst b/llvm/docs/GoldPlugin.rst +index ce310bc2cf3c..07d2fc203eba 100644 +--- a/llvm/docs/GoldPlugin.rst ++++ b/llvm/docs/GoldPlugin.rst +@@ -17,6 +17,10 @@ and above also supports LTO via plugins. However, usage of the LLVM + gold plugin with ld.bfd is not tested and therefore not officially + supported or recommended. + ++As of LLVM 15, the gold plugin will ignore bitcode from the ``.llvmbc`` ++section inside of ELF object files. However, LTO with bitcode files ++is still supported. ++ + .. _`gold linker`: http://sourceware.org/binutils + .. _`GCC LTO`: http://gcc.gnu.org/wiki/LinkTimeOptimization + .. _`gold plugin interface`: http://gcc.gnu.org/wiki/whopr/driver +diff --git a/llvm/test/tools/gold/X86/Inputs/bcsection-lib.ll b/llvm/test/tools/gold/X86/Inputs/bcsection-lib.ll +new file mode 100644 +index 000000000000..ef3557c19cdc +--- /dev/null ++++ b/llvm/test/tools/gold/X86/Inputs/bcsection-lib.ll +@@ -0,0 +1,6 @@ ++declare void @elf_func() ++ ++define i32 @lib_func() { ++ call void @elf_func() ++ ret i32 0 ++} +diff --git a/llvm/test/tools/gold/X86/Inputs/bcsection.s b/llvm/test/tools/gold/X86/Inputs/bcsection.s +index ede1e5c532dd..c523612563b4 100644 +--- a/llvm/test/tools/gold/X86/Inputs/bcsection.s ++++ b/llvm/test/tools/gold/X86/Inputs/bcsection.s +@@ -1,2 +1,7 @@ ++.global elf_func ++ ++elf_func: ++ ret ++ + .section .llvmbc + .incbin "bcsection.bc" +diff --git a/llvm/test/tools/gold/X86/bcsection.ll b/llvm/test/tools/gold/X86/bcsection.ll +index 6d3481f8f966..09882d83fe91 100644 +--- a/llvm/test/tools/gold/X86/bcsection.ll ++++ b/llvm/test/tools/gold/X86/bcsection.ll +@@ -2,16 +2,29 @@ + ; RUN: llvm-as -o %t/bcsection.bc %s + + ; RUN: llvm-mc -I=%t -filetype=obj -triple=x86_64-unknown-unknown -o %t/bcsection.bco %p/Inputs/bcsection.s +-; RUN: llvm-nm --no-llvm-bc %t/bcsection.bco 2>&1 | FileCheck %s -check-prefix=NO-SYMBOLS +-; NO-SYMBOLS: no symbols ++; RUN: llc -filetype=obj -mtriple=x86_64-unknown-unknown -o %t/bcsection-lib.o %p/Inputs/bcsection-lib.ll + +-; RUN: %gold -r -o %t/bcsection.o -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext %t/bcsection.bco +-; RUN: llvm-nm --no-llvm-bc %t/bcsection.o | FileCheck %s ++; RUN: %gold -shared --no-undefined -o %t/bcsection.so -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext %t/bcsection.bco %t/bcsection-lib.o ++ ++; This test checks that the gold plugin does not attempt to use the bitcode ++; in the .llvmbc section for LTO. bcsection-lib.o calls a function that is ++; present the symbol table of bcsection.bco, but not included in the embedded ++; bitcode. If the linker were to use the bitcode, then the symbols in the ++; symbol table of bcsection.bco will be ignored and the link will fail. ++; ++; bcsection.bco: ++; .text: ++; elf_func ++; .llvmbc: ++; bitcode_func ++; ++; bcsection-lib.o: ++; calls elf_func() + + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-unknown" + + ; CHECK: main +-define i32 @main() { ++define i32 @bitcode_func() { + ret i32 0 + } +diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp +index 180c181368e3..294c7a3d6178 100644 +--- a/llvm/tools/gold/gold-plugin.cpp ++++ b/llvm/tools/gold/gold-plugin.cpp +@@ -540,6 +540,14 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, + BufferRef = Buffer->getMemBufferRef(); + } + ++ // Only use bitcode files for LTO. InputFile::create() will load bitcode ++ // from the .llvmbc section within a binary object, this bitcode is typically ++ // generated by -fembed-bitcode and is not to be used by LLVMgold.so for LTO. ++ if (identify_magic(BufferRef.getBuffer()) != file_magic::bitcode) { ++ *claimed = 0; ++ return LDPS_OK; ++ } ++ + *claimed = 1; + + Expected<std::unique_ptr<InputFile>> ObjOrErr = InputFile::create(BufferRef); +-- +2.37.1.1.g659da70093 + diff --git a/build/build-clang/llvmorg-17-init-11952-g2f0a1699eab7.patch b/build/build-clang/llvmorg-17-init-11952-g2f0a1699eab7.patch new file mode 100644 index 0000000000..5d22cc4a8b --- /dev/null +++ b/build/build-clang/llvmorg-17-init-11952-g2f0a1699eab7.patch @@ -0,0 +1,49 @@ +This is an incremental version of the patch, against 16.0.4, which +includes an earlier version of the patch. + +From 3b0fad683523315e0fcd14039326fc0ce5eb350b Mon Sep 17 00:00:00 2001 +From: Phoebe Wang <phoebe.wang@intel.com> +Date: Thu, 18 May 2023 12:38:12 +0800 +Subject: [PATCH] Reland "[Driver] Support multi /guard: options" + +Fixes unexpected warning. + +Differential Revision: https://reviews.llvm.org/D150645 +--- + clang/lib/Driver/ToolChains/Clang.cpp | 1 + + clang/test/Driver/cl-options.c | 5 ++++- + 2 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp +index 238507e06335..77554aa2c462 100644 +--- a/clang/lib/Driver/ToolChains/Clang.cpp ++++ b/clang/lib/Driver/ToolChains/Clang.cpp +@@ -7801,6 +7801,7 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType, + } else { + D.Diag(diag::err_drv_invalid_value) << A->getSpelling() << GuardArgs; + } ++ A->claim(); + } + } + +diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c +index d96b887b7d48..326bc1162714 100644 +--- a/clang/test/Driver/cl-options.c ++++ b/clang/test/Driver/cl-options.c +@@ -647,9 +647,12 @@ + // RUN: %clang_cl /guard:ehcont -### -- %s 2>&1 | FileCheck -check-prefix=EHCONTGUARD %s + // EHCONTGUARD: -ehcontguard + +-// RUN: %clang_cl /guard:cf /guard:ehcont -### -- %s 2>&1 | FileCheck -check-prefix=BOTHGUARD %s ++// RUN: %clang_cl /guard:cf /guard:ehcont -Wall -Wno-msvc-not-found -### -- %s 2>&1 | \ ++// RUN: FileCheck -check-prefix=BOTHGUARD %s --implicit-check-not=warning: + // BOTHGUARD: -cfguard + // BOTHGUARD-SAME: -ehcontguard ++// BOTHGUARD: -guard:cf ++// BOTHGUARD-SAME: -guard:ehcont + + // RUN: %clang_cl /guard:foo -### -- %s 2>&1 | FileCheck -check-prefix=CFGUARDINVALID %s + // CFGUARDINVALID: invalid value 'foo' in '/guard:' +-- +2.40.0.1.gc689dad23e + diff --git a/build/build-clang/llvmorg-17-init-1242-g5de5f66b984a.patch b/build/build-clang/llvmorg-17-init-1242-g5de5f66b984a.patch new file mode 100644 index 0000000000..c56bc93f04 --- /dev/null +++ b/build/build-clang/llvmorg-17-init-1242-g5de5f66b984a.patch @@ -0,0 +1,317 @@ +From beb699370963cb347f636435efc8409219c58f5f Mon Sep 17 00:00:00 2001 +From: John Brawn <john.brawn@arm.com> +Date: Mon, 30 Jan 2023 14:34:14 +0000 +Subject: [PATCH] [extract_symbols.py] Better handling of templates + +Since commit 846b676 SmallVectorBase<uint32_t> has been explicitly +instantiated, which means that clang.exe must export it for a plugin +to be able to link against it, but the constructor is not exported as +currently no template constructors or destructors are exported. + +We can't just export all constructors and destructors, as that puts us +over the symbol limit on Windows, so instead rewrite how we decide +which templates need to be exported to be more precise. Currently we +assume that templates instantiated many times have no explicit +instantiations, but this isn't necessarily true and results also in +exporting implicit template instantiations that we don't need +to. Instead check for references to template members, as this +indicates that the template must be explicitly instantiated (as if it +weren't the template would just be implicitly instantiated on use). + +Doing this reduces the number of symbols exported from clang from +66011 to 53993 (in the build configuration that I've been testing). It +also lets us get rid of the special-case handling of Type::getAs, as +its explicit instantiations are now being detected as such. + +Differential Revision: https://reviews.llvm.org/D142989 +--- + llvm/utils/extract_symbols.py | 200 ++++++++++++++++++---------------- + 1 file changed, 104 insertions(+), 96 deletions(-) + +diff --git a/llvm/utils/extract_symbols.py b/llvm/utils/extract_symbols.py +index 298ee6ba4eeb..f64e3d1eebb9 100755 +--- a/llvm/utils/extract_symbols.py ++++ b/llvm/utils/extract_symbols.py +@@ -23,11 +23,11 @@ import subprocess + import multiprocessing + import argparse + +-# Define functions which extract a list of symbols from a library using several +-# different tools. We use subprocess.Popen and yield a symbol at a time instead +-# of using subprocess.check_output and returning a list as, especially on +-# Windows, waiting for the entire output to be ready can take a significant +-# amount of time. ++# Define functions which extract a list of pairs of (symbols, is_def) from a ++# library using several different tools. We use subprocess.Popen and yield a ++# symbol at a time instead of using subprocess.check_output and returning a list ++# as, especially on Windows, waiting for the entire output to be ready can take ++# a significant amount of time. + + def dumpbin_get_symbols(lib): + process = subprocess.Popen(['dumpbin','/symbols',lib], bufsize=1, +@@ -35,10 +35,10 @@ def dumpbin_get_symbols(lib): + universal_newlines=True) + process.stdin.close() + for line in process.stdout: +- # Look for external symbols that are defined in some section +- match = re.match("^.+SECT.+External\s+\|\s+(\S+).*$", line) ++ # Look for external symbols ++ match = re.match("^.+(SECT|UNDEF).+External\s+\|\s+(\S+).*$", line) + if match: +- yield match.group(1) ++ yield (match.group(2), match.group(1) != "UNDEF") + process.wait() + + def nm_get_symbols(lib): +@@ -60,7 +60,11 @@ def nm_get_symbols(lib): + # but \s+ match newline also, so \s+\S* will match the optional size field. + match = re.match("^(\S+)\s+[BDGRSTVW]\s+\S+\s+\S*$", line) + if match: +- yield match.group(1) ++ yield (match.group(1), True) ++ # Look for undefined symbols, which have only name and type (which is U). ++ match = re.match("^(\S+)\s+U\s+$", line) ++ if match: ++ yield (match.group(1), False) + process.wait() + + def readobj_get_symbols(lib): +@@ -71,7 +75,7 @@ def readobj_get_symbols(lib): + for line in process.stdout: + # When looking through the output of llvm-readobj we expect to see Name, + # Section, then StorageClass, so record Name and Section when we see +- # them and decide if this is a defined external symbol when we see ++ # them and decide if this is an external symbol when we see + # StorageClass. + match = re.search('Name: (\S+)', line) + if match: +@@ -83,9 +87,8 @@ def readobj_get_symbols(lib): + if match: + storageclass = match.group(1) + if section != 'IMAGE_SYM_ABSOLUTE' and \ +- section != 'IMAGE_SYM_UNDEFINED' and \ + storageclass == 'External': +- yield name ++ yield (name, section != 'IMAGE_SYM_UNDEFINED') + process.wait() + + # Define functions which determine if the target is 32-bit Windows (as that's +@@ -146,23 +149,11 @@ def should_keep_microsoft_symbol(symbol, calling_convention_decoration): + if symbol.startswith(("__xmm@", "__ymm@", "__real@")): + return None + return symbol +- # Function template instantiations start with ?$; keep the instantiations of +- # clang::Type::getAs, as some of them are explipict specializations that are +- # defined in clang's lib/AST/Type.cpp; discard the rest as it's assumed that +- # the definition is public +- elif re.match('\?\?\$getAs@.+@Type@clang@@', symbol): +- return symbol +- elif symbol.startswith('??$'): +- return None + # Deleting destructors start with ?_G or ?_E and can be discarded because + # link.exe gives you a warning telling you they can't be exported if you + # don't + elif symbol.startswith('??_G') or symbol.startswith('??_E'): + return None +- # Constructors (?0) and destructors (?1) of templates (?$) are assumed to be +- # defined in headers and not required to be kept +- elif symbol.startswith('??0?$') or symbol.startswith('??1?$'): +- return None + # An anonymous namespace is mangled as ?A(maybe hex number)@. Any symbol + # that mentions an anonymous namespace can be discarded, as the anonymous + # namespace doesn't exist outside of that translation unit. +@@ -216,18 +207,6 @@ def should_keep_itanium_symbol(symbol, calling_convention_decoration): + return None + if not names: + return symbol +- # Constructors and destructors of templates classes are assumed to be +- # defined in headers and not required to be kept +- if re.match('[CD][123]', names[-1][0]) and names[-2][1]: +- return None +- # Keep the instantiations of clang::Type::getAs, as some of them are +- # explipict specializations that are defined in clang's lib/AST/Type.cpp; +- # discard any other function template instantiations as it's assumed that +- # the definition is public +- elif symbol.startswith('_ZNK5clang4Type5getAs'): +- return symbol +- elif names[-1][1]: +- return None + # Keep llvm:: and clang:: names + elif names[0][0] == '4llvm' or names[0][0] == '5clang': + return symbol +@@ -338,14 +317,79 @@ def parse_itanium_nested_name(arg): + # If we get here then something went wrong + return None, None + ++# Parse a microsoft mangled symbol and return a list of pairs of ++# (name, is_template). This is very rudimentary and does just enough ++# in order to determine if the first or second component is a template. ++def parse_microsoft_mangling(arg): ++ # If the name doesn't start with ? this isn't a mangled name ++ if not arg.startswith('?'): ++ return [(arg, False)] ++ arg = arg[1:] ++ components = [] ++ while len(arg) > 0: ++ # If we see an empty component we've reached the end ++ if arg.startswith('@'): ++ return components ++ # Check for a simple name ++ match = re.match('(\w+)@(.+)', arg) ++ if match: ++ components.append((match.group(1), False)) ++ arg = match.group(2) ++ continue ++ # Check for a special function name ++ match = re.match('(\?_?\w)(.+)', arg) ++ if match: ++ components.append((match.group(1), False)) ++ arg = match.group(2) ++ continue ++ # Check for a template name ++ match = re.match('\?\$(\w+)@[^@]+@(.+)', arg) ++ if match: ++ components.append((match.group(1), True)) ++ arg = match.group(2) ++ continue ++ # Some other kind of name that we can't handle ++ components.append((arg, False)) ++ return components ++ return components ++ + def extract_symbols(arg): + get_symbols, should_keep_symbol, calling_convention_decoration, lib = arg +- symbols = dict() +- for symbol in get_symbols(lib): ++ symbol_defs = dict() ++ symbol_refs = set() ++ for (symbol, is_def) in get_symbols(lib): + symbol = should_keep_symbol(symbol, calling_convention_decoration) + if symbol: +- symbols[symbol] = 1 + symbols.setdefault(symbol,0) +- return symbols ++ if is_def: ++ symbol_defs[symbol] = 1 + symbol_defs.setdefault(symbol,0) ++ else: ++ symbol_refs.add(symbol) ++ return (symbol_defs, symbol_refs) ++ ++def get_template_name(sym, mangling): ++ # Parse the mangling into a list of (name, is_template) ++ try: ++ if mangling == 'microsoft': ++ names = parse_microsoft_mangling(sym) ++ else: ++ match = re.match('_Z(T[VTIS])?(N.+)', sym) ++ if match: ++ names, _ = parse_itanium_nested_name(match.group(2)) ++ else: ++ names = None ++ except TooComplexName: ++ return None ++ ++ if not names: ++ return None ++ ++ # If any component is a template then return it ++ for name, is_template in names: ++ if is_template: ++ return name ++ ++ # Not a template ++ return None + + if __name__ == '__main__': + tool_exes = ['dumpbin','nm','objdump','llvm-readobj'] +@@ -458,68 +502,32 @@ if __name__ == '__main__': + exit(1) + + # Merge everything into a single dict +- symbols = dict() +- for this_lib_symbols in libs_symbols: +- for k,v in list(this_lib_symbols.items()): +- symbols[k] = v + symbols.setdefault(k,0) +- +- # Count instances of member functions of template classes, and map the +- # symbol name to the function+class. We do this under the assumption that if +- # a member function of a template class is instantiated many times it's +- # probably declared in a public header file. +- template_function_count = dict() +- template_function_mapping = dict() +- template_function_count[""] = 0 +- for k in symbols: +- name = None +- if args.mangling == 'microsoft': +- # Member functions of templates start with +- # ?<fn_name>@?$<class_name>@, so we map to <fn_name>@?$<class_name>. +- # As manglings go from the innermost scope to the outermost scope +- # this means: +- # * When we have a function member of a subclass of a template +- # class then <fn_name> will actually contain the mangling of +- # both the subclass and the function member. This is fine. +- # * When we have a function member of a template subclass of a +- # (possibly template) class then it's the innermost template +- # subclass that becomes <class_name>. This should be OK so long +- # as we don't have multiple classes with a template subclass of +- # the same name. +- match = re.search("^\?(\??\w+\@\?\$\w+)\@", k) +- if match: +- name = match.group(1) +- else: +- # Find member functions of templates by demangling the name and +- # checking if the second-to-last name in the list is a template. +- match = re.match('_Z(T[VTIS])?(N.+)', k) +- if match: +- try: +- names, _ = parse_itanium_nested_name(match.group(2)) +- if names and names[-2][1]: +- name = ''.join([x for x,_ in names]) +- except TooComplexName: +- # Manglings that are too complex should already have been +- # filtered out, but if we happen to somehow see one here +- # just leave it as-is. +- pass +- if name: +- old_count = template_function_count.setdefault(name,0) +- template_function_count[name] = old_count + 1 +- template_function_mapping[k] = name +- else: +- template_function_mapping[k] = "" ++ symbol_defs = dict() ++ symbol_refs = set() ++ for (this_lib_defs, this_lib_refs) in libs_symbols: ++ for k,v in list(this_lib_defs.items()): ++ symbol_defs[k] = v + symbol_defs.setdefault(k,0) ++ for sym in list(this_lib_refs): ++ symbol_refs.add(sym) ++ ++ # Find which template instantiations are referenced at least once. ++ template_instantiation_refs = set() ++ for sym in list(symbol_refs): ++ template = get_template_name(sym, args.mangling) ++ if template: ++ template_instantiation_refs.add(template) + + # Print symbols which both: + # * Appear in exactly one input, as symbols defined in multiple + # objects/libraries are assumed to have public definitions. +- # * Aren't instances of member functions of templates which have been +- # instantiated 100 times or more, which are assumed to have public +- # definitions. (100 is an arbitrary guess here.) ++ # * Are not a template instantiation that isn't referenced anywhere. This ++ # is because we need to export any explicitly instantiated templates, ++ # and we expect those to be referenced in some object. + if args.o: + outfile = open(args.o,'w') + else: + outfile = sys.stdout +- for k,v in list(symbols.items()): +- template_count = template_function_count[template_function_mapping[k]] +- if v == 1 and template_count < 100: ++ for k,v in list(symbol_defs.items()): ++ template = get_template_name(k, args.mangling) ++ if v == 1 and (not template or template in template_instantiation_refs): + print(k, file=outfile) +-- +2.39.0.1.g6739ec1790 + diff --git a/build/build-clang/llvmorg-17-init-2171-g8198f30f7e75.patch b/build/build-clang/llvmorg-17-init-2171-g8198f30f7e75.patch new file mode 100644 index 0000000000..408e83fbb5 --- /dev/null +++ b/build/build-clang/llvmorg-17-init-2171-g8198f30f7e75.patch @@ -0,0 +1,99 @@ +From 8198f30f7e756e3368c3eda62ecc3d0cc62d1570 Mon Sep 17 00:00:00 2001 +From: Jez Ng <jezng@fb.com> +Date: Tue, 14 Feb 2023 14:34:19 -0500 +Subject: [PATCH] [lld-macho] Account for alignment in thunk insertion + algorithm + +We previously neglected this, leading us to underestimate the maximum +possible branch address offset. + +Fixing this should allow us to reduce `slop` to more reasonable +levels. I've lowered it to 256 for now, though I suspect we could go +lower. + +Fixes https://github.com/llvm/llvm-project/issues/59259. + +Reviewed By: serge-sans-paille + +Differential Revision: https://reviews.llvm.org/D144029 +--- + lld/MachO/ConcatOutputSection.cpp | 10 +++-- + lld/test/MachO/arm64-thunk-for-alignment.s | 44 ++++++++++++++++++++++ + 2 files changed, 51 insertions(+), 3 deletions(-) + create mode 100644 lld/test/MachO/arm64-thunk-for-alignment.s + +diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp +index cbd3a2492d25..b522bd9b289e 100644 +--- a/lld/MachO/ConcatOutputSection.cpp ++++ b/lld/MachO/ConcatOutputSection.cpp +@@ -246,10 +246,14 @@ void TextOutputSection::finalize() { + // contains several branch instructions in succession, then the distance + // from the current position to the position where the thunks are inserted + // grows. So leave room for a bunch of thunks. +- unsigned slop = 1024 * thunkSize; +- while (finalIdx < endIdx && addr + size + inputs[finalIdx]->getSize() < +- isecVA + forwardBranchRange - slop) ++ unsigned slop = 256 * thunkSize; ++ while (finalIdx < endIdx) { ++ size_t expectedNewSize = alignTo(addr + size, inputs[finalIdx]->align) + ++ inputs[finalIdx]->getSize(); ++ if (expectedNewSize >= isecVA + forwardBranchRange - slop) ++ break; + finalizeOne(inputs[finalIdx++]); ++ } + + if (!isec->hasCallSites) + continue; +diff --git a/lld/test/MachO/arm64-thunk-for-alignment.s b/lld/test/MachO/arm64-thunk-for-alignment.s +new file mode 100644 +index 000000000000..f497b81f705b +--- /dev/null ++++ b/lld/test/MachO/arm64-thunk-for-alignment.s +@@ -0,0 +1,44 @@ ++# REQUIRES: aarch64 ++# RUN: rm -rf %t; split-file %s %t ++# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/foo.s -o %t/foo.o ++# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/bar.s -o %t/bar.o ++# RUN: %lld -dylib -arch arm64 -lSystem -o %t/out %t/foo.o %t/bar.o ++ ++# RUN: llvm-objdump --macho --syms %t/out | FileCheck %s ++# CHECK: _bar.thunk.0 ++ ++## Regression test for PR59259. Previously, we neglected to check section ++## alignments when deciding when to create thunks. ++ ++## If we ignore alignment, the total size of _spacer1 + _spacer2 below is just ++## under the limit at which we attempt to insert thunks between the spacers. ++## However, with alignment accounted for, their total size ends up being ++## 0x8000000, which is just above the max forward branch range, making thunk ++## insertion necessary. Thus, not accounting for alignment led to an error. ++ ++#--- foo.s ++ ++_foo: ++ b _bar ++ ++## Size of a `b` instruction. ++.equ callSize, 4 ++## Refer to `slop` in TextOutputSection::finalize(). ++.equ slopSize, 12 * 256 ++ ++_spacer1: ++ .space 0x4000000 - slopSize - 2 * callSize - 1 ++ ++.subsections_via_symbols ++ ++#--- bar.s ++.globl _bar ++ ++.p2align 14 ++_spacer2: ++ .space 0x4000000 ++ ++_bar: ++ ret ++ ++.subsections_via_symbols +-- +2.39.0.1.g6739ec1790 + diff --git a/build/build-clang/llvmorg-17-init-237-g1b9fbc81ff15.patch b/build/build-clang/llvmorg-17-init-237-g1b9fbc81ff15.patch new file mode 100644 index 0000000000..4283a1f17a --- /dev/null +++ b/build/build-clang/llvmorg-17-init-237-g1b9fbc81ff15.patch @@ -0,0 +1,45 @@ +From 1b9fbc81ff15f6ad5a0e7f29c486c6edd0bce94c Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Thu, 26 Jan 2023 21:28:09 +0100 +Subject: [PATCH] [extract_symbols.py] Filter out more symbols for MSVC + +This strips out about 5k symbols. + +Fixes https://github.com/llvm/llvm-project/issues/60109 + +Reviewed By: john.brawn + +Differential Revision: https://reviews.llvm.org/D142431 +--- + llvm/utils/extract_symbols.py | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/llvm/utils/extract_symbols.py b/llvm/utils/extract_symbols.py +index 0f8e8ba64c80..298ee6ba4eeb 100755 +--- a/llvm/utils/extract_symbols.py ++++ b/llvm/utils/extract_symbols.py +@@ -141,7 +141,10 @@ def should_keep_microsoft_symbol(symbol, calling_convention_decoration): + # Remove calling convention decoration from names + match = re.match('[_@]([^@]+)', symbol) + if match: +- return match.group(1) ++ symbol = match.group(1) ++ # Discard floating point/SIMD constants. ++ if symbol.startswith(("__xmm@", "__ymm@", "__real@")): ++ return None + return symbol + # Function template instantiations start with ?$; keep the instantiations of + # clang::Type::getAs, as some of them are explipict specializations that are +@@ -165,6 +168,9 @@ def should_keep_microsoft_symbol(symbol, calling_convention_decoration): + # namespace doesn't exist outside of that translation unit. + elif re.search('\?A(0x\w+)?@', symbol): + return None ++ # Skip X86GenMnemonicTables functions, they are not exposed from llvm/include/. ++ elif re.match('\?is[A-Z0-9]*@X86@llvm', symbol): ++ return None + # Keep mangled llvm:: and clang:: function symbols. How we detect these is a + # bit of a mess and imprecise, but that avoids having to completely demangle + # the symbol name. The outermost namespace is at the end of the identifier +-- +2.39.0.1.g6739ec1790 + diff --git a/build/build-clang/llvmorg-17-init-4170-g5c602c46b1ef.patch b/build/build-clang/llvmorg-17-init-4170-g5c602c46b1ef.patch new file mode 100644 index 0000000000..c1c76064b5 --- /dev/null +++ b/build/build-clang/llvmorg-17-init-4170-g5c602c46b1ef.patch @@ -0,0 +1,98 @@ +From 39e048e60ae2153f7621b7d1a1949dcb69778fa1 Mon Sep 17 00:00:00 2001 +From: Michael Platings <michael.platings@arm.com> +Date: Mon, 6 Mar 2023 22:53:54 +0000 +Subject: [PATCH] Use LLVM_USE_SYMLINKS option in install_symlink + +The change to potentially use symlinks on Windows was added in +https://reviews.llvm.org/D99170. + +LLVM_USE_SYMLINKS was added more recently in +https://reviews.llvm.org/D135578 and allows specifying at configure time +whether or not symlinks should be created. The benefit of using this +option is it allows building the package on a symlink-capable Windows +machine with symlinks disabled so that the resulting package can be used +on a Windows machine that doesn't support symlinks. + +Differential Revision: https://reviews.llvm.org/D145443 +--- + llvm/cmake/modules/AddLLVM.cmake | 16 ++++++++++++++-- + llvm/cmake/modules/LLVMInstallSymlink.cmake | 14 ++++++-------- + 2 files changed, 20 insertions(+), 10 deletions(-) + +diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake +index 76695e69e740..fa23bf1d883a 100644 +--- a/llvm/cmake/modules/AddLLVM.cmake ++++ b/llvm/cmake/modules/AddLLVM.cmake +@@ -2008,13 +2008,19 @@ function(llvm_install_library_symlink name dest type) + set(full_name ${CMAKE_${type}_LIBRARY_PREFIX}${name}${CMAKE_${type}_LIBRARY_SUFFIX}) + set(full_dest ${CMAKE_${type}_LIBRARY_PREFIX}${dest}${CMAKE_${type}_LIBRARY_SUFFIX}) + ++ if(LLVM_USE_SYMLINKS) ++ set(LLVM_LINK_OR_COPY create_symlink) ++ else() ++ set(LLVM_LINK_OR_COPY copy) ++ endif() ++ + set(output_dir lib${LLVM_LIBDIR_SUFFIX}) + if(WIN32 AND "${type}" STREQUAL "SHARED") + set(output_dir "${CMAKE_INSTALL_BINDIR}") + endif() + + install(SCRIPT ${INSTALL_SYMLINK} +- CODE "install_symlink(\"${full_name}\" \"${full_dest}\" \"${output_dir}\")" ++ CODE "install_symlink(\"${full_name}\" \"${full_dest}\" \"${output_dir}\" \"${LLVM_LINK_OR_COPY}\")" + COMPONENT ${component}) + + endfunction() +@@ -2049,10 +2055,16 @@ function(llvm_install_symlink project name dest) + set(full_dest llvm${CMAKE_EXECUTABLE_SUFFIX}) + endif() + ++ if(LLVM_USE_SYMLINKS) ++ set(LLVM_LINK_OR_COPY create_symlink) ++ else() ++ set(LLVM_LINK_OR_COPY copy) ++ endif() ++ + set(output_dir "${${project}_TOOLS_INSTALL_DIR}") + + install(SCRIPT ${INSTALL_SYMLINK} +- CODE "install_symlink(\"${full_name}\" \"${full_dest}\" \"${output_dir}\")" ++ CODE "install_symlink(\"${full_name}\" \"${full_dest}\" \"${output_dir}\" \"${LLVM_LINK_OR_COPY}\")" + COMPONENT ${component}) + + if (NOT LLVM_ENABLE_IDE AND NOT ARG_ALWAYS_GENERATE) +diff --git a/llvm/cmake/modules/LLVMInstallSymlink.cmake b/llvm/cmake/modules/LLVMInstallSymlink.cmake +index e9be04aceb3d..fb61265543d1 100644 +--- a/llvm/cmake/modules/LLVMInstallSymlink.cmake ++++ b/llvm/cmake/modules/LLVMInstallSymlink.cmake +@@ -4,7 +4,10 @@ + set(CMAKE_INSTALL_LIBDIR "lib") + include(GNUInstallDirs) + +-function(install_symlink name target outdir) ++function(install_symlink name target outdir link_or_copy) ++ # link_or_copy is the "command" to pass to cmake -E. ++ # It should be either "create_symlink" or "copy". ++ + set(DESTDIR $ENV{DESTDIR}) + if(NOT IS_ABSOLUTE "${outdir}") + set(outdir "${CMAKE_INSTALL_PREFIX}/${outdir}") +@@ -14,12 +17,7 @@ function(install_symlink name target outdir) + message(STATUS "Creating ${name}") + + execute_process( +- COMMAND "${CMAKE_COMMAND}" -E create_symlink "${target}" "${name}" +- WORKING_DIRECTORY "${outdir}" ERROR_VARIABLE has_err) +- if(CMAKE_HOST_WIN32 AND has_err) +- execute_process( +- COMMAND "${CMAKE_COMMAND}" -E copy "${target}" "${name}" +- WORKING_DIRECTORY "${outdir}") +- endif() ++ COMMAND "${CMAKE_COMMAND}" -E ${link_or_copy} "${target}" "${name}" ++ WORKING_DIRECTORY "${outdir}") + + endfunction() +-- +2.39.0.1.g6739ec1790 + diff --git a/build/build-clang/llvmorg-17-init-6897-g415b1cfd57de.patch b/build/build-clang/llvmorg-17-init-6897-g415b1cfd57de.patch new file mode 100644 index 0000000000..596e32e56e --- /dev/null +++ b/build/build-clang/llvmorg-17-init-6897-g415b1cfd57de.patch @@ -0,0 +1,395 @@ +From 415b1cfd57de62da8af9ad8dc567fc9d918dbaa5 Mon Sep 17 00:00:00 2001 +From: Thurston Dang <thurston@google.com> +Date: Mon, 3 Apr 2023 21:14:40 +0000 +Subject: [PATCH] Add __sanitizer_get_allocated_begin API and implementations + +This function will return the start of the allocation, if given a pointer that lies within an allocation. Otherwise, it returns NULL. + +It will be useful for detecting dynamic TLS allocations in glibc >=2.25, which +uses malloc (see https://github.com/google/sanitizers/issues/1409#issuecomment-1214244142). + +Reviewed By: vitalybuka + +Differential Revision: https://reviews.llvm.org/D147005 +--- + .../include/sanitizer/allocator_interface.h | 4 ++ + compiler-rt/lib/asan/asan_allocator.cpp | 15 +++++ + compiler-rt/lib/dfsan/dfsan_allocator.cpp | 18 ++++++ + compiler-rt/lib/hwasan/hwasan_allocator.cpp | 21 +++++++ + compiler-rt/lib/lsan/lsan_allocator.cpp | 21 +++++++ + compiler-rt/lib/memprof/memprof_allocator.cpp | 16 +++++ + compiler-rt/lib/msan/msan_allocator.cpp | 19 ++++++ + .../sanitizer_allocator_interface.h | 2 + + .../sanitizer_allocator_internal.h | 3 +- + .../sanitizer_common_interface.inc | 1 + + compiler-rt/lib/tsan/rtl/tsan_mman.cpp | 18 ++++++ + .../TestCases/get_allocated_begin.cpp | 58 +++++++++++++++++++ + 12 files changed, 195 insertions(+), 1 deletion(-) + create mode 100644 compiler-rt/test/sanitizer_common/TestCases/get_allocated_begin.cpp + +diff --git a/compiler-rt/include/sanitizer/allocator_interface.h b/compiler-rt/include/sanitizer/allocator_interface.h +index 6226135ef84b3..d846f3f330741 100644 +--- a/compiler-rt/include/sanitizer/allocator_interface.h ++++ b/compiler-rt/include/sanitizer/allocator_interface.h +@@ -26,6 +26,10 @@ extern "C" { + is not yet freed. */ + int __sanitizer_get_ownership(const volatile void *p); + ++ /* If a pointer lies within an allocation, it will return the start address ++ of the allocation. Otherwise, it returns nullptr. */ ++ void *__sanitizer_get_allocated_begin(const void *p); ++ + /* Returns the number of bytes reserved for the pointer p. + Requires (get_ownership(p) == true) or (p == 0). */ + size_t __sanitizer_get_allocated_size(const volatile void *p); +diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp +index 4c52a45b875c7..4b65b44a88f91 100644 +--- a/compiler-rt/lib/asan/asan_allocator.cpp ++++ b/compiler-rt/lib/asan/asan_allocator.cpp +@@ -1164,6 +1164,17 @@ IgnoreObjectResult IgnoreObjectLocked(const void *p) { + // ---------------------- Interface ---------------- {{{1 + using namespace __asan; + ++void *AllocationBegin(const void *p) { ++ AsanChunk *m = __asan::instance.GetAsanChunkByAddr((uptr)p); ++ if (!m) ++ return nullptr; ++ if (atomic_load(&m->chunk_state, memory_order_acquire) != CHUNK_ALLOCATED) ++ return nullptr; ++ if (m->UsedSize() == 0) ++ return nullptr; ++ return (void *)(m->Beg()); ++} ++ + // ASan allocator doesn't reserve extra bytes, so normally we would + // just return "size". We don't want to expose our redzone sizes, etc here. + uptr __sanitizer_get_estimated_allocated_size(uptr size) { +@@ -1187,6 +1198,10 @@ uptr __sanitizer_get_allocated_size(const void *p) { + return allocated_size; + } + ++void *__sanitizer_get_allocated_begin(const void *p) { ++ return AllocationBegin(p); ++} ++ + void __sanitizer_purge_allocator() { + GET_STACK_TRACE_MALLOC; + instance.Purge(&stack); +diff --git a/compiler-rt/lib/dfsan/dfsan_allocator.cpp b/compiler-rt/lib/dfsan/dfsan_allocator.cpp +index 5fb8fef213b9a..cebf9983c9490 100644 +--- a/compiler-rt/lib/dfsan/dfsan_allocator.cpp ++++ b/compiler-rt/lib/dfsan/dfsan_allocator.cpp +@@ -174,6 +174,20 @@ void *DFsanCalloc(uptr nmemb, uptr size) { + return DFsanAllocate(nmemb * size, sizeof(u64), true /*zeroise*/); + } + ++void *AllocationBegin(const void *p) { ++ if (!p) ++ return nullptr; ++ const void *beg = allocator.GetBlockBegin(p); ++ if (!beg) ++ return nullptr; ++ Metadata *b = (Metadata *)allocator.GetMetaData(beg); ++ if (!b) ++ return nullptr; ++ if (b->requested_size == 0) ++ return nullptr; ++ return (void *)beg; ++} ++ + static uptr AllocationSize(const void *p) { + if (!p) + return 0; +@@ -294,4 +308,8 @@ uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; } + + int __sanitizer_get_ownership(const void *p) { return AllocationSize(p) != 0; } + ++void *__sanitizer_get_allocated_begin(const void *p) { ++ return AllocationBegin(p); ++} ++ + uptr __sanitizer_get_allocated_size(const void *p) { return AllocationSize(p); } +diff --git a/compiler-rt/lib/hwasan/hwasan_allocator.cpp b/compiler-rt/lib/hwasan/hwasan_allocator.cpp +index d096a8faa2c7e..8ccdeb23fa995 100644 +--- a/compiler-rt/lib/hwasan/hwasan_allocator.cpp ++++ b/compiler-rt/lib/hwasan/hwasan_allocator.cpp +@@ -397,6 +397,23 @@ HwasanChunkView FindHeapChunkByAddress(uptr address) { + return HwasanChunkView(reinterpret_cast<uptr>(block), metadata); + } + ++void *AllocationBegin(const void *p) { ++ const void *untagged_ptr = UntagPtr(p); ++ if (!untagged_ptr) ++ return nullptr; ++ ++ const void *beg = allocator.GetBlockBegin(untagged_ptr); ++ if (!beg) ++ return nullptr; ++ ++ Metadata *b = (Metadata *)allocator.GetMetaData(beg); ++ if (b->GetRequestedSize() == 0) ++ return nullptr; ++ ++ tag_t tag = GetTagFromPointer((uptr)p); ++ return (void *)AddTagToPointer((uptr)beg, tag); ++} ++ + static uptr AllocationSize(const void *tagged_ptr) { + const void *untagged_ptr = UntagPtr(tagged_ptr); + if (!untagged_ptr) return 0; +@@ -641,4 +658,8 @@ uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; } + + int __sanitizer_get_ownership(const void *p) { return AllocationSize(p) != 0; } + ++void *__sanitizer_get_allocated_begin(const void *p) { ++ return AllocationBegin(p); ++} ++ + uptr __sanitizer_get_allocated_size(const void *p) { return AllocationSize(p); } +diff --git a/compiler-rt/lib/lsan/lsan_allocator.cpp b/compiler-rt/lib/lsan/lsan_allocator.cpp +index 37ba363d479dd..d50882657dc33 100644 +--- a/compiler-rt/lib/lsan/lsan_allocator.cpp ++++ b/compiler-rt/lib/lsan/lsan_allocator.cpp +@@ -145,6 +145,22 @@ void GetAllocatorCacheRange(uptr *begin, uptr *end) { + *end = *begin + sizeof(AllocatorCache); + } + ++void *GetMallocBegin(const void *p) { ++ if (!p) ++ return nullptr; ++ const void *beg = allocator.GetBlockBegin(p); ++ if (!beg) ++ return nullptr; ++ ChunkMetadata *m = Metadata(beg); ++ if (!m) ++ return nullptr; ++ if (!m->allocated) ++ return nullptr; ++ if (m->requested_size == 0) ++ return nullptr; ++ return (void *)beg; ++} ++ + uptr GetMallocUsableSize(const void *p) { + if (!p) + return 0; +@@ -363,6 +379,11 @@ uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; } + SANITIZER_INTERFACE_ATTRIBUTE + int __sanitizer_get_ownership(const void *p) { return Metadata(p) != nullptr; } + ++SANITIZER_INTERFACE_ATTRIBUTE ++void * __sanitizer_get_allocated_begin(const void *p) { ++ return GetMallocBegin(p); ++} ++ + SANITIZER_INTERFACE_ATTRIBUTE + uptr __sanitizer_get_allocated_size(const void *p) { + return GetMallocUsableSize(p); +diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp +index 51c3a66ebd680..80a87d49dfc6e 100644 +--- a/compiler-rt/lib/memprof/memprof_allocator.cpp ++++ b/compiler-rt/lib/memprof/memprof_allocator.cpp +@@ -681,6 +681,18 @@ int memprof_posix_memalign(void **memptr, uptr alignment, uptr size, + return 0; + } + ++void *memprof_malloc_begin(const void *p) { ++ u64 user_requested_size; ++ MemprofChunk *m = ++ instance.GetMemprofChunkByAddr((uptr)p, user_requested_size); ++ if (!m) ++ return nullptr; ++ if (user_requested_size == 0) ++ return nullptr; ++ ++ return (void *)m->Beg(); ++} ++ + uptr memprof_malloc_usable_size(const void *ptr, uptr pc, uptr bp) { + if (!ptr) + return 0; +@@ -699,6 +711,10 @@ int __sanitizer_get_ownership(const void *p) { + return memprof_malloc_usable_size(p, 0, 0) != 0; + } + ++void *__sanitizer_get_allocated_begin(const void *p) { ++ return memprof_malloc_begin(p); ++} ++ + uptr __sanitizer_get_allocated_size(const void *p) { + return memprof_malloc_usable_size(p, 0, 0); + } +diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp +index 3308ee7053a61..a760a434158a5 100644 +--- a/compiler-rt/lib/msan/msan_allocator.cpp ++++ b/compiler-rt/lib/msan/msan_allocator.cpp +@@ -260,6 +260,21 @@ static void *MsanCalloc(StackTrace *stack, uptr nmemb, uptr size) { + return MsanAllocate(stack, nmemb * size, sizeof(u64), true); + } + ++void *AllocationBegin(const void *p) { ++ if (!p) ++ return nullptr; ++ const void *beg = allocator.GetBlockBegin(p); ++ if (!beg) ++ return nullptr; ++ Metadata *b = (Metadata *)allocator.GetMetaData(beg); ++ if (!b) ++ return nullptr; ++ if (b->requested_size == 0) ++ return nullptr; ++ ++ return (void *)beg; ++} ++ + static uptr AllocationSize(const void *p) { + if (!p) return 0; + const void *beg = allocator.GetBlockBegin(p); +@@ -373,4 +388,8 @@ uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; } + + int __sanitizer_get_ownership(const void *p) { return AllocationSize(p) != 0; } + ++void *__sanitizer_get_allocated_begin(const void *p) { ++ return AllocationBegin(p); ++} ++ + uptr __sanitizer_get_allocated_size(const void *p) { return AllocationSize(p); } +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_interface.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_interface.h +index c1b27563e2fc7..35c7c97df3299 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_interface.h ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_interface.h +@@ -21,6 +21,8 @@ extern "C" { + SANITIZER_INTERFACE_ATTRIBUTE + uptr __sanitizer_get_estimated_allocated_size(uptr size); + SANITIZER_INTERFACE_ATTRIBUTE int __sanitizer_get_ownership(const void *p); ++SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void * ++__sanitizer_get_allocated_begin(const void *p); + SANITIZER_INTERFACE_ATTRIBUTE uptr + __sanitizer_get_allocated_size(const void *p); + SANITIZER_INTERFACE_ATTRIBUTE uptr __sanitizer_get_current_allocated_bytes(); +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_internal.h +index 38994736877ac..adbdad5a1ee0c 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_internal.h ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_internal.h +@@ -51,7 +51,8 @@ void InternalFree(void *p, InternalAllocatorCache *cache = nullptr); + void InternalAllocatorLock(); + void InternalAllocatorUnlock(); + InternalAllocator *internal_allocator(); +- ++int __sanitizer_get_allocation_bounds(const void *p, void **start, ++ unsigned long long *size); + } // namespace __sanitizer + + #endif // SANITIZER_ALLOCATOR_INTERNAL_H +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interface.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interface.inc +index 958f071e7b5f7..01be600e33ba3 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interface.inc ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interface.inc +@@ -32,6 +32,7 @@ INTERFACE_FUNCTION(__sanitizer_get_module_and_offset_for_pc) + INTERFACE_FUNCTION(__sanitizer_symbolize_global) + INTERFACE_FUNCTION(__sanitizer_symbolize_pc) + // Allocator interface. ++INTERFACE_FUNCTION(__sanitizer_get_allocated_begin) + INTERFACE_FUNCTION(__sanitizer_get_allocated_size) + INTERFACE_FUNCTION(__sanitizer_get_current_allocated_bytes) + INTERFACE_FUNCTION(__sanitizer_get_estimated_allocated_size) +diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp +index 99fa492265615..9c548dfff91f3 100644 +--- a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp ++++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp +@@ -352,6 +352,20 @@ void *user_pvalloc(ThreadState *thr, uptr pc, uptr sz) { + return SetErrnoOnNull(user_alloc_internal(thr, pc, sz, PageSize)); + } + ++void *user_alloc_begin(const void *p) { ++ if (p == nullptr || !IsAppMem((uptr)p)) ++ return nullptr; ++ const void *beg = allocator()->GetBlockBegin(p); ++ if (!beg) ++ return nullptr; ++ ++ MBlock *b = ctx->metamap.GetBlock((uptr)beg); ++ if (!b) ++ return nullptr; // Not a valid pointer. ++ ++ return (void *)beg; ++} ++ + uptr user_alloc_usable_size(const void *p) { + if (p == 0 || !IsAppMem((uptr)p)) + return 0; +@@ -430,6 +444,10 @@ int __sanitizer_get_ownership(const void *p) { + return allocator()->GetBlockBegin(p) != 0; + } + ++void *__sanitizer_get_allocated_begin(const void *p) { ++ return user_alloc_begin(p); ++} ++ + uptr __sanitizer_get_allocated_size(const void *p) { + return user_alloc_usable_size(p); + } +diff --git a/compiler-rt/test/sanitizer_common/TestCases/get_allocated_begin.cpp b/compiler-rt/test/sanitizer_common/TestCases/get_allocated_begin.cpp +new file mode 100644 +index 0000000000000..6892a4a7fb282 +--- /dev/null ++++ b/compiler-rt/test/sanitizer_common/TestCases/get_allocated_begin.cpp +@@ -0,0 +1,58 @@ ++// RUN: %clangxx -O0 -g %s -o %t && %run %t ++ ++// UBSan does not have its own allocator ++// UNSUPPORTED: ubsan ++ ++#include <assert.h> ++#include <sanitizer/allocator_interface.h> ++#include <stdio.h> ++#include <stdlib.h> ++#include <string.h> ++ ++// Based on lib/msan/tests/msan_test.cpp::get_allocated_size_and_ownership ++int main(void) { ++ int sizes[] = {10, 100, 1000, 10000, 100000, 1000000}; ++ ++ for (int i = 0; i < sizeof(sizes) / sizeof(int); i++) { ++ printf("Testing size %d\n", sizes[i]); ++ ++ char *array = reinterpret_cast<char *>(malloc(sizes[i])); ++ int *int_ptr = new int; ++ printf("array: %p\n", array); ++ printf("int_ptr: %p\n", int_ptr); ++ ++ // Bogus value to unpoison start. Calling __sanitizer_get_allocated_begin ++ // does not unpoison it. ++ void *start = NULL; ++ for (int j = 0; j < sizes[i]; j++) { ++ printf("j: %d\n", j); ++ ++ start = __sanitizer_get_allocated_begin(array + j); ++ printf("Start: %p (expected: %p)\n", start, array); ++ fflush(stdout); ++ assert(array == start); ++ } ++ ++ start = __sanitizer_get_allocated_begin(int_ptr); ++ assert(int_ptr == start); ++ ++ void *wild_addr = reinterpret_cast<void *>(4096 * 160); ++ assert(__sanitizer_get_allocated_begin(wild_addr) == NULL); ++ ++ wild_addr = reinterpret_cast<void *>(0x1); ++ assert(__sanitizer_get_allocated_begin(wild_addr) == NULL); ++ ++ // NULL is a valid argument for GetAllocatedSize but is not owned. ++ assert(__sanitizer_get_allocated_begin(NULL) == NULL); ++ ++ free(array); ++ for (int j = 0; j < sizes[i]; j++) { ++ assert(__sanitizer_get_allocated_begin(array + j) == NULL); ++ } ++ ++ delete int_ptr; ++ assert(__sanitizer_get_allocated_begin(int_ptr) == NULL); ++ } ++ ++ return 0; ++} diff --git a/build/build-clang/llvmorg-17-init-6905-gc81a322476a1.patch b/build/build-clang/llvmorg-17-init-6905-gc81a322476a1.patch new file mode 100644 index 0000000000..2e54236e05 --- /dev/null +++ b/build/build-clang/llvmorg-17-init-6905-gc81a322476a1.patch @@ -0,0 +1,68 @@ +From c81a322476a1b1c57ca72832e10c43663557e097 Mon Sep 17 00:00:00 2001 +From: Jie Fu <jiefu@tencent.com> +Date: Tue, 4 Apr 2023 07:40:34 +0800 +Subject: [PATCH] [compiler-rt] Fix -Wcast-qual after D147005 (NFC) + +/home/jiefu/llvm-project/compiler-rt/lib/lsan/lsan_allocator.cpp:161:18: error: cast from 'const void *' to 'void *' drops const qualifier [-Werror,-Wcast-qual] + return (void *)beg; + ^ +1 error generated. +--- + compiler-rt/lib/dfsan/dfsan_allocator.cpp | 2 +- + compiler-rt/lib/lsan/lsan_allocator.cpp | 2 +- + compiler-rt/lib/msan/msan_allocator.cpp | 2 +- + compiler-rt/lib/tsan/rtl/tsan_mman.cpp | 2 +- + 4 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/compiler-rt/lib/dfsan/dfsan_allocator.cpp b/compiler-rt/lib/dfsan/dfsan_allocator.cpp +index cebf9983c9490..7ae6024fb2c9d 100644 +--- a/compiler-rt/lib/dfsan/dfsan_allocator.cpp ++++ b/compiler-rt/lib/dfsan/dfsan_allocator.cpp +@@ -177,7 +177,7 @@ void *DFsanCalloc(uptr nmemb, uptr size) { + void *AllocationBegin(const void *p) { + if (!p) + return nullptr; +- const void *beg = allocator.GetBlockBegin(p); ++ void *beg = allocator.GetBlockBegin(p); + if (!beg) + return nullptr; + Metadata *b = (Metadata *)allocator.GetMetaData(beg); +diff --git a/compiler-rt/lib/lsan/lsan_allocator.cpp b/compiler-rt/lib/lsan/lsan_allocator.cpp +index d50882657dc33..b0a54d7cd9bc5 100644 +--- a/compiler-rt/lib/lsan/lsan_allocator.cpp ++++ b/compiler-rt/lib/lsan/lsan_allocator.cpp +@@ -148,7 +148,7 @@ void GetAllocatorCacheRange(uptr *begin, uptr *end) { + void *GetMallocBegin(const void *p) { + if (!p) + return nullptr; +- const void *beg = allocator.GetBlockBegin(p); ++ void *beg = allocator.GetBlockBegin(p); + if (!beg) + return nullptr; + ChunkMetadata *m = Metadata(beg); +diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp +index a760a434158a5..08ec3314b26e6 100644 +--- a/compiler-rt/lib/msan/msan_allocator.cpp ++++ b/compiler-rt/lib/msan/msan_allocator.cpp +@@ -263,7 +263,7 @@ static void *MsanCalloc(StackTrace *stack, uptr nmemb, uptr size) { + void *AllocationBegin(const void *p) { + if (!p) + return nullptr; +- const void *beg = allocator.GetBlockBegin(p); ++ void *beg = allocator.GetBlockBegin(p); + if (!beg) + return nullptr; + Metadata *b = (Metadata *)allocator.GetMetaData(beg); +diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp +index 9c548dfff91f3..3cc4d16955ede 100644 +--- a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp ++++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp +@@ -355,7 +355,7 @@ void *user_pvalloc(ThreadState *thr, uptr pc, uptr sz) { + void *user_alloc_begin(const void *p) { + if (p == nullptr || !IsAppMem((uptr)p)) + return nullptr; +- const void *beg = allocator()->GetBlockBegin(p); ++ void *beg = allocator()->GetBlockBegin(p); + if (!beg) + return nullptr; + diff --git a/build/build-clang/llvmorg-17-init-6909-gd644ab022a7b.patch b/build/build-clang/llvmorg-17-init-6909-gd644ab022a7b.patch new file mode 100644 index 0000000000..ed7dcc8b2e --- /dev/null +++ b/build/build-clang/llvmorg-17-init-6909-gd644ab022a7b.patch @@ -0,0 +1,279 @@ +From d644ab022a7be985255db29fd466798e9b138bee Mon Sep 17 00:00:00 2001 +From: Thurston Dang <thurston@google.com> +Date: Tue, 4 Apr 2023 00:42:37 +0000 +Subject: [PATCH] Update __sanitizer_get_allocated_begin to return const void* + +D147005 introduced __sanitizer_get_allocated_begin, with a return +value of void*. This involved a few naughty casts that dropped the +const. This patch adds back the const qualifier. + +Differential Revision: https://reviews.llvm.org/D147489 +--- + compiler-rt/include/sanitizer/allocator_interface.h | 2 +- + compiler-rt/lib/asan/asan_allocator.cpp | 6 +++--- + compiler-rt/lib/dfsan/dfsan_allocator.cpp | 6 +++--- + compiler-rt/lib/hwasan/hwasan_allocator.cpp | 6 +++--- + compiler-rt/lib/lsan/lsan_allocator.cpp | 6 +++--- + compiler-rt/lib/memprof/memprof_allocator.cpp | 6 +++--- + compiler-rt/lib/msan/msan_allocator.cpp | 6 +++--- + .../lib/sanitizer_common/sanitizer_allocator_interface.h | 2 +- + compiler-rt/lib/tsan/rtl/tsan_mman.cpp | 6 +++--- + .../test/sanitizer_common/TestCases/get_allocated_begin.cpp | 2 +- + 10 files changed, 24 insertions(+), 24 deletions(-) + +diff --git a/compiler-rt/include/sanitizer/allocator_interface.h b/compiler-rt/include/sanitizer/allocator_interface.h +index d846f3f330741..d0cfce79c1aef 100644 +--- a/compiler-rt/include/sanitizer/allocator_interface.h ++++ b/compiler-rt/include/sanitizer/allocator_interface.h +@@ -28,7 +28,7 @@ extern "C" { + + /* If a pointer lies within an allocation, it will return the start address + of the allocation. Otherwise, it returns nullptr. */ +- void *__sanitizer_get_allocated_begin(const void *p); ++ const void *__sanitizer_get_allocated_begin(const void *p); + + /* Returns the number of bytes reserved for the pointer p. + Requires (get_ownership(p) == true) or (p == 0). */ +diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp +index 4b65b44a88f91..708d975a93dcf 100644 +--- a/compiler-rt/lib/asan/asan_allocator.cpp ++++ b/compiler-rt/lib/asan/asan_allocator.cpp +@@ -1164,7 +1164,7 @@ IgnoreObjectResult IgnoreObjectLocked(const void *p) { + // ---------------------- Interface ---------------- {{{1 + using namespace __asan; + +-void *AllocationBegin(const void *p) { ++const void *AllocationBegin(const void *p) { + AsanChunk *m = __asan::instance.GetAsanChunkByAddr((uptr)p); + if (!m) + return nullptr; +@@ -1172,7 +1172,7 @@ void *AllocationBegin(const void *p) { + return nullptr; + if (m->UsedSize() == 0) + return nullptr; +- return (void *)(m->Beg()); ++ return (const void *)(m->Beg()); + } + + // ASan allocator doesn't reserve extra bytes, so normally we would +@@ -1198,7 +1198,7 @@ uptr __sanitizer_get_allocated_size(const void *p) { + return allocated_size; + } + +-void *__sanitizer_get_allocated_begin(const void *p) { ++const void *__sanitizer_get_allocated_begin(const void *p) { + return AllocationBegin(p); + } + +diff --git a/compiler-rt/lib/dfsan/dfsan_allocator.cpp b/compiler-rt/lib/dfsan/dfsan_allocator.cpp +index 7ae6024fb2c9d..36346d163d982 100644 +--- a/compiler-rt/lib/dfsan/dfsan_allocator.cpp ++++ b/compiler-rt/lib/dfsan/dfsan_allocator.cpp +@@ -174,7 +174,7 @@ void *DFsanCalloc(uptr nmemb, uptr size) { + return DFsanAllocate(nmemb * size, sizeof(u64), true /*zeroise*/); + } + +-void *AllocationBegin(const void *p) { ++const void *AllocationBegin(const void *p) { + if (!p) + return nullptr; + void *beg = allocator.GetBlockBegin(p); +@@ -185,7 +185,7 @@ void *AllocationBegin(const void *p) { + return nullptr; + if (b->requested_size == 0) + return nullptr; +- return (void *)beg; ++ return (const void *)beg; + } + + static uptr AllocationSize(const void *p) { +@@ -308,7 +308,7 @@ uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; } + + int __sanitizer_get_ownership(const void *p) { return AllocationSize(p) != 0; } + +-void *__sanitizer_get_allocated_begin(const void *p) { ++const void *__sanitizer_get_allocated_begin(const void *p) { + return AllocationBegin(p); + } + +diff --git a/compiler-rt/lib/hwasan/hwasan_allocator.cpp b/compiler-rt/lib/hwasan/hwasan_allocator.cpp +index 8ccdeb23fa995..994a580dc95e0 100644 +--- a/compiler-rt/lib/hwasan/hwasan_allocator.cpp ++++ b/compiler-rt/lib/hwasan/hwasan_allocator.cpp +@@ -397,7 +397,7 @@ HwasanChunkView FindHeapChunkByAddress(uptr address) { + return HwasanChunkView(reinterpret_cast<uptr>(block), metadata); + } + +-void *AllocationBegin(const void *p) { ++const void *AllocationBegin(const void *p) { + const void *untagged_ptr = UntagPtr(p); + if (!untagged_ptr) + return nullptr; +@@ -411,7 +411,7 @@ void *AllocationBegin(const void *p) { + return nullptr; + + tag_t tag = GetTagFromPointer((uptr)p); +- return (void *)AddTagToPointer((uptr)beg, tag); ++ return (const void *)AddTagToPointer((uptr)beg, tag); + } + + static uptr AllocationSize(const void *tagged_ptr) { +@@ -658,7 +658,7 @@ uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; } + + int __sanitizer_get_ownership(const void *p) { return AllocationSize(p) != 0; } + +-void *__sanitizer_get_allocated_begin(const void *p) { ++const void *__sanitizer_get_allocated_begin(const void *p) { + return AllocationBegin(p); + } + +diff --git a/compiler-rt/lib/lsan/lsan_allocator.cpp b/compiler-rt/lib/lsan/lsan_allocator.cpp +index b0a54d7cd9bc5..471b134a26471 100644 +--- a/compiler-rt/lib/lsan/lsan_allocator.cpp ++++ b/compiler-rt/lib/lsan/lsan_allocator.cpp +@@ -145,7 +145,7 @@ void GetAllocatorCacheRange(uptr *begin, uptr *end) { + *end = *begin + sizeof(AllocatorCache); + } + +-void *GetMallocBegin(const void *p) { ++const void *GetMallocBegin(const void *p) { + if (!p) + return nullptr; + void *beg = allocator.GetBlockBegin(p); +@@ -158,7 +158,7 @@ void *GetMallocBegin(const void *p) { + return nullptr; + if (m->requested_size == 0) + return nullptr; +- return (void *)beg; ++ return (const void *)beg; + } + + uptr GetMallocUsableSize(const void *p) { +@@ -380,7 +380,7 @@ SANITIZER_INTERFACE_ATTRIBUTE + int __sanitizer_get_ownership(const void *p) { return Metadata(p) != nullptr; } + + SANITIZER_INTERFACE_ATTRIBUTE +-void * __sanitizer_get_allocated_begin(const void *p) { ++const void * __sanitizer_get_allocated_begin(const void *p) { + return GetMallocBegin(p); + } + +diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp +index 80a87d49dfc6e..49c0aad39cfbd 100644 +--- a/compiler-rt/lib/memprof/memprof_allocator.cpp ++++ b/compiler-rt/lib/memprof/memprof_allocator.cpp +@@ -681,7 +681,7 @@ int memprof_posix_memalign(void **memptr, uptr alignment, uptr size, + return 0; + } + +-void *memprof_malloc_begin(const void *p) { ++const void *memprof_malloc_begin(const void *p) { + u64 user_requested_size; + MemprofChunk *m = + instance.GetMemprofChunkByAddr((uptr)p, user_requested_size); +@@ -690,7 +690,7 @@ void *memprof_malloc_begin(const void *p) { + if (user_requested_size == 0) + return nullptr; + +- return (void *)m->Beg(); ++ return (const void *)m->Beg(); + } + + uptr memprof_malloc_usable_size(const void *ptr, uptr pc, uptr bp) { +@@ -711,7 +711,7 @@ int __sanitizer_get_ownership(const void *p) { + return memprof_malloc_usable_size(p, 0, 0) != 0; + } + +-void *__sanitizer_get_allocated_begin(const void *p) { ++const void *__sanitizer_get_allocated_begin(const void *p) { + return memprof_malloc_begin(p); + } + +diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp +index 08ec3314b26e6..1013303af6795 100644 +--- a/compiler-rt/lib/msan/msan_allocator.cpp ++++ b/compiler-rt/lib/msan/msan_allocator.cpp +@@ -260,7 +260,7 @@ static void *MsanCalloc(StackTrace *stack, uptr nmemb, uptr size) { + return MsanAllocate(stack, nmemb * size, sizeof(u64), true); + } + +-void *AllocationBegin(const void *p) { ++const void *AllocationBegin(const void *p) { + if (!p) + return nullptr; + void *beg = allocator.GetBlockBegin(p); +@@ -272,7 +272,7 @@ void *AllocationBegin(const void *p) { + if (b->requested_size == 0) + return nullptr; + +- return (void *)beg; ++ return (const void *)beg; + } + + static uptr AllocationSize(const void *p) { +@@ -388,7 +388,7 @@ uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; } + + int __sanitizer_get_ownership(const void *p) { return AllocationSize(p) != 0; } + +-void *__sanitizer_get_allocated_begin(const void *p) { ++const void *__sanitizer_get_allocated_begin(const void *p) { + return AllocationBegin(p); + } + +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_interface.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_interface.h +index 35c7c97df3299..504109e9d3f6f 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_interface.h ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_interface.h +@@ -21,7 +21,7 @@ extern "C" { + SANITIZER_INTERFACE_ATTRIBUTE + uptr __sanitizer_get_estimated_allocated_size(uptr size); + SANITIZER_INTERFACE_ATTRIBUTE int __sanitizer_get_ownership(const void *p); +-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void * ++SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE const void * + __sanitizer_get_allocated_begin(const void *p); + SANITIZER_INTERFACE_ATTRIBUTE uptr + __sanitizer_get_allocated_size(const void *p); +diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp +index 3cc4d16955ede..b548265fe6833 100644 +--- a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp ++++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp +@@ -352,7 +352,7 @@ void *user_pvalloc(ThreadState *thr, uptr pc, uptr sz) { + return SetErrnoOnNull(user_alloc_internal(thr, pc, sz, PageSize)); + } + +-void *user_alloc_begin(const void *p) { ++const void *user_alloc_begin(const void *p) { + if (p == nullptr || !IsAppMem((uptr)p)) + return nullptr; + void *beg = allocator()->GetBlockBegin(p); +@@ -363,7 +363,7 @@ void *user_alloc_begin(const void *p) { + if (!b) + return nullptr; // Not a valid pointer. + +- return (void *)beg; ++ return (const void *)beg; + } + + uptr user_alloc_usable_size(const void *p) { +@@ -444,7 +444,7 @@ int __sanitizer_get_ownership(const void *p) { + return allocator()->GetBlockBegin(p) != 0; + } + +-void *__sanitizer_get_allocated_begin(const void *p) { ++const void *__sanitizer_get_allocated_begin(const void *p) { + return user_alloc_begin(p); + } + +diff --git a/compiler-rt/test/sanitizer_common/TestCases/get_allocated_begin.cpp b/compiler-rt/test/sanitizer_common/TestCases/get_allocated_begin.cpp +index 6892a4a7fb282..1683063baea26 100644 +--- a/compiler-rt/test/sanitizer_common/TestCases/get_allocated_begin.cpp ++++ b/compiler-rt/test/sanitizer_common/TestCases/get_allocated_begin.cpp +@@ -23,7 +23,7 @@ int main(void) { + + // Bogus value to unpoison start. Calling __sanitizer_get_allocated_begin + // does not unpoison it. +- void *start = NULL; ++ const void *start = NULL; + for (int j = 0; j < sizes[i]; j++) { + printf("j: %d\n", j); + diff --git a/build/build-clang/llvmorg-17-init-8140-gb1bd52cd0d86.patch b/build/build-clang/llvmorg-17-init-8140-gb1bd52cd0d86.patch new file mode 100644 index 0000000000..35446958fc --- /dev/null +++ b/build/build-clang/llvmorg-17-init-8140-gb1bd52cd0d86.patch @@ -0,0 +1,162 @@ +From b1bd52cd0d8627df1187448b8247a9c7a4675019 Mon Sep 17 00:00:00 2001 +From: Thurston Dang <thurston@google.com> +Date: Wed, 12 Apr 2023 20:53:49 +0000 +Subject: [PATCH] Fix tls_get_addr handling for glibc >=2.25 + +This changes the sanitizers' tls_get_addr handling from +a heuristic check of __signal_safe_memalign allocations +(which has only been used in a since deprecated version +of Google's runtime), to using the sanitizers' interface +function to check if it is a malloc allocation (used +since glibc >= 2.25). + +This is one of the approaches proposed by Keno in +https://github.com/google/sanitizers/issues/1409#issuecomment-1214244142 + +This moves the weak annotation of __sanitizer_get_allocated_size/begin from the header to sanitizer_tls_get_addr.cpp, as suggested by Vitaly in D148060. + +Reviewed By: vitalybuka + +Differential Revision: https://reviews.llvm.org/D147459 +--- + .../sanitizer_allocator_interface.h | 4 +-- + .../sanitizer_tls_get_addr.cpp | 29 ++++++++++--------- + .../sanitizer_common/sanitizer_tls_get_addr.h | 26 +++++++++++------ + compiler-rt/test/msan/dtls_test.c | 4 --- + 4 files changed, 34 insertions(+), 29 deletions(-) + +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_interface.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_interface.h +index 504109e9d3f6f..8f3b71eb6ce74 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_interface.h ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_interface.h +@@ -21,8 +21,8 @@ extern "C" { + SANITIZER_INTERFACE_ATTRIBUTE + uptr __sanitizer_get_estimated_allocated_size(uptr size); + SANITIZER_INTERFACE_ATTRIBUTE int __sanitizer_get_ownership(const void *p); +-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE const void * +-__sanitizer_get_allocated_begin(const void *p); ++SANITIZER_INTERFACE_ATTRIBUTE const void *__sanitizer_get_allocated_begin( ++ const void *p); + SANITIZER_INTERFACE_ATTRIBUTE uptr + __sanitizer_get_allocated_size(const void *p); + SANITIZER_INTERFACE_ATTRIBUTE uptr __sanitizer_get_current_allocated_bytes(); +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp +index b13e2dc9e3327..252979f1c2baa 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp +@@ -12,6 +12,7 @@ + + #include "sanitizer_tls_get_addr.h" + ++#include "sanitizer_allocator_interface.h" + #include "sanitizer_atomic.h" + #include "sanitizer_flags.h" + #include "sanitizer_platform_interceptors.h" +@@ -26,13 +27,6 @@ struct TlsGetAddrParam { + uptr offset; + }; + +-// Glibc starting from 2.19 allocates tls using __signal_safe_memalign, +-// which has such header. +-struct Glibc_2_19_tls_header { +- uptr size; +- uptr start; +-}; +- + // This must be static TLS + __attribute__((tls_model("initial-exec"))) + static __thread DTLS dtls; +@@ -108,6 +102,14 @@ static const uptr kDtvOffset = 0x800; + static const uptr kDtvOffset = 0; + #endif + ++extern "C" { ++SANITIZER_WEAK_ATTRIBUTE ++uptr __sanitizer_get_allocated_size(const void *p); ++ ++SANITIZER_WEAK_ATTRIBUTE ++const void *__sanitizer_get_allocated_begin(const void *p); ++} ++ + DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res, + uptr static_tls_begin, uptr static_tls_end) { + if (!common_flags()->intercept_tls_get_addr) return 0; +@@ -125,19 +127,18 @@ DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res, + atomic_load(&number_of_live_dtls, memory_order_relaxed)); + if (dtls.last_memalign_ptr == tls_beg) { + tls_size = dtls.last_memalign_size; +- VReport(2, "__tls_get_addr: glibc <=2.18 suspected; tls={0x%zx,0x%zx}\n", ++ VReport(2, "__tls_get_addr: glibc <=2.24 suspected; tls={0x%zx,0x%zx}\n", + tls_beg, tls_size); + } else if (tls_beg >= static_tls_begin && tls_beg < static_tls_end) { + // This is the static TLS block which was initialized / unpoisoned at thread + // creation. + VReport(2, "__tls_get_addr: static tls: 0x%zx\n", tls_beg); + tls_size = 0; +- } else if ((tls_beg % 4096) == sizeof(Glibc_2_19_tls_header)) { +- // We may want to check gnu_get_libc_version(). +- Glibc_2_19_tls_header *header = (Glibc_2_19_tls_header *)tls_beg - 1; +- tls_size = header->size; +- tls_beg = header->start; +- VReport(2, "__tls_get_addr: glibc >=2.19 suspected; tls={0x%zx 0x%zx}\n", ++ } else if (const void *start = ++ __sanitizer_get_allocated_begin((void *)tls_beg)) { ++ tls_beg = (uptr)start; ++ tls_size = __sanitizer_get_allocated_size(start); ++ VReport(2, "__tls_get_addr: glibc >=2.25 suspected; tls={0x%zx,0x%zx}\n", + tls_beg, tls_size); + } else { + VReport(2, "__tls_get_addr: Can't guess glibc version\n"); +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.h b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.h +index a599c0bbc75cc..0ddab61deb102 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.h ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.h +@@ -12,16 +12,24 @@ + // the lack of interface that would tell us about the Dynamic TLS (DTLS). + // https://sourceware.org/bugzilla/show_bug.cgi?id=16291 + // +-// The matters get worse because the glibc implementation changed between +-// 2.18 and 2.19: +-// https://groups.google.com/forum/#!topic/address-sanitizer/BfwYD8HMxTM +-// +-// Before 2.19, every DTLS chunk is allocated with __libc_memalign, ++// Before 2.25: every DTLS chunk is allocated with __libc_memalign, + // which we intercept and thus know where is the DTLS. +-// Since 2.19, DTLS chunks are allocated with __signal_safe_memalign, +-// which is an internal function that wraps a mmap call, neither of which +-// we can intercept. Luckily, __signal_safe_memalign has a simple parseable +-// header which we can use. ++// ++// Since 2.25: DTLS chunks are allocated with malloc. We could co-opt ++// the malloc interceptor to keep track of the last allocation, similar ++// to how we handle __libc_memalign; however, this adds some overhead ++// (since malloc, unlike __libc_memalign, is commonly called), and ++// requires care to avoid false negatives for LeakSanitizer. ++// Instead, we rely on our internal allocators - which keep track of all ++// its allocations - to determine if an address points to a malloc ++// allocation. ++// ++// There exists a since-deprecated version of Google's internal glibc fork ++// that used __signal_safe_memalign. DTLS_on_tls_get_addr relied on a ++// heuristic check (is the allocation 16 bytes from the start of a page ++// boundary?), which was sometimes erroneous: ++// https://bugs.chromium.org/p/chromium/issues/detail?id=1275223#c15 ++// Since that check has no practical use anymore, we have removed it. + // + //===----------------------------------------------------------------------===// + +diff --git a/compiler-rt/test/msan/dtls_test.c b/compiler-rt/test/msan/dtls_test.c +index 45c8fd38bf5f6..3c384256147a0 100644 +--- a/compiler-rt/test/msan/dtls_test.c ++++ b/compiler-rt/test/msan/dtls_test.c +@@ -12,10 +12,6 @@ + // Reports use-of-uninitialized-value, not analyzed + XFAIL: target={{.*netbsd.*}} + +- // This is known to be broken with glibc-2.27+ +- // https://bugs.llvm.org/show_bug.cgi?id=37804 +- XFAIL: glibc-2.27 +- + */ + + #ifndef BUILD_SO diff --git a/build/build-clang/llvmorg-17-init-994-g1e72920c8859.patch b/build/build-clang/llvmorg-17-init-994-g1e72920c8859.patch new file mode 100644 index 0000000000..12d817fbe0 --- /dev/null +++ b/build/build-clang/llvmorg-17-init-994-g1e72920c8859.patch @@ -0,0 +1,842 @@ +From 4cb60673a0a25a25d171716c5b90e7a3368d434f Mon Sep 17 00:00:00 2001 +From: Alexey Lapshin <a.v.lapshin@mail.ru> +Date: Mon, 30 Jan 2023 15:05:53 +0100 +Subject: [PATCH] [dsymutil] dsymutil produces broken lines info (probably) + with LTO on mac + +This patch fixes #60307 issue. The 8bb4451 introduces the possibility +to unite overlapped or adjacent address ranges to keep address ranges +in an unambiguous state. The AddressRangesMap is used to normalize +address ranges. The AddressRangesMap keeps address ranges and the value +of the relocated address. For intersected range, it creates a united +range that keeps the last inserted mapping value. The same for adjusted ranges. +While it is OK to use the last inserted mapping value for intersected ranges +(as there is no way how to resolve ambiguity) It is not OK to use the +last inserted value for adjacent address ranges. Currently, two following +address ranges are united into a single one: + +{0,24,17e685c} {24,d8,55afe20} -> {0,d8,55afe20} + +To avoid the problem, the AddressRangesMap should not unite adjacent address ranges +with different relocated addresses. Instead, it should leave adjacent address ranges +as separate ranges. So, the ranges should look like this: + +{0,24,17e685c} {24,d8,55afe20} + +Differential Revision: https://reviews.llvm.org/D142936 +--- + llvm/include/llvm/ADT/AddressRanges.h | 206 ++++++++----- + .../llvm/DWARFLinker/DWARFLinkerCompileUnit.h | 2 +- + llvm/lib/DWARFLinker/DWARFLinker.cpp | 36 +-- + llvm/lib/DWARFLinker/DWARFStreamer.cpp | 5 +- + llvm/lib/Support/AddressRanges.cpp | 70 ----- + llvm/lib/Support/CMakeLists.txt | 1 - + llvm/unittests/Support/AddressRangeTest.cpp | 285 +++++++++++++++--- + 7 files changed, 398 insertions(+), 207 deletions(-) + delete mode 100644 llvm/lib/Support/AddressRanges.cpp + +diff --git a/llvm/include/llvm/ADT/AddressRanges.h b/llvm/include/llvm/ADT/AddressRanges.h +index f2052d82e7c1..415d30bbb5cf 100644 +--- a/llvm/include/llvm/ADT/AddressRanges.h ++++ b/llvm/include/llvm/ADT/AddressRanges.h +@@ -28,7 +28,11 @@ public: + uint64_t start() const { return Start; } + uint64_t end() const { return End; } + uint64_t size() const { return End - Start; } ++ uint64_t empty() const { return size() == 0; } + bool contains(uint64_t Addr) const { return Start <= Addr && Addr < End; } ++ bool contains(const AddressRange &R) const { ++ return Start <= R.Start && R.End <= End; ++ } + bool intersects(const AddressRange &R) const { + return Start < R.End && R.Start < End; + } +@@ -45,101 +49,163 @@ private: + uint64_t End = 0; + }; + +-/// The AddressRanges class helps normalize address range collections. +-/// This class keeps a sorted vector of AddressRange objects and can perform +-/// insertions and searches efficiently. The address ranges are always sorted +-/// and never contain any invalid or empty address ranges. +-/// Intersecting([100,200), [150,300)) and adjacent([100,200), [200,300)) +-/// address ranges are combined during insertion. +-class AddressRanges { ++/// The AddressRangesBase class presents the base functionality for the ++/// normalized address ranges collection. This class keeps a sorted vector ++/// of AddressRange-like objects and can perform searches efficiently. ++/// The address ranges are always sorted and never contain any invalid, ++/// empty or intersected address ranges. ++ ++template <typename T> class AddressRangesBase { + protected: +- using Collection = SmallVector<AddressRange>; ++ using Collection = SmallVector<T>; + Collection Ranges; + + public: + void clear() { Ranges.clear(); } + bool empty() const { return Ranges.empty(); } +- bool contains(uint64_t Addr) const { return find(Addr) != Ranges.end(); } ++ bool contains(uint64_t Addr) const { ++ return find(Addr, Addr + 1) != Ranges.end(); ++ } + bool contains(AddressRange Range) const { +- return find(Range) != Ranges.end(); ++ return find(Range.start(), Range.end()) != Ranges.end(); + } +- std::optional<AddressRange> getRangeThatContains(uint64_t Addr) const { +- Collection::const_iterator It = find(Addr); ++ void reserve(size_t Capacity) { Ranges.reserve(Capacity); } ++ size_t size() const { return Ranges.size(); } ++ ++ std::optional<T> getRangeThatContains(uint64_t Addr) const { ++ typename Collection::const_iterator It = find(Addr, Addr + 1); + if (It == Ranges.end()) + return std::nullopt; + + return *It; + } +- Collection::const_iterator insert(AddressRange Range); +- void reserve(size_t Capacity) { Ranges.reserve(Capacity); } +- size_t size() const { return Ranges.size(); } +- bool operator==(const AddressRanges &RHS) const { +- return Ranges == RHS.Ranges; +- } +- const AddressRange &operator[](size_t i) const { ++ ++ typename Collection::const_iterator begin() const { return Ranges.begin(); } ++ typename Collection::const_iterator end() const { return Ranges.end(); } ++ ++ const T &operator[](size_t i) const { + assert(i < Ranges.size()); + return Ranges[i]; + } +- Collection::const_iterator begin() const { return Ranges.begin(); } +- Collection::const_iterator end() const { return Ranges.end(); } ++ ++ bool operator==(const AddressRangesBase<T> &RHS) const { ++ return Ranges == RHS.Ranges; ++ } + + protected: +- Collection::const_iterator find(uint64_t Addr) const; +- Collection::const_iterator find(AddressRange Range) const; ++ typename Collection::const_iterator find(uint64_t Start, uint64_t End) const { ++ if (Start >= End) ++ return Ranges.end(); ++ ++ auto It = ++ std::partition_point(Ranges.begin(), Ranges.end(), [=](const T &R) { ++ return AddressRange(R).start() <= Start; ++ }); ++ ++ if (It == Ranges.begin()) ++ return Ranges.end(); ++ ++ --It; ++ if (End > AddressRange(*It).end()) ++ return Ranges.end(); ++ ++ return It; ++ } + }; + +-/// AddressRangesMap class maps values to the address ranges. +-/// It keeps address ranges and corresponding values. If ranges +-/// are combined during insertion, then combined range keeps +-/// newly inserted value. +-template <typename T> class AddressRangesMap : protected AddressRanges { ++/// The AddressRanges class helps normalize address range collections. ++/// This class keeps a sorted vector of AddressRange objects and can perform ++/// insertions and searches efficiently. Intersecting([100,200), [150,300)) ++/// and adjacent([100,200), [200,300)) address ranges are combined during ++/// insertion. ++class AddressRanges : public AddressRangesBase<AddressRange> { + public: +- void clear() { +- Ranges.clear(); +- Values.clear(); ++ Collection::const_iterator insert(AddressRange Range) { ++ if (Range.empty()) ++ return Ranges.end(); ++ ++ auto It = llvm::upper_bound(Ranges, Range); ++ auto It2 = It; ++ while (It2 != Ranges.end() && It2->start() <= Range.end()) ++ ++It2; ++ if (It != It2) { ++ Range = {Range.start(), std::max(Range.end(), std::prev(It2)->end())}; ++ It = Ranges.erase(It, It2); ++ } ++ if (It != Ranges.begin() && Range.start() <= std::prev(It)->end()) { ++ --It; ++ *It = {It->start(), std::max(It->end(), Range.end())}; ++ return It; ++ } ++ ++ return Ranges.insert(It, Range); + } +- bool empty() const { return AddressRanges::empty(); } +- bool contains(uint64_t Addr) const { return AddressRanges::contains(Addr); } +- bool contains(AddressRange Range) const { +- return AddressRanges::contains(Range); +- } +- void insert(AddressRange Range, T Value) { +- size_t InputSize = Ranges.size(); +- Collection::const_iterator RangesIt = AddressRanges::insert(Range); +- if (RangesIt == Ranges.end()) +- return; ++}; + +- // make Values match to Ranges. +- size_t Idx = RangesIt - Ranges.begin(); +- typename ValuesCollection::iterator ValuesIt = Values.begin() + Idx; +- if (InputSize < Ranges.size()) +- Values.insert(ValuesIt, T()); +- else if (InputSize > Ranges.size()) +- Values.erase(ValuesIt, ValuesIt + InputSize - Ranges.size()); +- assert(Ranges.size() == Values.size()); +- +- // set value to the inserted or combined range. +- Values[Idx] = Value; +- } +- size_t size() const { +- assert(Ranges.size() == Values.size()); +- return AddressRanges::size(); +- } +- std::optional<std::pair<AddressRange, T>> +- getRangeValueThatContains(uint64_t Addr) const { +- Collection::const_iterator It = find(Addr); +- if (It == Ranges.end()) +- return std::nullopt; ++class AddressRangeValuePair { ++public: ++ operator AddressRange() const { return Range; } + +- return std::make_pair(*It, Values[It - Ranges.begin()]); +- } +- std::pair<AddressRange, T> operator[](size_t Idx) const { +- return std::make_pair(Ranges[Idx], Values[Idx]); +- } ++ AddressRange Range; ++ int64_t Value = 0; ++}; + +-protected: +- using ValuesCollection = SmallVector<T>; +- ValuesCollection Values; ++inline bool operator==(const AddressRangeValuePair &LHS, ++ const AddressRangeValuePair &RHS) { ++ return LHS.Range == RHS.Range && LHS.Value == RHS.Value; ++} ++ ++/// AddressRangesMap class maps values to the address ranges. ++/// It keeps normalized address ranges and corresponding values. ++/// This class keeps a sorted vector of AddressRangeValuePair objects ++/// and can perform insertions and searches efficiently. ++/// Intersecting([100,200), [150,300)) ranges splitted into non-conflicting ++/// parts([100,200), [200,300)). Adjacent([100,200), [200,300)) address ++/// ranges are not combined during insertion. ++class AddressRangesMap : public AddressRangesBase<AddressRangeValuePair> { ++public: ++ void insert(AddressRange Range, int64_t Value) { ++ if (Range.empty()) ++ return; ++ ++ // Search for range which is less than or equal incoming Range. ++ auto It = std::partition_point(Ranges.begin(), Ranges.end(), ++ [=](const AddressRangeValuePair &R) { ++ return R.Range.start() <= Range.start(); ++ }); ++ ++ if (It != Ranges.begin()) ++ It--; ++ ++ while (!Range.empty()) { ++ // Inserted range does not overlap with any range. ++ // Store it into the Ranges collection. ++ if (It == Ranges.end() || Range.end() <= It->Range.start()) { ++ Ranges.insert(It, {Range, Value}); ++ return; ++ } ++ ++ // Inserted range partially overlaps with current range. ++ // Store not overlapped part of inserted range. ++ if (Range.start() < It->Range.start()) { ++ It = Ranges.insert(It, {{Range.start(), It->Range.start()}, Value}); ++ It++; ++ Range = {It->Range.start(), Range.end()}; ++ continue; ++ } ++ ++ // Inserted range fully overlaps with current range. ++ if (Range.end() <= It->Range.end()) ++ return; ++ ++ // Inserted range partially overlaps with current range. ++ // Remove overlapped part from the inserted range. ++ if (Range.start() < It->Range.end()) ++ Range = {It->Range.end(), Range.end()}; ++ ++ It++; ++ } ++ } + }; + + } // namespace llvm +diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h +index 5b0ea339c4d6..9c7f24e69d48 100644 +--- a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h ++++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h +@@ -21,7 +21,7 @@ class DeclContext; + + /// Mapped value in the address map is the offset to apply to the + /// linked address. +-using RangesTy = AddressRangesMap<int64_t>; ++using RangesTy = AddressRangesMap; + + // FIXME: Delete this structure. + struct PatchLocation { +diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp +index 9f6e54377ede..d302d61894fa 100644 +--- a/llvm/lib/DWARFLinker/DWARFLinker.cpp ++++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp +@@ -1659,7 +1659,7 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit, + DWARFDataExtractor RangeExtractor(OrigDwarf.getDWARFObj(), + OrigDwarf.getDWARFObj().getRangesSection(), + OrigDwarf.isLittleEndian(), AddressSize); +- std::optional<std::pair<AddressRange, int64_t>> CachedRange; ++ std::optional<AddressRangeValuePair> CachedRange; + DWARFUnit &OrigUnit = Unit.getOrigUnit(); + auto OrigUnitDie = OrigUnit.getUnitDIE(false); + uint64_t UnitBaseAddress = +@@ -1687,9 +1687,9 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit, + } + + if (!CachedRange || +- !CachedRange->first.contains(Range.StartAddress + BaseAddress)) +- CachedRange = FunctionRanges.getRangeValueThatContains( +- Range.StartAddress + BaseAddress); ++ !CachedRange->Range.contains(Range.StartAddress + BaseAddress)) ++ CachedRange = FunctionRanges.getRangeThatContains(Range.StartAddress + ++ BaseAddress); + + // All range entries should lie in the function range. + if (!CachedRange) { +@@ -1698,8 +1698,8 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit, + } + + LinkedRanges.insert( +- {Range.StartAddress + BaseAddress + CachedRange->second, +- Range.EndAddress + BaseAddress + CachedRange->second}); ++ {Range.StartAddress + BaseAddress + CachedRange->Value, ++ Range.EndAddress + BaseAddress + CachedRange->Value}); + } + } + +@@ -1802,7 +1802,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, + // in NewRows. + std::vector<DWARFDebugLine::Row> Seq; + const auto &FunctionRanges = Unit.getFunctionRanges(); +- std::optional<std::pair<AddressRange, int64_t>> CurrRange; ++ std::optional<AddressRangeValuePair> CurrRange; + + // FIXME: This logic is meant to generate exactly the same output as + // Darwin's classic dsymutil. There is a nicer way to implement this +@@ -1821,13 +1821,13 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, + // it is marked as end_sequence in the input (because in that + // case, the relocation offset is accurate and that entry won't + // serve as the start of another function). +- if (!CurrRange || !CurrRange->first.contains(Row.Address.Address) || +- (Row.Address.Address == CurrRange->first.end() && !Row.EndSequence)) { ++ if (!CurrRange || !CurrRange->Range.contains(Row.Address.Address) || ++ (Row.Address.Address == CurrRange->Range.end() && !Row.EndSequence)) { + // We just stepped out of a known range. Insert a end_sequence + // corresponding to the end of the range. + uint64_t StopAddress = +- CurrRange ? CurrRange->first.end() + CurrRange->second : -1ULL; +- CurrRange = FunctionRanges.getRangeValueThatContains(Row.Address.Address); ++ CurrRange ? CurrRange->Range.end() + CurrRange->Value : -1ULL; ++ CurrRange = FunctionRanges.getRangeThatContains(Row.Address.Address); + if (!CurrRange) { + if (StopAddress != -1ULL) { + // Try harder by looking in the Address ranges map. +@@ -1836,9 +1836,9 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, + // for now do as dsymutil. + // FIXME: Understand exactly what cases this addresses and + // potentially remove it along with the Ranges map. +- if (std::optional<std::pair<AddressRange, int64_t>> Range = +- Ranges.getRangeValueThatContains(Row.Address.Address)) +- StopAddress = Row.Address.Address + (*Range).second; ++ if (std::optional<AddressRangeValuePair> Range = ++ Ranges.getRangeThatContains(Row.Address.Address)) ++ StopAddress = Row.Address.Address + (*Range).Value; + } + } + if (StopAddress != -1ULL && !Seq.empty()) { +@@ -1863,7 +1863,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, + continue; + + // Relocate row address and add it to the current sequence. +- Row.Address.Address += CurrRange->second; ++ Row.Address.Address += CurrRange->Value; + Seq.emplace_back(Row); + + if (Row.EndSequence) +@@ -2002,8 +2002,8 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File, + // the function entry point, thus we can't just lookup the address + // in the debug map. Use the AddressInfo's range map to see if the FDE + // describes something that we can relocate. +- std::optional<std::pair<AddressRange, int64_t>> Range = +- Ranges.getRangeValueThatContains(Loc); ++ std::optional<AddressRangeValuePair> Range = ++ Ranges.getRangeThatContains(Loc); + if (!Range) { + // The +4 is to account for the size of the InitialLength field itself. + InputOffset = EntryOffset + InitialLength + 4; +@@ -2032,7 +2032,7 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File, + // fields that will get reconstructed by emitFDE(). + unsigned FDERemainingBytes = InitialLength - (4 + AddrSize); + TheDwarfEmitter->emitFDE(IteratorInserted.first->getValue(), AddrSize, +- Loc + Range->second, ++ Loc + Range->Value, + FrameData.substr(InputOffset, FDERemainingBytes)); + InputOffset += FDERemainingBytes; + } +diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp +index 5cad267fd845..ae79e8cb9066 100644 +--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp ++++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp +@@ -402,10 +402,9 @@ void DwarfStreamer::emitUnitRangesEntries(CompileUnit &Unit, + // Linked addresses might end up in a different order. + // Build linked address ranges. + AddressRanges LinkedRanges; +- for (size_t Idx = 0; Idx < FunctionRanges.size(); Idx++) ++ for (const AddressRangeValuePair &Range : FunctionRanges) + LinkedRanges.insert( +- {FunctionRanges[Idx].first.start() + FunctionRanges[Idx].second, +- FunctionRanges[Idx].first.end() + FunctionRanges[Idx].second}); ++ {Range.Range.start() + Range.Value, Range.Range.end() + Range.Value}); + + if (!FunctionRanges.empty()) + emitDwarfDebugArangesTable(Unit, LinkedRanges); +diff --git a/llvm/lib/Support/AddressRanges.cpp b/llvm/lib/Support/AddressRanges.cpp +deleted file mode 100644 +index 187d5be00dae..000000000000 +--- a/llvm/lib/Support/AddressRanges.cpp ++++ /dev/null +@@ -1,70 +0,0 @@ +-//===- AddressRanges.cpp ----------------------------------------*- C++ -*-===// +-// +-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +-// See https://llvm.org/LICENSE.txt for license information. +-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +-// +-//===----------------------------------------------------------------------===// +- +-#include "llvm/ADT/AddressRanges.h" +-#include "llvm/ADT/STLExtras.h" +-#include <inttypes.h> +- +-using namespace llvm; +- +-AddressRanges::Collection::const_iterator +-AddressRanges::insert(AddressRange Range) { +- if (Range.size() == 0) +- return Ranges.end(); +- +- auto It = llvm::upper_bound(Ranges, Range); +- auto It2 = It; +- while (It2 != Ranges.end() && It2->start() <= Range.end()) +- ++It2; +- if (It != It2) { +- Range = {Range.start(), std::max(Range.end(), std::prev(It2)->end())}; +- It = Ranges.erase(It, It2); +- } +- if (It != Ranges.begin() && Range.start() <= std::prev(It)->end()) { +- --It; +- *It = {It->start(), std::max(It->end(), Range.end())}; +- return It; +- } +- +- return Ranges.insert(It, Range); +-} +- +-AddressRanges::Collection::const_iterator +-AddressRanges::find(uint64_t Addr) const { +- auto It = std::partition_point( +- Ranges.begin(), Ranges.end(), +- [=](const AddressRange &R) { return R.start() <= Addr; }); +- +- if (It == Ranges.begin()) +- return Ranges.end(); +- +- --It; +- if (Addr >= It->end()) +- return Ranges.end(); +- +- return It; +-} +- +-AddressRanges::Collection::const_iterator +-AddressRanges::find(AddressRange Range) const { +- if (Range.size() == 0) +- return Ranges.end(); +- +- auto It = std::partition_point( +- Ranges.begin(), Ranges.end(), +- [=](const AddressRange &R) { return R.start() <= Range.start(); }); +- +- if (It == Ranges.begin()) +- return Ranges.end(); +- +- --It; +- if (Range.end() > It->end()) +- return Ranges.end(); +- +- return It; +-} +diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt +index 4cbc3b79f3bb..8fbb2ca4c164 100644 +--- a/llvm/lib/Support/CMakeLists.txt ++++ b/llvm/lib/Support/CMakeLists.txt +@@ -117,7 +117,6 @@ endif() + add_subdirectory(BLAKE3) + + add_llvm_component_library(LLVMSupport +- AddressRanges.cpp + ABIBreak.cpp + AMDGPUMetadata.cpp + APFixedPoint.cpp +diff --git a/llvm/unittests/Support/AddressRangeTest.cpp b/llvm/unittests/Support/AddressRangeTest.cpp +index 468f1e22ffa8..06b326678402 100644 +--- a/llvm/unittests/Support/AddressRangeTest.cpp ++++ b/llvm/unittests/Support/AddressRangeTest.cpp +@@ -149,8 +149,31 @@ TEST(AddressRangeTest, TestRanges) { + EXPECT_EQ(Ranges[0], AddressRange(0x1000, 0x5000)); + } + ++TEST(AddressRangeTest, TestRangesRandom) { ++ AddressRanges Ranges; ++ size_t NumElements = 100; ++ ++ std::srand(std::time(nullptr)); ++ ++ // Fill ranges. ++ for (size_t Idx = 0; Idx < NumElements; Idx++) { ++ uint64_t Start = static_cast<uint64_t>(std::rand() % 1000); ++ uint64_t End = Start + static_cast<uint64_t>(std::rand() % 1000); ++ Ranges.insert({Start, End}); ++ } ++ ++ // Check ranges. ++ for (size_t Idx = 0; Idx + 1 < Ranges.size(); Idx++) { ++ // Check that ranges are not intersected. ++ EXPECT_FALSE(Ranges[Idx].intersects(Ranges[Idx + 1])); ++ ++ // Check that ranges are sorted and not adjusted. ++ EXPECT_TRUE(Ranges[Idx].end() < Ranges[Idx + 1].start()); ++ } ++} ++ + TEST(AddressRangeTest, TestRangesMap) { +- AddressRangesMap<int> Ranges; ++ AddressRangesMap Ranges; + + EXPECT_EQ(Ranges.size(), 0u); + EXPECT_TRUE(Ranges.empty()); +@@ -162,73 +185,247 @@ TEST(AddressRangeTest, TestRangesMap) { + EXPECT_TRUE(Ranges.contains(0x1500)); + EXPECT_TRUE(Ranges.contains(AddressRange(0x1000, 0x2000))); + ++ /////////////////////////////////////// ++ /// Check ranges with the same mapped value. ++ ++ // Clear ranges. ++ Ranges.clear(); ++ EXPECT_EQ(Ranges.size(), 0u); ++ EXPECT_TRUE(Ranges.empty()); ++ ++ // Add range and check mapped value. ++ Ranges.insert(AddressRange(0x1000, 0x2000), 0x11); ++ EXPECT_EQ(Ranges.size(), 1u); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x1000)->Value, 0x11); ++ ++ // Add adjacent range and check mapped value. ++ Ranges.insert(AddressRange(0x2000, 0x3000), 0x11); ++ EXPECT_EQ(Ranges.size(), 2u); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x1000)->Value, 0x11); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x2000)->Value, 0x11); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x2900)->Value, 0x11); ++ EXPECT_FALSE(Ranges.getRangeThatContains(0x3000)); ++ ++ // Add intersecting range and check mapped value. ++ Ranges.insert(AddressRange(0x1000, 0x3000), 0x11); ++ EXPECT_EQ(Ranges.size(), 2u); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x1000)->Value, 0x11); ++ ++ // Add second range and check mapped values. ++ Ranges.insert(AddressRange(0x4000, 0x5000), 0x11); ++ EXPECT_EQ(Ranges.size(), 3u); ++ EXPECT_EQ(Ranges[0].Range, AddressRange(0x1000, 0x2000)); ++ EXPECT_EQ(Ranges[0].Value, 0x11); ++ EXPECT_EQ(Ranges[1].Range, AddressRange(0x2000, 0x3000)); ++ EXPECT_EQ(Ranges[1].Value, 0x11); ++ EXPECT_EQ(Ranges[2].Range, AddressRange(0x4000, 0x5000)); ++ EXPECT_EQ(Ranges[2].Value, 0x11); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x1000)->Value, 0x11); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x4000)->Value, 0x11); ++ ++ // Add intersecting range and check mapped value. ++ Ranges.insert(AddressRange(0x0, 0x6000), 0x11); ++ EXPECT_EQ(Ranges.size(), 6u); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x1000)->Value, 0x11); ++ ++ // Check that mapped values are correctly preserved for combined ranges. ++ Ranges.clear(); ++ Ranges.insert(AddressRange(0x0, 0xff), 0x11); ++ Ranges.insert(AddressRange(0x100, 0x1ff), 0x11); ++ Ranges.insert(AddressRange(0x200, 0x2ff), 0x11); ++ Ranges.insert(AddressRange(0x500, 0x5ff), 0x11); ++ Ranges.insert(AddressRange(0x300, 0x3ff), 0x11); ++ Ranges.insert(AddressRange(0x400, 0x4ff), 0x11); ++ Ranges.insert(AddressRange(0x600, 0x6ff), 0x11); ++ EXPECT_EQ(Ranges.size(), 7u); ++ ++ Ranges.insert(AddressRange(0x150, 0x350), 0x11); ++ EXPECT_EQ(Ranges.size(), 9u); ++ EXPECT_EQ(Ranges[0].Range, AddressRange(0x0, 0xff)); ++ EXPECT_EQ(Ranges[0].Value, 0x11); ++ EXPECT_EQ(Ranges[1].Range, AddressRange(0x100, 0x1ff)); ++ EXPECT_EQ(Ranges[1].Value, 0x11); ++ EXPECT_EQ(Ranges[2].Range, AddressRange(0x1ff, 0x200)); ++ EXPECT_EQ(Ranges[2].Value, 0x11); ++ EXPECT_EQ(Ranges[3].Range, AddressRange(0x200, 0x2ff)); ++ EXPECT_EQ(Ranges[3].Value, 0x11); ++ EXPECT_EQ(Ranges[4].Range, AddressRange(0x2ff, 0x300)); ++ EXPECT_EQ(Ranges[4].Value, 0x11); ++ EXPECT_EQ(Ranges[5].Range, AddressRange(0x300, 0x3ff)); ++ EXPECT_EQ(Ranges[5].Value, 0x11); ++ EXPECT_EQ(Ranges[6].Range, AddressRange(0x400, 0x4ff)); ++ EXPECT_EQ(Ranges[6].Value, 0x11); ++ EXPECT_EQ(Ranges[7].Range, AddressRange(0x500, 0x5ff)); ++ EXPECT_EQ(Ranges[7].Value, 0x11); ++ EXPECT_EQ(Ranges[8].Range, AddressRange(0x600, 0x6ff)); ++ EXPECT_EQ(Ranges[8].Value, 0x11); ++ ++ Ranges.insert(AddressRange(0x3ff, 0x400), 0x11); ++ EXPECT_EQ(Ranges.size(), 10u); ++ EXPECT_EQ(Ranges[0].Range, AddressRange(0x0, 0xff)); ++ EXPECT_EQ(Ranges[0].Value, 0x11); ++ EXPECT_EQ(Ranges[1].Range, AddressRange(0x100, 0x1ff)); ++ EXPECT_EQ(Ranges[1].Value, 0x11); ++ EXPECT_EQ(Ranges[2].Range, AddressRange(0x1ff, 0x200)); ++ EXPECT_EQ(Ranges[2].Value, 0x11); ++ EXPECT_EQ(Ranges[3].Range, AddressRange(0x200, 0x2ff)); ++ EXPECT_EQ(Ranges[3].Value, 0x11); ++ EXPECT_EQ(Ranges[4].Range, AddressRange(0x2ff, 0x300)); ++ EXPECT_EQ(Ranges[4].Value, 0x11); ++ EXPECT_EQ(Ranges[5].Range, AddressRange(0x300, 0x3ff)); ++ EXPECT_EQ(Ranges[5].Value, 0x11); ++ EXPECT_EQ(Ranges[6].Range, AddressRange(0x3ff, 0x400)); ++ EXPECT_EQ(Ranges[6].Value, 0x11); ++ EXPECT_EQ(Ranges[7].Range, AddressRange(0x400, 0x4ff)); ++ EXPECT_EQ(Ranges[7].Value, 0x11); ++ EXPECT_EQ(Ranges[8].Range, AddressRange(0x500, 0x5ff)); ++ EXPECT_EQ(Ranges[8].Value, 0x11); ++ EXPECT_EQ(Ranges[9].Range, AddressRange(0x600, 0x6ff)); ++ EXPECT_EQ(Ranges[9].Value, 0x11); ++ ++ ///////////////////////////////////////////// ++ /// Check ranges with various mapped values. ++ + // Clear ranges. + Ranges.clear(); + EXPECT_EQ(Ranges.size(), 0u); + EXPECT_TRUE(Ranges.empty()); + +- // Add range and check value. ++ // Add range and check mapped value. + Ranges.insert(AddressRange(0x1000, 0x2000), 0xfe); + EXPECT_EQ(Ranges.size(), 1u); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x1000)->second, 0xfe); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x1000)->Value, 0xfe); + +- // Add adjacent range and check value. ++ // Add adjacent range and check mapped value. + Ranges.insert(AddressRange(0x2000, 0x3000), 0xfc); +- EXPECT_EQ(Ranges.size(), 1u); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x1000)->second, 0xfc); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x2000)->second, 0xfc); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x2900)->second, 0xfc); +- EXPECT_FALSE(Ranges.getRangeValueThatContains(0x3000)); ++ EXPECT_EQ(Ranges.size(), 2u); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x1000)->Value, 0xfe); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x2000)->Value, 0xfc); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x2900)->Value, 0xfc); ++ EXPECT_FALSE(Ranges.getRangeThatContains(0x3000)); + +- // Add intersecting range and check value. +- Ranges.insert(AddressRange(0x2000, 0x3000), 0xff); +- EXPECT_EQ(Ranges.size(), 1u); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x1000)->second, 0xff); ++ // Add intersecting range and check mapped value. ++ Ranges.insert(AddressRange(0x1000, 0x3000), 0xff); ++ EXPECT_EQ(Ranges.size(), 2u); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x1000)->Value, 0xfe); + +- // Add second range and check values. ++ // Add one more range and check mapped values. + Ranges.insert(AddressRange(0x4000, 0x5000), 0x0); +- EXPECT_EQ(Ranges.size(), 2u); +- EXPECT_EQ(Ranges[0].second, 0xff); +- EXPECT_EQ(Ranges[1].second, 0x0); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x1000)->second, 0xff); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x4000)->second, 0x0); ++ EXPECT_EQ(Ranges.size(), 3u); ++ EXPECT_EQ(Ranges[0].Value, 0xfe); ++ EXPECT_EQ(Ranges[1].Value, 0xfc); ++ EXPECT_EQ(Ranges[2].Value, 0x0); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x1000)->Value, 0xfe); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x4000)->Value, 0x0); + +- // Add intersecting range and check value. ++ // Add intersecting range and check mapped value. + Ranges.insert(AddressRange(0x0, 0x6000), 0x1); +- EXPECT_EQ(Ranges.size(), 1u); +- EXPECT_EQ(Ranges.getRangeValueThatContains(0x1000)->second, 0x1); ++ EXPECT_EQ(Ranges.size(), 6u); ++ EXPECT_EQ(Ranges[0].Value, 0x1); ++ EXPECT_EQ(Ranges[1].Value, 0xfe); ++ EXPECT_EQ(Ranges[2].Value, 0xfc); ++ EXPECT_EQ(Ranges[3].Value, 0x1); ++ EXPECT_EQ(Ranges[4].Value, 0x0); ++ EXPECT_EQ(Ranges[5].Value, 0x1); ++ EXPECT_EQ(Ranges.getRangeThatContains(0x1000)->Value, 0xfe); + +- // Check that values are correctly preserved for combined ranges. ++ // Check that mapped values are correctly preserved for combined ranges. + Ranges.clear(); + Ranges.insert(AddressRange(0x0, 0xff), 0x1); + Ranges.insert(AddressRange(0x100, 0x1ff), 0x2); + Ranges.insert(AddressRange(0x200, 0x2ff), 0x3); + Ranges.insert(AddressRange(0x300, 0x3ff), 0x4); +- Ranges.insert(AddressRange(0x400, 0x4ff), 0x5); + Ranges.insert(AddressRange(0x500, 0x5ff), 0x6); ++ Ranges.insert(AddressRange(0x400, 0x4ff), 0x5); + Ranges.insert(AddressRange(0x600, 0x6ff), 0x7); ++ EXPECT_EQ(Ranges.size(), 7u); + + Ranges.insert(AddressRange(0x150, 0x350), 0xff); +- EXPECT_EQ(Ranges.size(), 5u); +- EXPECT_EQ(Ranges[0].first, AddressRange(0x0, 0xff)); +- EXPECT_EQ(Ranges[0].second, 0x1); +- EXPECT_EQ(Ranges[1].first, AddressRange(0x100, 0x3ff)); +- EXPECT_EQ(Ranges[1].second, 0xff); +- EXPECT_EQ(Ranges[2].first, AddressRange(0x400, 0x4ff)); +- EXPECT_EQ(Ranges[2].second, 0x5); +- EXPECT_EQ(Ranges[3].first, AddressRange(0x500, 0x5ff)); +- EXPECT_EQ(Ranges[3].second, 0x6); +- EXPECT_EQ(Ranges[4].first, AddressRange(0x600, 0x6ff)); +- EXPECT_EQ(Ranges[4].second, 0x7); ++ EXPECT_EQ(Ranges.size(), 9u); ++ EXPECT_EQ(Ranges[0].Range, AddressRange(0x0, 0xff)); ++ EXPECT_EQ(Ranges[0].Value, 0x1); ++ EXPECT_EQ(Ranges[1].Range, AddressRange(0x100, 0x1ff)); ++ EXPECT_EQ(Ranges[1].Value, 0x2); ++ EXPECT_EQ(Ranges[2].Range, AddressRange(0x1ff, 0x200)); ++ EXPECT_EQ(Ranges[2].Value, 0xff); ++ EXPECT_EQ(Ranges[3].Range, AddressRange(0x200, 0x2ff)); ++ EXPECT_EQ(Ranges[3].Value, 0x3); ++ EXPECT_EQ(Ranges[4].Range, AddressRange(0x2ff, 0x300)); ++ EXPECT_EQ(Ranges[4].Value, 0xff); ++ EXPECT_EQ(Ranges[5].Range, AddressRange(0x300, 0x3ff)); ++ EXPECT_EQ(Ranges[5].Value, 0x4); ++ EXPECT_EQ(Ranges[6].Range, AddressRange(0x400, 0x4ff)); ++ EXPECT_EQ(Ranges[6].Value, 0x5); ++ EXPECT_EQ(Ranges[7].Range, AddressRange(0x500, 0x5ff)); ++ EXPECT_EQ(Ranges[7].Value, 0x6); ++ EXPECT_EQ(Ranges[8].Range, AddressRange(0x600, 0x6ff)); ++ EXPECT_EQ(Ranges[8].Value, 0x7); + ++ Ranges.insert(AddressRange(0x650, 0x700), 0x8); + Ranges.insert(AddressRange(0x3ff, 0x400), 0x5); +- EXPECT_EQ(Ranges.size(), 4u); +- EXPECT_EQ(Ranges[0].first, AddressRange(0x0, 0xff)); +- EXPECT_EQ(Ranges[0].second, 0x1); +- EXPECT_EQ(Ranges[1].first, AddressRange(0x100, 0x4ff)); +- EXPECT_EQ(Ranges[1].second, 0x5); +- EXPECT_EQ(Ranges[2].first, AddressRange(0x500, 0x5ff)); +- EXPECT_EQ(Ranges[2].second, 0x6); +- EXPECT_EQ(Ranges[3].first, AddressRange(0x600, 0x6ff)); +- EXPECT_EQ(Ranges[3].second, 0x7); ++ Ranges.insert(AddressRange(0x0, 0x40), 0xee); ++ EXPECT_EQ(Ranges.size(), 11u); ++ EXPECT_EQ(Ranges[0].Range, AddressRange(0x0, 0xff)); ++ EXPECT_EQ(Ranges[0].Value, 0x1); ++ EXPECT_EQ(Ranges[1].Range, AddressRange(0x100, 0x1ff)); ++ EXPECT_EQ(Ranges[1].Value, 0x2); ++ EXPECT_EQ(Ranges[2].Range, AddressRange(0x1ff, 0x200)); ++ EXPECT_EQ(Ranges[2].Value, 0xff); ++ EXPECT_EQ(Ranges[3].Range, AddressRange(0x200, 0x2ff)); ++ EXPECT_EQ(Ranges[3].Value, 0x3); ++ EXPECT_EQ(Ranges[4].Range, AddressRange(0x2ff, 0x300)); ++ EXPECT_EQ(Ranges[4].Value, 0xff); ++ EXPECT_EQ(Ranges[5].Range, AddressRange(0x300, 0x3ff)); ++ EXPECT_EQ(Ranges[5].Value, 0x4); ++ EXPECT_EQ(Ranges[6].Range, AddressRange(0x3ff, 0x400)); ++ EXPECT_EQ(Ranges[6].Value, 0x5); ++ EXPECT_EQ(Ranges[7].Range, AddressRange(0x400, 0x4ff)); ++ EXPECT_EQ(Ranges[7].Value, 0x5); ++ EXPECT_EQ(Ranges[8].Range, AddressRange(0x500, 0x5ff)); ++ EXPECT_EQ(Ranges[8].Value, 0x6); ++ EXPECT_EQ(Ranges[9].Range, AddressRange(0x600, 0x6ff)); ++ EXPECT_EQ(Ranges[9].Value, 0x7); ++ EXPECT_EQ(Ranges[10].Range, AddressRange(0x6ff, 0x700)); ++ EXPECT_EQ(Ranges[10].Value, 0x8); ++} ++ ++TEST(AddressRangeTest, TestRangesMapRandom) { ++ AddressRangesMap Ranges; ++ size_t NumElements = 100; ++ ++ std::srand(std::time(nullptr)); ++ ++ // Fill ranges. Use the same mapped value. ++ for (size_t Idx = 0; Idx < NumElements; Idx++) { ++ uint64_t Start = static_cast<uint64_t>(std::rand() % 1000); ++ uint64_t End = Start + static_cast<uint64_t>(std::rand() % 1000); ++ Ranges.insert({Start, End}, 0xffLL); ++ } ++ ++ // Check ranges. ++ for (size_t Idx = 0; Idx + 1 < Ranges.size(); Idx++) { ++ // Check that ranges are not intersected. ++ EXPECT_FALSE(Ranges[Idx].Range.intersects(Ranges[Idx + 1].Range)); ++ ++ // Check that ranges are sorted and not adjusted. ++ EXPECT_TRUE(Ranges[Idx].Range.end() <= Ranges[Idx + 1].Range.start()); ++ } ++ ++ Ranges.clear(); ++ // Fill ranges. Use the various mapped value. ++ for (size_t Idx = 0; Idx < NumElements; Idx++) { ++ uint64_t Start = static_cast<uint64_t>(std::rand() % 1000); ++ uint64_t End = Start + static_cast<uint64_t>(std::rand() % 1000); ++ int64_t Value = static_cast<int64_t>(std::rand() % 10); ++ Ranges.insert({Start, End}, Value); ++ } ++ ++ // Check ranges. ++ for (size_t Idx = 0; Idx + 1 < Ranges.size(); Idx++) { ++ // Check that ranges are not intersected. ++ EXPECT_FALSE(Ranges[Idx].Range.intersects(Ranges[Idx + 1].Range)); ++ ++ // Check that ranges are sorted and not adjusted. ++ EXPECT_TRUE(Ranges[Idx].Range.end() <= Ranges[Idx + 1].Range.start()); ++ } + } +-- +2.39.0.1.g6739ec1790 + diff --git a/build/build-clang/macosx64-aarch64.json b/build/build-clang/macosx64-aarch64.json new file mode 100644 index 0000000000..ecbb483e77 --- /dev/null +++ b/build/build-clang/macosx64-aarch64.json @@ -0,0 +1,3 @@ +{ + "target": "aarch64-apple-darwin" +} diff --git a/build/build-clang/macosx64.json b/build/build-clang/macosx64.json new file mode 100644 index 0000000000..2576c0c05f --- /dev/null +++ b/build/build-clang/macosx64.json @@ -0,0 +1,9 @@ +{ + "target": "x86_64-apple-darwin", + "cc": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "cxx": "{MOZ_FETCHES_DIR}/clang/bin/clang++", + "as": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "ar": "{MOZ_FETCHES_DIR}/clang/bin/llvm-ar", + "ranlib": "{MOZ_FETCHES_DIR}/clang/bin/llvm-ranlib", + "ld": "{MOZ_FETCHES_DIR}/clang/bin/clang" +} diff --git a/build/build-clang/partial-revert-llvmorg-16-init-15775-g1ae7d83803e4.patch b/build/build-clang/partial-revert-llvmorg-16-init-15775-g1ae7d83803e4.patch new file mode 100644 index 0000000000..8f68583b51 --- /dev/null +++ b/build/build-clang/partial-revert-llvmorg-16-init-15775-g1ae7d83803e4.patch @@ -0,0 +1,106 @@ +The change in https://github.com/llvm/llvm-project/commit/1ae7d83803e45f6053ec6a606f259653846926b8 +makes rustc unable to read the profiles that `llvm-profdata merge` outputs, +further causing some problems (e.g. bug 1811960). + +diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp +index af3c27ebac76..a6da1e0f3aec 100644 +--- a/llvm/lib/ProfileData/InstrProfWriter.cpp ++++ b/llvm/lib/ProfileData/InstrProfWriter.cpp +@@ -291,10 +291,6 @@ void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, + for (auto &Func : I.getValue()) + addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn); + +- BinaryIds.reserve(BinaryIds.size() + IPW.BinaryIds.size()); +- for (auto &I : IPW.BinaryIds) +- addBinaryIds(I); +- + MemProfFrameData.reserve(IPW.MemProfFrameData.size()); + for (auto &I : IPW.MemProfFrameData) { + // If we weren't able to add the frame mappings then it doesn't make sense +@@ -339,7 +335,6 @@ static void setSummary(IndexedInstrProf::Summary *TheSummary, + + Error InstrProfWriter::writeImpl(ProfOStream &OS) { + using namespace IndexedInstrProf; +- using namespace support; + + OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator; + +@@ -356,7 +351,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { + // Write the header. + IndexedInstrProf::Header Header; + Header.Magic = IndexedInstrProf::Magic; +- Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion; ++ Header.Version = IndexedInstrProf::ProfVersion::Version8; + if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation)) + Header.Version |= VARIANT_MASK_IR_PROF; + if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) +@@ -396,12 +389,6 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { + // profile contains memory profile information. + OS.write(0); + +- // Save the location of binary ids section. +- uint64_t BinaryIdSectionOffset = OS.tell(); +- // Reserve space for the BinaryIdOffset field to be patched later if this +- // profile contains binary ids. +- OS.write(0); +- + // Reserve space to write profile summary data. + uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size(); + uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries); +@@ -478,43 +465,6 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { + OS.patch(PatchItems, 3); + } + +- // BinaryIdSection has two parts: +- // 1. uint64_t BinaryIdsSectionSize +- // 2. list of binary ids that consist of: +- // a. uint64_t BinaryIdLength +- // b. uint8_t BinaryIdData +- // c. uint8_t Padding (if necessary) +- uint64_t BinaryIdSectionStart = OS.tell(); +- // Calculate size of binary section. +- uint64_t BinaryIdsSectionSize = 0; +- +- // Remove duplicate binary ids. +- llvm::sort(BinaryIds); +- BinaryIds.erase(std::unique(BinaryIds.begin(), BinaryIds.end()), +- BinaryIds.end()); +- +- for (auto BI : BinaryIds) { +- // Increment by binary id length data type size. +- BinaryIdsSectionSize += sizeof(uint64_t); +- // Increment by binary id data length, aligned to 8 bytes. +- BinaryIdsSectionSize += alignToPowerOf2(BI.size(), sizeof(uint64_t)); +- } +- // Write binary ids section size. +- OS.write(BinaryIdsSectionSize); +- +- for (auto BI : BinaryIds) { +- uint64_t BILen = BI.size(); +- // Write binary id length. +- OS.write(BILen); +- // Write binary id data. +- for (unsigned K = 0; K < BILen; K++) +- OS.writeByte(BI[K]); +- // Write padding if necessary. +- uint64_t PaddingSize = alignToPowerOf2(BILen, sizeof(uint64_t)) - BILen; +- for (unsigned K = 0; K < PaddingSize; K++) +- OS.writeByte(0); +- } +- + // Allocate space for data to be serialized out. + std::unique_ptr<IndexedInstrProf::Summary> TheSummary = + IndexedInstrProf::allocSummary(SummarySize); +@@ -537,11 +487,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { + PatchItem PatchItems[] = { + // Patch the Header.HashOffset field. + {HashTableStartFieldOffset, &HashTableStart, 1}, +- // Patch the Header.MemProfOffset (=0 for profiles without MemProf +- // data). ++ // Patch the Header.MemProfOffset (=0 for profiles without MemProf data). + {MemProfSectionOffset, &MemProfSectionStart, 1}, +- // Patch the Header.BinaryIdSectionOffset. +- {BinaryIdSectionOffset, &BinaryIdSectionStart, 1}, + // Patch the summary data. + {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()), + (int)(SummarySize / sizeof(uint64_t))}, diff --git a/build/build-clang/partial-revert-llvmorg-16-init-15775-g1ae7d83803e4_clang_17.patch b/build/build-clang/partial-revert-llvmorg-16-init-15775-g1ae7d83803e4_clang_17.patch new file mode 100644 index 0000000000..eb123e5725 --- /dev/null +++ b/build/build-clang/partial-revert-llvmorg-16-init-15775-g1ae7d83803e4_clang_17.patch @@ -0,0 +1,106 @@ +The change in https://github.com/llvm/llvm-project/commit/1ae7d83803e45f6053ec6a606f259653846926b8 +makes rustc unable to read the profiles that `llvm-profdata merge` outputs, +further causing some problems (e.g. bug 1811960). + +diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp +index af3c27ebac76..a6da1e0f3aec 100644 +--- a/llvm/lib/ProfileData/InstrProfWriter.cpp ++++ b/llvm/lib/ProfileData/InstrProfWriter.cpp +@@ -291,10 +291,6 @@ void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, + for (auto &Func : I.getValue()) + addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn); + +- BinaryIds.reserve(BinaryIds.size() + IPW.BinaryIds.size()); +- for (auto &I : IPW.BinaryIds) +- addBinaryIds(I); +- + MemProfFrameData.reserve(IPW.MemProfFrameData.size()); + for (auto &I : IPW.MemProfFrameData) { + // If we weren't able to add the frame mappings then it doesn't make sense +@@ -339,7 +335,6 @@ static void setSummary(IndexedInstrProf::Summary *TheSummary, + + Error InstrProfWriter::writeImpl(ProfOStream &OS) { + using namespace IndexedInstrProf; +- using namespace support; + + OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator; + +@@ -356,7 +351,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { + // Write the header. + IndexedInstrProf::Header Header; + Header.Magic = IndexedInstrProf::Magic; +- Header.Version = IndexedInstrProf::ProfVersion::Version9; ++ Header.Version = IndexedInstrProf::ProfVersion::Version8; + if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation)) + Header.Version |= VARIANT_MASK_IR_PROF; + if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) +@@ -396,12 +389,6 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { + // profile contains memory profile information. + OS.write(0); + +- // Save the location of binary ids section. +- uint64_t BinaryIdSectionOffset = OS.tell(); +- // Reserve space for the BinaryIdOffset field to be patched later if this +- // profile contains binary ids. +- OS.write(0); +- + // Reserve space to write profile summary data. + uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size(); + uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries); +@@ -478,43 +465,6 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { + OS.patch(PatchItems, 3); + } + +- // BinaryIdSection has two parts: +- // 1. uint64_t BinaryIdsSectionSize +- // 2. list of binary ids that consist of: +- // a. uint64_t BinaryIdLength +- // b. uint8_t BinaryIdData +- // c. uint8_t Padding (if necessary) +- uint64_t BinaryIdSectionStart = OS.tell(); +- // Calculate size of binary section. +- uint64_t BinaryIdsSectionSize = 0; +- +- // Remove duplicate binary ids. +- llvm::sort(BinaryIds); +- BinaryIds.erase(std::unique(BinaryIds.begin(), BinaryIds.end()), +- BinaryIds.end()); +- +- for (auto BI : BinaryIds) { +- // Increment by binary id length data type size. +- BinaryIdsSectionSize += sizeof(uint64_t); +- // Increment by binary id data length, aligned to 8 bytes. +- BinaryIdsSectionSize += alignToPowerOf2(BI.size(), sizeof(uint64_t)); +- } +- // Write binary ids section size. +- OS.write(BinaryIdsSectionSize); +- +- for (auto BI : BinaryIds) { +- uint64_t BILen = BI.size(); +- // Write binary id length. +- OS.write(BILen); +- // Write binary id data. +- for (unsigned K = 0; K < BILen; K++) +- OS.writeByte(BI[K]); +- // Write padding if necessary. +- uint64_t PaddingSize = alignToPowerOf2(BILen, sizeof(uint64_t)) - BILen; +- for (unsigned K = 0; K < PaddingSize; K++) +- OS.writeByte(0); +- } +- + // Allocate space for data to be serialized out. + std::unique_ptr<IndexedInstrProf::Summary> TheSummary = + IndexedInstrProf::allocSummary(SummarySize); +@@ -537,11 +487,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { + PatchItem PatchItems[] = { + // Patch the Header.HashOffset field. + {HashTableStartFieldOffset, &HashTableStart, 1}, +- // Patch the Header.MemProfOffset (=0 for profiles without MemProf +- // data). ++ // Patch the Header.MemProfOffset (=0 for profiles without MemProf data). + {MemProfSectionOffset, &MemProfSectionStart, 1}, +- // Patch the Header.BinaryIdSectionOffset. +- {BinaryIdSectionOffset, &BinaryIdSectionStart, 1}, + // Patch the summary data. + {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()), + (int)(SummarySize / sizeof(uint64_t))}, diff --git a/build/build-clang/partial-revert-llvmorg-17-init-7686-g244be0b0de19.patch b/build/build-clang/partial-revert-llvmorg-17-init-7686-g244be0b0de19.patch new file mode 100644 index 0000000000..9f3c9a994a --- /dev/null +++ b/build/build-clang/partial-revert-llvmorg-17-init-7686-g244be0b0de19.patch @@ -0,0 +1,76 @@ +Revert profdata changes from https://github.com/llvm/llvm-project/commit/244be0b0de198fbe8a0861bb8f75509f610b57a4 +that make rustc unable to read the profiles that `llvm-profdata merge` +outputs, further causing some problems (e.g. bug 1811960). + +diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp +index 473fa35bfeec..3d324d91f10f 100644 +--- a/llvm/lib/ProfileData/InstrProfWriter.cpp ++++ b/llvm/lib/ProfileData/InstrProfWriter.cpp +@@ -356,9 +356,6 @@ void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, + for (auto &I : IPW.BinaryIds) + addBinaryIds(I); + +- addTemporalProfileTraces(IPW.TemporalProfTraces, +- IPW.TemporalProfTraceStreamSize); +- + MemProfFrameData.reserve(IPW.MemProfFrameData.size()); + for (auto &I : IPW.MemProfFrameData) { + // If we weren't able to add the frame mappings then it doesn't make sense +@@ -420,7 +417,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { + // Write the header. + IndexedInstrProf::Header Header; + Header.Magic = IndexedInstrProf::Magic; +- Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion; ++ Header.Version = IndexedInstrProf::ProfVersion::Version9; + if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation)) + Header.Version |= VARIANT_MASK_IR_PROF; + if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) +@@ -435,7 +432,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { + if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) + Header.Version |= VARIANT_MASK_MEMPROF; + if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) +- Header.Version |= VARIANT_MASK_TEMPORAL_PROF; ++ return make_error<InstrProfError>(instrprof_error::invalid_prof); + + Header.Unused = 0; + Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType); +@@ -469,9 +466,6 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { + // profile contains binary ids. + OS.write(0); + +- uint64_t TemporalProfTracesOffset = OS.tell(); +- OS.write(0); +- + // Reserve space to write profile summary data. + uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size(); + uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries); +@@ -585,19 +579,6 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { + OS.writeByte(0); + } + +- uint64_t TemporalProfTracesSectionStart = 0; +- if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) { +- TemporalProfTracesSectionStart = OS.tell(); +- OS.write(TemporalProfTraces.size()); +- OS.write(TemporalProfTraceStreamSize); +- for (auto &Trace : TemporalProfTraces) { +- OS.write(Trace.Weight); +- OS.write(Trace.FunctionNameRefs.size()); +- for (auto &NameRef : Trace.FunctionNameRefs) +- OS.write(NameRef); +- } +- } +- + // Allocate space for data to be serialized out. + std::unique_ptr<IndexedInstrProf::Summary> TheSummary = + IndexedInstrProf::allocSummary(SummarySize); +@@ -625,9 +606,6 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { + {MemProfSectionOffset, &MemProfSectionStart, 1}, + // Patch the Header.BinaryIdSectionOffset. + {BinaryIdSectionOffset, &BinaryIdSectionStart, 1}, +- // Patch the Header.TemporalProfTracesOffset (=0 for profiles without +- // traces). +- {TemporalProfTracesOffset, &TemporalProfTracesSectionStart, 1}, + // Patch the summary data. + {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()), + (int)(SummarySize / sizeof(uint64_t))}, diff --git a/build/build-clang/profile.json b/build/build-clang/profile.json new file mode 100644 index 0000000000..746aa92452 --- /dev/null +++ b/build/build-clang/profile.json @@ -0,0 +1,6 @@ +{ + "stages": "3", + "pgo": true, + "ranlib": "{MOZ_FETCHES_DIR}/clang/bin/llvm-ranlib", + "ar": "{MOZ_FETCHES_DIR}/clang/bin/llvm-ar" +} diff --git a/build/build-clang/revert-llvmorg-14-init-11890-gf86deb18cab6.patch b/build/build-clang/revert-llvmorg-14-init-11890-gf86deb18cab6.patch new file mode 100644 index 0000000000..9d61910705 --- /dev/null +++ b/build/build-clang/revert-llvmorg-14-init-11890-gf86deb18cab6.patch @@ -0,0 +1,173 @@ +From c8a5013045b5aff8e45418925688ca670545980f Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Fri, 18 Mar 2022 17:58:28 +0900 +Subject: [PATCH] Revert "[lsan] Move out suppression of invalid PCs from + StopTheWorld" + +This reverts commit f86deb18cab6479a0961ade3807e4729f3a27bdf +because of permafail for a sizable amount of ASan test jobs, where the +worker would die without even leaving any logs. + +--- + compiler-rt/lib/lsan/lsan_common.cpp | 108 +++++++++++++++++---------- + 1 file changed, 67 insertions(+), 41 deletions(-) + +diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp +index fd7aa38d99db..658415bce507 100644 +--- a/compiler-rt/lib/lsan/lsan_common.cpp ++++ b/compiler-rt/lib/lsan/lsan_common.cpp +@@ -71,11 +71,9 @@ class LeakSuppressionContext { + SuppressionContext context; + bool suppressed_stacks_sorted = true; + InternalMmapVector<u32> suppressed_stacks; +- const LoadedModule *suppress_module = nullptr; + +- void LazyInit(); + Suppression *GetSuppressionForAddr(uptr addr); +- bool SuppressInvalid(const StackTrace &stack); ++ void LazyInit(); + bool SuppressByRule(const StackTrace &stack, uptr hit_count, uptr total_size); + + public: +@@ -126,8 +124,6 @@ void LeakSuppressionContext::LazyInit() { + if (&__lsan_default_suppressions) + context.Parse(__lsan_default_suppressions()); + context.Parse(kStdSuppressions); +- if (flags()->use_tls && flags()->use_ld_allocations) +- suppress_module = GetLinker(); + } + } + +@@ -152,41 +148,6 @@ Suppression *LeakSuppressionContext::GetSuppressionForAddr(uptr addr) { + return s; + } + +-static uptr GetCallerPC(const StackTrace &stack) { +- // The top frame is our malloc/calloc/etc. The next frame is the caller. +- if (stack.size >= 2) +- return stack.trace[1]; +- return 0; +-} +- +-// On Linux, treats all chunks allocated from ld-linux.so as reachable, which +-// covers dynamically allocated TLS blocks, internal dynamic loader's loaded +-// modules accounting etc. +-// Dynamic TLS blocks contain the TLS variables of dynamically loaded modules. +-// They are allocated with a __libc_memalign() call in allocate_and_init() +-// (elf/dl-tls.c). Glibc won't tell us the address ranges occupied by those +-// blocks, but we can make sure they come from our own allocator by intercepting +-// __libc_memalign(). On top of that, there is no easy way to reach them. Their +-// addresses are stored in a dynamically allocated array (the DTV) which is +-// referenced from the static TLS. Unfortunately, we can't just rely on the DTV +-// being reachable from the static TLS, and the dynamic TLS being reachable from +-// the DTV. This is because the initial DTV is allocated before our interception +-// mechanism kicks in, and thus we don't recognize it as allocated memory. We +-// can't special-case it either, since we don't know its size. +-// Our solution is to include in the root set all allocations made from +-// ld-linux.so (which is where allocate_and_init() is implemented). This is +-// guaranteed to include all dynamic TLS blocks (and possibly other allocations +-// which we don't care about). +-// On all other platforms, this simply checks to ensure that the caller pc is +-// valid before reporting chunks as leaked. +-bool LeakSuppressionContext::SuppressInvalid(const StackTrace &stack) { +- uptr caller_pc = GetCallerPC(stack); +- // If caller_pc is unknown, this chunk may be allocated in a coroutine. Mark +- // it as reachable, as we can't properly report its allocation stack anyway. +- return !caller_pc || +- (suppress_module && suppress_module->containsAddress(caller_pc)); +-} +- + bool LeakSuppressionContext::SuppressByRule(const StackTrace &stack, + uptr hit_count, uptr total_size) { + for (uptr i = 0; i < stack.size; i++) { +@@ -205,7 +166,7 @@ bool LeakSuppressionContext::Suppress(u32 stack_trace_id, uptr hit_count, + uptr total_size) { + LazyInit(); + StackTrace stack = StackDepotGet(stack_trace_id); +- if (!SuppressInvalid(stack) && !SuppressByRule(stack, hit_count, total_size)) ++ if (!SuppressByRule(stack, hit_count, total_size)) + return false; + suppressed_stacks_sorted = false; + suppressed_stacks.push_back(stack_trace_id); +@@ -569,6 +530,68 @@ static void CollectIgnoredCb(uptr chunk, void *arg) { + } + } + ++static uptr GetCallerPC(const StackTrace &stack) { ++ // The top frame is our malloc/calloc/etc. The next frame is the caller. ++ if (stack.size >= 2) ++ return stack.trace[1]; ++ return 0; ++} ++ ++struct InvalidPCParam { ++ Frontier *frontier; ++ bool skip_linker_allocations; ++}; ++ ++// ForEachChunk callback. If the caller pc is invalid or is within the linker, ++// mark as reachable. Called by ProcessPlatformSpecificAllocations. ++static void MarkInvalidPCCb(uptr chunk, void *arg) { ++ CHECK(arg); ++ InvalidPCParam *param = reinterpret_cast<InvalidPCParam *>(arg); ++ chunk = GetUserBegin(chunk); ++ LsanMetadata m(chunk); ++ if (m.allocated() && m.tag() != kReachable && m.tag() != kIgnored) { ++ u32 stack_id = m.stack_trace_id(); ++ uptr caller_pc = 0; ++ if (stack_id > 0) ++ caller_pc = GetCallerPC(StackDepotGet(stack_id)); ++ // If caller_pc is unknown, this chunk may be allocated in a coroutine. Mark ++ // it as reachable, as we can't properly report its allocation stack anyway. ++ if (caller_pc == 0 || (param->skip_linker_allocations && ++ GetLinker()->containsAddress(caller_pc))) { ++ m.set_tag(kIgnored); ++ param->frontier->push_back(chunk); ++ } ++ } ++} ++ ++// On Linux, treats all chunks allocated from ld-linux.so as reachable, which ++// covers dynamically allocated TLS blocks, internal dynamic loader's loaded ++// modules accounting etc. ++// Dynamic TLS blocks contain the TLS variables of dynamically loaded modules. ++// They are allocated with a __libc_memalign() call in allocate_and_init() ++// (elf/dl-tls.c). Glibc won't tell us the address ranges occupied by those ++// blocks, but we can make sure they come from our own allocator by intercepting ++// __libc_memalign(). On top of that, there is no easy way to reach them. Their ++// addresses are stored in a dynamically allocated array (the DTV) which is ++// referenced from the static TLS. Unfortunately, we can't just rely on the DTV ++// being reachable from the static TLS, and the dynamic TLS being reachable from ++// the DTV. This is because the initial DTV is allocated before our interception ++// mechanism kicks in, and thus we don't recognize it as allocated memory. We ++// can't special-case it either, since we don't know its size. ++// Our solution is to include in the root set all allocations made from ++// ld-linux.so (which is where allocate_and_init() is implemented). This is ++// guaranteed to include all dynamic TLS blocks (and possibly other allocations ++// which we don't care about). ++// On all other platforms, this simply checks to ensure that the caller pc is ++// valid before reporting chunks as leaked. ++static void ProcessPC(Frontier *frontier) { ++ InvalidPCParam arg; ++ arg.frontier = frontier; ++ arg.skip_linker_allocations = ++ flags()->use_tls && flags()->use_ld_allocations && GetLinker() != nullptr; ++ ForEachChunk(MarkInvalidPCCb, &arg); ++} ++ + // Sets the appropriate tag on each chunk. + static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads, + Frontier *frontier) { +@@ -584,6 +607,9 @@ static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads, + ProcessRootRegions(frontier); + FloodFillTag(frontier, kReachable); + ++ CHECK_EQ(0, frontier->size()); ++ ProcessPC(frontier); ++ + // The check here is relatively expensive, so we do this in a separate flood + // fill. That way we can skip the check for chunks that are reachable + // otherwise. +-- +2.35.0.1.g829a698654 + diff --git a/build/build-clang/revert-llvmorg-14-init-11890-gf86deb18cab6_clang_16.patch b/build/build-clang/revert-llvmorg-14-init-11890-gf86deb18cab6_clang_16.patch new file mode 100644 index 0000000000..f5493527ab --- /dev/null +++ b/build/build-clang/revert-llvmorg-14-init-11890-gf86deb18cab6_clang_16.patch @@ -0,0 +1,180 @@ +From c8a5013045b5aff8e45418925688ca670545980f Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Fri, 18 Mar 2022 17:58:28 +0900 +Subject: [PATCH] Revert "[lsan] Move out suppression of invalid PCs from + StopTheWorld" + +This reverts commit f86deb18cab6479a0961ade3807e4729f3a27bdf +because of permafail for a sizable amount of ASan test jobs, where the +worker would die without even leaving any logs. + +--- + compiler-rt/lib/lsan/lsan_common.cpp | 108 +++++++++++++++++---------- + 1 file changed, 67 insertions(+), 41 deletions(-) + +diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp +index 51218770d6dc..0a69b010879b 100644 +--- a/compiler-rt/lib/lsan/lsan_common.cpp ++++ b/compiler-rt/lib/lsan/lsan_common.cpp +@@ -83,11 +83,9 @@ class LeakSuppressionContext { + SuppressionContext context; + bool suppressed_stacks_sorted = true; + InternalMmapVector<u32> suppressed_stacks; +- const LoadedModule *suppress_module = nullptr; + +- void LazyInit(); + Suppression *GetSuppressionForAddr(uptr addr); +- bool SuppressInvalid(const StackTrace &stack); ++ void LazyInit(); + bool SuppressByRule(const StackTrace &stack, uptr hit_count, uptr total_size); + + public: +@@ -138,8 +136,6 @@ void LeakSuppressionContext::LazyInit() { + if (&__lsan_default_suppressions) + context.Parse(__lsan_default_suppressions()); + context.Parse(kStdSuppressions); +- if (flags()->use_tls && flags()->use_ld_allocations) +- suppress_module = GetLinker(); + } + } + +@@ -165,13 +161,6 @@ Suppression *LeakSuppressionContext::GetSuppressionForAddr(uptr addr) { + return s; + } + +-static uptr GetCallerPC(const StackTrace &stack) { +- // The top frame is our malloc/calloc/etc. The next frame is the caller. +- if (stack.size >= 2) +- return stack.trace[1]; +- return 0; +-} +- + # if SANITIZER_APPLE + // Objective-C class data pointers are stored with flags in the low bits, so + // they need to be transformed back into something that looks like a pointer. +@@ -183,34 +172,6 @@ static inline void *MaybeTransformPointer(void *p) { + } + # endif + +-// On Linux, treats all chunks allocated from ld-linux.so as reachable, which +-// covers dynamically allocated TLS blocks, internal dynamic loader's loaded +-// modules accounting etc. +-// Dynamic TLS blocks contain the TLS variables of dynamically loaded modules. +-// They are allocated with a __libc_memalign() call in allocate_and_init() +-// (elf/dl-tls.c). Glibc won't tell us the address ranges occupied by those +-// blocks, but we can make sure they come from our own allocator by intercepting +-// __libc_memalign(). On top of that, there is no easy way to reach them. Their +-// addresses are stored in a dynamically allocated array (the DTV) which is +-// referenced from the static TLS. Unfortunately, we can't just rely on the DTV +-// being reachable from the static TLS, and the dynamic TLS being reachable from +-// the DTV. This is because the initial DTV is allocated before our interception +-// mechanism kicks in, and thus we don't recognize it as allocated memory. We +-// can't special-case it either, since we don't know its size. +-// Our solution is to include in the root set all allocations made from +-// ld-linux.so (which is where allocate_and_init() is implemented). This is +-// guaranteed to include all dynamic TLS blocks (and possibly other allocations +-// which we don't care about). +-// On all other platforms, this simply checks to ensure that the caller pc is +-// valid before reporting chunks as leaked. +-bool LeakSuppressionContext::SuppressInvalid(const StackTrace &stack) { +- uptr caller_pc = GetCallerPC(stack); +- // If caller_pc is unknown, this chunk may be allocated in a coroutine. Mark +- // it as reachable, as we can't properly report its allocation stack anyway. +- return !caller_pc || +- (suppress_module && suppress_module->containsAddress(caller_pc)); +-} +- + bool LeakSuppressionContext::SuppressByRule(const StackTrace &stack, + uptr hit_count, uptr total_size) { + for (uptr i = 0; i < stack.size; i++) { +@@ -229,7 +190,7 @@ bool LeakSuppressionContext::Suppress(u32 stack_trace_id, uptr hit_count, + uptr total_size) { + LazyInit(); + StackTrace stack = StackDepotGet(stack_trace_id); +- if (!SuppressInvalid(stack) && !SuppressByRule(stack, hit_count, total_size)) ++ if (!SuppressByRule(stack, hit_count, total_size)) + return false; + suppressed_stacks_sorted = false; + suppressed_stacks.push_back(stack_trace_id); +@@ -600,6 +561,68 @@ static void CollectIgnoredCb(uptr chunk, void *arg) { + } + } + ++static uptr GetCallerPC(const StackTrace &stack) { ++ // The top frame is our malloc/calloc/etc. The next frame is the caller. ++ if (stack.size >= 2) ++ return stack.trace[1]; ++ return 0; ++} ++ ++struct InvalidPCParam { ++ Frontier *frontier; ++ bool skip_linker_allocations; ++}; ++ ++// ForEachChunk callback. If the caller pc is invalid or is within the linker, ++// mark as reachable. Called by ProcessPlatformSpecificAllocations. ++static void MarkInvalidPCCb(uptr chunk, void *arg) { ++ CHECK(arg); ++ InvalidPCParam *param = reinterpret_cast<InvalidPCParam *>(arg); ++ chunk = GetUserBegin(chunk); ++ LsanMetadata m(chunk); ++ if (m.allocated() && m.tag() != kReachable && m.tag() != kIgnored) { ++ u32 stack_id = m.stack_trace_id(); ++ uptr caller_pc = 0; ++ if (stack_id > 0) ++ caller_pc = GetCallerPC(StackDepotGet(stack_id)); ++ // If caller_pc is unknown, this chunk may be allocated in a coroutine. Mark ++ // it as reachable, as we can't properly report its allocation stack anyway. ++ if (caller_pc == 0 || (param->skip_linker_allocations && ++ GetLinker()->containsAddress(caller_pc))) { ++ m.set_tag(kIgnored); ++ param->frontier->push_back(chunk); ++ } ++ } ++} ++ ++// On Linux, treats all chunks allocated from ld-linux.so as reachable, which ++// covers dynamically allocated TLS blocks, internal dynamic loader's loaded ++// modules accounting etc. ++// Dynamic TLS blocks contain the TLS variables of dynamically loaded modules. ++// They are allocated with a __libc_memalign() call in allocate_and_init() ++// (elf/dl-tls.c). Glibc won't tell us the address ranges occupied by those ++// blocks, but we can make sure they come from our own allocator by intercepting ++// __libc_memalign(). On top of that, there is no easy way to reach them. Their ++// addresses are stored in a dynamically allocated array (the DTV) which is ++// referenced from the static TLS. Unfortunately, we can't just rely on the DTV ++// being reachable from the static TLS, and the dynamic TLS being reachable from ++// the DTV. This is because the initial DTV is allocated before our interception ++// mechanism kicks in, and thus we don't recognize it as allocated memory. We ++// can't special-case it either, since we don't know its size. ++// Our solution is to include in the root set all allocations made from ++// ld-linux.so (which is where allocate_and_init() is implemented). This is ++// guaranteed to include all dynamic TLS blocks (and possibly other allocations ++// which we don't care about). ++// On all other platforms, this simply checks to ensure that the caller pc is ++// valid before reporting chunks as leaked. ++static void ProcessPC(Frontier *frontier) { ++ InvalidPCParam arg; ++ arg.frontier = frontier; ++ arg.skip_linker_allocations = ++ flags()->use_tls && flags()->use_ld_allocations && GetLinker() != nullptr; ++ ForEachChunk(MarkInvalidPCCb, &arg); ++} ++ + // Sets the appropriate tag on each chunk. + static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads, + Frontier *frontier, tid_t caller_tid, +@@ -616,6 +639,9 @@ static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads, + ProcessRootRegions(frontier); + FloodFillTag(frontier, kReachable); + ++ CHECK_EQ(0, frontier->size()); ++ ProcessPC(frontier); ++ + // The check here is relatively expensive, so we do this in a separate flood + // fill. That way we can skip the check for chunks that are reachable + // otherwise. +-- +2.35.0.1.g829a698654 + diff --git a/build/build-clang/revert-llvmorg-14-init-14141-gd6d3000a2f6d.patch b/build/build-clang/revert-llvmorg-14-init-14141-gd6d3000a2f6d.patch new file mode 100644 index 0000000000..5a8fb5e701 --- /dev/null +++ b/build/build-clang/revert-llvmorg-14-init-14141-gd6d3000a2f6d.patch @@ -0,0 +1,78 @@ +From e602ffe1785cef7f5502223e81345e6b9395fae1 Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Fri, 11 Mar 2022 10:38:51 +0900 +Subject: [PATCH] Revert "[CMake][WinMsvc] Fix user passed compiler/linker + flags" + +This reverts commit d6d3000a2f6d88ac73e5d4da4005ceadec788a9a +because of bustage building 32-bits compiler-rt for Windows. +See https://reviews.llvm.org/D116709#3374131 +--- + llvm/cmake/platforms/WinMsvc.cmake | 30 ++++++++++++++++++++++-------- + 1 file changed, 22 insertions(+), 8 deletions(-) + +diff --git a/llvm/cmake/platforms/WinMsvc.cmake b/llvm/cmake/platforms/WinMsvc.cmake +index d30701a31858..9a5078894182 100644 +--- a/llvm/cmake/platforms/WinMsvc.cmake ++++ b/llvm/cmake/platforms/WinMsvc.cmake +@@ -84,7 +84,6 @@ + # up a VFS overlay for the SDK headers and case-correcting symlinks for the + # libraries when running on a case-sensitive filesystem. + +-include_guard(GLOBAL) + + # When configuring CMake with a toolchain file against a top-level CMakeLists.txt, + # it will actually run CMake many times, once for each small test program used to +@@ -252,8 +251,6 @@ list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_ASM_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/b + list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_C_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/bin/clang") + list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_CXX_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/bin/clang++") + +-# These flags are used during build time. So if CFLAGS/CXXFLAGS/LDFLAGS is set +-# for the target, makes sure these are unset during build time. + set(CROSS_TOOLCHAIN_FLAGS_NATIVE "${_CTF_NATIVE_DEFAULT}" CACHE STRING "") + + set(COMPILE_FLAGS +@@ -280,8 +277,18 @@ if(case_sensitive_filesystem) + endif() + + string(REPLACE ";" " " COMPILE_FLAGS "${COMPILE_FLAGS}") +-string(APPEND CMAKE_C_FLAGS_INIT " ${COMPILE_FLAGS}") +-string(APPEND CMAKE_CXX_FLAGS_INIT " ${COMPILE_FLAGS}") ++ ++# We need to preserve any flags that were passed in by the user. However, we ++# can't append to CMAKE_C_FLAGS and friends directly, because toolchain files ++# will be re-invoked on each reconfigure and therefore need to be idempotent. ++# The assignments to the _INITIAL cache variables don't use FORCE, so they'll ++# only be populated on the initial configure, and their values won't change ++# afterward. ++set(_CMAKE_C_FLAGS_INITIAL "${CMAKE_C_FLAGS}" CACHE STRING "") ++set(CMAKE_C_FLAGS "${_CMAKE_C_FLAGS_INITIAL} ${COMPILE_FLAGS}" CACHE STRING "" FORCE) ++ ++set(_CMAKE_CXX_FLAGS_INITIAL "${CMAKE_CXX_FLAGS}" CACHE STRING "") ++set(CMAKE_CXX_FLAGS "${_CMAKE_CXX_FLAGS_INITIAL} ${COMPILE_FLAGS}" CACHE STRING "" FORCE) + + set(LINK_FLAGS + # Prevent CMake from attempting to invoke mt.exe. It only recognizes the slashed form and not the dashed form. +@@ -305,9 +312,16 @@ if(case_sensitive_filesystem) + endif() + + string(REPLACE ";" " " LINK_FLAGS "${LINK_FLAGS}") +-string(APPEND CMAKE_EXE_LINKER_FLAGS_INIT " ${LINK_FLAGS}") +-string(APPEND CMAKE_MODULE_LINKER_FLAGS_INIT " ${LINK_FLAGS}") +-string(APPEND CMAKE_SHARED_LINKER_FLAGS_INIT " ${LINK_FLAGS}") ++ ++# See explanation for compiler flags above for the _INITIAL variables. ++set(_CMAKE_EXE_LINKER_FLAGS_INITIAL "${CMAKE_EXE_LINKER_FLAGS}" CACHE STRING "") ++set(CMAKE_EXE_LINKER_FLAGS "${_CMAKE_EXE_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE) ++ ++set(_CMAKE_MODULE_LINKER_FLAGS_INITIAL "${CMAKE_MODULE_LINKER_FLAGS}" CACHE STRING "") ++set(CMAKE_MODULE_LINKER_FLAGS "${_CMAKE_MODULE_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE) ++ ++set(_CMAKE_SHARED_LINKER_FLAGS_INITIAL "${CMAKE_SHARED_LINKER_FLAGS}" CACHE STRING "") ++set(CMAKE_SHARED_LINKER_FLAGS "${_CMAKE_SHARED_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE) + + # CMake populates these with a bunch of unnecessary libraries, which requires + # extra case-correcting symlinks and what not. Instead, let projects explicitly +-- +2.35.0.1.g829a698654 + diff --git a/build/build-clang/revert-llvmorg-15-init-11205-gcead4eceb01b_clang_16.patch b/build/build-clang/revert-llvmorg-15-init-11205-gcead4eceb01b_clang_16.patch new file mode 100644 index 0000000000..93c7e7d767 --- /dev/null +++ b/build/build-clang/revert-llvmorg-15-init-11205-gcead4eceb01b_clang_16.patch @@ -0,0 +1,1027 @@ +From cb411520cb7cd5e6e25966911ca55feb5de779e0 Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Fri, 4 Nov 2022 14:51:38 +0900 +Subject: [PATCH] Revert "[symbolizer] Parse DW_TAG_variable DIs to show line + info for globals" + +This reverts commit cead4eceb01b935fae07bf4a7e91911b344d2fec for causing +yet unidentified problems on some webrtc tests under TSan (bug 1798613). +--- + llvm/include/llvm/DebugInfo/DIContext.h | 4 - + .../llvm/DebugInfo/DWARF/DWARFContext.h | 2 - + llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h | 7 - + llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 14 - + llvm/include/llvm/DebugInfo/PDB/PDBContext.h | 2 - + llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 97 ++-- + llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 60 --- + llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 97 ---- + llvm/lib/DebugInfo/PDB/PDBContext.cpp | 7 - + llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp | 4 - + .../Symbolize/SymbolizableObjectFile.cpp | 8 - + .../Symbolize/ELF/data-command-symtab.yaml | 3 - + .../tools/llvm-symbolizer/data-location.yaml | 450 ------------------ + llvm/test/tools/llvm-symbolizer/data.s | 3 - + 14 files changed, 61 insertions(+), 697 deletions(-) + delete mode 100644 llvm/test/tools/llvm-symbolizer/data-location.yaml + +diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h +index 9b278b696073..a9f98588cf2d 100644 +--- a/llvm/include/llvm/DebugInfo/DIContext.h ++++ b/llvm/include/llvm/DebugInfo/DIContext.h +@@ -114,8 +114,6 @@ struct DIGlobal { + std::string Name; + uint64_t Start = 0; + uint64_t Size = 0; +- std::string DeclFile; +- uint64_t DeclLine = 0; + + DIGlobal() : Name(DILineInfo::BadString) {} + }; +@@ -241,8 +239,6 @@ public: + virtual DILineInfo getLineInfoForAddress( + object::SectionedAddress Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; +- virtual DILineInfo +- getLineInfoForDataAddress(object::SectionedAddress Address) = 0; + virtual DILineInfoTable getLineInfoForAddressRange( + object::SectionedAddress Address, uint64_t Size, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; +diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +index bf591ed554c6..3365ef8d8ee3 100644 +--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h ++++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +@@ -364,8 +364,6 @@ public: + DILineInfo getLineInfoForAddress( + object::SectionedAddress Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; +- DILineInfo +- getLineInfoForDataAddress(object::SectionedAddress Address) override; + DILineInfoTable getLineInfoForAddressRange( + object::SectionedAddress Address, uint64_t Size, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; +diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +index 149c5ef4e493..4a4d105a2b23 100644 +--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h ++++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +@@ -280,13 +280,6 @@ public: + /// \returns an iterator range for the attributes of the current DIE. + iterator_range<attribute_iterator> attributes() const; + +- /// Gets the type size (in bytes) for this DIE. +- /// +- /// \param PointerSize the pointer size of the containing CU. +- /// \returns if this is a type DIE, or this DIE contains a DW_AT_type, returns +- /// the size of the type. +- std::optional<uint64_t> getTypeSize(uint64_t PointerSize); +- + class iterator; + + iterator begin() const; +diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +index 9188865b4d77..0341344bc7b8 100644 +--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h ++++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +@@ -9,7 +9,6 @@ + #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H + #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H + +-#include "llvm/ADT/DenseSet.h" + #include "llvm/ADT/STLExtras.h" + #include "llvm/ADT/SmallVector.h" + #include "llvm/ADT/StringRef.h" +@@ -28,7 +27,6 @@ + #include <cstdint> + #include <map> + #include <memory> +-#include <set> + #include <utility> + #include <vector> + +@@ -242,11 +240,6 @@ class DWARFUnit { + /// std::map::upper_bound for address range lookup. + std::map<uint64_t, std::pair<uint64_t, DWARFDie>> AddrDieMap; + +- /// Map from the location (interpreted DW_AT_location) of a DW_TAG_variable, +- /// to the end address and the corresponding DIE. +- std::map<uint64_t, std::pair<uint64_t, DWARFDie>> VariableDieMap; +- DenseSet<uint64_t> RootsParsedForVariables; +- + using die_iterator_range = + iterator_range<std::vector<DWARFDebugInfoEntry>::iterator>; + +@@ -329,9 +322,6 @@ public: + /// Recursively update address to Die map. + void updateAddressDieMap(DWARFDie Die); + +- /// Recursively update address to variable Die map. +- void updateVariableDieMap(DWARFDie Die); +- + void setRangesSection(const DWARFSection *RS, uint64_t Base) { + RangeSection = RS; + RangeSectionBase = Base; +@@ -446,10 +436,6 @@ public: + /// cleared. + DWARFDie getSubroutineForAddress(uint64_t Address); + +- /// Returns variable DIE for the address provided. The pointer is alive as +- /// long as parsed compile unit DIEs are not cleared. +- DWARFDie getVariableForAddress(uint64_t Address); +- + /// getInlinedChainForAddress - fetches inlined chain for a given address. + /// Returns empty chain if there is no subprogram containing address. The + /// chain is valid as long as parsed compile unit DIEs are not cleared. +diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBContext.h b/llvm/include/llvm/DebugInfo/PDB/PDBContext.h +index 3163c0a1dae0..7b6793f0a639 100644 +--- a/llvm/include/llvm/DebugInfo/PDB/PDBContext.h ++++ b/llvm/include/llvm/DebugInfo/PDB/PDBContext.h +@@ -45,8 +45,6 @@ namespace pdb { + DILineInfo getLineInfoForAddress( + object::SectionedAddress Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; +- DILineInfo +- getLineInfoForDataAddress(object::SectionedAddress Address) override; + DILineInfoTable getLineInfoForAddressRange( + object::SectionedAddress Address, uint64_t Size, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; +diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +index 19d7d659a86a..1bcfdecfd588 100644 +--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp ++++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +@@ -1053,25 +1053,7 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { + // First, get the offset of the compile unit. + uint64_t CUOffset = getDebugAranges()->findAddress(Address); + // Retrieve the compile unit. +- if (DWARFCompileUnit *OffsetCU = getCompileUnitForOffset(CUOffset)) +- return OffsetCU; +- +- // Global variables are often not found by the above search, for one of two +- // reasons: +- // 1. .debug_aranges may not include global variables. On clang, it seems we +- // put the globals in the aranges, but this isn't true for gcc. +- // 2. Even if the global variable is in a .debug_arange, global variables +- // may not be captured in the [start, end) addresses described by the +- // parent compile unit. +- // +- // So, we walk the CU's and their child DI's manually, looking for the +- // specific global variable. +- for (std::unique_ptr<DWARFUnit> &CU : compile_units()) { +- if (DWARFDie Die = CU->getVariableForAddress(Address)) { +- return static_cast<DWARFCompileUnit *>(CU.get()); +- } +- } +- return nullptr; ++ return getCompileUnitForOffset(CUOffset); + } + + DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) { +@@ -1141,6 +1123,64 @@ static bool getFunctionNameAndStartLineForAddress( + return FoundResult; + } + ++static std::optional<uint64_t> getTypeSize(DWARFDie Type, uint64_t PointerSize) { ++ if (auto SizeAttr = Type.find(DW_AT_byte_size)) ++ if (std::optional<uint64_t> Size = SizeAttr->getAsUnsignedConstant()) ++ return Size; ++ ++ switch (Type.getTag()) { ++ case DW_TAG_pointer_type: ++ case DW_TAG_reference_type: ++ case DW_TAG_rvalue_reference_type: ++ return PointerSize; ++ case DW_TAG_ptr_to_member_type: { ++ if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type)) ++ if (BaseType.getTag() == DW_TAG_subroutine_type) ++ return 2 * PointerSize; ++ return PointerSize; ++ } ++ case DW_TAG_const_type: ++ case DW_TAG_immutable_type: ++ case DW_TAG_volatile_type: ++ case DW_TAG_restrict_type: ++ case DW_TAG_typedef: { ++ if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type)) ++ return getTypeSize(BaseType, PointerSize); ++ break; ++ } ++ case DW_TAG_array_type: { ++ DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type); ++ if (!BaseType) ++ return std::optional<uint64_t>(); ++ std::optional<uint64_t> BaseSize = getTypeSize(BaseType, PointerSize); ++ if (!BaseSize) ++ return std::optional<uint64_t>(); ++ uint64_t Size = *BaseSize; ++ for (DWARFDie Child : Type) { ++ if (Child.getTag() != DW_TAG_subrange_type) ++ continue; ++ ++ if (auto ElemCountAttr = Child.find(DW_AT_count)) ++ if (std::optional<uint64_t> ElemCount = ++ ElemCountAttr->getAsUnsignedConstant()) ++ Size *= *ElemCount; ++ if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound)) ++ if (std::optional<int64_t> UpperBound = ++ UpperBoundAttr->getAsSignedConstant()) { ++ int64_t LowerBound = 0; ++ if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound)) ++ LowerBound = LowerBoundAttr->getAsSignedConstant().value_or(0); ++ Size *= *UpperBound - LowerBound + 1; ++ } ++ } ++ return Size; ++ } ++ default: ++ break; ++ } ++ return std::optional<uint64_t>(); ++} ++ + static std::optional<int64_t> + getExpressionFrameOffset(ArrayRef<uint8_t> Expr, + std::optional<unsigned> FrameBaseReg) { +@@ -1201,7 +1241,7 @@ void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, + if (std::optional<const char *> Name = dwarf::toString(*NameAttr)) + Local.Name = *Name; + if (auto Type = Die.getAttributeValueAsReferencedDie(DW_AT_type)) +- Local.Size = Type.getTypeSize(getCUAddrSize()); ++ Local.Size = getTypeSize(Type, getCUAddrSize()); + if (auto DeclFileAttr = Die.find(DW_AT_decl_file)) { + if (const auto *LT = CU->getContext().getLineTableForUnit(CU)) + LT->getFileNameByIndex( +@@ -1242,6 +1282,7 @@ DWARFContext::getLocalsForAddress(object::SectionedAddress Address) { + DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, + DILineInfoSpecifier Spec) { + DILineInfo Result; ++ + DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + if (!CU) + return Result; +@@ -1256,22 +1297,6 @@ DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, + Spec.FLIKind, Result); + } + } +- +- return Result; +-} +- +-DILineInfo +-DWARFContext::getLineInfoForDataAddress(object::SectionedAddress Address) { +- DILineInfo Result; +- DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); +- if (!CU) +- return Result; +- +- if (DWARFDie Die = CU->getVariableForAddress(Address.Address)) { +- Result.FileName = Die.getDeclFile(FileLineInfoKind::AbsoluteFilePath); +- Result.Line = Die.getDeclLine(); +- } +- + return Result; + } + +diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +index 15a2d23c4fd2..9bf15c30f714 100644 +--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp ++++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +@@ -492,66 +492,6 @@ void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, + CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0); + } + +-std::optional<uint64_t> DWARFDie::getTypeSize(uint64_t PointerSize) { +- if (auto SizeAttr = find(DW_AT_byte_size)) +- if (std::optional<uint64_t> Size = SizeAttr->getAsUnsignedConstant()) +- return Size; +- +- switch (getTag()) { +- case DW_TAG_pointer_type: +- case DW_TAG_reference_type: +- case DW_TAG_rvalue_reference_type: +- return PointerSize; +- case DW_TAG_ptr_to_member_type: { +- if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) +- if (BaseType.getTag() == DW_TAG_subroutine_type) +- return 2 * PointerSize; +- return PointerSize; +- } +- case DW_TAG_const_type: +- case DW_TAG_immutable_type: +- case DW_TAG_volatile_type: +- case DW_TAG_restrict_type: +- case DW_TAG_typedef: { +- if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) +- return BaseType.getTypeSize(PointerSize); +- break; +- } +- case DW_TAG_array_type: { +- DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type); +- if (!BaseType) +- return std::nullopt; +- std::optional<uint64_t> BaseSize = BaseType.getTypeSize(PointerSize); +- if (!BaseSize) +- return std::nullopt; +- uint64_t Size = *BaseSize; +- for (DWARFDie Child : *this) { +- if (Child.getTag() != DW_TAG_subrange_type) +- continue; +- +- if (auto ElemCountAttr = Child.find(DW_AT_count)) +- if (std::optional<uint64_t> ElemCount = +- ElemCountAttr->getAsUnsignedConstant()) +- Size *= *ElemCount; +- if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound)) +- if (std::optional<int64_t> UpperBound = +- UpperBoundAttr->getAsSignedConstant()) { +- int64_t LowerBound = 0; +- if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound)) +- LowerBound = LowerBoundAttr->getAsSignedConstant().value_or(0); +- Size *= *UpperBound - LowerBound + 1; +- } +- } +- return Size; +- } +- default: +- if (DWARFDie BaseType = getAttributeValueAsReferencedDie(DW_AT_type)) +- return BaseType.getTypeSize(PointerSize); +- break; +- } +- return std::nullopt; +-} +- + /// Helper to dump a DIE with all of its parents, but no siblings. + static unsigned dumpParentChain(DWARFDie Die, raw_ostream &OS, unsigned Indent, + DIDumpOptions DumpOpts, unsigned Depth = 0) { +diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +index 74667fcb92bc..148711f0246f 100644 +--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp ++++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +@@ -9,7 +9,6 @@ + #include "llvm/DebugInfo/DWARF/DWARFUnit.h" + #include "llvm/ADT/SmallString.h" + #include "llvm/ADT/StringRef.h" +-#include "llvm/BinaryFormat/Dwarf.h" + #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" + #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" + #include "llvm/DebugInfo/DWARF/DWARFContext.h" +@@ -19,13 +18,11 @@ + #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" + #include "llvm/DebugInfo/DWARF/DWARFDebugRnglists.h" + #include "llvm/DebugInfo/DWARF/DWARFDie.h" +-#include "llvm/DebugInfo/DWARF/DWARFExpression.h" + #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" + #include "llvm/DebugInfo/DWARF/DWARFListTable.h" + #include "llvm/DebugInfo/DWARF/DWARFObject.h" + #include "llvm/DebugInfo/DWARF/DWARFSection.h" + #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" +-#include "llvm/Object/ObjectFile.h" + #include "llvm/Support/DataExtractor.h" + #include "llvm/Support/Errc.h" + #include "llvm/Support/Path.h" +@@ -752,100 +749,6 @@ DWARFDie DWARFUnit::getSubroutineForAddress(uint64_t Address) { + return R->second.second; + } + +-void DWARFUnit::updateVariableDieMap(DWARFDie Die) { +- for (DWARFDie Child : Die) { +- if (isType(Child.getTag())) +- continue; +- updateVariableDieMap(Child); +- } +- +- if (Die.getTag() != DW_TAG_variable) +- return; +- +- Expected<DWARFLocationExpressionsVector> Locations = +- Die.getLocations(DW_AT_location); +- if (!Locations) { +- // Missing DW_AT_location is fine here. +- consumeError(Locations.takeError()); +- return; +- } +- +- uint64_t Address = UINT64_MAX; +- +- for (const DWARFLocationExpression &Location : *Locations) { +- uint8_t AddressSize = getAddressByteSize(); +- DataExtractor Data(Location.Expr, /*IsLittleEndian=*/true, AddressSize); +- DWARFExpression Expr(Data, AddressSize); +- auto It = Expr.begin(); +- if (It == Expr.end()) +- continue; +- +- // Match exactly the main sequence used to describe global variables: +- // `DW_OP_addr[x] [+ DW_OP_plus_uconst]`. Currently, this is the sequence +- // that LLVM produces for DILocalVariables and DIGlobalVariables. If, in +- // future, the DWARF producer (`DwarfCompileUnit::addLocationAttribute()` is +- // a good starting point) is extended to use further expressions, this code +- // needs to be updated. +- uint64_t LocationAddr; +- if (It->getCode() == dwarf::DW_OP_addr) { +- LocationAddr = It->getRawOperand(0); +- } else if (It->getCode() == dwarf::DW_OP_addrx) { +- uint64_t DebugAddrOffset = It->getRawOperand(0); +- if (auto Pointer = getAddrOffsetSectionItem(DebugAddrOffset)) { +- LocationAddr = Pointer->Address; +- } +- } else { +- continue; +- } +- +- // Read the optional 2nd operand, a DW_OP_plus_uconst. +- if (++It != Expr.end()) { +- if (It->getCode() != dwarf::DW_OP_plus_uconst) +- continue; +- +- LocationAddr += It->getRawOperand(0); +- +- // Probe for a 3rd operand, if it exists, bail. +- if (++It != Expr.end()) +- continue; +- } +- +- Address = LocationAddr; +- break; +- } +- +- // Get the size of the global variable. If all else fails (i.e. the global has +- // no type), then we use a size of one to still allow symbolization of the +- // exact address. +- uint64_t GVSize = 1; +- if (DWARFDie BaseType = Die.getAttributeValueAsReferencedDie(DW_AT_type)) +- if (std::optional<uint64_t> Size = Die.getTypeSize(getAddressByteSize())) +- GVSize = *Size; +- +- if (Address != UINT64_MAX) +- VariableDieMap[Address] = {Address + GVSize, Die}; +-} +- +-DWARFDie DWARFUnit::getVariableForAddress(uint64_t Address) { +- extractDIEsIfNeeded(false); +- +- auto RootDie = getUnitDIE(); +- +- auto RootLookup = RootsParsedForVariables.insert(RootDie.getOffset()); +- if (RootLookup.second) +- updateVariableDieMap(RootDie); +- +- auto R = VariableDieMap.upper_bound(Address); +- if (R == VariableDieMap.begin()) +- return DWARFDie(); +- +- // upper_bound's previous item contains Address. +- --R; +- if (Address >= R->second.first) +- return DWARFDie(); +- return R->second.second; +-} +- + void + DWARFUnit::getInlinedChainForAddress(uint64_t Address, + SmallVectorImpl<DWARFDie> &InlinedChain) { +diff --git a/llvm/lib/DebugInfo/PDB/PDBContext.cpp b/llvm/lib/DebugInfo/PDB/PDBContext.cpp +index e600fb7385f1..0444093d7622 100644 +--- a/llvm/lib/DebugInfo/PDB/PDBContext.cpp ++++ b/llvm/lib/DebugInfo/PDB/PDBContext.cpp +@@ -64,13 +64,6 @@ DILineInfo PDBContext::getLineInfoForAddress(object::SectionedAddress Address, + return Result; + } + +-DILineInfo +-PDBContext::getLineInfoForDataAddress(object::SectionedAddress Address) { +- // Unimplemented. S_GDATA and S_LDATA in CodeView (used to describe global +- // variables) aren't capable of carrying line information. +- return DILineInfo(); +-} +- + DILineInfoTable + PDBContext::getLineInfoForAddressRange(object::SectionedAddress Address, + uint64_t Size, +diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +index 877380213f21..496c8149782e 100644 +--- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp ++++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +@@ -206,10 +206,6 @@ void PlainPrinterBase::print(const Request &Request, const DIGlobal &Global) { + Name = DILineInfo::Addr2LineBadString; + OS << Name << "\n"; + OS << Global.Start << " " << Global.Size << "\n"; +- if (Global.DeclFile.empty()) +- OS << "??:?\n"; +- else +- OS << Global.DeclFile << ":" << Global.DeclLine << "\n"; + printFooter(); + } + +diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +index d8ee9264b64f..fcff531895a2 100644 +--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp ++++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +@@ -327,14 +327,6 @@ DIGlobal SymbolizableObjectFile::symbolizeData( + std::string FileName; + getNameFromSymbolTable(ModuleOffset.Address, Res.Name, Res.Start, Res.Size, + FileName); +- Res.DeclFile = FileName; +- +- // Try and get a better filename:lineno pair from the debuginfo, if present. +- DILineInfo DL = DebugInfoContext->getLineInfoForDataAddress(ModuleOffset); +- if (DL.Line != 0) { +- Res.DeclFile = DL.FileName; +- Res.DeclLine = DL.Line; +- } + return Res; + } + +diff --git a/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml +index 83af3111c5dd..984e444b2fda 100644 +--- a/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml ++++ b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml +@@ -7,15 +7,12 @@ + + # CHECK: func + # CHECK-NEXT: 4096 1 +-# CHECK-NEXT: ??:? + # CHECK-EMPTY: + # CHECK-NEXT: data + # CHECK-NEXT: 8192 2 +-# CHECK-NEXT: ??:? + # CHECK-EMPTY: + # CHECK-NEXT: notype + # CHECK-NEXT: 8194 3 +-# CHECK-NEXT: ??:? + # CHECK-EMPTY: + + --- !ELF +diff --git a/llvm/test/tools/llvm-symbolizer/data-location.yaml b/llvm/test/tools/llvm-symbolizer/data-location.yaml +deleted file mode 100644 +index 54f7d9be44a1..000000000000 +--- a/llvm/test/tools/llvm-symbolizer/data-location.yaml ++++ /dev/null +@@ -1,450 +0,0 @@ +-## Show that when "DATA" is used with an address, it forces the found location +-## to be symbolized as data, including the source information. +- +-# RUN: yaml2obj %s -o %t.so +- +-# RUN: llvm-symbolizer 'DATA 0x304d0' 'DATA 0x304d1' 'DATA 0x304d3' \ +-# RUN: 'DATA 0x304c0' 'DATA 0x304c8' 'DATA 0x304d4' 'DATA 0x304dc' \ +-# RUN: 'DATA 0x304d8' --obj=%t.so | FileCheck %s +- +-# CHECK: bss_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:1 +-# CHECK-EMPTY: +- +-## Check that lookups in the middle of the symbol are also resolved correctly. +-# CHECK: bss_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:1 +-# CHECK-EMPTY: +-# CHECK: bss_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:1 +-# CHECK-EMPTY: +- +-## Now, the remainder of the symbols. +-# CHECK-NEXT: data_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:2 +-# CHECK-EMPTY: +-# CHECK-NEXT: str +-# CHECK-NEXT: {{[0-9]+}} 8 +-# CHECK-NEXT: /tmp/file.cpp:4 +-# CHECK-EMPTY: +-# CHECK-NEXT: f()::function_global +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:8 +-# CHECK-EMPTY: +- +-## Including the one that includes an addend. +-# CHECK-NEXT: alpha +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:12 +-# CHECK-EMPTY: +-# CHECK-NEXT: beta +-# CHECK-NEXT: {{[0-9]+}} 4 +-# CHECK-NEXT: /tmp/file.cpp:13 +-# CHECK-EMPTY: +- +-## Ensure there's still a global that's offset-based. +-# RUN: llvm-dwarfdump --debug-info %t.so | FileCheck %s --check-prefix=OFFSET +- +-# OFFSET: DW_AT_location (DW_OP_addrx 0x4, DW_OP_plus_uconst 0x4) +- +-################################################################################ +-## File below was generated using: +-## +-## $ clang++ -g -O3 /tmp/file.cpp -shared -fuse-ld=lld -nostdlib \ +-## -target aarch64-linux-gnuabi -mllvm -global-merge-ignore-single-use \ +-## -o /tmp/file.so +-## +-## With /tmp/file.cpp as: +-## 1: int bss_global; +-## 2: int data_global = 2; +-## 3: +-## 4: const char* str = +-## 5: "12345678"; +-## 6: +-## 7: int* f() { +-## 8: static int function_global; +-## 9: return &function_global; +-## 10: } +-## 11: +-## 12: static int alpha; +-## 13: static int beta; +-## 14: int *f(bool b) { return beta ? &alpha : β } +-## 15: +-## +-## ... then, one can get the offsets using `nm`, like: +-## $ nm out.so | grep bss_global +-## 00000000000038fc B bss_global +-## +-## Note the use of the aarch64 target (with -nostdlib in order to allow linkage +-## without libraries for cross-compilation) as well as -O3 and +-## -global-merge-ignore-single-use. This is a specific combination that makes +-## the compiler emit the `alpha` global variable with a more complex +-## DW_AT_location than just a DW_OP_addr/DW_OP_addrx. In this instance, it +-## outputs a `DW_AT_location (DW_OP_addrx 0x4, DW_OP_plus_uconst 0x4)`. +-## +-## Ideally, this would be tested by invoking clang directly on a C source file, +-## but unfortunately there's no way to do that for LLVM tests. The other option +-## is to compile IR to an objfile, but llvm-symbolizer doesn't understand that +-## two symbols can have the same address in different sections. In the code +-## above, for example, we'd have bss_global at .bss+0x0, and data_global at +-## .data+0x0, and so the symbolizer would only print one of them. Hence, we have +-## the ugly dso-to-yaml blob below. +-## +-## For now, constant strings don't have a debuginfo entry, and so can't be +-## symbolized correctly. In future (if D123534 gets merged), this can be updated +-## to include a check that llvm-symbolizer can also symbolize constant strings, +-## like `str` above (basically that &"12345678" should be symbolizable) +-## to the specific line. Then, you can find the address of the constant string +-## from the relocation: +-## +-## $ nm out.so | grep str +-## 00000000000038c0 D str +-## $ llvm-objdump -R out.so | grep 38c0 +-## 00000000000038c0 R_X86_64_RELATIVE *ABS*+0x4f8 # <-- 0x4f8 +-################################################################################ +- +---- !ELF +-FileHeader: +- Class: ELFCLASS64 +- Data: ELFDATA2LSB +- Type: ET_DYN +- Machine: EM_AARCH64 +-ProgramHeaders: +- - Type: PT_PHDR +- Flags: [ PF_R ] +- VAddr: 0x40 +- Align: 0x8 +- - Type: PT_LOAD +- Flags: [ PF_R ] +- FirstSec: .dynsym +- LastSec: .eh_frame +- Align: 0x10000 +- - Type: PT_LOAD +- Flags: [ PF_X, PF_R ] +- FirstSec: .text +- LastSec: .text +- VAddr: 0x103E4 +- Align: 0x10000 +- - Type: PT_LOAD +- Flags: [ PF_W, PF_R ] +- FirstSec: .dynamic +- LastSec: .dynamic +- VAddr: 0x20410 +- Align: 0x10000 +- - Type: PT_LOAD +- Flags: [ PF_W, PF_R ] +- FirstSec: .data +- LastSec: .bss +- VAddr: 0x304C0 +- Align: 0x10000 +- - Type: PT_DYNAMIC +- Flags: [ PF_W, PF_R ] +- FirstSec: .dynamic +- LastSec: .dynamic +- VAddr: 0x20410 +- Align: 0x8 +- - Type: PT_GNU_RELRO +- Flags: [ PF_R ] +- FirstSec: .dynamic +- LastSec: .dynamic +- VAddr: 0x20410 +- - Type: PT_GNU_EH_FRAME +- Flags: [ PF_R ] +- FirstSec: .eh_frame_hdr +- LastSec: .eh_frame_hdr +- VAddr: 0x37C +- Align: 0x4 +- - Type: PT_GNU_STACK +- Flags: [ PF_W, PF_R ] +- Align: 0x0 +-Sections: +- - Name: .dynsym +- Type: SHT_DYNSYM +- Flags: [ SHF_ALLOC ] +- Address: 0x238 +- Link: .dynstr +- AddressAlign: 0x8 +- - Name: .gnu.hash +- Type: SHT_GNU_HASH +- Flags: [ SHF_ALLOC ] +- Address: 0x2C8 +- Link: .dynsym +- AddressAlign: 0x8 +- Header: +- SymNdx: 0x1 +- Shift2: 0x1A +- BloomFilter: [ 0x400188002180000C ] +- HashBuckets: [ 0x1 ] +- HashValues: [ 0xEE8502A, 0xEE85016, 0xC033991C, 0x61F7372E, 0xB88AB7F ] +- - Name: .hash +- Type: SHT_HASH +- Flags: [ SHF_ALLOC ] +- Address: 0x2F8 +- Link: .dynsym +- AddressAlign: 0x4 +- Bucket: [ 5, 0, 4, 0, 3, 0 ] +- Chain: [ 0, 0, 0, 1, 2, 0 ] +- - Name: .dynstr +- Type: SHT_STRTAB +- Flags: [ SHF_ALLOC ] +- Address: 0x330 +- AddressAlign: 0x1 +- - Name: .rela.dyn +- Type: SHT_RELA +- Flags: [ SHF_ALLOC ] +- Address: 0x358 +- Link: .dynsym +- AddressAlign: 0x8 +- Relocations: +- - Offset: 0x304C8 +- Type: R_AARCH64_RELATIVE +- Addend: 880 +- - Name: .rodata +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC, SHF_MERGE, SHF_STRINGS ] +- Address: 0x370 +- AddressAlign: 0x1 +- EntSize: 0x1 +- Content: '313233343536373800' +- - Name: .eh_frame_hdr +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC ] +- Address: 0x37C +- AddressAlign: 0x4 +- Content: 011B033B18000000020000006800010034000000740001004C000000 +- - Name: .eh_frame +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC ] +- Address: 0x398 +- AddressAlign: 0x8 +- Content: 1400000000000000017A5200017C1E011B0C1F0000000000140000001C0000002C0001000C00000000000000000000001400000034000000200001001C000000000000000000000000000000 +- - Name: .text +- Type: SHT_PROGBITS +- Flags: [ SHF_ALLOC, SHF_EXECINSTR ] +- Address: 0x103E4 +- AddressAlign: 0x4 +- Content: 0001009000501391C0035FD60801009008611391E90308AA2A4540B85F0100710001899AC0035FD6 +- - Name: .dynamic +- Type: SHT_DYNAMIC +- Flags: [ SHF_WRITE, SHF_ALLOC ] +- Address: 0x20410 +- Link: .dynstr +- AddressAlign: 0x8 +- Entries: +- - Tag: DT_RELA +- Value: 0x358 +- - Tag: DT_RELASZ +- Value: 0x18 +- - Tag: DT_RELAENT +- Value: 0x18 +- - Tag: DT_RELACOUNT +- Value: 0x1 +- - Tag: DT_SYMTAB +- Value: 0x238 +- - Tag: DT_SYMENT +- Value: 0x18 +- - Tag: DT_STRTAB +- Value: 0x330 +- - Tag: DT_STRSZ +- Value: 0x28 +- - Tag: DT_GNU_HASH +- Value: 0x2C8 +- - Tag: DT_HASH +- Value: 0x2F8 +- - Tag: DT_NULL +- Value: 0x0 +- - Name: .data +- Type: SHT_PROGBITS +- Flags: [ SHF_WRITE, SHF_ALLOC ] +- Address: 0x304C0 +- AddressAlign: 0x8 +- Content: '02000000000000000000000000000000' +- - Name: .bss +- Type: SHT_NOBITS +- Flags: [ SHF_WRITE, SHF_ALLOC ] +- Address: 0x304D0 +- AddressAlign: 0x4 +- Size: 0x10 +- - Name: .debug_abbrev +- Type: SHT_PROGBITS +- AddressAlign: 0x1 +- Content: 011101252513050325721710171B25111B120673170000023400032549133F193A0B3B0B0218000003240003253E0B0B0B0000040F004913000005260049130000062E01111B120640187A196E2503253A0B3B0B49133F190000073400032549133A0B3B0B02180000083400032549133A0B3B0B02186E25000009050003253A0B3B0B4913000000 +- - Name: .debug_info +- Type: SHT_PROGBITS +- AddressAlign: 0x1 +- Content: AB0000000500010800000000010021000108000000000000000205280000000800000002032E000000000102A1000304050402052E000000000202A101020648000000000402A102044D00000005520000000307080106050C000000016F0D0E0007A500000007082E000000000802A1030008092E000000000D02A1040A080B2E000000000C04A10423040C06061C000000016F0F0E000EA50000000910000EAA00000000042E0000000311020100 +- - Name: .debug_str_offsets +- Type: SHT_PROGBITS +- AddressAlign: 0x1 +- Content: 4C00000005000000A2000000000000002C00000059000000280000001C00000072000000640000008C0000008700000069000000140000007B0000009C0000001A0000000E0000008500000076000000 +- - Name: .comment +- Type: SHT_PROGBITS +- Flags: [ SHF_MERGE, SHF_STRINGS ] +- AddressAlign: 0x1 +- EntSize: 0x1 +- Content: 4C696E6B65723A204C4C442031352E302E300000636C616E672076657273696F6E2031352E302E30202868747470733A2F2F6769746875622E636F6D2F6C6C766D2F6C6C766D2D70726F6A6563742E67697420306462616566363162353666306566306162306366333865613932666663316633356265653366662900 +- - Name: .debug_line +- Type: SHT_PROGBITS +- AddressAlign: 0x1 +- Content: 620000000500080037000000010101FB0E0D00010101010000000100000101011F010E00000003011F020F051E0100000000006C97BBE59F7DC6A9EA956633431DA63E0400000902E4030100000000001805030A140500BF05190A0105120608740204000101 +- - Name: .debug_line_str +- Type: SHT_PROGBITS +- Flags: [ SHF_MERGE, SHF_STRINGS ] +- AddressAlign: 0x1 +- EntSize: 0x1 +- Content: 2F746D702F66696C652E637070002F7573722F6C6F63616C2F676F6F676C652F686F6D652F6D69746368702F6C6C766D2D6275696C642F6F707400 +-Symbols: +- - Name: file.cpp +- Type: STT_FILE +- Index: SHN_ABS +- - Name: '$x.0' +- Section: .text +- Value: 0x103E4 +- - Name: _ZZ1fvE15function_global +- Type: STT_OBJECT +- Section: .bss +- Value: 0x304D4 +- Size: 0x4 +- - Name: '$d.1' +- Section: .bss +- Value: 0x304D0 +- - Name: '$d.2' +- Section: .data +- Value: 0x304C0 +- - Name: '$d.3' +- Section: .rodata +- Value: 0x370 +- - Name: '$d.4' +- Section: .debug_abbrev +- - Name: '$d.5' +- Section: .debug_info +- - Name: '$d.6' +- Section: .debug_str_offsets +- - Name: '$d.7' +- Section: .debug_str +- Value: 0xA2 +- - Name: '$d.8' +- Section: .debug_addr +- - Name: _ZL4beta +- Type: STT_OBJECT +- Section: .bss +- Value: 0x304D8 +- Size: 0x4 +- - Name: _ZL5alpha +- Type: STT_OBJECT +- Section: .bss +- Value: 0x304DC +- Size: 0x4 +- - Name: '$d.9' +- Section: .comment +- Value: 0x13 +- - Name: '$d.10' +- Section: .eh_frame +- Value: 0x398 +- - Name: '$d.11' +- Section: .debug_line +- - Name: '$d.12' +- Section: .debug_line_str +- Value: 0xE +- - Name: _DYNAMIC +- Section: .dynamic +- Value: 0x20410 +- Other: [ STV_HIDDEN ] +- - Name: _Z1fv +- Type: STT_FUNC +- Section: .text +- Binding: STB_GLOBAL +- Value: 0x103E4 +- Size: 0xC +- - Name: _Z1fb +- Type: STT_FUNC +- Section: .text +- Binding: STB_GLOBAL +- Value: 0x103F0 +- Size: 0x1C +- - Name: bss_global +- Type: STT_OBJECT +- Section: .bss +- Binding: STB_GLOBAL +- Value: 0x304D0 +- Size: 0x4 +- - Name: data_global +- Type: STT_OBJECT +- Section: .data +- Binding: STB_GLOBAL +- Value: 0x304C0 +- Size: 0x4 +- - Name: str +- Type: STT_OBJECT +- Section: .data +- Binding: STB_GLOBAL +- Value: 0x304C8 +- Size: 0x8 +-DynamicSymbols: +- - Name: _Z1fv +- Type: STT_FUNC +- Section: .text +- Binding: STB_GLOBAL +- Value: 0x103E4 +- Size: 0xC +- - Name: _Z1fb +- Type: STT_FUNC +- Section: .text +- Binding: STB_GLOBAL +- Value: 0x103F0 +- Size: 0x1C +- - Name: bss_global +- Type: STT_OBJECT +- Section: .bss +- Binding: STB_GLOBAL +- Value: 0x304D0 +- Size: 0x4 +- - Name: data_global +- Type: STT_OBJECT +- Section: .data +- Binding: STB_GLOBAL +- Value: 0x304C0 +- Size: 0x4 +- - Name: str +- Type: STT_OBJECT +- Section: .data +- Binding: STB_GLOBAL +- Value: 0x304C8 +- Size: 0x8 +-DWARF: +- debug_str: +- - '/tmp/file.cpp' +- - _Z1fb +- - alpha +- - f +- - data_global +- - int +- - '/usr/local/google/home/mitchp/llvm-build/opt' +- - bss_global +- - char +- - _ZL4beta +- - str +- - bool +- - _ZL5alpha +- - b +- - beta +- - function_global +- - _Z1fv +- - 'clang version 15.0.0 (https://github.com/llvm/llvm-project.git 0dbaef61b56f0ef0ab0cf38ea92ffc1f35bee3ff)' +- debug_addr: +- - Length: 0x3C +- Version: 0x5 +- AddressSize: 0x8 +- Entries: +- - Address: 0x304D0 +- - Address: 0x304C0 +- - Address: 0x304C8 +- - Address: 0x304D4 +- - Address: 0x304D8 +- - Address: 0x103E4 +- - Address: 0x103F0 +-... +diff --git a/llvm/test/tools/llvm-symbolizer/data.s b/llvm/test/tools/llvm-symbolizer/data.s +index cc9503c59141..e8039f146dbd 100644 +--- a/llvm/test/tools/llvm-symbolizer/data.s ++++ b/llvm/test/tools/llvm-symbolizer/data.s +@@ -7,12 +7,9 @@ + + # CHECK: d1 + # CHECK-NEXT: 0 8 +-# CHECK-NEXT: ??:? + # CHECK-EMPTY: + # CHECK-NEXT: d2 + # CHECK-NEXT: 8 4 +-# CHECK-NEXT: ??:? +-# CHECK-EMPTY: + + d1: + .quad 0x1122334455667788 +-- +2.38.1.1.g6d9df9d320 + diff --git a/build/build-clang/revert-llvmorg-15-init-13446-g7524fe962e47.patch b/build/build-clang/revert-llvmorg-15-init-13446-g7524fe962e47.patch new file mode 100644 index 0000000000..5bd4601827 --- /dev/null +++ b/build/build-clang/revert-llvmorg-15-init-13446-g7524fe962e47.patch @@ -0,0 +1,39 @@ +From 12f64ca10837bd68ec30804ebfa21653925ad5cf Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Thu, 16 Jun 2022 12:51:29 +0900 +Subject: [PATCH] Revert "[libFuzzer] Use the compiler to link the relocatable + object" + +This reverts commit 7524fe962e479416fd6318407eff4eed5b96a40b. +--- + compiler-rt/lib/fuzzer/CMakeLists.txt | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/compiler-rt/lib/fuzzer/CMakeLists.txt b/compiler-rt/lib/fuzzer/CMakeLists.txt +index 856cd732d517..d51de53f5acc 100644 +--- a/compiler-rt/lib/fuzzer/CMakeLists.txt ++++ b/compiler-rt/lib/fuzzer/CMakeLists.txt +@@ -138,15 +138,15 @@ if(OS_NAME MATCHES "Linux|Fuchsia" AND + COMPILER_RT_LIBCXX_PATH AND + COMPILER_RT_LIBCXXABI_PATH) + macro(partially_link_libcxx name dir arch) +- get_target_flags_for_arch(${arch} target_cflags) +- if(CMAKE_CXX_COMPILER_ID MATCHES Clang) +- get_compiler_rt_target(${arch} target) +- set(target_cflags --target=${target} ${target_cflags}) ++ if(${arch} MATCHES "i386") ++ set(EMULATION_ARGUMENT "-m" "elf_i386") ++ else() ++ set(EMULATION_ARGUMENT "") + endif() + set(cxx_${arch}_merge_dir "${CMAKE_CURRENT_BINARY_DIR}/cxx_${arch}_merge.dir") + file(MAKE_DIRECTORY ${cxx_${arch}_merge_dir}) + add_custom_command(TARGET clang_rt.${name}-${arch} POST_BUILD +- COMMAND ${CMAKE_CXX_COMPILER} ${target_cflags} -Wl,--whole-archive "$<TARGET_LINKER_FILE:clang_rt.${name}-${arch}>" -Wl,--no-whole-archive ${dir}/lib/libc++.a -r -o ${name}.o ++ COMMAND ${CMAKE_LINKER} ${EMULATION_ARGUMENT} --whole-archive "$<TARGET_LINKER_FILE:clang_rt.${name}-${arch}>" --no-whole-archive ${dir}/lib/libc++.a -r -o ${name}.o + COMMAND ${CMAKE_OBJCOPY} --localize-hidden ${name}.o + COMMAND ${CMAKE_COMMAND} -E remove "$<TARGET_LINKER_FILE:clang_rt.${name}-${arch}>" + COMMAND ${CMAKE_AR} qcs "$<TARGET_LINKER_FILE:clang_rt.${name}-${arch}>" ${name}.o +-- +2.36.0.1.g2bbe56bd8d + diff --git a/build/build-clang/revert-llvmorg-16-init-11301-g163bb6d64e5f.patch b/build/build-clang/revert-llvmorg-16-init-11301-g163bb6d64e5f.patch new file mode 100644 index 0000000000..716d74d4b5 --- /dev/null +++ b/build/build-clang/revert-llvmorg-16-init-11301-g163bb6d64e5f.patch @@ -0,0 +1,172 @@ +From cf00b30288c4c81b2c6a5af01c38f236148777a0 Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Tue, 28 Mar 2023 06:13:36 +0900 +Subject: [PATCH] Revert "[Passes][VectorCombine] enable early run generally + and try load folds" + +This reverts commit 163bb6d64e5f1220777c3ec2a8b58c0666a74d91. +It causes various reftest regressions. +--- + llvm/lib/Passes/PassBuilderPipelines.cpp | 7 ++++--- + llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 8 ++------ + llvm/test/Other/new-pm-defaults.ll | 2 +- + llvm/test/Other/new-pm-thinlto-defaults.ll | 1 - + .../Other/new-pm-thinlto-postlink-pgo-defaults.ll | 1 - + .../new-pm-thinlto-postlink-samplepgo-defaults.ll | 1 - + .../Other/new-pm-thinlto-prelink-pgo-defaults.ll | 1 - + .../new-pm-thinlto-prelink-samplepgo-defaults.ll | 1 - + .../PhaseOrdering/X86/vec-load-combine.ll | 15 +++++++++++---- + 9 files changed, 18 insertions(+), 19 deletions(-) + +diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp +index eed29c25714b..b925448cd6c0 100644 +--- a/llvm/lib/Passes/PassBuilderPipelines.cpp ++++ b/llvm/lib/Passes/PassBuilderPipelines.cpp +@@ -611,9 +611,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, + // Delete small array after loop unroll. + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); + +- // Try vectorization/scalarization transforms that are both improvements +- // themselves and can allow further folds with GVN and InstCombine. +- FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); ++ // The matrix extension can introduce large vector operations early, which can ++ // benefit from running vector-combine early on. ++ if (EnableMatrix) ++ FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); + + // Eliminate redundancies. + FPM.addPass(MergedLoadStoreMotionPass()); +diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +index 2e489757ebc1..810a9f92bb7a 100644 +--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp ++++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +@@ -1720,12 +1720,6 @@ bool VectorCombine::run() { + // dispatching to folding functions if there's no chance of matching. + if (IsFixedVectorType) { + switch (Opcode) { +- case Instruction::InsertElement: +- MadeChange |= vectorizeLoadInsert(I); +- break; +- case Instruction::ShuffleVector: +- MadeChange |= widenSubvectorLoad(I); +- break; + case Instruction::Load: + MadeChange |= scalarizeLoadExtract(I); + break; +@@ -1754,9 +1748,11 @@ bool VectorCombine::run() { + if (IsFixedVectorType) { + switch (Opcode) { + case Instruction::InsertElement: ++ MadeChange |= vectorizeLoadInsert(I); + MadeChange |= foldInsExtFNeg(I); + break; + case Instruction::ShuffleVector: ++ MadeChange |= widenSubvectorLoad(I); + MadeChange |= foldShuffleOfBinops(I); + MadeChange |= foldSelectShuffle(I); + break; +diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll +index 13612c3bb459..5f84d28af4a6 100644 +--- a/llvm/test/Other/new-pm-defaults.ll ++++ b/llvm/test/Other/new-pm-defaults.ll +@@ -186,7 +186,7 @@ + ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass + ; CHECK-EP-LOOP-END-NEXT: Running pass: NoOpLoopPass + ; CHECK-O-NEXT: Running pass: SROAPass on foo +-; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass ++; CHECK-MATRIX: Running pass: VectorCombinePass + ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass + ; CHECK-O23SZ-NEXT: Running pass: GVNPass + ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll +index 3f5d2d5b153d..ea07128c9f6a 100644 +--- a/llvm/test/Other/new-pm-thinlto-defaults.ll ++++ b/llvm/test/Other/new-pm-thinlto-defaults.ll +@@ -159,7 +159,6 @@ + ; CHECK-O-NEXT: Running pass: LoopDeletionPass + ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass + ; CHECK-O-NEXT: Running pass: SROAPass on foo +-; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass + ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass + ; CHECK-O23SZ-NEXT: Running pass: GVNPass + ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +index 29021ceace54..43e943cb6011 100644 +--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll ++++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +@@ -121,7 +121,6 @@ + ; CHECK-O-NEXT: Running pass: LoopDeletionPass + ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass + ; CHECK-O-NEXT: Running pass: SROAPass on foo +-; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass + ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass + ; CHECK-O23SZ-NEXT: Running pass: GVNPass + ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +index daf3141a1f2c..78914d1c23b2 100644 +--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll ++++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +@@ -130,7 +130,6 @@ + ; CHECK-O-NEXT: Running pass: LoopDeletionPass + ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass + ; CHECK-O-NEXT: Running pass: SROAPass on foo +-; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass + ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass + ; CHECK-O23SZ-NEXT: Running pass: GVNPass + ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +index bfe80902f806..5b62ba39add3 100644 +--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll ++++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +@@ -160,7 +160,6 @@ + ; CHECK-O-NEXT: Running pass: LoopDeletionPass + ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass + ; CHECK-O-NEXT: Running pass: SROAPass on foo +-; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass + ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass + ; CHECK-O23SZ-NEXT: Running pass: GVNPass + ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +index c7daf7aa46b1..17475423d696 100644 +--- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll ++++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +@@ -124,7 +124,6 @@ + ; CHECK-O-NEXT: Running pass: IndVarSimplifyPass + ; CHECK-O-NEXT: Running pass: LoopDeletionPass + ; CHECK-O-NEXT: Running pass: SROAPass on foo +-; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass + ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass + ; CHECK-O23SZ-NEXT: Running pass: GVNPass + ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll +index 77cbc70ff369..dd7164febea4 100644 +--- a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll ++++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll +@@ -12,13 +12,20 @@ $getAt = comdat any + define dso_local noundef <4 x float> @ConvertVectors_ByRef(ptr noundef nonnull align 16 dereferenceable(16) %0) #0 { + ; SSE-LABEL: @ConvertVectors_ByRef( + ; SSE-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16 +-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2> +-; SSE-NEXT: ret <4 x float> [[TMP3]] ++; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 1 ++; SSE-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[TMP3]], align 4 ++; SSE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ++; SSE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 5, i32 undef> ++; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 5> ++; SSE-NEXT: ret <4 x float> [[TMP7]] + ; + ; AVX-LABEL: @ConvertVectors_ByRef( + ; AVX-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16 +-; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2> +-; AVX-NEXT: ret <4 x float> [[TMP3]] ++; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 2 ++; AVX-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 8 ++; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i64 2 ++; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP4]], i64 3 ++; AVX-NEXT: ret <4 x float> [[TMP6]] + ; + %2 = alloca ptr, align 8 + %3 = alloca <4 x float>, align 16 +-- +2.39.0.1.g6739ec1790 + diff --git a/build/build-clang/revert-llvmorg-16-init-11301-g163bb6d64e5f_clang_17.patch b/build/build-clang/revert-llvmorg-16-init-11301-g163bb6d64e5f_clang_17.patch new file mode 100644 index 0000000000..4e217dcf3d --- /dev/null +++ b/build/build-clang/revert-llvmorg-16-init-11301-g163bb6d64e5f_clang_17.patch @@ -0,0 +1,172 @@ +From cf00b30288c4c81b2c6a5af01c38f236148777a0 Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Tue, 28 Mar 2023 06:13:36 +0900 +Subject: [PATCH] Revert "[Passes][VectorCombine] enable early run generally + and try load folds" + +This reverts commit 163bb6d64e5f1220777c3ec2a8b58c0666a74d91. +It causes various reftest regressions. +--- + llvm/lib/Passes/PassBuilderPipelines.cpp | 7 ++++--- + llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 8 ++------ + llvm/test/Other/new-pm-defaults.ll | 2 +- + .../Other/new-pm-thinlto-postlink-defaults.ll | 1 - + .../Other/new-pm-thinlto-postlink-pgo-defaults.ll | 1 - + .../new-pm-thinlto-postlink-samplepgo-defaults.ll | 1 - + .../Other/new-pm-thinlto-prelink-pgo-defaults.ll | 1 - + .../new-pm-thinlto-prelink-samplepgo-defaults.ll | 1 - + .../PhaseOrdering/X86/vec-load-combine.ll | 15 +++++++++++---- + 9 files changed, 18 insertions(+), 19 deletions(-) + +diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp +index eed29c25714b..b925448cd6c0 100644 +--- a/llvm/lib/Passes/PassBuilderPipelines.cpp ++++ b/llvm/lib/Passes/PassBuilderPipelines.cpp +@@ -611,9 +611,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, + // Delete small array after loop unroll. + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); + +- // Try vectorization/scalarization transforms that are both improvements +- // themselves and can allow further folds with GVN and InstCombine. +- FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); ++ // The matrix extension can introduce large vector operations early, which can ++ // benefit from running vector-combine early on. ++ if (EnableMatrix) ++ FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); + + // Eliminate redundancies. + FPM.addPass(MergedLoadStoreMotionPass()); +diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +index 2e489757ebc1..810a9f92bb7a 100644 +--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp ++++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +@@ -1720,12 +1720,6 @@ bool VectorCombine::run() { + // dispatching to folding functions if there's no chance of matching. + if (IsFixedVectorType) { + switch (Opcode) { +- case Instruction::InsertElement: +- MadeChange |= vectorizeLoadInsert(I); +- break; +- case Instruction::ShuffleVector: +- MadeChange |= widenSubvectorLoad(I); +- break; + case Instruction::Load: + MadeChange |= scalarizeLoadExtract(I); + break; +@@ -1754,9 +1748,11 @@ bool VectorCombine::run() { + if (IsFixedVectorType) { + switch (Opcode) { + case Instruction::InsertElement: ++ MadeChange |= vectorizeLoadInsert(I); + MadeChange |= foldInsExtFNeg(I); + break; + case Instruction::ShuffleVector: ++ MadeChange |= widenSubvectorLoad(I); + MadeChange |= foldShuffleOfBinops(I); + MadeChange |= foldSelectShuffle(I); + break; +diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll +index 13612c3bb459..5f84d28af4a6 100644 +--- a/llvm/test/Other/new-pm-defaults.ll ++++ b/llvm/test/Other/new-pm-defaults.ll +@@ -186,7 +186,7 @@ + ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass + ; CHECK-EP-LOOP-END-NEXT: Running pass: NoOpLoopPass + ; CHECK-O-NEXT: Running pass: SROAPass on foo +-; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass ++; CHECK-MATRIX: Running pass: VectorCombinePass + ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass + ; CHECK-O23SZ-NEXT: Running pass: GVNPass + ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +index 3f5d2d5b153d..ea07128c9f6a 100644 +--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll ++++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +@@ -159,7 +159,6 @@ + ; CHECK-O-NEXT: Running pass: LoopDeletionPass + ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass + ; CHECK-O-NEXT: Running pass: SROAPass on foo +-; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass + ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass + ; CHECK-O23SZ-NEXT: Running pass: GVNPass + ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +index 29021ceace54..43e943cb6011 100644 +--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll ++++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +@@ -121,7 +121,6 @@ + ; CHECK-O-NEXT: Running pass: LoopDeletionPass + ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass + ; CHECK-O-NEXT: Running pass: SROAPass on foo +-; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass + ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass + ; CHECK-O23SZ-NEXT: Running pass: GVNPass + ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +index daf3141a1f2c..78914d1c23b2 100644 +--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll ++++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +@@ -130,7 +130,6 @@ + ; CHECK-O-NEXT: Running pass: LoopDeletionPass + ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass + ; CHECK-O-NEXT: Running pass: SROAPass on foo +-; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass + ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass + ; CHECK-O23SZ-NEXT: Running pass: GVNPass + ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +index bfe80902f806..5b62ba39add3 100644 +--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll ++++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +@@ -160,7 +160,6 @@ + ; CHECK-O-NEXT: Running pass: LoopDeletionPass + ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass + ; CHECK-O-NEXT: Running pass: SROAPass on foo +-; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass + ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass + ; CHECK-O23SZ-NEXT: Running pass: GVNPass + ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +index c7daf7aa46b1..17475423d696 100644 +--- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll ++++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +@@ -124,7 +124,6 @@ + ; CHECK-O-NEXT: Running pass: IndVarSimplifyPass + ; CHECK-O-NEXT: Running pass: LoopDeletionPass + ; CHECK-O-NEXT: Running pass: SROAPass on foo +-; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass + ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass + ; CHECK-O23SZ-NEXT: Running pass: GVNPass + ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll +index 77cbc70ff369..dd7164febea4 100644 +--- a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll ++++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll +@@ -12,13 +12,20 @@ $getAt = comdat any + define dso_local noundef <4 x float> @ConvertVectors_ByRef(ptr noundef nonnull align 16 dereferenceable(16) %0) #0 { + ; SSE-LABEL: @ConvertVectors_ByRef( + ; SSE-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16 +-; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2> +-; SSE-NEXT: ret <4 x float> [[TMP3]] ++; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 1 ++; SSE-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[TMP3]], align 4 ++; SSE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ++; SSE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 5, i32 undef> ++; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 5> ++; SSE-NEXT: ret <4 x float> [[TMP7]] + ; + ; AVX-LABEL: @ConvertVectors_ByRef( + ; AVX-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16 +-; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2> +-; AVX-NEXT: ret <4 x float> [[TMP3]] ++; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 2 ++; AVX-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 8 ++; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i64 2 ++; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP4]], i64 3 ++; AVX-NEXT: ret <4 x float> [[TMP6]] + ; + %2 = alloca ptr, align 8 + %3 = alloca <4 x float>, align 16 +-- +2.39.0.1.g6739ec1790 + diff --git a/build/build-clang/revert-llvmorg-16-init-7598-g54bfd0484615.patch b/build/build-clang/revert-llvmorg-16-init-7598-g54bfd0484615.patch new file mode 100644 index 0000000000..683d87b7a2 --- /dev/null +++ b/build/build-clang/revert-llvmorg-16-init-7598-g54bfd0484615.patch @@ -0,0 +1,866 @@ +The patch changed the way the no_thread_safety_analysis works in a way +that I think actually makes sense, but we have exceptions in the code +that aren't enough to accomodate that change. + +Until our code is adjusted, revert this change. + +--- + clang/docs/ThreadSafetyAnalysis.rst | 10 +- + .../Analysis/Analyses/ThreadSafetyCommon.h | 13 +- + .../clang/Analysis/Analyses/ThreadSafetyTIL.h | 7 +- + .../Analysis/Analyses/ThreadSafetyTraverse.h | 5 +- + clang/lib/Analysis/ThreadSafety.cpp | 202 +++++++++--------- + clang/lib/Analysis/ThreadSafetyCommon.cpp | 46 ++-- + .../SemaCXX/warn-thread-safety-analysis.cpp | 155 +++----------- + 8 files changed, 155 insertions(+), 290 deletions(-) + +diff --git a/clang/docs/ThreadSafetyAnalysis.rst b/clang/docs/ThreadSafetyAnalysis.rst +index dcde0c706c70..23f460b248e1 100644 +--- a/clang/docs/ThreadSafetyAnalysis.rst ++++ b/clang/docs/ThreadSafetyAnalysis.rst +@@ -408,8 +408,7 @@ and destructor refer to the capability via different names; see the + Scoped capabilities are treated as capabilities that are implicitly acquired + on construction and released on destruction. They are associated with + the set of (regular) capabilities named in thread safety attributes on the +-constructor or function returning them by value (using C++17 guaranteed copy +-elision). Acquire-type attributes on other member functions are treated as ++constructor. Acquire-type attributes on other member functions are treated as + applying to that set of associated capabilities, while ``RELEASE`` implies that + a function releases all associated capabilities in whatever mode they're held. + +@@ -931,13 +930,6 @@ implementation. + // Assume mu is not held, implicitly acquire *this and associate it with mu. + MutexLocker(Mutex *mu, defer_lock_t) EXCLUDES(mu) : mut(mu), locked(false) {} + +- // Same as constructors, but without tag types. (Requires C++17 copy elision.) +- static MutexLocker Lock(Mutex *mu) ACQUIRE(mu); +- static MutexLocker Adopt(Mutex *mu) REQUIRES(mu); +- static MutexLocker ReaderLock(Mutex *mu) ACQUIRE_SHARED(mu); +- static MutexLocker AdoptReaderLock(Mutex *mu) REQUIRES_SHARED(mu); +- static MutexLocker DeferLock(Mutex *mu) EXCLUDES(mu); +- + // Release *this and all associated mutexes, if they are still held. + // There is no warning if the scope was already unlocked before. + ~MutexLocker() RELEASE() { +diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h +index 9c73d65db266..da69348ea938 100644 +--- a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h ++++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h +@@ -31,7 +31,6 @@ + #include "clang/Basic/LLVM.h" + #include "llvm/ADT/DenseMap.h" + #include "llvm/ADT/PointerIntPair.h" +-#include "llvm/ADT/PointerUnion.h" + #include "llvm/ADT/SmallVector.h" + #include "llvm/Support/Casting.h" + #include <sstream> +@@ -355,7 +354,7 @@ public: + const NamedDecl *AttrDecl; + + // Implicit object argument -- e.g. 'this' +- llvm::PointerUnion<const Expr *, til::SExpr *> SelfArg = nullptr; ++ const Expr *SelfArg = nullptr; + + // Number of funArgs + unsigned NumArgs = 0; +@@ -379,18 +378,10 @@ public: + // Translate a clang expression in an attribute to a til::SExpr. + // Constructs the context from D, DeclExp, and SelfDecl. + CapabilityExpr translateAttrExpr(const Expr *AttrExp, const NamedDecl *D, +- const Expr *DeclExp, +- til::SExpr *Self = nullptr); ++ const Expr *DeclExp, VarDecl *SelfD=nullptr); + + CapabilityExpr translateAttrExpr(const Expr *AttrExp, CallingContext *Ctx); + +- // Translate a variable reference. +- til::LiteralPtr *createVariable(const VarDecl *VD); +- +- // Create placeholder for this: we don't know the VarDecl on construction yet. +- std::pair<til::LiteralPtr *, StringRef> +- createThisPlaceholder(const Expr *Exp); +- + // Translate a clang statement or expression to a TIL expression. + // Also performs substitution of variables; Ctx provides the context. + // Dispatches on the type of S. +diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h +index 48593516d853..65556c8d584c 100644 +--- a/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h ++++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h +@@ -634,14 +634,15 @@ typename V::R_SExpr Literal::traverse(V &Vs, typename V::R_Ctx Ctx) { + /// At compile time, pointer literals are represented by symbolic names. + class LiteralPtr : public SExpr { + public: +- LiteralPtr(const ValueDecl *D) : SExpr(COP_LiteralPtr), Cvdecl(D) {} ++ LiteralPtr(const ValueDecl *D) : SExpr(COP_LiteralPtr), Cvdecl(D) { ++ assert(D && "ValueDecl must not be null"); ++ } + LiteralPtr(const LiteralPtr &) = default; + + static bool classof(const SExpr *E) { return E->opcode() == COP_LiteralPtr; } + + // The clang declaration for the value that this pointer points to. + const ValueDecl *clangDecl() const { return Cvdecl; } +- void setClangDecl(const ValueDecl *VD) { Cvdecl = VD; } + + template <class V> + typename V::R_SExpr traverse(V &Vs, typename V::R_Ctx Ctx) { +@@ -650,8 +651,6 @@ public: + + template <class C> + typename C::CType compare(const LiteralPtr* E, C& Cmp) const { +- if (!Cvdecl || !E->Cvdecl) +- return Cmp.comparePointers(this, E); + return Cmp.comparePointers(Cvdecl, E->Cvdecl); + } + +diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyTraverse.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyTraverse.h +index 6fc55130655a..e81c00d3dddb 100644 +--- a/clang/include/clang/Analysis/Analyses/ThreadSafetyTraverse.h ++++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyTraverse.h +@@ -623,10 +623,7 @@ protected: + } + + void printLiteralPtr(const LiteralPtr *E, StreamType &SS) { +- if (const NamedDecl *D = E->clangDecl()) +- SS << D->getNameAsString(); +- else +- SS << "<temporary>"; ++ SS << E->clangDecl()->getNameAsString(); + } + + void printVariable(const Variable *V, StreamType &SS, bool IsVarDecl=false) { +diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp +index 76747561a9a3..a29134c6a5e7 100644 +--- a/clang/lib/Analysis/ThreadSafety.cpp ++++ b/clang/lib/Analysis/ThreadSafety.cpp +@@ -1029,7 +1029,7 @@ public: + + template <typename AttrType> + void getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, const Expr *Exp, +- const NamedDecl *D, til::SExpr *Self = nullptr); ++ const NamedDecl *D, VarDecl *SelfDecl = nullptr); + + template <class AttrType> + void getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, const Expr *Exp, +@@ -1220,7 +1220,7 @@ bool ThreadSafetyAnalyzer::inCurrentScope(const CapabilityExpr &CapE) { + if (const auto *LP = dyn_cast<til::LiteralPtr>(SExp)) { + const ValueDecl *VD = LP->clangDecl(); + // Variables defined in a function are always inaccessible. +- if (!VD || !VD->isDefinedOutsideFunctionOrMethod()) ++ if (!VD->isDefinedOutsideFunctionOrMethod()) + return false; + // For now we consider static class members to be inaccessible. + if (isa<CXXRecordDecl>(VD->getDeclContext())) +@@ -1311,10 +1311,10 @@ void ThreadSafetyAnalyzer::removeLock(FactSet &FSet, const CapabilityExpr &Cp, + template <typename AttrType> + void ThreadSafetyAnalyzer::getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, + const Expr *Exp, const NamedDecl *D, +- til::SExpr *Self) { ++ VarDecl *SelfDecl) { + if (Attr->args_size() == 0) { + // The mutex held is the "this" object. +- CapabilityExpr Cp = SxBuilder.translateAttrExpr(nullptr, D, Exp, Self); ++ CapabilityExpr Cp = SxBuilder.translateAttrExpr(nullptr, D, Exp, SelfDecl); + if (Cp.isInvalid()) { + warnInvalidLock(Handler, nullptr, D, Exp, Cp.getKind()); + return; +@@ -1326,7 +1326,7 @@ void ThreadSafetyAnalyzer::getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, + } + + for (const auto *Arg : Attr->args()) { +- CapabilityExpr Cp = SxBuilder.translateAttrExpr(Arg, D, Exp, Self); ++ CapabilityExpr Cp = SxBuilder.translateAttrExpr(Arg, D, Exp, SelfDecl); + if (Cp.isInvalid()) { + warnInvalidLock(Handler, nullptr, D, Exp, Cp.getKind()); + continue; +@@ -1529,26 +1529,21 @@ class BuildLockset : public ConstStmtVisitor<BuildLockset> { + + ThreadSafetyAnalyzer *Analyzer; + FactSet FSet; +- /// Maps constructed objects to `this` placeholder prior to initialization. +- llvm::SmallDenseMap<const Expr *, til::LiteralPtr *> ConstructedObjects; + LocalVariableMap::Context LVarCtx; + unsigned CtxIndex; + + // helper functions + void warnIfMutexNotHeld(const NamedDecl *D, const Expr *Exp, AccessKind AK, + Expr *MutexExp, ProtectedOperationKind POK, +- til::LiteralPtr *Self, SourceLocation Loc); +- void warnIfMutexHeld(const NamedDecl *D, const Expr *Exp, Expr *MutexExp, +- til::LiteralPtr *Self, SourceLocation Loc); ++ SourceLocation Loc); ++ void warnIfMutexHeld(const NamedDecl *D, const Expr *Exp, Expr *MutexExp); + + void checkAccess(const Expr *Exp, AccessKind AK, + ProtectedOperationKind POK = POK_VarAccess); + void checkPtAccess(const Expr *Exp, AccessKind AK, + ProtectedOperationKind POK = POK_VarAccess); + +- void handleCall(const Expr *Exp, const NamedDecl *D, +- til::LiteralPtr *Self = nullptr, +- SourceLocation Loc = SourceLocation()); ++ void handleCall(const Expr *Exp, const NamedDecl *D, VarDecl *VD = nullptr); + void examineArguments(const FunctionDecl *FD, + CallExpr::const_arg_iterator ArgBegin, + CallExpr::const_arg_iterator ArgEnd, +@@ -1565,7 +1560,6 @@ public: + void VisitCallExpr(const CallExpr *Exp); + void VisitCXXConstructExpr(const CXXConstructExpr *Exp); + void VisitDeclStmt(const DeclStmt *S); +- void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *Exp); + }; + + } // namespace +@@ -1575,12 +1569,10 @@ public: + void BuildLockset::warnIfMutexNotHeld(const NamedDecl *D, const Expr *Exp, + AccessKind AK, Expr *MutexExp, + ProtectedOperationKind POK, +- til::LiteralPtr *Self, + SourceLocation Loc) { + LockKind LK = getLockKindFromAccessKind(AK); + +- CapabilityExpr Cp = +- Analyzer->SxBuilder.translateAttrExpr(MutexExp, D, Exp, Self); ++ CapabilityExpr Cp = Analyzer->SxBuilder.translateAttrExpr(MutexExp, D, Exp); + if (Cp.isInvalid()) { + warnInvalidLock(Analyzer->Handler, MutexExp, D, Exp, Cp.getKind()); + return; +@@ -1637,10 +1629,8 @@ void BuildLockset::warnIfMutexNotHeld(const NamedDecl *D, const Expr *Exp, + + /// Warn if the LSet contains the given lock. + void BuildLockset::warnIfMutexHeld(const NamedDecl *D, const Expr *Exp, +- Expr *MutexExp, til::LiteralPtr *Self, +- SourceLocation Loc) { +- CapabilityExpr Cp = +- Analyzer->SxBuilder.translateAttrExpr(MutexExp, D, Exp, Self); ++ Expr *MutexExp) { ++ CapabilityExpr Cp = Analyzer->SxBuilder.translateAttrExpr(MutexExp, D, Exp); + if (Cp.isInvalid()) { + warnInvalidLock(Analyzer->Handler, MutexExp, D, Exp, Cp.getKind()); + return; +@@ -1651,7 +1641,7 @@ void BuildLockset::warnIfMutexHeld(const NamedDecl *D, const Expr *Exp, + const FactEntry *LDat = FSet.findLock(Analyzer->FactMan, Cp); + if (LDat) { + Analyzer->Handler.handleFunExcludesLock(Cp.getKind(), D->getNameAsString(), +- Cp.toString(), Loc); ++ Cp.toString(), Exp->getExprLoc()); + } + } + +@@ -1721,7 +1711,7 @@ void BuildLockset::checkAccess(const Expr *Exp, AccessKind AK, + } + + for (const auto *I : D->specific_attrs<GuardedByAttr>()) +- warnIfMutexNotHeld(D, Exp, AK, I->getArg(), POK, nullptr, Loc); ++ warnIfMutexNotHeld(D, Exp, AK, I->getArg(), POK, Loc); + } + + /// Checks pt_guarded_by and pt_guarded_var attributes. +@@ -1758,8 +1748,7 @@ void BuildLockset::checkPtAccess(const Expr *Exp, AccessKind AK, + Analyzer->Handler.handleNoMutexHeld(D, PtPOK, AK, Exp->getExprLoc()); + + for (auto const *I : D->specific_attrs<PtGuardedByAttr>()) +- warnIfMutexNotHeld(D, Exp, AK, I->getArg(), PtPOK, nullptr, +- Exp->getExprLoc()); ++ warnIfMutexNotHeld(D, Exp, AK, I->getArg(), PtPOK, Exp->getExprLoc()); + } + + /// Process a function call, method call, constructor call, +@@ -1772,35 +1761,21 @@ void BuildLockset::checkPtAccess(const Expr *Exp, AccessKind AK, + /// and check that the appropriate locks are held. Non-const method calls with + /// the same signature as const method calls can be also treated as reads. + /// +-/// \param Exp The call expression. +-/// \param D The callee declaration. +-/// \param Self If \p Exp = nullptr, the implicit this argument. +-/// \param Loc If \p Exp = nullptr, the location. + void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, +- til::LiteralPtr *Self, SourceLocation Loc) { ++ VarDecl *VD) { ++ SourceLocation Loc = Exp->getExprLoc(); + CapExprSet ExclusiveLocksToAdd, SharedLocksToAdd; + CapExprSet ExclusiveLocksToRemove, SharedLocksToRemove, GenericLocksToRemove; + CapExprSet ScopedReqsAndExcludes; + + // Figure out if we're constructing an object of scoped lockable class +- CapabilityExpr Scp; +- if (Exp) { +- assert(!Self); +- const auto *TagT = Exp->getType()->getAs<TagType>(); +- if (TagT && Exp->isPRValue()) { +- std::pair<til::LiteralPtr *, StringRef> Placeholder = +- Analyzer->SxBuilder.createThisPlaceholder(Exp); +- [[maybe_unused]] auto inserted = +- ConstructedObjects.insert({Exp, Placeholder.first}); +- assert(inserted.second && "Are we visiting the same expression again?"); +- if (isa<CXXConstructExpr>(Exp)) +- Self = Placeholder.first; +- if (TagT->getDecl()->hasAttr<ScopedLockableAttr>()) +- Scp = CapabilityExpr(Placeholder.first, Placeholder.second, false); ++ bool isScopedVar = false; ++ if (VD) { ++ if (const auto *CD = dyn_cast<const CXXConstructorDecl>(D)) { ++ const CXXRecordDecl* PD = CD->getParent(); ++ if (PD && PD->hasAttr<ScopedLockableAttr>()) ++ isScopedVar = true; + } +- +- assert(Loc.isInvalid()); +- Loc = Exp->getExprLoc(); + } + + for(const Attr *At : D->attrs()) { +@@ -1811,7 +1786,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + const auto *A = cast<AcquireCapabilityAttr>(At); + Analyzer->getMutexIDs(A->isShared() ? SharedLocksToAdd + : ExclusiveLocksToAdd, +- A, Exp, D, Self); ++ A, Exp, D, VD); + break; + } + +@@ -1822,7 +1797,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + const auto *A = cast<AssertExclusiveLockAttr>(At); + + CapExprSet AssertLocks; +- Analyzer->getMutexIDs(AssertLocks, A, Exp, D, Self); ++ Analyzer->getMutexIDs(AssertLocks, A, Exp, D, VD); + for (const auto &AssertLock : AssertLocks) + Analyzer->addLock( + FSet, std::make_unique<LockableFactEntry>( +@@ -1833,7 +1808,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + const auto *A = cast<AssertSharedLockAttr>(At); + + CapExprSet AssertLocks; +- Analyzer->getMutexIDs(AssertLocks, A, Exp, D, Self); ++ Analyzer->getMutexIDs(AssertLocks, A, Exp, D, VD); + for (const auto &AssertLock : AssertLocks) + Analyzer->addLock( + FSet, std::make_unique<LockableFactEntry>( +@@ -1844,7 +1819,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + case attr::AssertCapability: { + const auto *A = cast<AssertCapabilityAttr>(At); + CapExprSet AssertLocks; +- Analyzer->getMutexIDs(AssertLocks, A, Exp, D, Self); ++ Analyzer->getMutexIDs(AssertLocks, A, Exp, D, VD); + for (const auto &AssertLock : AssertLocks) + Analyzer->addLock(FSet, std::make_unique<LockableFactEntry>( + AssertLock, +@@ -1858,11 +1833,11 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + case attr::ReleaseCapability: { + const auto *A = cast<ReleaseCapabilityAttr>(At); + if (A->isGeneric()) +- Analyzer->getMutexIDs(GenericLocksToRemove, A, Exp, D, Self); ++ Analyzer->getMutexIDs(GenericLocksToRemove, A, Exp, D, VD); + else if (A->isShared()) +- Analyzer->getMutexIDs(SharedLocksToRemove, A, Exp, D, Self); ++ Analyzer->getMutexIDs(SharedLocksToRemove, A, Exp, D, VD); + else +- Analyzer->getMutexIDs(ExclusiveLocksToRemove, A, Exp, D, Self); ++ Analyzer->getMutexIDs(ExclusiveLocksToRemove, A, Exp, D, VD); + break; + } + +@@ -1870,10 +1845,10 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + const auto *A = cast<RequiresCapabilityAttr>(At); + for (auto *Arg : A->args()) { + warnIfMutexNotHeld(D, Exp, A->isShared() ? AK_Read : AK_Written, Arg, +- POK_FunctionCall, Self, Loc); ++ POK_FunctionCall, Exp->getExprLoc()); + // use for adopting a lock +- if (!Scp.shouldIgnore()) +- Analyzer->getMutexIDs(ScopedReqsAndExcludes, A, Exp, D, Self); ++ if (isScopedVar) ++ Analyzer->getMutexIDs(ScopedReqsAndExcludes, A, Exp, D, VD); + } + break; + } +@@ -1881,10 +1856,10 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + case attr::LocksExcluded: { + const auto *A = cast<LocksExcludedAttr>(At); + for (auto *Arg : A->args()) { +- warnIfMutexHeld(D, Exp, Arg, Self, Loc); ++ warnIfMutexHeld(D, Exp, Arg); + // use for deferring a lock +- if (!Scp.shouldIgnore()) +- Analyzer->getMutexIDs(ScopedReqsAndExcludes, A, Exp, D, Self); ++ if (isScopedVar) ++ Analyzer->getMutexIDs(ScopedReqsAndExcludes, A, Exp, D, VD); + } + break; + } +@@ -1907,7 +1882,7 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + + // Add locks. + FactEntry::SourceKind Source = +- !Scp.shouldIgnore() ? FactEntry::Managed : FactEntry::Acquired; ++ isScopedVar ? FactEntry::Managed : FactEntry::Acquired; + for (const auto &M : ExclusiveLocksToAdd) + Analyzer->addLock(FSet, std::make_unique<LockableFactEntry>(M, LK_Exclusive, + Loc, Source)); +@@ -1915,9 +1890,15 @@ void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, + Analyzer->addLock( + FSet, std::make_unique<LockableFactEntry>(M, LK_Shared, Loc, Source)); + +- if (!Scp.shouldIgnore()) { ++ if (isScopedVar) { + // Add the managing object as a dummy mutex, mapped to the underlying mutex. +- auto ScopedEntry = std::make_unique<ScopedLockableFactEntry>(Scp, Loc); ++ SourceLocation MLoc = VD->getLocation(); ++ DeclRefExpr DRE(VD->getASTContext(), VD, false, VD->getType(), VK_LValue, ++ VD->getLocation()); ++ // FIXME: does this store a pointer to DRE? ++ CapabilityExpr Scp = Analyzer->SxBuilder.translateAttrExpr(&DRE, nullptr); ++ ++ auto ScopedEntry = std::make_unique<ScopedLockableFactEntry>(Scp, MLoc); + for (const auto &M : ExclusiveLocksToAdd) + ScopedEntry->addLock(M); + for (const auto &M : SharedLocksToAdd) +@@ -2077,11 +2058,36 @@ void BuildLockset::VisitCXXConstructExpr(const CXXConstructExpr *Exp) { + } else { + examineArguments(D, Exp->arg_begin(), Exp->arg_end()); + } +- if (D && D->hasAttrs()) +- handleCall(Exp, D); + } + +-static const Expr *UnpackConstruction(const Expr *E) { ++static CXXConstructorDecl * ++findConstructorForByValueReturn(const CXXRecordDecl *RD) { ++ // Prefer a move constructor over a copy constructor. If there's more than ++ // one copy constructor or more than one move constructor, we arbitrarily ++ // pick the first declared such constructor rather than trying to guess which ++ // one is more appropriate. ++ CXXConstructorDecl *CopyCtor = nullptr; ++ for (auto *Ctor : RD->ctors()) { ++ if (Ctor->isDeleted()) ++ continue; ++ if (Ctor->isMoveConstructor()) ++ return Ctor; ++ if (!CopyCtor && Ctor->isCopyConstructor()) ++ CopyCtor = Ctor; ++ } ++ return CopyCtor; ++} ++ ++static Expr *buildFakeCtorCall(CXXConstructorDecl *CD, ArrayRef<Expr *> Args, ++ SourceLocation Loc) { ++ ASTContext &Ctx = CD->getASTContext(); ++ return CXXConstructExpr::Create(Ctx, Ctx.getRecordType(CD->getParent()), Loc, ++ CD, true, Args, false, false, false, false, ++ CXXConstructExpr::CK_Complete, ++ SourceRange(Loc, Loc)); ++} ++ ++static Expr *UnpackConstruction(Expr *E) { + if (auto *CE = dyn_cast<CastExpr>(E)) + if (CE->getCastKind() == CK_NoOp) + E = CE->getSubExpr()->IgnoreParens(); +@@ -2100,7 +2106,7 @@ void BuildLockset::VisitDeclStmt(const DeclStmt *S) { + + for (auto *D : S->getDeclGroup()) { + if (auto *VD = dyn_cast_or_null<VarDecl>(D)) { +- const Expr *E = VD->getInit(); ++ Expr *E = VD->getInit(); + if (!E) + continue; + E = E->IgnoreParens(); +@@ -2110,27 +2116,29 @@ void BuildLockset::VisitDeclStmt(const DeclStmt *S) { + E = EWC->getSubExpr()->IgnoreParens(); + E = UnpackConstruction(E); + +- if (auto Object = ConstructedObjects.find(E); +- Object != ConstructedObjects.end()) { +- Object->second->setClangDecl(VD); +- ConstructedObjects.erase(Object); ++ if (const auto *CE = dyn_cast<CXXConstructExpr>(E)) { ++ const auto *CtorD = dyn_cast_or_null<NamedDecl>(CE->getConstructor()); ++ if (!CtorD || !CtorD->hasAttrs()) ++ continue; ++ handleCall(E, CtorD, VD); ++ } else if (isa<CallExpr>(E) && E->isPRValue()) { ++ // If the object is initialized by a function call that returns a ++ // scoped lockable by value, use the attributes on the copy or move ++ // constructor to figure out what effect that should have on the ++ // lockset. ++ // FIXME: Is this really the best way to handle this situation? ++ auto *RD = E->getType()->getAsCXXRecordDecl(); ++ if (!RD || !RD->hasAttr<ScopedLockableAttr>()) ++ continue; ++ CXXConstructorDecl *CtorD = findConstructorForByValueReturn(RD); ++ if (!CtorD || !CtorD->hasAttrs()) ++ continue; ++ handleCall(buildFakeCtorCall(CtorD, {E}, E->getBeginLoc()), CtorD, VD); + } + } + } + } + +-void BuildLockset::VisitMaterializeTemporaryExpr( +- const MaterializeTemporaryExpr *Exp) { +- if (const ValueDecl *ExtD = Exp->getExtendingDecl()) { +- if (auto Object = +- ConstructedObjects.find(UnpackConstruction(Exp->getSubExpr())); +- Object != ConstructedObjects.end()) { +- Object->second->setClangDecl(ExtD); +- ConstructedObjects.erase(Object); +- } +- } +-} +- + /// Given two facts merging on a join point, possibly warn and decide whether to + /// keep or replace. + /// +@@ -2403,33 +2411,19 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { + LocksetBuilder.Visit(CS.getStmt()); + break; + } +- // Ignore BaseDtor and MemberDtor for now. ++ // Ignore BaseDtor, MemberDtor, and TemporaryDtor for now. + case CFGElement::AutomaticObjectDtor: { + CFGAutomaticObjDtor AD = BI.castAs<CFGAutomaticObjDtor>(); + const auto *DD = AD.getDestructorDecl(AC.getASTContext()); + if (!DD->hasAttrs()) + break; + +- LocksetBuilder.handleCall(nullptr, DD, +- SxBuilder.createVariable(AD.getVarDecl()), +- AD.getTriggerStmt()->getEndLoc()); +- break; +- } +- case CFGElement::TemporaryDtor: { +- auto TD = BI.castAs<CFGTemporaryDtor>(); +- +- // Clean up constructed object even if there are no attributes to +- // keep the number of objects in limbo as small as possible. +- if (auto Object = LocksetBuilder.ConstructedObjects.find( +- TD.getBindTemporaryExpr()->getSubExpr()); +- Object != LocksetBuilder.ConstructedObjects.end()) { +- const auto *DD = TD.getDestructorDecl(AC.getASTContext()); +- if (DD->hasAttrs()) +- // TODO: the location here isn't quite correct. +- LocksetBuilder.handleCall(nullptr, DD, Object->second, +- TD.getBindTemporaryExpr()->getEndLoc()); +- LocksetBuilder.ConstructedObjects.erase(Object); +- } ++ // Create a dummy expression, ++ auto *VD = const_cast<VarDecl *>(AD.getVarDecl()); ++ DeclRefExpr DRE(VD->getASTContext(), VD, false, ++ VD->getType().getNonReferenceType(), VK_LValue, ++ AD.getTriggerStmt()->getEndLoc()); ++ LocksetBuilder.handleCall(&DRE, DD); + break; + } + default: +diff --git a/clang/lib/Analysis/ThreadSafetyCommon.cpp b/clang/lib/Analysis/ThreadSafetyCommon.cpp +index a771149f1591..06b61b4de92f 100644 +--- a/clang/lib/Analysis/ThreadSafetyCommon.cpp ++++ b/clang/lib/Analysis/ThreadSafetyCommon.cpp +@@ -115,22 +115,19 @@ static StringRef ClassifyDiagnostic(QualType VDT) { + /// \param D The declaration to which the attribute is attached. + /// \param DeclExp An expression involving the Decl to which the attribute + /// is attached. E.g. the call to a function. +-/// \param Self S-expression to substitute for a \ref CXXThisExpr. + CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, + const NamedDecl *D, + const Expr *DeclExp, +- til::SExpr *Self) { ++ VarDecl *SelfDecl) { + // If we are processing a raw attribute expression, with no substitutions. +- if (!DeclExp && !Self) ++ if (!DeclExp) + return translateAttrExpr(AttrExp, nullptr); + + CallingContext Ctx(nullptr, D); + + // Examine DeclExp to find SelfArg and FunArgs, which are used to substitute + // for formal parameters when we call buildMutexID later. +- if (!DeclExp) +- /* We'll use Self. */; +- else if (const auto *ME = dyn_cast<MemberExpr>(DeclExp)) { ++ if (const auto *ME = dyn_cast<MemberExpr>(DeclExp)) { + Ctx.SelfArg = ME->getBase(); + Ctx.SelfArrow = ME->isArrow(); + } else if (const auto *CE = dyn_cast<CXXMemberCallExpr>(DeclExp)) { +@@ -145,24 +142,29 @@ CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, + Ctx.SelfArg = nullptr; // Will be set below + Ctx.NumArgs = CE->getNumArgs(); + Ctx.FunArgs = CE->getArgs(); ++ } else if (D && isa<CXXDestructorDecl>(D)) { ++ // There's no such thing as a "destructor call" in the AST. ++ Ctx.SelfArg = DeclExp; + } + +- if (Self) { +- assert(!Ctx.SelfArg && "Ambiguous self argument"); +- Ctx.SelfArg = Self; ++ // Hack to handle constructors, where self cannot be recovered from ++ // the expression. ++ if (SelfDecl && !Ctx.SelfArg) { ++ DeclRefExpr SelfDRE(SelfDecl->getASTContext(), SelfDecl, false, ++ SelfDecl->getType(), VK_LValue, ++ SelfDecl->getLocation()); ++ Ctx.SelfArg = &SelfDRE; + + // If the attribute has no arguments, then assume the argument is "this". + if (!AttrExp) +- return CapabilityExpr( +- Self, ClassifyDiagnostic(cast<CXXMethodDecl>(D)->getThisObjectType()), +- false); ++ return translateAttrExpr(Ctx.SelfArg, nullptr); + else // For most attributes. + return translateAttrExpr(AttrExp, &Ctx); + } + + // If the attribute has no arguments, then assume the argument is "this". + if (!AttrExp) +- return translateAttrExpr(cast<const Expr *>(Ctx.SelfArg), nullptr); ++ return translateAttrExpr(Ctx.SelfArg, nullptr); + else // For most attributes. + return translateAttrExpr(AttrExp, &Ctx); + } +@@ -216,16 +218,6 @@ CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp, + return CapabilityExpr(E, Kind, Neg); + } + +-til::LiteralPtr *SExprBuilder::createVariable(const VarDecl *VD) { +- return new (Arena) til::LiteralPtr(VD); +-} +- +-std::pair<til::LiteralPtr *, StringRef> +-SExprBuilder::createThisPlaceholder(const Expr *Exp) { +- return {new (Arena) til::LiteralPtr(nullptr), +- ClassifyDiagnostic(Exp->getType())}; +-} +- + // Translate a clang statement or expression to a TIL expression. + // Also performs substitution of variables; Ctx provides the context. + // Dispatches on the type of S. +@@ -335,12 +327,8 @@ til::SExpr *SExprBuilder::translateDeclRefExpr(const DeclRefExpr *DRE, + til::SExpr *SExprBuilder::translateCXXThisExpr(const CXXThisExpr *TE, + CallingContext *Ctx) { + // Substitute for 'this' +- if (Ctx && Ctx->SelfArg) { +- if (const auto *SelfArg = dyn_cast<const Expr *>(Ctx->SelfArg)) +- return translate(SelfArg, Ctx->Prev); +- else +- return cast<til::SExpr *>(Ctx->SelfArg); +- } ++ if (Ctx && Ctx->SelfArg) ++ return translate(Ctx->SelfArg, Ctx->Prev); + assert(SelfVar && "We have no variable for 'this'!"); + return SelfVar; + } +diff --git a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp +index 8e312e589d81..e1cfa1f3fd17 100644 +--- a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp ++++ b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp +@@ -1606,30 +1606,6 @@ namespace substitution_test { + dlr.unlockData(d1); + } + }; +- +- // Automatic object destructor calls don't appear as expressions in the CFG, +- // so we have to handle them separately whenever substitutions are required. +- struct DestructorRequires { +- Mutex mu; +- ~DestructorRequires() EXCLUSIVE_LOCKS_REQUIRED(mu); +- }; +- +- void destructorRequires() { +- DestructorRequires rd; +- rd.mu.AssertHeld(); +- } +- +- struct DestructorExcludes { +- Mutex mu; +- ~DestructorExcludes() LOCKS_EXCLUDED(mu); +- }; +- +- void destructorExcludes() { +- DestructorExcludes ed; +- ed.mu.Lock(); // expected-note {{mutex acquired here}} +- } // expected-warning {{cannot call function '~DestructorExcludes' while mutex 'ed.mu' is held}} +- // expected-warning@-1 {{mutex 'ed.mu' is still held at the end of function}} +- + } // end namespace substituation_test + + +@@ -1714,15 +1690,6 @@ struct TestScopedLockable { + } + #endif + +- void temporary() { +- MutexLock{&mu1}, a = 5; +- } +- +- void lifetime_extension() { +- const MutexLock &mulock = MutexLock(&mu1); +- a = 5; +- } +- + void foo2() { + ReaderMutexLock mulock1(&mu1); + if (getBool()) { +@@ -1741,12 +1708,6 @@ struct TestScopedLockable { + // expected-warning {{acquiring mutex 'mu1' that is already held}} + } + +- void temporary_double_lock() { +- MutexLock mulock_a(&mu1); // expected-note{{mutex acquired here}} +- MutexLock{&mu1}; // \ +- // expected-warning {{acquiring mutex 'mu1' that is already held}} +- } +- + void foo4() { + MutexLock mulock1(&mu1), mulock2(&mu2); + a = b+1; +@@ -4226,20 +4187,6 @@ public: + void foo() EXCLUSIVE_LOCKS_REQUIRED(this); + }; + +-class SelfLockDeferred { +-public: +- SelfLockDeferred() LOCKS_EXCLUDED(mu_); +- ~SelfLockDeferred() UNLOCK_FUNCTION(mu_); +- +- Mutex mu_; +-}; +- +-class LOCKABLE SelfLockDeferred2 { +-public: +- SelfLockDeferred2() LOCKS_EXCLUDED(this); +- ~SelfLockDeferred2() UNLOCK_FUNCTION(); +-}; +- + + void test() { + SelfLock s; +@@ -4251,14 +4198,6 @@ void test2() { + s2.foo(); + } + +-void testDeferredTemporary() { +- SelfLockDeferred(); // expected-warning {{releasing mutex '<temporary>.mu_' that was not held}} +-} +- +-void testDeferredTemporary2() { +- SelfLockDeferred2(); // expected-warning {{releasing mutex '<temporary>' that was not held}} +-} +- + } // end namespace SelfConstructorTest + + +@@ -5953,75 +5892,47 @@ C c; + void f() { c[A()]->g(); } + } // namespace PR34800 + +-#ifdef __cpp_guaranteed_copy_elision +- + namespace ReturnScopedLockable { ++ template<typename Object> class SCOPED_LOCKABLE ReadLockedPtr { ++ public: ++ ReadLockedPtr(Object *ptr) SHARED_LOCK_FUNCTION((*this)->mutex); ++ ReadLockedPtr(ReadLockedPtr &&) SHARED_LOCK_FUNCTION((*this)->mutex); ++ ~ReadLockedPtr() UNLOCK_FUNCTION(); + +-class Object { +-public: +- MutexLock lock() EXCLUSIVE_LOCK_FUNCTION(mutex) { +- // TODO: False positive because scoped lock isn't destructed. +- return MutexLock(&mutex); // expected-note {{mutex acquired here}} +- } // expected-warning {{mutex 'mutex' is still held at the end of function}} +- +- ReaderMutexLock lockShared() SHARED_LOCK_FUNCTION(mutex) { +- // TODO: False positive because scoped lock isn't destructed. +- return ReaderMutexLock(&mutex); // expected-note {{mutex acquired here}} +- } // expected-warning {{mutex 'mutex' is still held at the end of function}} +- +- MutexLock adopt() EXCLUSIVE_LOCKS_REQUIRED(mutex) { +- // TODO: False positive because scoped lock isn't destructed. +- return MutexLock(&mutex, true); // expected-note {{mutex acquired here}} +- } // expected-warning {{mutex 'mutex' is still held at the end of function}} ++ Object *operator->() const { return object; } + +- ReaderMutexLock adoptShared() SHARED_LOCKS_REQUIRED(mutex) { +- // TODO: False positive because scoped lock isn't destructed. +- return ReaderMutexLock(&mutex, true); // expected-note {{mutex acquired here}} +- } // expected-warning {{mutex 'mutex' is still held at the end of function}} ++ private: ++ Object *object; ++ }; + +- int x GUARDED_BY(mutex); +- void needsLock() EXCLUSIVE_LOCKS_REQUIRED(mutex); ++ struct Object { ++ int f() SHARED_LOCKS_REQUIRED(mutex); ++ Mutex mutex; ++ }; + +- void testInside() { +- MutexLock scope = lock(); +- x = 1; +- needsLock(); ++ ReadLockedPtr<Object> get(); ++ int use() { ++ auto ptr = get(); ++ return ptr->f(); ++ } ++ void use_constructor() { ++ auto ptr = ReadLockedPtr<Object>(nullptr); ++ ptr->f(); ++ auto ptr2 = ReadLockedPtr<Object>{nullptr}; ++ ptr2->f(); ++ auto ptr3 = (ReadLockedPtr<Object>{nullptr}); ++ ptr3->f(); ++ } ++ struct Convertible { ++ Convertible(); ++ operator ReadLockedPtr<Object>(); ++ }; ++ void use_conversion() { ++ ReadLockedPtr<Object> ptr = Convertible(); ++ ptr->f(); + } +- +- Mutex mutex; +-}; +- +-Object obj; +- +-void testLock() { +- MutexLock scope = obj.lock(); +- obj.x = 1; +- obj.needsLock(); + } + +-int testSharedLock() { +- ReaderMutexLock scope = obj.lockShared(); +- obj.x = 1; // expected-warning {{writing variable 'x' requires holding mutex 'obj.mutex' exclusively}} +- return obj.x; +-} +- +-void testAdopt() { +- obj.mutex.Lock(); +- MutexLock scope = obj.adopt(); +- obj.x = 1; +-} +- +-int testAdoptShared() { +- obj.mutex.Lock(); +- ReaderMutexLock scope = obj.adoptShared(); +- obj.x = 1; +- return obj.x; +-} +- +-} // namespace ReturnScopedLockable +- +-#endif +- + namespace PR38640 { + void f() { + // Self-referencing assignment previously caused an infinite loop when thread +-- +2.37.1.1.g659da70093 + diff --git a/build/build-clang/revert-llvmorg-17-init-4120-g02e8eb1a438b.patch b/build/build-clang/revert-llvmorg-17-init-4120-g02e8eb1a438b.patch new file mode 100644 index 0000000000..63e59ee68b --- /dev/null +++ b/build/build-clang/revert-llvmorg-17-init-4120-g02e8eb1a438b.patch @@ -0,0 +1,118 @@ +From 2836e92ea557be53fcd91e38cb05a989ad0167e9 Mon Sep 17 00:00:00 2001 +From: Mike Hommey <mh@glandium.org> +Date: Wed, 8 Mar 2023 14:44:58 +0900 +Subject: [PATCH] Revert "Split getCompileUnitFor{Data,Code}Address." + +This reverts commit 02e8eb1a438bdb1dc9a97aea75a8c9c748048039, which +applies on top of cead4eceb01b935fae07bf4a7e91911b344d2fec, that we +revert too. +--- + .../llvm/DebugInfo/DWARF/DWARFContext.h | 11 +-------- + llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 23 ++++++++----------- + 2 files changed, 11 insertions(+), 23 deletions(-) + +diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +index 4eba79a7215f..df903b967ef6 100644 +--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h ++++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +@@ -445,16 +445,7 @@ public: + /// address. + /// TODO: change input parameter from "uint64_t Address" + /// into "SectionedAddress Address" +- DWARFCompileUnit *getCompileUnitForCodeAddress(uint64_t Address); +- +- /// Return the compile unit which contains data with the provided address. +- /// Note: This is more expensive than `getCompileUnitForAddress`, as if +- /// `Address` isn't found in the CU ranges (which is cheap), then it falls +- /// back to an expensive O(n) walk of all CU's looking for data that spans the +- /// address. +- /// TODO: change input parameter from "uint64_t Address" into +- /// "SectionedAddress Address" +- DWARFCompileUnit *getCompileUnitForDataAddress(uint64_t Address); ++ DWARFCompileUnit *getCompileUnitForAddress(uint64_t Address); + + /// Returns whether CU/TU should be populated manually. TU Index populated + /// manually only for DWARF5. +diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +index f648ef8ff770..dd86144d16e0 100644 +--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp ++++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +@@ -1118,17 +1118,14 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint64_t Offset) { + NormalUnits.getUnitForOffset(Offset)); + } + +-DWARFCompileUnit *DWARFContext::getCompileUnitForCodeAddress(uint64_t Address) { +- uint64_t CUOffset = getDebugAranges()->findAddress(Address); +- return getCompileUnitForOffset(CUOffset); +-} +- +-DWARFCompileUnit *DWARFContext::getCompileUnitForDataAddress(uint64_t Address) { ++DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { ++ // First, get the offset of the compile unit. + uint64_t CUOffset = getDebugAranges()->findAddress(Address); ++ // Retrieve the compile unit. + if (DWARFCompileUnit *OffsetCU = getCompileUnitForOffset(CUOffset)) + return OffsetCU; + +- // Global variables are often missed by the above search, for one of two ++ // Global variables are often not found by the above search, for one of two + // reasons: + // 1. .debug_aranges may not include global variables. On clang, it seems we + // put the globals in the aranges, but this isn't true for gcc. +@@ -1149,7 +1146,7 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForDataAddress(uint64_t Address) { + DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) { + DIEsForAddress Result; + +- DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address); ++ DWARFCompileUnit *CU = getCompileUnitForAddress(Address); + if (!CU) + return Result; + +@@ -1300,7 +1297,7 @@ void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, + std::vector<DILocal> + DWARFContext::getLocalsForAddress(object::SectionedAddress Address) { + std::vector<DILocal> Result; +- DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address.Address); ++ DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + if (!CU) + return Result; + +@@ -1313,7 +1310,7 @@ DWARFContext::getLocalsForAddress(object::SectionedAddress Address) { + DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, + DILineInfoSpecifier Spec) { + DILineInfo Result; +- DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address.Address); ++ DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + if (!CU) + return Result; + +@@ -1334,7 +1331,7 @@ DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, + DILineInfo + DWARFContext::getLineInfoForDataAddress(object::SectionedAddress Address) { + DILineInfo Result; +- DWARFCompileUnit *CU = getCompileUnitForDataAddress(Address.Address); ++ DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + if (!CU) + return Result; + +@@ -1349,7 +1346,7 @@ DWARFContext::getLineInfoForDataAddress(object::SectionedAddress Address) { + DILineInfoTable DWARFContext::getLineInfoForAddressRange( + object::SectionedAddress Address, uint64_t Size, DILineInfoSpecifier Spec) { + DILineInfoTable Lines; +- DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address.Address); ++ DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + if (!CU) + return Lines; + +@@ -1405,7 +1402,7 @@ DWARFContext::getInliningInfoForAddress(object::SectionedAddress Address, + DILineInfoSpecifier Spec) { + DIInliningInfo InliningInfo; + +- DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address.Address); ++ DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address); + if (!CU) + return InliningInfo; + +-- +2.39.0.1.g6739ec1790 + diff --git a/build/build-clang/skip-3-stages.json b/build/build-clang/skip-3-stages.json new file mode 100644 index 0000000000..79b1bf193f --- /dev/null +++ b/build/build-clang/skip-3-stages.json @@ -0,0 +1,6 @@ +{ + "skip_stages": "3", + "cc": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "cxx": "{MOZ_FETCHES_DIR}/clang/bin/clang++", + "as": "{MOZ_FETCHES_DIR}/clang/bin/clang" +} diff --git a/build/build-clang/skip-stage-1-win64.json b/build/build-clang/skip-stage-1-win64.json new file mode 100644 index 0000000000..8dee151003 --- /dev/null +++ b/build/build-clang/skip-stage-1-win64.json @@ -0,0 +1,7 @@ +{ + "skip_stages": "1", + "cc": "{MOZ_FETCHES_DIR}/clang/bin/clang-cl.exe", + "cxx": "{MOZ_FETCHES_DIR}/clang/bin/clang-cl.exe", + "ml": "{MOZ_FETCHES_DIR}/clang/bin/clang-cl.exe", + "lib": "{MOZ_FETCHES_DIR}/clang/bin/llvm-lib.exe" +} diff --git a/build/build-clang/skip-stage-1.json b/build/build-clang/skip-stage-1.json new file mode 100644 index 0000000000..aa1101b13b --- /dev/null +++ b/build/build-clang/skip-stage-1.json @@ -0,0 +1,6 @@ +{ + "skip_stages": "1", + "cc": "{MOZ_FETCHES_DIR}/clang/bin/clang", + "cxx": "{MOZ_FETCHES_DIR}/clang/bin/clang++", + "as": "{MOZ_FETCHES_DIR}/clang/bin/clang" +} diff --git a/build/build-clang/unpoison-thread-stacks_clang_10.patch b/build/build-clang/unpoison-thread-stacks_clang_10.patch new file mode 100644 index 0000000000..563fa1d7bf --- /dev/null +++ b/build/build-clang/unpoison-thread-stacks_clang_10.patch @@ -0,0 +1,64 @@ +[winasan] Unpoison the stack in NtTerminateThread + +In long-running builds we've seen some ASan complaints during thread creation +that we suspect are due to leftover poisoning from previous threads whose stacks +occupied that memory. This patch adds a hook that unpoisons the stack just +before the NtTerminateThread syscall. + +Differential Revision: https://reviews.llvm.org/D52091 + +** Update for clang 9 ** : After some backouts, this patch eventually landed +upstream in a different form, as the TLS handler `asan_thread_exit`, but that +variant causes failures in our test suite, so revert the TLS handler in favor of +the interceptor approach from the first patch. + +diff --git a/compiler-rt/lib/asan/asan_win.cpp b/compiler-rt/lib/asan/asan_win.cpp +index 417892aaedd..5fe86db44f4 100644 +--- a/compiler-rt/lib/asan/asan_win.cpp ++++ b/compiler-rt/lib/asan/asan_win.cpp +@@ -154,6 +154,14 @@ INTERCEPTOR_WINAPI(HANDLE, CreateThread, LPSECURITY_ATTRIBUTES security, + thr_flags, tid); + } + ++INTERCEPTOR_WINAPI(void, NtTerminateThread, void *rcx) { ++ // Unpoison the terminating thread's stack because the memory may be re-used. ++ NT_TIB *tib = (NT_TIB *)NtCurrentTeb(); ++ uptr stackSize = (uptr)tib->StackBase - (uptr)tib->StackLimit; ++ __asan_unpoison_memory_region(tib->StackLimit, stackSize); ++ return REAL(NtTerminateThread(rcx)); ++} ++ + // }}} + + namespace __asan { +@@ -168,7 +176,9 @@ void InitializePlatformInterceptors() { + + ASAN_INTERCEPT_FUNC(CreateThread); + ASAN_INTERCEPT_FUNC(SetUnhandledExceptionFilter); +- ++ CHECK(::__interception::OverrideFunction("NtTerminateThread", ++ (uptr)WRAP(NtTerminateThread), ++ (uptr *)&REAL(NtTerminateThread))); + #ifdef _WIN64 + ASAN_INTERCEPT_FUNC(__C_specific_handler); + #else +@@ -380,19 +390,6 @@ __declspec(allocate(".CRT$XLAB")) void(NTAPI *__asan_tls_init)( + void *, unsigned long, void *) = asan_thread_init; + #endif + +-static void NTAPI asan_thread_exit(void *module, DWORD reason, void *reserved) { +- if (reason == DLL_THREAD_DETACH) { +- // Unpoison the thread's stack because the memory may be re-used. +- NT_TIB *tib = (NT_TIB *)NtCurrentTeb(); +- uptr stackSize = (uptr)tib->StackBase - (uptr)tib->StackLimit; +- __asan_unpoison_memory_region(tib->StackLimit, stackSize); +- } +-} +- +-#pragma section(".CRT$XLY", long, read) +-__declspec(allocate(".CRT$XLY")) void(NTAPI *__asan_tls_exit)( +- void *, unsigned long, void *) = asan_thread_exit; +- + WIN_FORCE_LINK(__asan_dso_reg_hook) + + // }}} diff --git a/build/build-clang/win64-ret-null-on-commitment-limit_clang_14.patch b/build/build-clang/win64-ret-null-on-commitment-limit_clang_14.patch new file mode 100644 index 0000000000..23b001bc68 --- /dev/null +++ b/build/build-clang/win64-ret-null-on-commitment-limit_clang_14.patch @@ -0,0 +1,14 @@ +diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp +index 7c84cdc22ce4..e13fff03489e 100644 +--- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp ++++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp +@@ -157,7 +157,8 @@ void UnmapOrDie(void *addr, uptr size) { + static void *ReturnNullptrOnOOMOrDie(uptr size, const char *mem_type, + const char *mmap_type) { + error_t last_error = GetLastError(); +- if (last_error == ERROR_NOT_ENOUGH_MEMORY) ++ if (last_error == ERROR_NOT_ENOUGH_MEMORY || ++ last_error == ERROR_COMMITMENT_LIMIT) + return nullptr; + ReportMmapFailureAndDie(size, mem_type, mmap_type, last_error); + } diff --git a/build/build-clang/win64.json b/build/build-clang/win64.json new file mode 100644 index 0000000000..9d4dcc589e --- /dev/null +++ b/build/build-clang/win64.json @@ -0,0 +1,7 @@ +{ + "target": "x86_64-pc-windows-msvc", + "cc": "{MOZ_FETCHES_DIR}/clang/bin/clang-cl", + "cxx": "{MOZ_FETCHES_DIR}/clang/bin/clang-cl", + "ml": "{MOZ_FETCHES_DIR}/clang/bin/clang-cl", + "lib": "{MOZ_FETCHES_DIR}/clang/bin/llvm-lib" +} |