Ported from clangd, this still can be improved over time, but it can be landed. This was based on the work from https://bit.ly/3TkV2N1 The utility makes the assumption that all header are self contained! It only checkes Decls from the main translation file, where SourceLocarion is the passed cpp file. It builds a list with all of the includes from the translation unit. It matches all of the Decls from the main translation units with definitions from the included header files and builds a list with used header files. All of the includes that are not part of the matched used header files are considered to be unused. Of course this is correct if the first assumption if followed by the coding guide, where all of the header are self contained. Since the mozilla code base doesn't follow this approach false positives might appear where the is the following situation: FOO.cpp #include #Include If header A defines a symbol that is used by header B and B doesn't include A nor it has symbols defined that are used by FOO.cpp then B it will be marked as potentially to be removed by the tool. This is the limitation determined by header that are not self contained. The limitation presented above can be fixed in the future with extra work, but it's very time expensive during the runtime of the checker. diff --git a/clang-tools-extra/CMakeLists.txt b/clang-tools-extra/CMakeLists.txt index 6a3f741721ee..ff17c8e8472a 100644 --- a/clang-tools-extra/CMakeLists.txt +++ b/clang-tools-extra/CMakeLists.txt @@ -16,6 +16,7 @@ endif() add_subdirectory(clang-apply-replacements) add_subdirectory(clang-reorder-fields) add_subdirectory(modularize) +add_subdirectory(include-cleaner) add_subdirectory(clang-tidy) add_subdirectory(clang-change-namespace) @@ -23,7 +24,6 @@ add_subdirectory(clang-doc) add_subdirectory(clang-include-fixer) add_subdirectory(clang-move) add_subdirectory(clang-query) -add_subdirectory(include-cleaner) add_subdirectory(pp-trace) add_subdirectory(pseudo) add_subdirectory(tool-template) diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt index 8a953eeea275..f2edc509acaf 100644 --- a/clang-tools-extra/clang-tidy/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/CMakeLists.txt @@ -50,6 +50,7 @@ endif() # Checks. # If you add a check, also add it to ClangTidyForceLinker.h in this directory. +add_subdirectory(alpha) add_subdirectory(android) add_subdirectory(abseil) add_subdirectory(altera) @@ -77,6 +78,7 @@ add_subdirectory(portability) add_subdirectory(readability) add_subdirectory(zircon) set(ALL_CLANG_TIDY_CHECKS + clangTidyAlphaModule clangTidyAndroidModule clangTidyAbseilModule clangTidyAlteraModule diff --git a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h index 2691d90fa521..2fa064cff22a 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h +++ b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h @@ -20,6 +20,11 @@ extern volatile int AbseilModuleAnchorSource; static int LLVM_ATTRIBUTE_UNUSED AbseilModuleAnchorDestination = AbseilModuleAnchorSource; +// This anchor is used to force the linker to link the AlphaModule. +extern volatile int AlphaModuleAnchorSource; +static int LLVM_ATTRIBUTE_UNUSED AlphaModuleAnchorDestination = + AlphaModuleAnchorSource; + // This anchor is used to force the linker to link the AlteraModule. extern volatile int AlteraModuleAnchorSource; static int LLVM_ATTRIBUTE_UNUSED AlteraModuleAnchorDestination = diff --git a/clang-tools-extra/clang-tidy/alpha/AlphaTidyModule.cpp b/clang-tools-extra/clang-tidy/alpha/AlphaTidyModule.cpp new file mode 100644 index 000000000000..b598a36cebf7 --- /dev/null +++ b/clang-tools-extra/clang-tidy/alpha/AlphaTidyModule.cpp @@ -0,0 +1,38 @@ +//===--- AlphaTidyModule.cpp - clang-tidy ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../ClangTidy.h" +#include "../ClangTidyModule.h" +#include "../ClangTidyModuleRegistry.h" +#include "UnusedIncludesCheck.h" + + +namespace clang { +namespace tidy { +namespace alpha { + +class AlphaModule : public ClangTidyModule { +public: + void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { + + CheckFactories.registerCheck("alpha-unused-includes"); + } +}; + +} // namespace alpha + +// Register the AlphaTidyModule using this statically initialized variable. +static ClangTidyModuleRegistry::Add + X("alpha-module", "Adds alpha lint checks."); + +// This anchor is used to force the linker to link in the generated object file +// and thus register the AlphaModule. +volatile int AlphaModuleAnchorSource = 0; + +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/alpha/CMakeLists.txt b/clang-tools-extra/clang-tidy/alpha/CMakeLists.txt new file mode 100644 index 000000000000..b50576868645 --- /dev/null +++ b/clang-tools-extra/clang-tidy/alpha/CMakeLists.txt @@ -0,0 +1,32 @@ +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../include-cleaner/include) + +set(LLVM_LINK_COMPONENTS + Support + ) + +add_clang_library(clangTidyAlphaModule + + AlphaTidyModule.cpp + UnusedIncludesCheck.cpp + + LINK_LIBS + clangAnalysis + clangIncludeCleaner + clangTidy + clangTidyUtils + + DEPENDS + omp_gen + ) + +clang_target_link_libraries(clangTidyAlphaModule + PRIVATE + clangAnalysis + clangAST + clangASTMatchers + clangBasic + clangIncludeCleaner + clangLex + clangSerialization + clangTooling + ) diff --git a/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.cpp b/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.cpp new file mode 100644 index 000000000000..0d6a6bf7a367 --- /dev/null +++ b/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.cpp @@ -0,0 +1,76 @@ +//===--- UnusedIncludesCheck.cpp - clang-tidy------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "UnusedIncludesCheck.h" +#include "clang-include-cleaner/Analysis.h" +#include "clang-include-cleaner/Hooks.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Preprocessor.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace alpha { + +UnusedIncludesCheck::UnusedIncludesCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + +void UnusedIncludesCheck::registerPPCallbacks(const SourceManager &SM, + Preprocessor *PP, + Preprocessor *) { + Ctx = std::make_unique( + include_cleaner::Policy{}, *PP); + RecordedPP = std::make_unique(); + PP->addPPCallbacks(RecordedPP->record(*Ctx)); +} + +void UnusedIncludesCheck::registerMatchers(MatchFinder *Finder) { + Finder->addMatcher( + translationUnitDecl(forEach(decl(isExpansionInMainFile()).bind("top"))), + this); +} + +void UnusedIncludesCheck::check(const MatchFinder::MatchResult &Result) { + Top.push_back(const_cast(Result.Nodes.getNodeAs("top"))); +} + +void UnusedIncludesCheck::onEndOfTranslationUnit() { + llvm::DenseSet Used; + llvm::DenseSet Seen; + include_cleaner::walkUsed( + *Ctx, Top, RecordedPP->MacroReferences, + [&](SourceLocation Loc, include_cleaner::Symbol Sym, + llvm::ArrayRef Headers) { + for (const auto &Header : Headers) { + if (!Seen.insert(Header).second) + continue; + const auto& HeadersToInsert = RecordedPP->Includes.match(Header); + Used.insert(HeadersToInsert.begin(), HeadersToInsert.end()); + } + }); + for (const auto &I : RecordedPP->Includes.all()) { + if (!Used.contains(&I)) { + const auto &SM = Ctx->sourceManager(); + FileID FID = SM.getFileID(I.Location); + diag(I.Location, "there is a high probability that include is unused") + << FixItHint::CreateRemoval(CharSourceRange::getCharRange( + SM.translateLineCol(FID, I.Line, 1), + SM.translateLineCol(FID, I.Line + 1, 1))); + } + } +} + +UnusedIncludesCheck::~UnusedIncludesCheck() = default; + +} // namespace alpha +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.h b/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.h new file mode 100644 index 000000000000..f67c46e6cc3e --- /dev/null +++ b/clang-tools-extra/clang-tidy/alpha/UnusedIncludesCheck.h @@ -0,0 +1,42 @@ +//===--- UnusedIncludesCheck.h - clang-tidy----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_INCLUDES_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_INCLUDES_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace include_cleaner { +class AnalysisContext; +struct RecordedPP; +} // namespace include_cleaner +namespace tidy { +namespace alpha { + +class UnusedIncludesCheck : public ClangTidyCheck { +public: + UnusedIncludesCheck(StringRef Name, ClangTidyContext *Context); + ~UnusedIncludesCheck(); + void registerPPCallbacks(const SourceManager &SM, Preprocessor *, + Preprocessor *) override; + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + void onEndOfTranslationUnit() override; + +private: + std::unique_ptr Ctx; + std::unique_ptr RecordedPP; + std::vector Top; +}; + +} // namespace misc +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_UNUSED_INCLUDES_H diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index de8f087a52a5..14f605b1efaf 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -2,6 +2,8 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include-cleaner/include) + add_subdirectory(support) # Configure the Features.inc file. @@ -153,6 +155,7 @@ clang_target_link_libraries(clangDaemon clangDriver clangFormat clangFrontend + clangIncludeCleaner clangIndex clangLex clangSema diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index 26eb2574195d..a3cbc8894f6d 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -12,9 +12,11 @@ #include "CodeCompletionStrings.h" #include "Config.h" #include "FindTarget.h" +#include "IncludeCleaner.h" #include "ParsedAST.h" #include "Selection.h" #include "SourceCode.h" +#include "clang-include-cleaner/Analysis.h" #include "index/SymbolCollector.h" #include "support/Markup.h" #include "clang/AST/ASTContext.h" @@ -985,6 +987,23 @@ llvm::Optional getHover(ParsedAST &AST, Position Pos, // FIXME: We don't have a fitting value for Kind. HI.Definition = URIForFile::canonicalize(Inc.Resolved, *MainFilePath).file().str(); + + // FIXME: share code, macros too... + include_cleaner::AnalysisContext Ctx(include_cleaner::Policy{}, + AST.getPreprocessor()); + std::vector Provides; + include_cleaner::walkUsed( + Ctx, AST.getLocalTopLevelDecls(), /*Macros=*/{}, + [&](SourceLocation Loc, include_cleaner::Symbol S, + llvm::ArrayRef Headers) { + for (const auto &H : Headers) + if (match(H, Inc, AST.getIncludeStructure())) + Provides.push_back(S.name()); + }); + llvm::sort(Provides); + Provides.erase(std::unique(Provides.begin(), Provides.end()), + Provides.end()); + HI.Documentation = "provides " + llvm::join(Provides, ", "); HI.DefinitionLanguage = ""; return HI; } diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp index e5b5187e030c..3c0ba06316ac 100644 --- a/clang-tools-extra/clangd/IncludeCleaner.cpp +++ b/clang-tools-extra/clangd/IncludeCleaner.cpp @@ -12,6 +12,8 @@ #include "ParsedAST.h" #include "Protocol.h" #include "SourceCode.h" +#include "clang-include-cleaner/Analysis.h" +#include "clang-include-cleaner/Types.h" #include "index/CanonicalIncludes.h" #include "support/Logger.h" #include "support/Trace.h" @@ -40,181 +42,6 @@ void setIncludeCleanerAnalyzesStdlib(bool B) { AnalyzeStdlib = B; } namespace { -/// Crawler traverses the AST and feeds in the locations of (sometimes -/// implicitly) used symbols into \p Result. -class ReferencedLocationCrawler - : public RecursiveASTVisitor { -public: - ReferencedLocationCrawler(ReferencedLocations &Result, - const SourceManager &SM) - : Result(Result), SM(SM) {} - - bool VisitDeclRefExpr(DeclRefExpr *DRE) { - add(DRE->getDecl()); - add(DRE->getFoundDecl()); - return true; - } - - bool VisitMemberExpr(MemberExpr *ME) { - add(ME->getMemberDecl()); - add(ME->getFoundDecl().getDecl()); - return true; - } - - bool VisitTagType(TagType *TT) { - add(TT->getDecl()); - return true; - } - - bool VisitFunctionDecl(FunctionDecl *FD) { - // Function definition will require redeclarations to be included. - if (FD->isThisDeclarationADefinition()) - add(FD); - return true; - } - - bool VisitCXXConstructExpr(CXXConstructExpr *CCE) { - add(CCE->getConstructor()); - return true; - } - - bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) { - // Using templateName case is handled by the override TraverseTemplateName. - if (TST->getTemplateName().getKind() == TemplateName::UsingTemplate) - return true; - add(TST->getAsCXXRecordDecl()); // Specialization - return true; - } - - // There is no VisitTemplateName in RAV, thus we override the Traverse version - // to handle the Using TemplateName case. - bool TraverseTemplateName(TemplateName TN) { - VisitTemplateName(TN); - return Base::TraverseTemplateName(TN); - } - // A pseudo VisitTemplateName, dispatched by the above TraverseTemplateName! - bool VisitTemplateName(TemplateName TN) { - if (const auto *USD = TN.getAsUsingShadowDecl()) { - add(USD); - return true; - } - add(TN.getAsTemplateDecl()); // Primary template. - return true; - } - - bool VisitUsingType(UsingType *UT) { - add(UT->getFoundDecl()); - return true; - } - - bool VisitTypedefType(TypedefType *TT) { - add(TT->getDecl()); - return true; - } - - // Consider types of any subexpression used, even if the type is not named. - // This is helpful in getFoo().bar(), where Foo must be complete. - // FIXME(kirillbobyrev): Should we tweak this? It may not be desirable to - // consider types "used" when they are not directly spelled in code. - bool VisitExpr(Expr *E) { - TraverseType(E->getType()); - return true; - } - - bool TraverseType(QualType T) { - if (isNew(T.getTypePtrOrNull())) // don't care about quals - Base::TraverseType(T); - return true; - } - - bool VisitUsingDecl(UsingDecl *D) { - for (const auto *Shadow : D->shadows()) - add(Shadow->getTargetDecl()); - return true; - } - - // Enums may be usefully forward-declared as *complete* types by specifying - // an underlying type. In this case, the definition should see the declaration - // so they can be checked for compatibility. - bool VisitEnumDecl(EnumDecl *D) { - if (D->isThisDeclarationADefinition() && D->getIntegerTypeSourceInfo()) - add(D); - return true; - } - - // When the overload is not resolved yet, mark all candidates as used. - bool VisitOverloadExpr(OverloadExpr *E) { - for (const auto *ResolutionDecl : E->decls()) - add(ResolutionDecl); - return true; - } - -private: - using Base = RecursiveASTVisitor; - - void add(const Decl *D) { - if (!D || !isNew(D->getCanonicalDecl())) - return; - if (auto SS = StdRecognizer(D)) { - Result.Stdlib.insert(*SS); - return; - } - // Special case RecordDecls, as it is common for them to be forward - // declared multiple times. The most common cases are: - // - Definition available in TU, only mark that one as usage. The rest is - // likely to be unnecessary. This might result in false positives when an - // internal definition is visible. - // - There's a forward declaration in the main file, no need for other - // redecls. - if (const auto *RD = llvm::dyn_cast(D)) { - if (const auto *Definition = RD->getDefinition()) { - Result.User.insert(Definition->getLocation()); - return; - } - if (SM.isInMainFile(RD->getMostRecentDecl()->getLocation())) - return; - } - for (const Decl *Redecl : D->redecls()) - Result.User.insert(Redecl->getLocation()); - } - - bool isNew(const void *P) { return P && Visited.insert(P).second; } - - ReferencedLocations &Result; - llvm::DenseSet Visited; - const SourceManager &SM; - tooling::stdlib::Recognizer StdRecognizer; -}; - -// Given a set of referenced FileIDs, determines all the potentially-referenced -// files and macros by traversing expansion/spelling locations of macro IDs. -// This is used to map the referenced SourceLocations onto real files. -struct ReferencedFilesBuilder { - ReferencedFilesBuilder(const SourceManager &SM) : SM(SM) {} - llvm::DenseSet Files; - llvm::DenseSet Macros; - const SourceManager &SM; - - void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); } - - void add(FileID FID, SourceLocation Loc) { - if (FID.isInvalid()) - return; - assert(SM.isInFileID(Loc, FID)); - if (Loc.isFileID()) { - Files.insert(FID); - return; - } - // Don't process the same macro FID twice. - if (!Macros.insert(FID).second) - return; - const auto &Exp = SM.getSLocEntry(FID).getExpansion(); - add(Exp.getSpellingLoc()); - add(Exp.getExpansionLocStart()); - add(Exp.getExpansionLocEnd()); - } -}; - // Returns the range starting at '#' and ending at EOL. Escaped newlines are not // handled. clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) { @@ -231,10 +58,10 @@ clangd::Range getDiagnosticRange(llvm::StringRef Code, unsigned HashOffset) { // Finds locations of macros referenced from within the main file. That includes // references that were not yet expanded, e.g `BAR` in `#define FOO BAR`. -void findReferencedMacros(const SourceManager &SM, Preprocessor &PP, - const syntax::TokenBuffer *Tokens, - ReferencedLocations &Result) { +std::vector +findReferencedMacros(ParsedAST &AST, include_cleaner::AnalysisContext &Ctx) { trace::Span Tracer("IncludeCleaner::findReferencedMacros"); + std::vector Result; // FIXME(kirillbobyrev): The macros from the main file are collected in // ParsedAST's MainFileMacros. However, we can't use it here because it // doesn't handle macro references that were not expanded, e.g. in macro @@ -244,15 +71,19 @@ void findReferencedMacros(const SourceManager &SM, Preprocessor &PP, // this mechanism (as opposed to iterating through all tokens) will improve // the performance of findReferencedMacros and also improve other features // relying on MainFileMacros. - for (const syntax::Token &Tok : Tokens->spelledTokens(SM.getMainFileID())) { - auto Macro = locateMacroAt(Tok, PP); + for (const syntax::Token &Tok : + AST.getTokens().spelledTokens(AST.getSourceManager().getMainFileID())) { + auto Macro = locateMacroAt(Tok, AST.getPreprocessor()); if (!Macro) continue; auto Loc = Macro->Info->getDefinitionLoc(); if (Loc.isValid()) - Result.User.insert(Loc); - // FIXME: support stdlib macros + Result.push_back(include_cleaner::SymbolReference{ + Tok.location(), + Ctx.macro(AST.getPreprocessor().getIdentifierInfo(Macro->Name), + Loc)}); } + return Result; } static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST, @@ -296,110 +127,8 @@ static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST, } return true; } - -// In case symbols are coming from non self-contained header, we need to find -// its first includer that is self-contained. This is the header users can -// include, so it will be responsible for bringing the symbols from given -// header into the scope. -FileID headerResponsible(FileID ID, const SourceManager &SM, - const IncludeStructure &Includes) { - // Unroll the chain of non self-contained headers until we find the one that - // can be included. - for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID(); - FE = SM.getFileEntryForID(ID)) { - // If FE is nullptr, we consider it to be the responsible header. - if (!FE) - break; - auto HID = Includes.getID(FE); - assert(HID && "We're iterating over headers already existing in " - "IncludeStructure"); - if (Includes.isSelfContained(*HID)) - break; - // The header is not self-contained: put the responsibility for its symbols - // on its includer. - ID = SM.getFileID(SM.getIncludeLoc(ID)); - } - return ID; -} - } // namespace -ReferencedLocations findReferencedLocations(ASTContext &Ctx, Preprocessor &PP, - const syntax::TokenBuffer *Tokens) { - trace::Span Tracer("IncludeCleaner::findReferencedLocations"); - ReferencedLocations Result; - const auto &SM = Ctx.getSourceManager(); - ReferencedLocationCrawler Crawler(Result, SM); - Crawler.TraverseAST(Ctx); - if (Tokens) - findReferencedMacros(SM, PP, Tokens, Result); - return Result; -} - -ReferencedLocations findReferencedLocations(ParsedAST &AST) { - return findReferencedLocations(AST.getASTContext(), AST.getPreprocessor(), - &AST.getTokens()); -} - -ReferencedFiles findReferencedFiles( - const ReferencedLocations &Locs, const SourceManager &SM, - llvm::function_ref HeaderResponsible, - llvm::function_ref(FileID)> UmbrellaHeader) { - std::vector Sorted{Locs.User.begin(), Locs.User.end()}; - llvm::sort(Sorted); // Group by FileID. - ReferencedFilesBuilder Builder(SM); - for (auto It = Sorted.begin(); It < Sorted.end();) { - FileID FID = SM.getFileID(*It); - Builder.add(FID, *It); - // Cheaply skip over all the other locations from the same FileID. - // This avoids lots of redundant Loc->File lookups for the same file. - do - ++It; - while (It != Sorted.end() && SM.isInFileID(*It, FID)); - } - - // If a header is not self-contained, we consider its symbols a logical part - // of the including file. Therefore, mark the parents of all used - // non-self-contained FileIDs as used. Perform this on FileIDs rather than - // HeaderIDs, as each inclusion of a non-self-contained file is distinct. - llvm::DenseSet UserFiles; - llvm::StringSet<> PublicHeaders; - for (FileID ID : Builder.Files) { - UserFiles.insert(HeaderResponsible(ID)); - if (auto PublicHeader = UmbrellaHeader(ID)) { - PublicHeaders.insert(*PublicHeader); - } - } - - llvm::DenseSet StdlibFiles; - for (const auto &Symbol : Locs.Stdlib) - for (const auto &Header : Symbol.headers()) - StdlibFiles.insert(Header); - - return {std::move(UserFiles), std::move(StdlibFiles), - std::move(PublicHeaders)}; -} - -ReferencedFiles findReferencedFiles(const ReferencedLocations &Locs, - const IncludeStructure &Includes, - const CanonicalIncludes &CanonIncludes, - const SourceManager &SM) { - return findReferencedFiles( - Locs, SM, - [&SM, &Includes](FileID ID) { - return headerResponsible(ID, SM, Includes); - }, - [&SM, &CanonIncludes](FileID ID) -> Optional { - auto Entry = SM.getFileEntryRefForID(ID); - if (!Entry) - return llvm::None; - auto PublicHeader = CanonIncludes.mapHeader(*Entry); - if (PublicHeader.empty()) - return llvm::None; - return PublicHeader; - }); -} - std::vector getUnused(ParsedAST &AST, const llvm::DenseSet &ReferencedFiles, @@ -426,51 +155,50 @@ getUnused(ParsedAST &AST, return Unused; } -#ifndef NDEBUG -// Is FID a , etc? -static bool isSpecialBuffer(FileID FID, const SourceManager &SM) { - const SrcMgr::FileInfo &FI = SM.getSLocEntry(FID).getFile(); - return FI.getName().startswith("<"); -} -#endif - -llvm::DenseSet -translateToHeaderIDs(const ReferencedFiles &Files, - const IncludeStructure &Includes, - const SourceManager &SM) { - trace::Span Tracer("IncludeCleaner::translateToHeaderIDs"); - llvm::DenseSet TranslatedHeaderIDs; - TranslatedHeaderIDs.reserve(Files.User.size()); - for (FileID FID : Files.User) { - const FileEntry *FE = SM.getFileEntryForID(FID); - if (!FE) { - assert(isSpecialBuffer(FID, SM)); - continue; - } - const auto File = Includes.getID(FE); - assert(File); - TranslatedHeaderIDs.insert(*File); - } - for (tooling::stdlib::Header StdlibUsed : Files.Stdlib) - for (auto HID : Includes.StdlibHeaders.lookup(StdlibUsed)) - TranslatedHeaderIDs.insert(HID); - return TranslatedHeaderIDs; +bool match(const include_cleaner::Header &H, const Inclusion &I, + const IncludeStructure &S) { + switch (H.kind()) { + case include_cleaner::Header::Physical: + if (auto HID = S.getID(H.getPhysical())) + if (static_cast(*HID) == I.HeaderID) + return true; + break; + case include_cleaner::Header::StandardLibrary: + return I.Written == H.getStandardLibrary().name(); + case include_cleaner::Header::Verbatim: + return llvm::StringRef(I.Written).trim("\"<>") == H.getVerbatimSpelling(); + case include_cleaner::Header::Builtin: + case include_cleaner::Header::MainFile: + break; + } + return false; } std::vector computeUnusedIncludes(ParsedAST &AST) { - const auto &SM = AST.getSourceManager(); - - auto Refs = findReferencedLocations(AST); - auto ReferencedFiles = - findReferencedFiles(Refs, AST.getIncludeStructure(), - AST.getCanonicalIncludes(), AST.getSourceManager()); - auto ReferencedHeaders = - translateToHeaderIDs(ReferencedFiles, AST.getIncludeStructure(), SM); - return getUnused(AST, ReferencedHeaders, ReferencedFiles.SpelledUmbrellas); + include_cleaner::AnalysisContext Ctx(include_cleaner::Policy{}, + AST.getPreprocessor()); + llvm::DenseSet Used; + include_cleaner::walkUsed( + Ctx, AST.getLocalTopLevelDecls(), + /*MacroRefs=*/findReferencedMacros(AST, Ctx), + [&](SourceLocation Loc, include_cleaner::Symbol Sym, + llvm::ArrayRef Headers) { + for (const auto &I : AST.getIncludeStructure().MainFileIncludes) + for (const auto &H : Headers) + if (match(H, I, AST.getIncludeStructure())) + Used.insert(&I); + }); + std::vector Unused; + const Config &Cfg = Config::current(); + for (const auto &I : AST.getIncludeStructure().MainFileIncludes) { + if (!Used.contains(&I) && mayConsiderUnused(I, AST, Cfg)) + Unused.push_back(&I); + } + return Unused; } -std::vector issueUnusedIncludesDiagnostics(ParsedAST &AST, - llvm::StringRef Code) { +auto issueUnusedIncludesDiagnostics(ParsedAST &AST, + llvm::StringRef Code) -> std::vector { const Config &Cfg = Config::current(); if (Cfg.Diagnostics.UnusedIncludes != Config::UnusedIncludesPolicy::Strict || Cfg.Diagnostics.SuppressAll || diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h index 4ce31baaa067..c858a60c5db7 100644 --- a/clang-tools-extra/clangd/IncludeCleaner.h +++ b/clang-tools-extra/clangd/IncludeCleaner.h @@ -23,6 +23,7 @@ #include "index/CanonicalIncludes.h" #include "clang/Basic/SourceLocation.h" #include "clang/Tooling/Inclusions/StandardLibrary.h" +#include "clang-include-cleaner/Types.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringSet.h" @@ -100,6 +101,10 @@ std::vector computeUnusedIncludes(ParsedAST &AST); std::vector issueUnusedIncludesDiagnostics(ParsedAST &AST, llvm::StringRef Code); +// Does an include-cleaner header spec match a clangd recorded inclusion? +bool match(const include_cleaner::Header &H, const Inclusion &I, + const IncludeStructure &S); + /// Affects whether standard library includes should be considered for /// removal. This is off by default for now due to implementation limitations: /// - macros are not tracked diff --git a/clang-tools-extra/include-cleaner/CMakeLists.txt b/clang-tools-extra/include-cleaner/CMakeLists.txt index 0550b02f603b..325186879a47 100644 --- a/clang-tools-extra/include-cleaner/CMakeLists.txt +++ b/clang-tools-extra/include-cleaner/CMakeLists.txt @@ -1,4 +1,8 @@ +include_directories(include) +include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) add_subdirectory(lib) +add_subdirectory(tool) + if(CLANG_INCLUDE_TESTS) add_subdirectory(test) add_subdirectory(unittests) diff --git a/clang-tools-extra/include-cleaner/README.md b/clang-tools-extra/include-cleaner/README.md deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h new file mode 100644 index 000000000000..4e5cc8d03814 --- /dev/null +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h @@ -0,0 +1,77 @@ +//===--- Analysis.h - Analyze used files --------------------------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_INCLUDE_CLEANER_ANALYSIS_H +#define CLANG_INCLUDE_CLEANER_ANALYSIS_H + +#include "clang-include-cleaner/Policy.h" +#include "clang-include-cleaner/Types.h" + +namespace clang { +namespace include_cleaner { +class Cache; + +// Bundles the policy, compiler state, and caches for one include-cleaner run. +// (This is needed everywhere, but shouldn't be used to propagate state around!) +class AnalysisContext { +public: + AnalysisContext(const Policy &, const Preprocessor &); + AnalysisContext(AnalysisContext &&) = delete; + AnalysisContext &operator=(AnalysisContext &&) = delete; + ~AnalysisContext(); + + const Policy &policy() const { return P; } + + const SourceManager &sourceManager() const { return *SM; } + const Preprocessor &preprocessor() const { return *PP; } + + // Only for internal use (the Cache class definition is not exposed). + // This allows us to reuse e.g. mappings from symbols to their locations. + Cache &cache() { return *C; } + // FIXME: does this need to be public? + Symbol macro(const IdentifierInfo *, SourceLocation); + +private: + Policy P; + const SourceManager *SM; + const Preprocessor *PP; + std::unique_ptr C; +}; + +// A UsedSymbolVisitor is a callback invoked for each symbol reference seen. +// +// References occur at a particular location, refer to a single symbol, and +// that symbol may be provided by any of several headers. +// +// The first element of ProvidedBy is the *preferred* header, e.g. to insert. +using UsedSymbolVisitor = + llvm::function_ref ProvidedBy)>; + +// Find and report all references to symbols in a region of code. +// +// The AST traversal is rooted at ASTRoots - typically top-level declarations +// of a single source file. MacroRefs are additional recorded references to +// macros, which do not appear in the AST. +// +// This is the main entrypoint of the include-cleaner library, and can be used: +// - to diagnose missing includes: a referenced symbol is provided by +// headers which don't match any #include in the main file +// - to diagnose unused includes: an #include in the main file does not match +// the headers for any referenced symbol +// +// Mapping between Header and #include directives is not provided here, but see +// RecordedPP::Includes::match() in Hooks.h. +void walkUsed(AnalysisContext &, llvm::ArrayRef ASTRoots, + llvm::ArrayRef MacroRefs, + UsedSymbolVisitor Callback); + +} // namespace include_cleaner +} // namespace clang + +#endif diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Hooks.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Hooks.h new file mode 100644 index 000000000000..39e11653b210 --- /dev/null +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Hooks.h @@ -0,0 +1,87 @@ +//===--- Hooks.h - Record compiler events -------------------------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Where Analysis.h analyzes AST nodes and recorded preprocessor events, this +// file defines ways to capture AST and preprocessor information from a parse. +// +// These are the simplest way to connect include-cleaner logic to the parser, +// but other ways are possible (for example clangd records includes separately). +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_INCLUDE_CLEANER_HOOKS_H +#define CLANG_INCLUDE_CLEANER_HOOKS_H + +#include "Analysis.h" +#include "Types.h" +#include "clang/Basic/FileEntry.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace clang { +class FileEntry; +class PPCallbacks; +namespace include_cleaner { +class PPRecorder; + +// Contains recorded preprocessor events relevant to include-cleaner. +struct RecordedPP { + // The callback (when installed into clang) tracks macros/includes in this. + std::unique_ptr record(AnalysisContext &Ctx); + // FIXME: probably also want a comment handler to capture IWYU pragmas. + + // Describes where macros were used from the main file. + std::vector MacroReferences; + + // A single #include directive from the main file. + struct Include { + llvm::StringRef Spelled; // e.g. vector + const FileEntry *Resolved; // e.g. /path/to/c++/v1/vector + SourceLocation Location; // of hash in #include + unsigned Line; // 1-based line number for #include + }; + // The set of includes recorded from the main file. + class RecordedIncludes { + public: + // All #includes seen, in the order they appear. + llvm::ArrayRef all() const { return All; } + // Determine #includes that match a header (that provides a used symbol). + // + // Matching is based on the type of Header specified: + // - for a physical file like /path/to/foo.h, we check Resolved + // - for a logical file like , we check Spelled + llvm::SmallVector match(Header H) const; + + private: + std::vector All; + llvm::StringMap> BySpelling; + llvm::DenseMap> ByFile; + friend PPRecorder; + } Includes; +}; + +// Contains recorded parser events relevant to include-cleaner. +struct RecordedAST { + // The consumer (when installed into clang) tracks declarations in this. + std::unique_ptr record(AnalysisContext &Ctx); + + // The set of declarations written at file scope inside the main file. + // + // These are the roots of the subtrees that should be traversed to find uses. + // (Traversing the TranslationUnitDecl would find uses inside headers!) + std::vector TopLevelDecls; +}; + +} // namespace include_cleaner +} // namespace clang + +#endif diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Policy.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Policy.h new file mode 100644 index 000000000000..142887b85529 --- /dev/null +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Policy.h @@ -0,0 +1,35 @@ +//===--- Policy.h - Tuning what is considered used ----------------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_INCLUDE_CLEANER_POLICY_H +#define CLANG_INCLUDE_CLEANER_POLICY_H + +namespace clang { +namespace include_cleaner { + +// Provides some fine-tuning of include-cleaner's choices about what is used. +// +// Changing the policy serves two purposes: +// - marking more things used reduces the false-positives for "unused include", +// while marking fewer things improves "missing include" in the same way. +// - different coding styles may make different decisions about which includes +// are required. +struct Policy { + // Does construction count as use of the type, when the type is not named? + // e.g. printVector({x, y, z}); - is std::vector used? + bool Construction = false; + // Is member access tracked as a reference? + bool Members = false; + // Are operator calls tracked as references? + bool Operators = false; +}; + +} // namespace include_cleaner +} // namespace clang + +#endif diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h new file mode 100644 index 000000000000..2a91473b926e --- /dev/null +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h @@ -0,0 +1,219 @@ +//===--- Types.h - Data structures for used-symbol analysis -------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Find referenced files is mostly a matter of translating: +// AST Node => declaration => source location => file +// +// clang has types for these (DynTypedNode, Decl, SourceLocation, FileID), but +// there are special cases: macros are not declarations, the concrete file where +// a standard library symbol was defined doesn't matter, etc. +// +// We define some slightly more abstract sum types to handle these cases while +// keeping the API clean. For example, Symbol is Decl+DefinedMacro. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_INCLUDE_CLEANER_TYPES_H +#define CLANG_INCLUDE_CLEANER_TYPES_H + +#include "clang/AST/DeclBase.h" +#include "clang/Tooling/Inclusions/StandardLibrary.h" +#include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/PointerSumType.h" + +namespace clang { +class IdentifierInfo; +class MacroDirective; +namespace include_cleaner { + +// Identifies a macro, along with a particular definition of it. +// We generally consider redefined macros to be different symbols. +struct DefinedMacro { + const IdentifierInfo *Name; + const SourceLocation Definition; +}; + +// A Symbol is an entity that can be referenced. +// It is either a declaration (NamedDecl) or a macro (DefinedMacro). +class Symbol { +public: + enum Kind { + Macro, + Declaration, + }; + Symbol(NamedDecl *ND) : Target(ND) {} + Symbol(const DefinedMacro *M) : Target(M) {} + + std::string name() const; + std::string nodeName() const; + Kind kind() const { return Target.is() ? Declaration : Macro; } + + NamedDecl *getDeclaration() const { return Target.get(); } + const DefinedMacro *getMacro() const { + return Target.get(); + } + +private: + llvm::PointerUnion Target; +}; + +// A usage of a Symbol seen in our source code. +struct SymbolReference { + // The point in the code where the reference occurred. + // We could track the DynTypedNode we found it in if it's important. + SourceLocation Location; + Symbol Target; +}; + +// A Location is a place where a symbol can be provided. +// It is either a physical part of the TU (SourceLocation) or a logical location +// in the standard library (stdlib::Symbol). +class Location { +public: + enum Kind : uint8_t { + Physical, + StandardLibrary, + }; + + Location(SourceLocation S) : K(Physical), SrcLoc(S) {} + Location(tooling::stdlib::Symbol S) : K(StandardLibrary), StdlibSym(S) {} + + std::string name(const SourceManager &SM) const; + Kind kind() const { return K; } + + SourceLocation getPhysical() const { + assert(kind() == Physical); + return SrcLoc; + }; + tooling::stdlib::Symbol getStandardLibrary() const { + assert(kind() == StandardLibrary); + return StdlibSym; + }; + +private: + Kind K; + union { + SourceLocation SrcLoc; + tooling::stdlib::Symbol StdlibSym; + }; +}; + +// A Header is an includable file that can provide access to Locations. +// It is either a physical file (FileEntry), a logical location in the standard +// library (stdlib::Header), or a verbatim header spelling (StringRef). +class Header { +public: + enum Kind : uint8_t { + Physical, + StandardLibrary, + Verbatim, + Builtin, + MainFile, + }; + + Header(const FileEntry *FE) : K(Physical), PhysicalFile(FE) {} + Header(tooling::stdlib::Header H) : K(StandardLibrary), StdlibHeader(H) {} + Header(const char *V) : K(Verbatim), VerbatimSpelling(V) {} + static Header builtin() { return Header{Builtin}; }; + static Header mainFile() { return Header{MainFile}; }; + + std::string name() const; + Kind kind() const { return K; } + + const FileEntry *getPhysical() const { + assert(kind() == Physical); + return PhysicalFile; + }; + tooling::stdlib::Header getStandardLibrary() const { + assert(kind() == StandardLibrary); + return StdlibHeader; + }; + llvm::StringRef getVerbatimSpelling() const { + assert(kind() == Verbatim); + return VerbatimSpelling; + }; + +private: + Header(Kind K) : K(K) {} + + Kind K; + union { + const FileEntry *PhysicalFile; + tooling::stdlib::Header StdlibHeader; + const char *VerbatimSpelling; + }; + + friend bool operator==(const Header &L, const Header &R) { + if (L.kind() != R.kind()) + return false; + switch (L.kind()) { + case Physical: + return L.getPhysical() == R.getPhysical(); + case StandardLibrary: + return L.getStandardLibrary() == R.getStandardLibrary(); + case Verbatim: + return L.getVerbatimSpelling() == R.getVerbatimSpelling(); + case Builtin: + case MainFile: + return true; // no payload + } + llvm_unreachable("unhandled Header kind"); + } + + friend bool operator<(const Header &L, const Header &R) { + if (L.kind() != R.kind()) + return L.kind() < R.kind(); + switch (L.kind()) { + case Physical: + return L.getPhysical() == R.getPhysical(); + case StandardLibrary: + return L.getStandardLibrary() < R.getStandardLibrary(); + case Verbatim: + return L.getVerbatimSpelling() < R.getVerbatimSpelling(); + case Builtin: + case MainFile: + return false; // no payload + } + llvm_unreachable("unhandled Header kind"); + } + + friend llvm::hash_code hash_value(const Header &H) { + switch (H.K) { + case Header::Physical: + return llvm::hash_combine(H.K, H.getPhysical()); + case Header::StandardLibrary: + // FIXME: make StdlibHeader hashable instead. + return llvm::hash_combine(H.K, H.getStandardLibrary().name()); + case Header::Verbatim: + return llvm::hash_combine(H.K, llvm::StringRef(H.VerbatimSpelling)); + case Header::Builtin: + case Header::MainFile: + return llvm::hash_value(H.K); + } + } +}; + +template struct DefaultDenseMapInfo { + static T isEqual(const T &L, const T &R) { return L == R; } + static unsigned getHashValue(const T &V) { return hash_value(V); } +}; + +} // namespace include_cleaner +} // namespace clang + +namespace llvm { +template <> struct DenseMapInfo { + using Header = clang::include_cleaner::Header; + static Header getTombstoneKey() { return Header("__tombstone__"); } + static Header getEmptyKey() { return Header("__empty__"); } + static bool isEqual(const Header &L, const Header &R) { return L == R; } + static unsigned getHashValue(const Header &V) { return hash_value(V); } +}; +} // namespace llvm + +#endif diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp new file mode 100644 index 000000000000..5ac0008b07e8 --- /dev/null +++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp @@ -0,0 +1,101 @@ +//===--- Analysis.cpp - Analyze used files --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-include-cleaner/Analysis.h" +#include "AnalysisInternal.h" +#include "clang/Lex/Preprocessor.h" + +namespace clang { +namespace include_cleaner { + +AnalysisContext::AnalysisContext(const Policy &P, const Preprocessor &PP) + : P(P), SM(&PP.getSourceManager()), PP(&PP), C(std::make_unique()) {} +AnalysisContext::~AnalysisContext() = default; + +static bool prefer(AnalysisContext &Ctx, Hint L, Hint R) { + return std::make_tuple(bool(L & Hint::NameMatch), bool(L & Hint::Complete)) > + std::make_tuple(bool(R & Hint::NameMatch), bool(R & Hint::Complete)); +} + +// Is this hint actually useful? +static void addNameMatchHint(const IdentifierInfo *II, + llvm::SmallVector> &H) { + if (!II) + return; + for (auto &HH : H) + if (HH->kind() == Header::Physical && + II->getName().equals_insensitive(HH->getPhysical()->getName())) + HH.Hint |= Hint::NameMatch; +} + +static llvm::SmallVector
+rank(AnalysisContext &Ctx, llvm::SmallVector> &Candidates) { + // Sort by Header, so we can deduplicate (and combine flags). + llvm::stable_sort(Candidates, + [&](const Hinted
&L, const Hinted
&R) { + return *L < *R; + }); + // Like unique(), but merge hints. + auto *Write = Candidates.begin(); + for (auto *Read = Candidates.begin(); Read != Candidates.end(); ++Write) { + *Write = *Read; + for (++Read; Read != Candidates.end() && Read->Value == Write->Value; + ++Read) + Write->Hint |= Read->Hint; + } + Candidates.erase(Write, Candidates.end()); + // Now sort by hints. + llvm::stable_sort(Candidates, + [&](const Hinted
&L, const Hinted
&R) { + return prefer(Ctx, L.Hint, R.Hint); + }); + // Drop hints to return clean result list. + llvm::SmallVector
Result; + for (const auto &H : Candidates) + Result.push_back(*H); + return Result; +} + +template void addHint(Hint H, T &Items) { + for (auto &Item : Items) + Item.Hint |= H; +} + +void walkUsed(AnalysisContext &Ctx, llvm::ArrayRef ASTRoots, + llvm::ArrayRef MacroRefs, + UsedSymbolVisitor Callback) { + for (Decl *Root : ASTRoots) { + walkAST(Ctx, *Root, [&](SourceLocation RefLoc, Hinted ND) { + auto Locations = locateDecl(Ctx, *ND); + llvm::SmallVector> Headers; + for (const auto &Loc : Locations) { + auto LocHeaders = includableHeader(Ctx, *Loc); + addHint(Loc.Hint, LocHeaders); + Headers.append(std::move(LocHeaders)); + } + addHint(ND.Hint, Headers); + addNameMatchHint(ND.Value.getDeclName().getAsIdentifierInfo(), Headers); + Callback(RefLoc, &ND.Value, rank(Ctx, Headers)); + }); + } + for (const SymbolReference &MacroRef : MacroRefs) { + assert(MacroRef.Target.kind() == Symbol::Macro); + auto Loc = locateMacro(Ctx, *MacroRef.Target.getMacro()); + auto Headers = includableHeader(Ctx, *Loc); + addHint(Loc.Hint, Headers); + addNameMatchHint(MacroRef.Target.getMacro()->Name, Headers); + Callback(MacroRef.Location, MacroRef.Target, rank(Ctx, Headers)); + } +} + +Symbol AnalysisContext::macro(const IdentifierInfo *II, SourceLocation Loc) { + return cache().macro(II, Loc); +} + +} // namespace include_cleaner +} // namespace clang diff --git a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h index 8b0c73fe7997..31b1ad8039d8 100644 --- a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h +++ b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h @@ -21,6 +21,95 @@ #ifndef CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H #define CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H +#include "clang-include-cleaner/Analysis.h" +#include "clang-include-cleaner/Types.h" +#include "clang/Tooling/Inclusions/StandardLibrary.h" + +namespace clang { +namespace include_cleaner { + +// FIXME: Right now we cache nothing, this is just used as an arena for macros. +// Verify we're burning time in repeated analysis and cache partial operations. +class Cache { +public: + Symbol macro(const IdentifierInfo *Name, const SourceLocation Def) { + auto &DMS = DefinedMacros[Name->getName()]; + // Linear search. We probably only saw ~1 definition of each macro name. + for (const DefinedMacro &DM : DMS) + if (DM.Definition == Def) + return &DM; + DMS.push_back(DefinedMacro{Name, Def}); + return &DMS.back(); + } + + tooling::stdlib::Recognizer StdlibRecognizer; + +private: + llvm::StringMap> DefinedMacros; +}; + +enum class Hint : uint16_t { + None = 0, + Complete = 1, // Provides a complete definition that is often needed. + // e.g. classes, templates. + NameMatch = 1, // Header name matches the symbol name. + LLVM_MARK_AS_BITMASK_ENUM(Hint::Complete) +}; +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + +template struct Hinted { + Hinted(T Value, Hint H = Hint::None) : Value(Value), Hint(H) {} + T Value; + include_cleaner::Hint Hint; + + T &operator*() { return Value; } + const T &operator*() const { return Value; } + std::remove_reference_t *operator->() { return &Value; } + const std::remove_reference_t *operator->() const { return &Value; } +}; + +// Traverses a subtree of the AST, reporting declarations referenced. +void walkAST(AnalysisContext &, Decl &Root, + llvm::function_ref)>); + +// Finds the locations where a declaration is provided. +llvm::SmallVector> locateDecl(AnalysisContext &, + const NamedDecl &); + +// Finds the locations where a macro is provided. +Hinted locateMacro(AnalysisContext &, const DefinedMacro &); + +// Finds the headers that provide a location. +llvm::SmallVector> includableHeader(AnalysisContext &, + const Location &); + +} // namespace include_cleaner +} // namespace clang + +#endif +//===--- AnalysisInternal.h - Analysis building blocks ------------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides smaller, testable pieces of the used-header analysis. +// We find the headers by chaining together several mappings. +// +// AST => AST node => Symbol => Location => Header +// / +// Macro expansion => +// +// The individual steps are declared here. +// (AST => AST Node => Symbol is one API to avoid materializing DynTypedNodes). +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H +#define CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H + #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/STLFunctionalExtras.h" diff --git a/clang-tools-extra/include-cleaner/lib/CMakeLists.txt b/clang-tools-extra/include-cleaner/lib/CMakeLists.txt index 5e2807332f94..25d66b4f30df 100644 --- a/clang-tools-extra/include-cleaner/lib/CMakeLists.txt +++ b/clang-tools-extra/include-cleaner/lib/CMakeLists.txt @@ -1,10 +1,15 @@ set(LLVM_LINK_COMPONENTS Support) add_clang_library(clangIncludeCleaner + Analysis.cpp + Headers.cpp + Hooks.cpp + Locations.cpp + Types.cpp WalkAST.cpp LINK_LIBS clangBasic + clangLex clangAST ) - diff --git a/clang-tools-extra/include-cleaner/lib/Headers.cpp b/clang-tools-extra/include-cleaner/lib/Headers.cpp new file mode 100644 index 000000000000..f41bbe4c59c8 --- /dev/null +++ b/clang-tools-extra/include-cleaner/lib/Headers.cpp @@ -0,0 +1,46 @@ +//===--- Headers.cpp - Find headers that provide locations ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AnalysisInternal.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Preprocessor.h" + +namespace clang { +namespace include_cleaner { + +llvm::SmallVector> includableHeader(AnalysisContext &Ctx, + const Location &Loc) { + switch (Loc.kind()) { + case Location::Physical: { + FileID FID = Ctx.sourceManager().getFileID( + Ctx.sourceManager().getExpansionLoc(Loc.getPhysical())); + if (FID == Ctx.sourceManager().getMainFileID()) + return {Header::mainFile()}; + if (FID == Ctx.preprocessor().getPredefinesFileID()) + return {Header::builtin()}; + // FIXME: if the file is not self-contained, find its umbrella header: + // - files that lack header guards (e.g. *.def) + // - IWYU private pragmas (and maybe export?) + // - #pragma clang include_instead + // - headers containing "#error ... include" clangd isDontIncludeMeHeader + // - apple framework header layout + if (auto *FE = Ctx.sourceManager().getFileEntryForID(FID)) + return {{FE}}; + return {}; + } + case Location::StandardLibrary: + // FIXME: some symbols are provided by multiple stdlib headers: + // - for historical reasons, like size_t + // - some headers are guaranteed to include others () + // - ::printf is de facto provided by cstdio and stdio.h, etc + return {{Loc.getStandardLibrary().header()}}; + } +} + +} // namespace include_cleaner +} // namespace clang diff --git a/clang-tools-extra/include-cleaner/lib/Hooks.cpp b/clang-tools-extra/include-cleaner/lib/Hooks.cpp new file mode 100644 index 000000000000..decb83110c65 --- /dev/null +++ b/clang-tools-extra/include-cleaner/lib/Hooks.cpp @@ -0,0 +1,166 @@ +//===--- Hooks.cpp - Record events from the compiler --------------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-include-cleaner/Hooks.h" +#include "AnalysisInternal.h" +#include "clang-include-cleaner/Analysis.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclGroup.h" +#include "clang/AST/DeclObjC.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/Token.h" + +namespace clang { +namespace include_cleaner { + +class PPRecorder : public PPCallbacks { +public: + PPRecorder(AnalysisContext &Ctx, RecordedPP &Recorded) + : Ctx(Ctx), Recorded(Recorded) {} + + virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) override { + Active = Ctx.sourceManager().isWrittenInMainFile(Loc); + } + + void InclusionDirective(SourceLocation Hash, const Token &IncludeTok, + StringRef SpelledFilename, bool IsAngled, + CharSourceRange FilenameRange, Optional File, + StringRef SearchPath, StringRef RelativePath, + const Module *, SrcMgr::CharacteristicKind) override { + if (!Active) + return; + + unsigned Index = Recorded.Includes.All.size(); + Recorded.Includes.All.emplace_back(); + RecordedPP::Include &I = Recorded.Includes.All.back(); + const auto *const RawFile = &(*File).getFileEntry(); + I.Location = Hash; + I.Resolved = RawFile; + I.Line = Ctx.sourceManager().getSpellingLineNumber(Hash); + auto BySpellingIt = + Recorded.Includes.BySpelling.try_emplace(SpelledFilename).first; + I.Spelled = BySpellingIt->first(); + + BySpellingIt->second.push_back(Index); + Recorded.Includes.ByFile[RawFile].push_back(Index); + } + + void MacroExpands(const Token &MacroName, const MacroDefinition &MD, + SourceRange Range, const MacroArgs *Args) override { + if (!Active) + return; + recordMacroRef(MacroName, *MD.getMacroInfo()); + } + + void MacroDefined(const Token &MacroName, const MacroDirective *MD) override { + if (!Active) + return; + + const auto *MI = MD->getMacroInfo(); + // The tokens of a macro definition could refer to a macro. + // Formally this reference isn't resolved until this macro is expanded, + // but we want to treat it as a reference anyway. + for (const auto &Tok : MI->tokens()) { + auto *II = Tok.getIdentifierInfo(); + // Could this token be a reference to a macro? (Not param to this macro). + if (!II || !II->hadMacroDefinition() || + llvm::is_contained(MI->params(), II)) + continue; + if (const MacroInfo *MI = Ctx.preprocessor().getMacroInfo(II)) + recordMacroRef(Tok, *MI); + } + } + +private: + void recordMacroRef(const Token &Tok, const MacroInfo &MI) { + if (MI.isBuiltinMacro()) + return; // __FILE__ is not a reference. + Recorded.MacroReferences.push_back(SymbolReference{ + Tok.getLocation(), + Ctx.cache().macro(Tok.getIdentifierInfo(), MI.getDefinitionLoc())}); + } + + bool Active = false; + AnalysisContext &Ctx; + RecordedPP &Recorded; +}; + +llvm::SmallVector +RecordedPP::RecordedIncludes::match(Header H) const { + llvm::SmallVector Result; + switch (H.kind()) { + case Header::Physical: + for (unsigned I : ByFile.lookup(H.getPhysical())) + Result.push_back(&All[I]); + break; + case Header::StandardLibrary: + for (unsigned I : + BySpelling.lookup(H.getStandardLibrary().name().trim("<>"))) + Result.push_back(&All[I]); + break; + case Header::Verbatim: + for (unsigned I : BySpelling.lookup(H.getVerbatimSpelling())) + Result.push_back(&All[I]); + break; + case Header::Builtin: + case Header::MainFile: + break; + } + llvm::sort(Result); + Result.erase(std::unique(Result.begin(), Result.end()), Result.end()); + return Result; +} + +class ASTRecorder : public ASTConsumer { +public: + ASTRecorder(AnalysisContext &Ctx, RecordedAST &Recorded) + : Ctx(Ctx), Recorded(Recorded) {} + + bool HandleTopLevelDecl(DeclGroupRef DG) override { + for (Decl *D : DG) { + if (!Ctx.sourceManager().isWrittenInMainFile( + Ctx.sourceManager().getExpansionLoc(D->getLocation()))) + continue; + if (const auto *T = llvm::dyn_cast(D)) + if (T->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) + continue; + if (const auto *T = llvm::dyn_cast(D)) + if (T->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) + continue; + if (const auto *T = llvm::dyn_cast(D)) + if (T->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) + continue; + // ObjCMethodDecl are not actually top-level! + if (isa(D)) + continue; + + Recorded.TopLevelDecls.push_back(D); + } + return true; + } + +private: + AnalysisContext &Ctx; + RecordedAST &Recorded; +}; + +std::unique_ptr RecordedPP::record(AnalysisContext &Ctx) { + return std::make_unique(Ctx, *this); +} + +std::unique_ptr RecordedAST::record(AnalysisContext &Ctx) { + return std::make_unique(Ctx, *this); +} + +} // namespace include_cleaner +} // namespace clang \ No newline at end of file diff --git a/clang-tools-extra/include-cleaner/lib/Locations.cpp b/clang-tools-extra/include-cleaner/lib/Locations.cpp new file mode 100644 index 000000000000..7e23c56c1dfc --- /dev/null +++ b/clang-tools-extra/include-cleaner/lib/Locations.cpp @@ -0,0 +1,60 @@ +//===--- Locations.cpp - Find the locations that provide symbols ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AnalysisInternal.h" +#include "clang-include-cleaner/Analysis.h" +#include "clang-include-cleaner/Types.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/SmallVector.h" + +namespace clang { +namespace include_cleaner { + +Hint declHint(const NamedDecl &D) { + Hint H = Hint::None; + if (auto *TD = llvm::dyn_cast(&D)) + if (TD->isThisDeclarationADefinition()) + H |= Hint::Complete; + if (auto *CTD = llvm::dyn_cast(&D)) + if (CTD->isThisDeclarationADefinition()) + H |= Hint::Complete; + // A function template being defined is similar to a class being defined. + if (auto *FTD = llvm::dyn_cast(&D)) + if (FTD->isThisDeclarationADefinition()) + H |= Hint::Complete; + return H; +} + +llvm::SmallVector> locateDecl(AnalysisContext &Ctx, + const NamedDecl &ND) { + if (auto StdlibSym = Ctx.cache().StdlibRecognizer(&ND)) + return {{*StdlibSym}}; + + llvm::SmallVector> Result; + // Is accepting all the redecls too naive? + for (const Decl *RD : ND.redecls()) { + // `friend X` is not an interesting location for X unless it's acting as a + // forward-declaration. + if (RD->getFriendObjectKind() == Decl::FOK_Declared) + continue; + SourceLocation Loc = RD->getLocation(); + if (Loc.isValid()) + Result.push_back({Loc, declHint(*cast(RD))}); + } + return Result; +} + +Hinted locateMacro(AnalysisContext &Ctx, const DefinedMacro &M) { + return {M.Definition}; +} + +} // namespace include_cleaner +} // namespace clang diff --git a/clang-tools-extra/include-cleaner/lib/Types.cpp b/clang-tools-extra/include-cleaner/lib/Types.cpp new file mode 100644 index 000000000000..6b79c603a70d --- /dev/null +++ b/clang-tools-extra/include-cleaner/lib/Types.cpp @@ -0,0 +1,61 @@ +//===--- Types.cpp - Data structures for used-symbol analysis -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang-include-cleaner/Types.h" +#include "clang/AST/Decl.h" +#include "clang/Basic/FileEntry.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Tooling/Inclusions/StandardLibrary.h" + +namespace clang { +namespace include_cleaner { + +std::string Symbol::name() const { + switch (kind()) { + case Macro: + return getMacro()->Name->getName().str(); + case Declaration: + return getDeclaration()->getNameAsString(); + } + llvm_unreachable("Unhandled Symbol kind"); +} + +std::string Symbol::nodeName() const { + if (kind() == Macro) + return "macro"; + return getDeclaration()->getDeclKindName(); +} + +std::string Location::name(const SourceManager &SM) const { + switch (K) { + case Physical: + return SrcLoc.printToString(SM); + case StandardLibrary: + return StdlibSym.name().str(); + } + llvm_unreachable("Unhandled Location kind"); +} + +std::string Header::name() const { + switch (K) { + case Physical: + return PhysicalFile->getName().str(); + case StandardLibrary: + return StdlibHeader.name().str(); + case Verbatim: + return VerbatimSpelling; + case Builtin: + return ""; + case MainFile: + return ""; + } + llvm_unreachable("Unhandled Header kind"); +} + +} // namespace include_cleaner +} // namespace clang diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp index b7354fe300e0..02a27977005f 100644 --- a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp +++ b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp @@ -7,40 +7,132 @@ //===----------------------------------------------------------------------===// #include "AnalysisInternal.h" +#include "clang-include-cleaner/Analysis.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Support/SaveAndRestore.h" namespace clang { namespace include_cleaner { namespace { -using DeclCallback = llvm::function_ref; +using DeclCallback = + llvm::function_ref)>; + +// Traverses part of the AST, looking for references and reporting them. class ASTWalker : public RecursiveASTVisitor { - DeclCallback Callback; +public: + ASTWalker(AnalysisContext &Ctx, DeclCallback Callback) + : Ctx(Ctx), Callback(Callback) {} - void report(SourceLocation Loc, NamedDecl *ND) { - if (!ND || Loc.isInvalid()) - return; - Callback(Loc, *cast(ND->getCanonicalDecl())); + bool VisitDeclRefExpr(DeclRefExpr *E) { + if (!Ctx.policy().Operators) + if (auto *FD = E->getDecl()->getAsFunction()) + if (FD->isOverloadedOperator()) + return true; + report(E->getLocation(), E->getFoundDecl()); + return true; } -public: - ASTWalker(DeclCallback Callback) : Callback(Callback) {} + bool VisitMemberExpr(MemberExpr *ME) { + if (Ctx.policy().Members) + report(ME->getMemberLoc(), ME->getFoundDecl().getDecl()); + return true; + } + + bool VisitTagType(TagType *TT) { + report(LocationOfType, TT->getDecl()); + return true; + } + + bool VisitFunctionDecl(FunctionDecl *FD) { + // Count function definitions as a reference to their declarations. + if (FD->isThisDeclarationADefinition() && FD->getCanonicalDecl() != FD) + report(FD->getLocation(), FD->getCanonicalDecl()); + return true; + } + + bool VisitCXXConstructExpr(CXXConstructExpr *E) { + if (!Ctx.policy().Construction) + return true; + SaveAndRestore Loc(LocationOfType, E->getLocation()); + LocationOfType = E->getLocation(); + return TraverseType(E->getType()); + } + + // We handle TypeLocs by saving their loc and consuming it in Visit*Type(). + // + // Handling Visit*TypeLoc() directly would be simpler, but sometimes unwritten + // types count as references (e.g. implicit conversions, with no TypeLoc). + // Stashing the location and visiting the contained type lets us handle both + // cases in VisitTagType() etc. + bool TraverseTypeLoc(TypeLoc TL) { + SaveAndRestore Loc(LocationOfType, TL.getBeginLoc()); + // The base implementation calls: + // - Visit*TypeLoc() - does nothing + // - Visit*Type() - where we handle type references + // - TraverseTypeLoc for each lexically nested type. + return Base::TraverseTypeLoc(TL); + } - bool VisitTagTypeLoc(TagTypeLoc TTL) { - report(TTL.getNameLoc(), TTL.getDecl()); + bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) { + report(LocationOfType, + TST->getTemplateName().getAsTemplateDecl()); // Primary template. + report(LocationOfType, TST->getAsCXXRecordDecl()); // Specialization return true; } - bool VisitDeclRefExpr(DeclRefExpr *DRE) { - report(DRE->getLocation(), DRE->getFoundDecl()); + bool VisitUsingType(UsingType *UT) { + report(LocationOfType, UT->getFoundDecl()); return true; } + + bool VisitTypedefType(TypedefType *TT) { + report(LocationOfType, TT->getDecl()); + return true; + } + + bool VisitUsingDecl(UsingDecl *UD) { + for (const auto *USD : UD->shadows()) + report(UD->getLocation(), USD->getTargetDecl()); + return true; + } + + bool VisitOverloadExpr(OverloadExpr *E) { + if (llvm::isa(E) && !Ctx.policy().Members) + return true; + for (auto *Candidate : E->decls()) + report(E->getExprLoc(), Candidate); + return true; + } + +private: + void report(SourceLocation Loc, NamedDecl *ND) { + while (Loc.isMacroID()) { + auto DecLoc = Ctx.sourceManager().getDecomposedLoc(Loc); + const SrcMgr::ExpansionInfo &Expansion = + Ctx.sourceManager().getSLocEntry(DecLoc.first).getExpansion(); + if (!Expansion.isMacroArgExpansion()) + return; // Names within macro bodies are not considered references. + Loc = Expansion.getSpellingLoc().getLocWithOffset(DecLoc.second); + } + // FIXME: relevant ranking hints? + if (ND) + Callback(Loc, *cast(ND->getCanonicalDecl())); + } + + using Base = RecursiveASTVisitor; + + AnalysisContext &Ctx; + DeclCallback Callback; + + SourceLocation LocationOfType; }; } // namespace -void walkAST(Decl &Root, DeclCallback Callback) { - ASTWalker(Callback).TraverseDecl(&Root); +void walkAST(AnalysisContext &Ctx, Decl &Root, DeclCallback Callback) { + ASTWalker(Ctx, Callback).TraverseDecl(&Root); } } // namespace include_cleaner diff --git a/clang-tools-extra/include-cleaner/tool/CMakeLists.txt b/clang-tools-extra/include-cleaner/tool/CMakeLists.txt new file mode 100644 index 000000000000..f8f7c81c761b --- /dev/null +++ b/clang-tools-extra/include-cleaner/tool/CMakeLists.txt @@ -0,0 +1,17 @@ +set(LLVM_LINK_COMPONENTS support) + +add_clang_tool(clang-include-cleaner + ClangIncludeCleaner.cpp + ) + +clang_target_link_libraries(clang-include-cleaner + PRIVATE + clangBasic + clangFrontend + clangTooling + ) + +target_link_libraries(clang-include-cleaner + PRIVATE + clangIncludeCleaner + ) \ No newline at end of file diff --git a/clang-tools-extra/include-cleaner/tool/ClangIncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/ClangIncludeCleaner.cpp new file mode 100644 index 000000000000..aad70eabdae9 --- /dev/null +++ b/clang-tools-extra/include-cleaner/tool/ClangIncludeCleaner.cpp @@ -0,0 +1,187 @@ +//===--- ClangIncludeCleaner.cpp - Standalone used-header analysis --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// clang-include-cleaner finds violations of include-what-you-use policy. +// +// It scans a file, finding referenced symbols and headers providing them. +// - if a reference is satisfied only by indirect #include dependencies, +// this violates the policy and direct #includes are suggested. +// - if some #include directive doesn't satisfy any references, this violates +// the policy (don't include what you don't use!) and removal is suggested. +// +// With the -satisfied flag, it will also explain things that were OK: +// satisfied references and used #includes. +// +// This tool doesn't fix broken code where missing #includes prevent parsing, +// try clang-include-fixer for this instead. +// +//===----------------------------------------------------------------------===// + +#include "clang-include-cleaner/Analysis.h" +#include "clang-include-cleaner/Hooks.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendAction.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" + +llvm::cl::OptionCategory OptionsCat{"clang-include-cleaner"}; +llvm::cl::opt ShowSatisfied{ + "satisfied", + llvm::cl::cat(OptionsCat), + llvm::cl::desc( + "Show references whose header is included, and used includes"), + llvm::cl::init(false), +}; +llvm::cl::opt Recover{ + "recover", + llvm::cl::cat(OptionsCat), + llvm::cl::desc("Suppress further errors for the same header"), + llvm::cl::init(true), +}; + +namespace clang { +namespace include_cleaner { +namespace { + +class Action : public clang::ASTFrontendAction { +public: + bool BeginSourceFileAction(CompilerInstance &CI) override { + Diag = &CI.getDiagnostics(); + ID.emplace(Diag); + Ctx.emplace(Policy{}, CI.getPreprocessor()); + CI.getPreprocessor().addPPCallbacks(PP.record(*Ctx)); + return true; + } + + void EndSourceFile() override { + llvm::DenseSet
Recovered; + llvm::DenseMap Used; + walkUsed(*Ctx, AST.TopLevelDecls, PP.MacroReferences, + [&](SourceLocation Loc, Symbol Sym, ArrayRef
Headers) { + diagnoseReference(Loc, Sym, Headers, Recovered, Used); + }); + diagnoseIncludes(PP.Includes.all(), Used); + Ctx.reset(); + + ASTFrontendAction::EndSourceFile(); + } + + virtual std::unique_ptr + CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { + return AST.record(*Ctx); + } + +private: + // The diagnostics that we issue. + struct CustomDiagnosticIDs { + // References + unsigned Satisfied; + unsigned Unsatisfied; + unsigned NoHeader; + unsigned NoteHeader; + // #includes + unsigned Used; + unsigned Unused; + + CustomDiagnosticIDs(DiagnosticsEngine *D) { + auto SatisfiedLevel = ShowSatisfied ? DiagnosticsEngine::Remark + : DiagnosticsEngine::Ignored; + auto Error = DiagnosticsEngine::Error; + auto Note = DiagnosticsEngine::Note; + auto Warn = DiagnosticsEngine::Warning; + + Satisfied = D->getCustomDiagID(SatisfiedLevel, "%0 '%1' provided by %2"); + Unsatisfied = D->getCustomDiagID(Error, "no header included for %0 '%1'"); + NoHeader = D->getCustomDiagID(Warn, "unknown header provides %0 '%1'"); + NoteHeader = D->getCustomDiagID(Note, "provided by %0"); + Used = D->getCustomDiagID(SatisfiedLevel, "include provides %0 '%1'"); + Unused = D->getCustomDiagID(Error, "include is unused"); + } + }; + + void + diagnoseReference(SourceLocation Loc, Symbol Sym, ArrayRef
Headers, + llvm::DenseSet
&Recovered, + llvm::DenseMap &Used) { + bool Diagnosed = false; + for (const auto &H : Headers) { + if (H.kind() == Header::Builtin || H.kind() == Header::MainFile) { + if (!Diagnosed) { + Diag->Report(Loc, ID->Satisfied) + << Sym.nodeName() << Sym.name() << H.name(); + Diagnosed = true; + } + } + for (const auto *I : PP.Includes.match(H)) { + Used.try_emplace(I, Sym); + if (!Diagnosed) { + Diag->Report(Loc, ID->Satisfied) + << Sym.nodeName() << Sym.name() << I->Spelled; + Diagnosed = true; + } + } + } + if (Diagnosed) + return; + for (const auto &H : Headers) { + if (Recovered.contains(H)) { + Diag->Report(Loc, ID->Satisfied) + << Sym.nodeName() << Sym.name() << H.name(); + return; + } + } + Diag->Report(Loc, Headers.empty() ? ID->NoHeader : ID->Unsatisfied) + << Sym.nodeName() << Sym.name(); + for (const auto &H : Headers) { + Recovered.insert(H); + Diag->Report(ID->NoteHeader) << H.name(); + } + } + + void diagnoseIncludes( + ArrayRef Includes, + const llvm::DenseMap &Used) { + for (const auto &I : Includes) { + auto It = Used.find(&I); + if (It == Used.end()) + Diag->Report(I.Location, ID->Unused); + else + Diag->Report(I.Location, ID->Used) + << It->second.nodeName() << It->second.name(); + } + } + + llvm::Optional Ctx; + RecordedPP PP; + RecordedAST AST; + DiagnosticsEngine *Diag; + llvm::Optional ID; +}; + +} // namespace +} // namespace include_cleaner +} // namespace clang + +int main(int Argc, const char **Argv) { + llvm::InitLLVM X(Argc, Argv); + auto OptionsParser = + clang::tooling::CommonOptionsParser::create(Argc, Argv, OptionsCat); + if (!OptionsParser) { + llvm::errs() << toString(OptionsParser.takeError()); + return 1; + } + + return clang::tooling::ClangTool(OptionsParser->getCompilations(), + OptionsParser->getSourcePathList()) + .run(clang::tooling::newFrontendActionFactory< + clang::include_cleaner::Action>() + .get()); +} diff --git a/clang/include/clang/Tooling/Inclusions/StandardLibrary.h b/clang/include/clang/Tooling/Inclusions/StandardLibrary.h index c6ce2780dae6..e94a7fb9304a 100644 --- a/clang/include/clang/Tooling/Inclusions/StandardLibrary.h +++ b/clang/include/clang/Tooling/Inclusions/StandardLibrary.h @@ -49,6 +49,9 @@ private: friend bool operator==(const Header &L, const Header &R) { return L.ID == R.ID; } + friend bool operator<(const Header &L, const Header &R) { + return L.ID < R.ID; + } }; // A top-level standard library symbol, such as std::vector