diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /build/clang-plugin/mozsearch-plugin | |
parent | Initial commit. (diff) | |
download | firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'build/clang-plugin/mozsearch-plugin')
-rw-r--r-- | build/clang-plugin/mozsearch-plugin/FileOperations.cpp | 140 | ||||
-rw-r--r-- | build/clang-plugin/mozsearch-plugin/FileOperations.h | 70 | ||||
-rw-r--r-- | build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp | 2200 | ||||
-rw-r--r-- | build/clang-plugin/mozsearch-plugin/README | 12 | ||||
-rw-r--r-- | build/clang-plugin/mozsearch-plugin/StringOperations.cpp | 42 | ||||
-rw-r--r-- | build/clang-plugin/mozsearch-plugin/StringOperations.h | 25 |
6 files changed, 2489 insertions, 0 deletions
diff --git a/build/clang-plugin/mozsearch-plugin/FileOperations.cpp b/build/clang-plugin/mozsearch-plugin/FileOperations.cpp new file mode 100644 index 0000000000..9307f4989d --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/FileOperations.cpp @@ -0,0 +1,140 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "FileOperations.h" + +#include <stdio.h> +#include <stdlib.h> + +#if defined(_WIN32) || defined(_WIN64) +#include <direct.h> +#include <io.h> +#include <windows.h> +#include "StringOperations.h" +#else +#include <sys/file.h> +#include <sys/time.h> +#include <unistd.h> +#endif + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> + +// Make sure that all directories on path exist, excluding the final element of +// the path. +void ensurePath(std::string Path) { + size_t Pos = 0; + if (Path[0] == PATHSEP_CHAR) { + Pos++; + } + + while ((Pos = Path.find(PATHSEP_CHAR, Pos)) != std::string::npos) { + std::string Portion = Path.substr(0, Pos); + if (!Portion.empty()) { +#if defined(_WIN32) || defined(_WIN64) + int Err = _mkdir(Portion.c_str()); +#else + int Err = mkdir(Portion.c_str(), 0775); +#endif + if (Err == -1 && errno != EEXIST) { + perror("mkdir failed"); + exit(1); + } + } + + Pos++; + } +} + +#if defined(_WIN32) || defined(_WIN64) +AutoLockFile::AutoLockFile(const std::string &SrcFile, const std::string &DstFile) { + this->Filename = DstFile; + std::string Hash = hash(SrcFile); + std::string MutexName = std::string("Local\\searchfox-") + Hash; + std::wstring WideMutexName; + WideMutexName.assign(MutexName.begin(), MutexName.end()); + Handle = CreateMutex(nullptr, false, WideMutexName.c_str()); + if (Handle == NULL) { + return; + } + + if (WaitForSingleObject(Handle, INFINITE) != WAIT_OBJECT_0) { + return; + } +} + +AutoLockFile::~AutoLockFile() { + ReleaseMutex(Handle); + CloseHandle(Handle); +} + +bool AutoLockFile::success() { + return Handle != NULL; +} + +FILE *AutoLockFile::openTmp() { + int TmpDescriptor = _open((Filename + ".tmp").c_str(), _O_WRONLY | _O_APPEND | _O_CREAT | _O_BINARY, 0666); + return _fdopen(TmpDescriptor, "ab"); +} + +bool AutoLockFile::moveTmp() { + if (_unlink(Filename.c_str()) == -1) { + if (errno != ENOENT) { + return false; + } + } + return rename((Filename + ".tmp").c_str(), Filename.c_str()) == 0; +} + +std::string getAbsolutePath(const std::string &Filename) { + char Full[_MAX_PATH]; + if (!_fullpath(Full, Filename.c_str(), _MAX_PATH)) { + return std::string(""); + } + return std::string(Full); +} +#else +AutoLockFile::AutoLockFile(const std::string &SrcFile, const std::string &DstFile) { + this->Filename = DstFile; + FileDescriptor = open(SrcFile.c_str(), O_RDONLY); + if (FileDescriptor == -1) { + return; + } + + do { + int rv = flock(FileDescriptor, LOCK_EX); + if (rv == 0) { + break; + } + } while (true); +} + +AutoLockFile::~AutoLockFile() { close(FileDescriptor); } + +bool AutoLockFile::success() { return FileDescriptor != -1; } + +FILE* AutoLockFile::openTmp() { + int TmpDescriptor = open((Filename + ".tmp").c_str(), O_WRONLY | O_APPEND | O_CREAT, 0666); + return fdopen(TmpDescriptor, "ab"); +} + +bool AutoLockFile::moveTmp() { + if (unlink(Filename.c_str()) == -1) { + if (errno != ENOENT) { + return false; + } + } + return rename((Filename + ".tmp").c_str(), Filename.c_str()) == 0; +} + +std::string getAbsolutePath(const std::string &Filename) { + char Full[4096]; + if (!realpath(Filename.c_str(), Full)) { + return std::string(""); + } + return std::string(Full); +} +#endif diff --git a/build/clang-plugin/mozsearch-plugin/FileOperations.h b/build/clang-plugin/mozsearch-plugin/FileOperations.h new file mode 100644 index 0000000000..90764484da --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/FileOperations.h @@ -0,0 +1,70 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef FileOperations_h +#define FileOperations_h + +#include <stdio.h> +#include <string> + +#if defined(_WIN32) || defined(_WIN64) +#include <windows.h> +#define PATHSEP_CHAR '\\' +#define PATHSEP_STRING "\\" +#else +#define PATHSEP_CHAR '/' +#define PATHSEP_STRING "/" +#endif + +// Make sure that all directories on path exist, excluding the final element of +// the path. +void ensurePath(std::string Path); + +std::string getAbsolutePath(const std::string &Filename); + +// Used to synchronize access when writing to an analysis file, so that +// concurrently running clang instances don't clobber each other's data. +// On Windows, we use a named mutex. On POSIX platforms, we use flock on the +// source files. flock is advisory locking, and doesn't interfere with clang's +// own opening of the source files (i.e. to interfere, clang would have to be +// using flock itself, which it does not). +struct AutoLockFile { + // Absolute path to the analysis file + std::string Filename; + +#if defined(_WIN32) || defined(_WIN64) + // Handle for the named Mutex + HANDLE Handle = NULL; +#else + // fd for the *source* file that corresponds to the analysis file. We use + // the source file because it doesn't change while the analysis file gets + // repeatedly replaced by a new version written to a separate tmp file. + // This fd is used when using flock to synchronize access. + int FileDescriptor = -1; +#endif + + // SrcFile should be the absolute path to the source code file, and DstFile + // the absolute path to the corresponding analysis file. This constructor + // will block until exclusive access has been obtained. + AutoLockFile(const std::string &SrcFile, const std::string &DstFile); + ~AutoLockFile(); + + // Check after constructing to ensure the mutex was properly set up. + bool success(); + + // There used to be an `openFile` method here but we switched to directly + // using a std::ifstream for the input file in able to take advantage of its + // support for variable length lines (as opposed to fgets which takes a fixed + // size buffer). + + // Open a new tmp file for writing the new analysis data to. Caller is + // responsible for fclose'ing it. + FILE *openTmp(); + // Replace the existing analysis file with the new "tmp" one that has the new + // data. Returns false on error. + bool moveTmp(); +}; + +#endif diff --git a/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp b/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp new file mode 100644 index 0000000000..904897df6b --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp @@ -0,0 +1,2200 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Mangle.h" +#include "clang/AST/RecordLayout.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/Version.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendPluginRegistry.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/raw_ostream.h" + +#include <fstream> +#include <iostream> +#include <map> +#include <memory> +#include <sstream> +#include <string> +#include <tuple> +#include <unordered_set> + +#include <stdio.h> +#include <stdlib.h> + +#include "FileOperations.h" +#include "StringOperations.h" + +#if CLANG_VERSION_MAJOR < 8 +// Starting with Clang 8.0 some basic functions have been renamed +#define getBeginLoc getLocStart +#define getEndLoc getLocEnd +#endif +// We want std::make_unique, but that's only available in c++14. In versions +// prior to that, we need to fall back to llvm's make_unique. It's also the +// case that we expect clang 10 to build with c++14 and clang 9 and earlier to +// build with c++11, at least as suggested by the llvm-config --cxxflags on +// non-windows platforms. mozilla-central seems to build with -std=c++17 on +// windows so we need to make this decision based on __cplusplus instead of +// the CLANG_VERSION_MAJOR. +#if __cplusplus < 201402L +using llvm::make_unique; +#else +using std::make_unique; +#endif + +using namespace clang; + +const std::string GENERATED("__GENERATED__" PATHSEP_STRING); + +// Absolute path to directory containing source code. +std::string Srcdir; + +// Absolute path to objdir (including generated code). +std::string Objdir; + +// Absolute path where analysis JSON output will be stored. +std::string Outdir; + +enum class FileType { + // The file was either in the source tree nor objdir. It might be a system + // include, for example. + Unknown, + // A file from the source tree. + Source, + // A file from the objdir. + Generated, +}; + +// Takes an absolute path to a file, and returns the type of file it is. If +// it's a Source or Generated file, the provided inout path argument is modified +// in-place so that it is relative to the source dir or objdir, respectively. +FileType relativizePath(std::string& path) { + if (path.compare(0, Objdir.length(), Objdir) == 0) { + path.replace(0, Objdir.length(), GENERATED); + return FileType::Generated; + } + // Empty filenames can get turned into Srcdir when they are resolved as + // absolute paths, so we should exclude files that are exactly equal to + // Srcdir or anything outside Srcdir. + if (path.length() > Srcdir.length() && path.compare(0, Srcdir.length(), Srcdir) == 0) { + // Remove the trailing `/' as well. + path.erase(0, Srcdir.length() + 1); + return FileType::Source; + } + return FileType::Unknown; +} + +#if !defined(_WIN32) && !defined(_WIN64) +#include <sys/time.h> + +static double time() { + struct timeval Tv; + gettimeofday(&Tv, nullptr); + return double(Tv.tv_sec) + double(Tv.tv_usec) / 1000000.; +} +#endif + +// Return true if |input| is a valid C++ identifier. We don't want to generate +// analysis information for operators, string literals, etc. by accident since +// it trips up consumers of the data. +static bool isValidIdentifier(std::string Input) { + for (char C : Input) { + if (!(isalpha(C) || isdigit(C) || C == '_')) { + return false; + } + } + return true; +} + +struct RAIITracer { + RAIITracer(const char *log) : mLog(log) { + printf("<%s>\n", mLog); + } + + ~RAIITracer() { + printf("</%s>\n", mLog); + } + + const char* mLog; +}; + +#define TRACEFUNC RAIITracer tracer(__FUNCTION__); + +class IndexConsumer; + +// For each C++ file seen by the analysis (.cpp or .h), we track a +// FileInfo. This object tracks whether the file is "interesting" (i.e., whether +// it's in the source dir or the objdir). We also store the analysis output +// here. +struct FileInfo { + FileInfo(std::string &Rname) : Realname(Rname) { + switch (relativizePath(Realname)) { + case FileType::Generated: + Interesting = true; + Generated = true; + break; + case FileType::Source: + Interesting = true; + Generated = false; + break; + case FileType::Unknown: + Interesting = false; + Generated = false; + break; + } + } + std::string Realname; + std::vector<std::string> Output; + bool Interesting; + bool Generated; +}; + +class IndexConsumer; + +class PreprocessorHook : public PPCallbacks { + IndexConsumer *Indexer; + +public: + PreprocessorHook(IndexConsumer *C) : Indexer(C) {} + + virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) override; + + virtual void InclusionDirective(SourceLocation HashLoc, + const Token &IncludeTok, + StringRef FileName, + bool IsAngled, + CharSourceRange FileNameRange, +#if CLANG_VERSION_MAJOR >= 16 + OptionalFileEntryRef File, +#elif CLANG_VERSION_MAJOR >= 15 + Optional<FileEntryRef> File, +#else + const FileEntry *File, +#endif + StringRef SearchPath, + StringRef RelativePath, + const Module *Imported, + SrcMgr::CharacteristicKind FileType) override; + + virtual void MacroDefined(const Token &Tok, + const MacroDirective *Md) override; + + virtual void MacroExpands(const Token &Tok, const MacroDefinition &Md, + SourceRange Range, const MacroArgs *Ma) override; + virtual void MacroUndefined(const Token &Tok, const MacroDefinition &Md, + const MacroDirective *Undef) override; + virtual void Defined(const Token &Tok, const MacroDefinition &Md, + SourceRange Range) override; + virtual void Ifdef(SourceLocation Loc, const Token &Tok, + const MacroDefinition &Md) override; + virtual void Ifndef(SourceLocation Loc, const Token &Tok, + const MacroDefinition &Md) override; +}; + +class IndexConsumer : public ASTConsumer, + public RecursiveASTVisitor<IndexConsumer>, + public DiagnosticConsumer { +private: + CompilerInstance &CI; + SourceManager &SM; + LangOptions &LO; + std::map<FileID, std::unique_ptr<FileInfo>> FileMap; + MangleContext *CurMangleContext; + ASTContext *AstContext; + + typedef RecursiveASTVisitor<IndexConsumer> Super; + + // Tracks the set of declarations that the current expression/statement is + // nested inside of. + struct AutoSetContext { + AutoSetContext(IndexConsumer *Self, NamedDecl *Context, bool VisitImplicit = false) + : Self(Self), Prev(Self->CurDeclContext), Decl(Context) { + this->VisitImplicit = VisitImplicit || (Prev ? Prev->VisitImplicit : false); + Self->CurDeclContext = this; + } + + ~AutoSetContext() { Self->CurDeclContext = Prev; } + + IndexConsumer *Self; + AutoSetContext *Prev; + NamedDecl *Decl; + bool VisitImplicit; + }; + AutoSetContext *CurDeclContext; + + FileInfo *getFileInfo(SourceLocation Loc) { + FileID Id = SM.getFileID(Loc); + + std::map<FileID, std::unique_ptr<FileInfo>>::iterator It; + It = FileMap.find(Id); + if (It == FileMap.end()) { + // We haven't seen this file before. We need to make the FileInfo + // structure information ourselves + std::string Filename = std::string(SM.getFilename(Loc)); + std::string Absolute; + // If Loc is a macro id rather than a file id, it Filename might be + // empty. Also for some types of file locations that are clang-internal + // like "<scratch>" it can return an empty Filename. In these cases we + // want to leave Absolute as empty. + if (!Filename.empty()) { + Absolute = getAbsolutePath(Filename); + if (Absolute.empty()) { + Absolute = Filename; + } + } + std::unique_ptr<FileInfo> Info = make_unique<FileInfo>(Absolute); + It = FileMap.insert(std::make_pair(Id, std::move(Info))).first; + } + return It->second.get(); + } + + // Helpers for processing declarations + // Should we ignore this location? + bool isInterestingLocation(SourceLocation Loc) { + if (Loc.isInvalid()) { + return false; + } + + return getFileInfo(Loc)->Interesting; + } + + // Convert location to "line:column" or "line:column-column" given length. + // In resulting string rep, line is 1-based and zero-padded to 5 digits, while + // column is 0-based and unpadded. + std::string locationToString(SourceLocation Loc, size_t Length = 0) { + std::pair<FileID, unsigned> Pair = SM.getDecomposedLoc(Loc); + + bool IsInvalid; + unsigned Line = SM.getLineNumber(Pair.first, Pair.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Column = SM.getColumnNumber(Pair.first, Pair.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + + if (Length) { + return stringFormat("%05d:%d-%d", Line, Column - 1, Column - 1 + Length); + } else { + return stringFormat("%05d:%d", Line, Column - 1); + } + } + + // Convert SourceRange to "line-line". + // In the resulting string rep, line is 1-based. + std::string lineRangeToString(SourceRange Range) { + std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin()); + std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd()); + + bool IsInvalid; + unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + + return stringFormat("%d-%d", Line1, Line2); + } + + // Convert SourceRange to "line:column-line:column". + // In the resulting string rep, line is 1-based, column is 0-based. + std::string fullRangeToString(SourceRange Range) { + std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin()); + std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd()); + + bool IsInvalid; + unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Column1 = SM.getColumnNumber(Begin.first, Begin.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Column2 = SM.getColumnNumber(End.first, End.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + + return stringFormat("%d:%d-%d:%d", Line1, Column1 - 1, Line2, Column2 - 1); + } + + // Returns the qualified name of `d` without considering template parameters. + std::string getQualifiedName(const NamedDecl *D) { + const DeclContext *Ctx = D->getDeclContext(); + if (Ctx->isFunctionOrMethod()) { + return D->getQualifiedNameAsString(); + } + + std::vector<const DeclContext *> Contexts; + + // Collect contexts. + while (Ctx && isa<NamedDecl>(Ctx)) { + Contexts.push_back(Ctx); + Ctx = Ctx->getParent(); + } + + std::string Result; + + std::reverse(Contexts.begin(), Contexts.end()); + + for (const DeclContext *DC : Contexts) { + if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) { + Result += Spec->getNameAsString(); + + if (Spec->getSpecializationKind() == TSK_ExplicitSpecialization) { + std::string Backing; + llvm::raw_string_ostream Stream(Backing); + const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs(); + printTemplateArgumentList( + Stream, TemplateArgs.asArray(), PrintingPolicy(CI.getLangOpts())); + Result += Stream.str(); + } + } else if (const auto *Nd = dyn_cast<NamespaceDecl>(DC)) { + if (Nd->isAnonymousNamespace() || Nd->isInline()) { + continue; + } + Result += Nd->getNameAsString(); + } else if (const auto *Rd = dyn_cast<RecordDecl>(DC)) { + if (!Rd->getIdentifier()) { + Result += "(anonymous)"; + } else { + Result += Rd->getNameAsString(); + } + } else if (const auto *Fd = dyn_cast<FunctionDecl>(DC)) { + Result += Fd->getNameAsString(); + } else if (const auto *Ed = dyn_cast<EnumDecl>(DC)) { + // C++ [dcl.enum]p10: Each enum-name and each unscoped + // enumerator is declared in the scope that immediately contains + // the enum-specifier. Each scoped enumerator is declared in the + // scope of the enumeration. + if (Ed->isScoped() || Ed->getIdentifier()) + Result += Ed->getNameAsString(); + else + continue; + } else { + Result += cast<NamedDecl>(DC)->getNameAsString(); + } + Result += "::"; + } + + if (D->getDeclName()) + Result += D->getNameAsString(); + else + Result += "(anonymous)"; + + return Result; + } + + std::string mangleLocation(SourceLocation Loc, + std::string Backup = std::string()) { + FileInfo *F = getFileInfo(Loc); + std::string Filename = F->Realname; + if (Filename.length() == 0 && Backup.length() != 0) { + return Backup; + } + if (F->Generated) { + // Since generated files may be different on different platforms, + // we need to include a platform-specific thing in the hash. Otherwise + // we can end up with hash collisions where different symbols from + // different platforms map to the same thing. + char* Platform = getenv("MOZSEARCH_PLATFORM"); + Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename; + } + return hash(Filename + std::string("@") + locationToString(Loc)); + } + + bool isAcceptableSymbolChar(char c) { + return isalpha(c) || isdigit(c) || c == '_' || c == '/'; + } + + std::string mangleFile(std::string Filename, FileType Type) { + // "Mangle" the file path, such that: + // 1. The majority of paths will still be mostly human-readable. + // 2. The sanitization algorithm doesn't produce collisions where two + // different unsanitized paths can result in the same sanitized paths. + // 3. The produced symbol doesn't cause problems with downstream consumers. + // In order to accomplish this, we keep alphanumeric chars, underscores, + // and slashes, and replace everything else with an "@xx" hex encoding. + // The majority of path characters are letters and slashes which don't get + // encoded, so that satisifies (1). Since "@" characters in the unsanitized + // path get encoded, there should be no "@" characters in the sanitized path + // that got preserved from the unsanitized input, so that should satisfy (2). + // And (3) was done by trial-and-error. Note in particular the dot (.) + // character needs to be encoded, or the symbol-search feature of mozsearch + // doesn't work correctly, as all dot characters in the symbol query get + // replaced by #. + for (size_t i = 0; i < Filename.length(); i++) { + char c = Filename[i]; + if (isAcceptableSymbolChar(c)) { + continue; + } + char hex[4]; + sprintf(hex, "@%02X", ((int)c) & 0xFF); + Filename.replace(i, 1, hex); + i += 2; + } + + if (Type == FileType::Generated) { + // Since generated files may be different on different platforms, + // we need to include a platform-specific thing in the hash. Otherwise + // we can end up with hash collisions where different symbols from + // different platforms map to the same thing. + char* Platform = getenv("MOZSEARCH_PLATFORM"); + Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename; + } + return Filename; + } + + std::string mangleQualifiedName(std::string Name) { + std::replace(Name.begin(), Name.end(), ' ', '_'); + return Name; + } + + std::string getMangledName(clang::MangleContext *Ctx, + const clang::NamedDecl *Decl) { + if (isa<FunctionDecl>(Decl) && cast<FunctionDecl>(Decl)->isExternC()) { + return cast<FunctionDecl>(Decl)->getNameAsString(); + } + + if (isa<FunctionDecl>(Decl) || isa<VarDecl>(Decl)) { + const DeclContext *DC = Decl->getDeclContext(); + if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC) || + isa<LinkageSpecDecl>(DC) || + // isa<ExternCContextDecl>(DC) || + isa<TagDecl>(DC)) { + llvm::SmallVector<char, 512> Output; + llvm::raw_svector_ostream Out(Output); +#if CLANG_VERSION_MAJOR >= 11 + // This code changed upstream in version 11: + // https://github.com/llvm/llvm-project/commit/29e1a16be8216066d1ed733a763a749aed13ff47 + GlobalDecl GD; + if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) { + GD = GlobalDecl(D, Ctor_Complete); + } else if (const CXXDestructorDecl *D = + dyn_cast<CXXDestructorDecl>(Decl)) { + GD = GlobalDecl(D, Dtor_Complete); + } else { + GD = GlobalDecl(Decl); + } + Ctx->mangleName(GD, Out); +#else + if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) { + Ctx->mangleCXXCtor(D, CXXCtorType::Ctor_Complete, Out); + } else if (const CXXDestructorDecl *D = + dyn_cast<CXXDestructorDecl>(Decl)) { + Ctx->mangleCXXDtor(D, CXXDtorType::Dtor_Complete, Out); + } else { + Ctx->mangleName(Decl, Out); + } +#endif + return Out.str().str(); + } else { + return std::string("V_") + mangleLocation(Decl->getLocation()) + + std::string("_") + hash(std::string(Decl->getName())); + } + } else if (isa<TagDecl>(Decl) || isa<TypedefNameDecl>(Decl) || + isa<ObjCInterfaceDecl>(Decl)) { + if (!Decl->getIdentifier()) { + // Anonymous. + return std::string("T_") + mangleLocation(Decl->getLocation()); + } + + return std::string("T_") + mangleQualifiedName(getQualifiedName(Decl)); + } else if (isa<NamespaceDecl>(Decl) || isa<NamespaceAliasDecl>(Decl)) { + if (!Decl->getIdentifier()) { + // Anonymous. + return std::string("NS_") + mangleLocation(Decl->getLocation()); + } + + return std::string("NS_") + mangleQualifiedName(getQualifiedName(Decl)); + } else if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(Decl)) { + const ObjCInterfaceDecl *Iface = D2->getContainingInterface(); + return std::string("F_<") + getMangledName(Ctx, Iface) + ">_" + + D2->getNameAsString(); + } else if (const FieldDecl *D2 = dyn_cast<FieldDecl>(Decl)) { + const RecordDecl *Record = D2->getParent(); + return std::string("F_<") + getMangledName(Ctx, Record) + ">_" + + D2->getNameAsString(); + } else if (const EnumConstantDecl *D2 = dyn_cast<EnumConstantDecl>(Decl)) { + const DeclContext *DC = Decl->getDeclContext(); + if (const NamedDecl *Named = dyn_cast<NamedDecl>(DC)) { + return std::string("E_<") + getMangledName(Ctx, Named) + ">_" + + D2->getNameAsString(); + } + } + + assert(false); + return std::string(""); + } + + void debugLocation(SourceLocation Loc) { + std::string S = locationToString(Loc); + StringRef Filename = SM.getFilename(Loc); + printf("--> %s %s\n", std::string(Filename).c_str(), S.c_str()); + } + + void debugRange(SourceRange Range) { + printf("Range\n"); + debugLocation(Range.getBegin()); + debugLocation(Range.getEnd()); + } + +public: + IndexConsumer(CompilerInstance &CI) + : CI(CI), SM(CI.getSourceManager()), LO(CI.getLangOpts()), CurMangleContext(nullptr), + AstContext(nullptr), CurDeclContext(nullptr), TemplateStack(nullptr) { + CI.getPreprocessor().addPPCallbacks( + make_unique<PreprocessorHook>(this)); + } + + virtual DiagnosticConsumer *clone(DiagnosticsEngine &Diags) const { + return new IndexConsumer(CI); + } + +#if !defined(_WIN32) && !defined(_WIN64) + struct AutoTime { + AutoTime(double *Counter) : Counter(Counter), Start(time()) {} + ~AutoTime() { + if (Start) { + *Counter += time() - Start; + } + } + void stop() { + *Counter += time() - Start; + Start = 0; + } + double *Counter; + double Start; + }; +#endif + + // All we need is to follow the final declaration. + virtual void HandleTranslationUnit(ASTContext &Ctx) { + CurMangleContext = + clang::ItaniumMangleContext::create(Ctx, CI.getDiagnostics()); + + AstContext = &Ctx; + TraverseDecl(Ctx.getTranslationUnitDecl()); + + // Emit the JSON data for all files now. + std::map<FileID, std::unique_ptr<FileInfo>>::iterator It; + for (It = FileMap.begin(); It != FileMap.end(); It++) { + if (!It->second->Interesting) { + continue; + } + + FileInfo &Info = *It->second; + + std::string Filename = Outdir + Info.Realname; + std::string SrcFilename = Info.Generated + ? Objdir + Info.Realname.substr(GENERATED.length()) + : Srcdir + PATHSEP_STRING + Info.Realname; + + ensurePath(Filename); + + // We lock the output file in case some other clang process is trying to + // write to it at the same time. + AutoLockFile Lock(SrcFilename, Filename); + + if (!Lock.success()) { + fprintf(stderr, "Unable to lock file %s\n", Filename.c_str()); + exit(1); + } + + // Merge our results with the existing lines from the output file. + // This ensures that header files that are included multiple times + // in different ways are analyzed completely. + std::ifstream Fin(Filename.c_str(), std::ios::in | std::ios::binary); + FILE *OutFp = Lock.openTmp(); + if (!OutFp) { + fprintf(stderr, "Unable to open tmp out file for %s\n", Filename.c_str()); + exit(1); + } + + // Sort our new results and get an iterator to them + std::sort(Info.Output.begin(), Info.Output.end()); + std::vector<std::string>::const_iterator NewLinesIter = Info.Output.begin(); + std::string LastNewWritten; + + // Loop over the existing (sorted) lines in the analysis output file. + // (The good() check also handles the case where Fin did not exist when we + // went to open it.) + while(Fin.good()) { + std::string OldLine; + std::getline(Fin, OldLine); + // Skip blank lines. + if (OldLine.length() == 0) { + continue; + } + // We need to put the newlines back that getline() eats. + OldLine.push_back('\n'); + + // Write any results from Info.Output that are lexicographically + // smaller than OldLine (read from the existing file), but make sure + // to skip duplicates. Keep advacing NewLinesIter until we reach an + // entry that is lexicographically greater than OldLine. + for (; NewLinesIter != Info.Output.end(); NewLinesIter++) { + if (*NewLinesIter > OldLine) { + break; + } + if (*NewLinesIter == OldLine) { + continue; + } + if (*NewLinesIter == LastNewWritten) { + // dedupe the new entries being written + continue; + } + if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) { + fprintf(stderr, "Unable to write %zu bytes[1] to tmp output file for %s\n", + NewLinesIter->length(), Filename.c_str()); + exit(1); + } + LastNewWritten = *NewLinesIter; + } + + // Write the entry read from the existing file. + if (fwrite(OldLine.c_str(), OldLine.length(), 1, OutFp) != 1) { + fprintf(stderr, "Unable to write %zu bytes[2] to tmp output file for %s\n", + OldLine.length(), Filename.c_str()); + exit(1); + } + } + + // We finished reading from Fin + Fin.close(); + + // Finish iterating our new results, discarding duplicates + for (; NewLinesIter != Info.Output.end(); NewLinesIter++) { + if (*NewLinesIter == LastNewWritten) { + continue; + } + if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) { + fprintf(stderr, "Unable to write %zu bytes[3] to tmp output file for %s\n", + NewLinesIter->length(), Filename.c_str()); + exit(1); + } + LastNewWritten = *NewLinesIter; + } + + // Done writing all the things, close it and replace the old output file + // with the new one. + fclose(OutFp); + if (!Lock.moveTmp()) { + fprintf(stderr, "Unable to move tmp output file into place for %s (err %d)\n", Filename.c_str(), errno); + exit(1); + } + } + } + + // Unfortunately, we have to override all these methods in order to track the + // context we're inside. + + bool TraverseEnumDecl(EnumDecl *D) { + AutoSetContext Asc(this, D); + return Super::TraverseEnumDecl(D); + } + bool TraverseRecordDecl(RecordDecl *D) { + AutoSetContext Asc(this, D); + return Super::TraverseRecordDecl(D); + } + bool TraverseCXXRecordDecl(CXXRecordDecl *D) { + AutoSetContext Asc(this, D); + return Super::TraverseCXXRecordDecl(D); + } + bool TraverseFunctionDecl(FunctionDecl *D) { + AutoSetContext Asc(this, D); + const FunctionDecl *Def; + // (See the larger AutoTemplateContext comment for more information.) If a + // method on a templated class is declared out-of-line, we need to analyze + // the definition inside the scope of the template or else we won't properly + // handle member access on the templated type. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseFunctionDecl(D); + } + bool TraverseCXXMethodDecl(CXXMethodDecl *D) { + AutoSetContext Asc(this, D); + const FunctionDecl *Def; + // See TraverseFunctionDecl. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseCXXMethodDecl(D); + } + bool TraverseCXXConstructorDecl(CXXConstructorDecl *D) { + AutoSetContext Asc(this, D, /*VisitImplicit=*/true); + const FunctionDecl *Def; + // See TraverseFunctionDecl. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseCXXConstructorDecl(D); + } + bool TraverseCXXConversionDecl(CXXConversionDecl *D) { + AutoSetContext Asc(this, D); + const FunctionDecl *Def; + // See TraverseFunctionDecl. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseCXXConversionDecl(D); + } + bool TraverseCXXDestructorDecl(CXXDestructorDecl *D) { + AutoSetContext Asc(this, D); + const FunctionDecl *Def; + // See TraverseFunctionDecl. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseCXXDestructorDecl(D); + } + + // Used to keep track of the context in which a token appears. + struct Context { + // Ultimately this becomes the "context" JSON property. + std::string Name; + + // Ultimately this becomes the "contextsym" JSON property. + std::string Symbol; + + Context() {} + Context(std::string Name, std::string Symbol) + : Name(Name), Symbol(Symbol) {} + }; + + Context translateContext(NamedDecl *D) { + const FunctionDecl *F = dyn_cast<FunctionDecl>(D); + if (F && F->isTemplateInstantiation()) { + D = F->getTemplateInstantiationPattern(); + } + + return Context(D->getQualifiedNameAsString(), getMangledName(CurMangleContext, D)); + } + + Context getContext(SourceLocation Loc) { + if (SM.isMacroBodyExpansion(Loc)) { + // If we're inside a macro definition, we don't return any context. It + // will probably not be what the user expects if we do. + return Context(); + } + + if (CurDeclContext) { + return translateContext(CurDeclContext->Decl); + } + return Context(); + } + + // Similar to GetContext(SourceLocation), but it skips the declaration passed + // in. This is useful if we want the context of a declaration that's already + // on the stack. + Context getContext(Decl *D) { + if (SM.isMacroBodyExpansion(D->getLocation())) { + // If we're inside a macro definition, we don't return any context. It + // will probably not be what the user expects if we do. + return Context(); + } + + AutoSetContext *Ctxt = CurDeclContext; + while (Ctxt) { + if (Ctxt->Decl != D) { + return translateContext(Ctxt->Decl); + } + Ctxt = Ctxt->Prev; + } + return Context(); + } + + // Analyzing template code is tricky. Suppose we have this code: + // + // template<class T> + // bool Foo(T* ptr) { return T::StaticMethod(ptr); } + // + // If we analyze the body of Foo without knowing the type T, then we will not + // be able to generate any information for StaticMethod. However, analyzing + // Foo for every possible instantiation is inefficient and it also generates + // too much data in some cases. For example, the following code would generate + // one definition of Baz for every instantiation, which is undesirable: + // + // template<class T> + // class Bar { struct Baz { ... }; }; + // + // To solve this problem, we analyze templates only once. We do so in a + // GatherDependent mode where we look for "dependent scoped member + // expressions" (i.e., things like StaticMethod). We keep track of the + // locations of these expressions. If we find one or more of them, we analyze + // the template for each instantiation, in an AnalyzeDependent mode. This mode + // ignores all source locations except for the ones where we found dependent + // scoped member expressions before. For these locations, we generate a + // separate JSON result for each instantiation. + // + // We inherit our parent's mode if it is exists. This is because if our + // parent is in analyze mode, it means we've already lived a full life in + // gather mode and we must not restart in gather mode or we'll cause the + // indexer to visit EVERY identifier, which is way too much data. + struct AutoTemplateContext { + AutoTemplateContext(IndexConsumer *Self) + : Self(Self) + , CurMode(Self->TemplateStack ? Self->TemplateStack->CurMode : Mode::GatherDependent) + , Parent(Self->TemplateStack) { + Self->TemplateStack = this; + } + + ~AutoTemplateContext() { Self->TemplateStack = Parent; } + + // We traverse templates in two modes: + enum class Mode { + // Gather mode does not traverse into specializations. It looks for + // locations where it would help to have more info from template + // specializations. + GatherDependent, + + // Analyze mode traverses into template specializations and records + // information about token locations saved in gather mode. + AnalyzeDependent, + }; + + // We found a dependent scoped member expression! Keep track of it for + // later. + void visitDependent(SourceLocation Loc) { + if (CurMode == Mode::AnalyzeDependent) { + return; + } + + DependentLocations.insert(Loc.getRawEncoding()); + if (Parent) { + Parent->visitDependent(Loc); + } + } + + bool inGatherMode() { + return CurMode == Mode::GatherDependent; + } + + // Do we need to perform the extra AnalyzeDependent passes (one per + // instantiation)? + bool needsAnalysis() const { + if (!DependentLocations.empty()) { + return true; + } + if (Parent) { + return Parent->needsAnalysis(); + } + return false; + } + + void switchMode() { CurMode = Mode::AnalyzeDependent; } + + // Do we want to analyze each template instantiation separately? + bool shouldVisitTemplateInstantiations() const { + if (CurMode == Mode::AnalyzeDependent) { + return true; + } + if (Parent) { + return Parent->shouldVisitTemplateInstantiations(); + } + return false; + } + + // For a given expression/statement, should we emit JSON data for it? + bool shouldVisit(SourceLocation Loc) { + if (CurMode == Mode::GatherDependent) { + return true; + } + if (DependentLocations.find(Loc.getRawEncoding()) != + DependentLocations.end()) { + return true; + } + if (Parent) { + return Parent->shouldVisit(Loc); + } + return false; + } + + private: + IndexConsumer *Self; + Mode CurMode; + std::unordered_set<unsigned> DependentLocations; + AutoTemplateContext *Parent; + }; + + AutoTemplateContext *TemplateStack; + + bool shouldVisitTemplateInstantiations() const { + if (TemplateStack) { + return TemplateStack->shouldVisitTemplateInstantiations(); + } + return false; + } + + bool shouldVisitImplicitCode() const { + return CurDeclContext && CurDeclContext->VisitImplicit; + } + + bool TraverseClassTemplateDecl(ClassTemplateDecl *D) { + AutoTemplateContext Atc(this); + Super::TraverseClassTemplateDecl(D); + + if (!Atc.needsAnalysis()) { + return true; + } + + Atc.switchMode(); + + if (D != D->getCanonicalDecl()) { + return true; + } + + for (auto *Spec : D->specializations()) { + for (auto *Rd : Spec->redecls()) { + // We don't want to visit injected-class-names in this traversal. + if (cast<CXXRecordDecl>(Rd)->isInjectedClassName()) + continue; + + TraverseDecl(Rd); + } + } + + return true; + } + + bool TraverseFunctionTemplateDecl(FunctionTemplateDecl *D) { + AutoTemplateContext Atc(this); + if (Atc.inGatherMode()) { + Super::TraverseFunctionTemplateDecl(D); + } + + if (!Atc.needsAnalysis()) { + return true; + } + + Atc.switchMode(); + + if (D != D->getCanonicalDecl()) { + return true; + } + + for (auto *Spec : D->specializations()) { + for (auto *Rd : Spec->redecls()) { + TraverseDecl(Rd); + } + } + + return true; + } + + bool shouldVisit(SourceLocation Loc) { + if (TemplateStack) { + return TemplateStack->shouldVisit(Loc); + } + return true; + } + + enum { + // Flag to omit the identifier from being cross-referenced across files. + // This is usually desired for local variables. + NoCrossref = 1 << 0, + // Flag to indicate the token with analysis data is not an identifier. Indicates + // we want to skip the check that tries to ensure a sane identifier token. + NotIdentifierToken = 1 << 1, + // This indicates that the end of the provided SourceRange is valid and + // should be respected. If this flag is not set, the visitIdentifier + // function should use only the start of the SourceRange and auto-detect + // the end based on whatever token is found at the start. + LocRangeEndValid = 1 << 2 + }; + + void emitStructuredInfo(SourceLocation Loc, const RecordDecl *decl) { + std::string json_str; + llvm::raw_string_ostream ros(json_str); + llvm::json::OStream J(ros); + // Start the top-level object. + J.objectBegin(); + + unsigned StartOffset = SM.getFileOffset(Loc); + unsigned EndOffset = + StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts()); + J.attribute("loc", locationToString(Loc, EndOffset - StartOffset)); + J.attribute("structured", 1); + J.attribute("pretty", getQualifiedName(decl)); + J.attribute("sym", getMangledName(CurMangleContext, decl)); + + J.attribute("kind", TypeWithKeyword::getTagTypeKindName(decl->getTagKind())); + + const ASTContext &C = *AstContext; + const ASTRecordLayout &Layout = C.getASTRecordLayout(decl); + + J.attribute("sizeBytes", Layout.getSize().getQuantity()); + + auto cxxDecl = dyn_cast<CXXRecordDecl>(decl); + + if (cxxDecl) { + J.attributeBegin("supers"); + J.arrayBegin(); + for (const CXXBaseSpecifier &Base : cxxDecl->bases()) { + const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); + + J.objectBegin(); + + J.attribute("pretty", getQualifiedName(BaseDecl)); + J.attribute("sym", getMangledName(CurMangleContext, BaseDecl)); + + J.attributeBegin("props"); + J.arrayBegin(); + if (Base.isVirtual()) { + J.value("virtual"); + } + J.arrayEnd(); + J.attributeEnd(); + + J.objectEnd(); + } + J.arrayEnd(); + J.attributeEnd(); + + J.attributeBegin("methods"); + J.arrayBegin(); + for (const CXXMethodDecl *MethodDecl : cxxDecl->methods()) { + J.objectBegin(); + + J.attribute("pretty", getQualifiedName(MethodDecl)); + J.attribute("sym", getMangledName(CurMangleContext, MethodDecl)); + + // TODO: Better figure out what to do for non-isUserProvided methods + // which means there's potentially semantic data that doesn't correspond + // to a source location in the source. Should we be emitting + // structured info for those when we're processing the class here? + + J.attributeBegin("props"); + J.arrayBegin(); + if (MethodDecl->isStatic()) { + J.value("static"); + } + if (MethodDecl->isInstance()) { + J.value("instance"); + } + if (MethodDecl->isVirtual()) { + J.value("virtual"); + } + if (MethodDecl->isUserProvided()) { + J.value("user"); + } + if (MethodDecl->isDefaulted()) { + J.value("defaulted"); + } + if (MethodDecl->isDeleted()) { + J.value("deleted"); + } + if (MethodDecl->isConstexpr()) { + J.value("constexpr"); + } + J.arrayEnd(); + J.attributeEnd(); + + J.objectEnd(); + } + J.arrayEnd(); + J.attributeEnd(); + } + + J.attributeBegin("fields"); + J.arrayBegin(); + uint64_t iField = 0; + for (RecordDecl::field_iterator It = decl->field_begin(), + End = decl->field_end(); It != End; ++It, ++iField) { + const FieldDecl &Field = **It; + uint64_t localOffsetBits = Layout.getFieldOffset(iField); + CharUnits localOffsetBytes = C.toCharUnitsFromBits(localOffsetBits); + + J.objectBegin(); + J.attribute("pretty", getQualifiedName(&Field)); + J.attribute("sym", getMangledName(CurMangleContext, &Field)); + QualType FieldType = Field.getType(); + J.attribute("type", FieldType.getAsString()); + QualType CanonicalFieldType = FieldType.getCanonicalType(); + const TagDecl *tagDecl = CanonicalFieldType->getAsTagDecl(); + if (tagDecl) { + J.attribute("typesym", getMangledName(CurMangleContext, tagDecl)); + } + J.attribute("offsetBytes", localOffsetBytes.getQuantity()); + if (Field.isBitField()) { + J.attributeBegin("bitPositions"); + J.objectBegin(); + + J.attribute("begin", unsigned(localOffsetBits - C.toBits(localOffsetBytes))); + J.attribute("width", Field.getBitWidthValue(C)); + + J.objectEnd(); + J.attributeEnd(); + } else { + // Try and get the field as a record itself so we can know its size, but + // we don't actually want to recurse into it. + if (auto FieldRec = Field.getType()->getAs<RecordType>()) { + auto const &FieldLayout = C.getASTRecordLayout(FieldRec->getDecl()); + J.attribute("sizeBytes", FieldLayout.getSize().getQuantity()); + } else { + // We were unable to get it as a record, which suggests it's a normal + // type, in which case let's just ask for the type size. (Maybe this + // would also work for the above case too?) + uint64_t typeSizeBits = C.getTypeSize(Field.getType()); + CharUnits typeSizeBytes = C.toCharUnitsFromBits(typeSizeBits); + J.attribute("sizeBytes", typeSizeBytes.getQuantity()); + } + } + J.objectEnd(); + } + J.arrayEnd(); + J.attributeEnd(); + + // End the top-level object. + J.objectEnd(); + + FileInfo *F = getFileInfo(Loc); + // we want a newline. + ros << '\n'; + F->Output.push_back(std::move(ros.str())); + } + + void emitStructuredInfo(SourceLocation Loc, const FunctionDecl *decl) { + std::string json_str; + llvm::raw_string_ostream ros(json_str); + llvm::json::OStream J(ros); + // Start the top-level object. + J.objectBegin(); + + unsigned StartOffset = SM.getFileOffset(Loc); + unsigned EndOffset = + StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts()); + J.attribute("loc", locationToString(Loc, EndOffset - StartOffset)); + J.attribute("structured", 1); + J.attribute("pretty", getQualifiedName(decl)); + J.attribute("sym", getMangledName(CurMangleContext, decl)); + + auto cxxDecl = dyn_cast<CXXMethodDecl>(decl); + + if (cxxDecl) { + J.attribute("kind", "method"); + if (auto parentDecl = cxxDecl->getParent()) { + J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl)); + } + + J.attributeBegin("overrides"); + J.arrayBegin(); + for (const CXXMethodDecl *MethodDecl : cxxDecl->overridden_methods()) { + J.objectBegin(); + + // TODO: Make sure we're doing template traversals appropriately... + // findOverriddenMethods (now removed) liked to do: + // if (Decl->isTemplateInstantiation()) { + // Decl = dyn_cast<CXXMethodDecl>(Decl->getTemplateInstantiationPattern()); + // } + // I think our pre-emptive dereferencing/avoidance of templates may + // protect us from this, but it needs more investigation. + + J.attribute("pretty", getQualifiedName(MethodDecl)); + J.attribute("sym", getMangledName(CurMangleContext, MethodDecl)); + + J.objectEnd(); + } + J.arrayEnd(); + J.attributeEnd(); + + } else { + J.attribute("kind", "function"); + } + + // ## Props + J.attributeBegin("props"); + J.arrayBegin(); + // some of these are only possible on a CXXMethodDecl, but we want them all + // in the same array, so condition these first ones. + if (cxxDecl) { + if (cxxDecl->isStatic()) { + J.value("static"); + } + if (cxxDecl->isInstance()) { + J.value("instance"); + } + if (cxxDecl->isVirtual()) { + J.value("virtual"); + } + if (cxxDecl->isUserProvided()) { + J.value("user"); + } + } + if (decl->isDefaulted()) { + J.value("defaulted"); + } + if (decl->isDeleted()) { + J.value("deleted"); + } + if (decl->isConstexpr()) { + J.value("constexpr"); + } + J.arrayEnd(); + J.attributeEnd(); + + // End the top-level object. + J.objectEnd(); + + FileInfo *F = getFileInfo(Loc); + // we want a newline. + ros << '\n'; + F->Output.push_back(std::move(ros.str())); + } + + /** + * Emit structured info for a field. Right now the intent is for this to just + * be a pointer to its parent's structured info with this method entirely + * avoiding getting the ASTRecordLayout. + * + * TODO: Give more thought on where to locate the canonical info on fields and + * how to normalize their exposure over the web. We could relink the info + * both at cross-reference time and web-server lookup time. This is also + * called out in `analysis.md`. + */ + void emitStructuredInfo(SourceLocation Loc, const FieldDecl *decl) { + // XXX the call to decl::getParent will assert below for ObjCIvarDecl + // instances because their DecContext is not a RecordDecl. So just bail + // for now. + // TODO: better support ObjC. + if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(decl)) { + return; + } + + std::string json_str; + llvm::raw_string_ostream ros(json_str); + llvm::json::OStream J(ros); + // Start the top-level object. + J.objectBegin(); + + unsigned StartOffset = SM.getFileOffset(Loc); + unsigned EndOffset = + StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts()); + J.attribute("loc", locationToString(Loc, EndOffset - StartOffset)); + J.attribute("structured", 1); + J.attribute("pretty", getQualifiedName(decl)); + J.attribute("sym", getMangledName(CurMangleContext, decl)); + J.attribute("kind", "field"); + + if (auto parentDecl = decl->getParent()) { + J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl)); + } + + // End the top-level object. + J.objectEnd(); + + FileInfo *F = getFileInfo(Loc); + // we want a newline. + ros << '\n'; + F->Output.push_back(std::move(ros.str())); + } + + // XXX Type annotating. + // QualType is the type class. It has helpers like TagDecl via getAsTagDecl. + // ValueDecl exposes a getType() method. + // + // Arguably it makes sense to only expose types that Searchfox has definitions + // for as first-class. Probably the way to go is like context/contextsym. + // We expose a "type" which is just a human-readable string which has no + // semantic purposes and is just a display string, plus then a "typesym" which + // we expose if we were able to map the type. + // + // Other meta-info: field offsets. Ancestor types. + + // This is the only function that emits analysis JSON data. It should be + // called for each identifier that corresponds to a symbol. + void visitIdentifier(const char *Kind, const char *SyntaxKind, + llvm::StringRef QualName, SourceRange LocRange, + std::string Symbol, + QualType MaybeType = QualType(), + Context TokenContext = Context(), int Flags = 0, + SourceRange PeekRange = SourceRange(), + SourceRange NestingRange = SourceRange()) { + SourceLocation Loc = LocRange.getBegin(); + if (!shouldVisit(Loc)) { + return; + } + + // Find the file positions corresponding to the token. + unsigned StartOffset = SM.getFileOffset(Loc); + unsigned EndOffset = (Flags & LocRangeEndValid) + ? SM.getFileOffset(LocRange.getEnd()) + : StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts()); + + std::string LocStr = locationToString(Loc, EndOffset - StartOffset); + std::string RangeStr = locationToString(Loc, EndOffset - StartOffset); + std::string PeekRangeStr; + + if (!(Flags & NotIdentifierToken)) { + // Get the token's characters so we can make sure it's a valid token. + const char *StartChars = SM.getCharacterData(Loc); + std::string Text(StartChars, EndOffset - StartOffset); + if (!isValidIdentifier(Text)) { + return; + } + } + + FileInfo *F = getFileInfo(Loc); + + if (!(Flags & NoCrossref)) { + std::string json_str; + llvm::raw_string_ostream ros(json_str); + llvm::json::OStream J(ros); + // Start the top-level object. + J.objectBegin(); + + J.attribute("loc", LocStr); + J.attribute("target", 1); + J.attribute("kind", Kind); + J.attribute("pretty", QualName.data()); + J.attribute("sym", Symbol); + if (!TokenContext.Name.empty()) { + J.attribute("context", TokenContext.Name); + } + if (!TokenContext.Symbol.empty()) { + J.attribute("contextsym", TokenContext.Symbol); + } + if (PeekRange.isValid()) { + PeekRangeStr = lineRangeToString(PeekRange); + if (!PeekRangeStr.empty()) { + J.attribute("peekRange", PeekRangeStr); + } + } + + // End the top-level object. + J.objectEnd(); + // we want a newline. + ros << '\n'; + F->Output.push_back(std::move(ros.str())); + } + + // Generate a single "source":1 for all the symbols. If we search from here, + // we want to union the results for every symbol in `symbols`. + std::string json_str; + llvm::raw_string_ostream ros(json_str); + llvm::json::OStream J(ros); + // Start the top-level object. + J.objectBegin(); + + J.attribute("loc", RangeStr); + J.attribute("source", 1); + + if (NestingRange.isValid()) { + std::string NestingRangeStr = fullRangeToString(NestingRange); + if (!NestingRangeStr.empty()) { + J.attribute("nestingRange", NestingRangeStr); + } + } + + std::string Syntax; + if (Flags & NoCrossref) { + J.attribute("syntax", ""); + } else { + Syntax = Kind; + Syntax.push_back(','); + Syntax.append(SyntaxKind); + J.attribute("syntax", Syntax); + } + + if (!MaybeType.isNull()) { + J.attribute("type", MaybeType.getAsString()); + QualType canonical = MaybeType.getCanonicalType(); + const TagDecl *decl = canonical->getAsTagDecl(); + if (decl) { + std::string Mangled = getMangledName(CurMangleContext, decl); + J.attribute("typesym", Mangled); + } + } + + std::string Pretty(SyntaxKind); + Pretty.push_back(' '); + Pretty.append(QualName.data()); + J.attribute("pretty", Pretty); + + J.attribute("sym", Symbol); + + if (Flags & NoCrossref) { + J.attribute("no_crossref", 1); + } + + // End the top-level object. + J.objectEnd(); + + // we want a newline. + ros << '\n'; + F->Output.push_back(std::move(ros.str())); + } + + void normalizeLocation(SourceLocation *Loc) { + *Loc = SM.getSpellingLoc(*Loc); + } + + // For cases where the left-brace is not directly accessible from the AST, + // helper to use the lexer to find the brace. Make sure you're picking the + // start location appropriately! + SourceLocation findLeftBraceFromLoc(SourceLocation Loc) { + return Lexer::findLocationAfterToken(Loc, tok::l_brace, SM, LO, false); + } + + // If the provided statement is compound, return its range. + SourceRange getCompoundStmtRange(Stmt* D) { + if (!D) { + return SourceRange(); + } + + CompoundStmt *D2 = dyn_cast<CompoundStmt>(D); + if (D2) { + return D2->getSourceRange(); + } + + return SourceRange(); + } + + SourceRange getFunctionPeekRange(FunctionDecl* D) { + // We always start at the start of the function decl, which may include the + // return type on a separate line. + SourceLocation Start = D->getBeginLoc(); + + // By default, we end at the line containing the function's name. + SourceLocation End = D->getLocation(); + + std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End); + + // But if there are parameters, we want to include those as well. + for (ParmVarDecl* Param : D->parameters()) { + std::pair<FileID, unsigned> ParamLoc = SM.getDecomposedLoc(Param->getLocation()); + + // It's possible there are macros involved or something. We don't include + // the parameters in that case. + if (ParamLoc.first == FuncLoc.first) { + // Assume parameters are in order, so we always take the last one. + End = Param->getEndLoc(); + } + } + + return SourceRange(Start, End); + } + + SourceRange getTagPeekRange(TagDecl* D) { + SourceLocation Start = D->getBeginLoc(); + + // By default, we end at the line containing the name. + SourceLocation End = D->getLocation(); + + std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End); + + if (CXXRecordDecl* D2 = dyn_cast<CXXRecordDecl>(D)) { + // But if there are parameters, we want to include those as well. + for (CXXBaseSpecifier& Base : D2->bases()) { + std::pair<FileID, unsigned> Loc = SM.getDecomposedLoc(Base.getEndLoc()); + + // It's possible there are macros involved or something. We don't include + // the parameters in that case. + if (Loc.first == FuncLoc.first) { + // Assume parameters are in order, so we always take the last one. + End = Base.getEndLoc(); + } + } + } + + return SourceRange(Start, End); + } + + SourceRange getCommentRange(NamedDecl* D) { + const RawComment* RC = + AstContext->getRawCommentForDeclNoCache(D); + if (!RC) { + return SourceRange(); + } + + return RC->getSourceRange(); + } + + // Sanity checks that all ranges are in the same file, returning the first if + // they're in different files. Unions the ranges based on which is first. + SourceRange combineRanges(SourceRange Range1, SourceRange Range2) { + if (Range1.isInvalid()) { + return Range2; + } + if (Range2.isInvalid()) { + return Range1; + } + + std::pair<FileID, unsigned> Begin1 = SM.getDecomposedLoc(Range1.getBegin()); + std::pair<FileID, unsigned> End1 = SM.getDecomposedLoc(Range1.getEnd()); + std::pair<FileID, unsigned> Begin2 = SM.getDecomposedLoc(Range2.getBegin()); + std::pair<FileID, unsigned> End2 = SM.getDecomposedLoc(Range2.getEnd()); + + if (End1.first != Begin2.first) { + // Something weird is probably happening with the preprocessor. Just + // return the first range. + return Range1; + } + + // See which range comes first. + if (Begin1.second <= End2.second) { + return SourceRange(Range1.getBegin(), Range2.getEnd()); + } else { + return SourceRange(Range2.getBegin(), Range1.getEnd()); + } + } + + // Given a location and a range, returns the range if: + // - The location and the range live in the same file. + // - The range is well ordered (end is not before begin). + // Returns an empty range otherwise. + SourceRange validateRange(SourceLocation Loc, SourceRange Range) { + std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc); + std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin()); + std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd()); + + if (Begin.first != Decomposed.first || End.first != Decomposed.first) { + return SourceRange(); + } + + if (Begin.second >= End.second) { + return SourceRange(); + } + + return Range; + } + + bool VisitNamedDecl(NamedDecl *D) { + SourceLocation Loc = D->getLocation(); + + // If the token is from a macro expansion and the expansion location + // is interesting, use that instead as it tends to be more useful. + SourceLocation expandedLoc = Loc; + if (SM.isMacroBodyExpansion(Loc)) { + Loc = SM.getFileLoc(Loc); + } + + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + if (isa<ParmVarDecl>(D) && !D->getDeclName().getAsIdentifierInfo()) { + // Unnamed parameter in function proto. + return true; + } + + int Flags = 0; + const char *Kind = "def"; + const char *PrettyKind = "?"; + bool wasTemplate = false; + SourceRange PeekRange(D->getBeginLoc(), D->getEndLoc()); + // The nesting range identifies the left brace and right brace, which + // heavily depends on the AST node type. + SourceRange NestingRange; + if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) { + if (D2->isTemplateInstantiation()) { + wasTemplate = true; + D = D2->getTemplateInstantiationPattern(); + } + // We treat pure virtual declarations as definitions. + Kind = (D2->isThisDeclarationADefinition() || D2->isPure()) ? "def" : "decl"; + PrettyKind = "function"; + PeekRange = getFunctionPeekRange(D2); + + // Only emit the nesting range if: + // - This is a definition AND + // - This isn't a template instantiation. Function templates' + // instantiations can end up as a definition with a Loc at their point + // of declaration but with the CompoundStmt of the template's + // point of definition. This really messes up the nesting range logic. + // At the time of writing this, the test repo's `big_header.h`'s + // `WhatsYourVector_impl::forwardDeclaredTemplateThingInlinedBelow` as + // instantiated by `big_cpp.cpp` triggers this phenomenon. + // + // Note: As covered elsewhere, template processing is tricky and it's + // conceivable that we may change traversal patterns in the future, + // mooting this guard. + if (D2->isThisDeclarationADefinition() && + !D2->isTemplateInstantiation()) { + // The CompoundStmt range is the brace range. + NestingRange = getCompoundStmtRange(D2->getBody()); + } + } else if (TagDecl *D2 = dyn_cast<TagDecl>(D)) { + Kind = D2->isThisDeclarationADefinition() ? "def" : "forward"; + PrettyKind = "type"; + + if (D2->isThisDeclarationADefinition() && D2->getDefinition() == D2) { + PeekRange = getTagPeekRange(D2); + NestingRange = D2->getBraceRange(); + } else { + PeekRange = SourceRange(); + } + } else if (isa<TypedefNameDecl>(D)) { + Kind = "def"; + PrettyKind = "type"; + PeekRange = SourceRange(Loc, Loc); + } else if (VarDecl *D2 = dyn_cast<VarDecl>(D)) { + if (D2->isLocalVarDeclOrParm()) { + Flags = NoCrossref; + } + + Kind = D2->isThisDeclarationADefinition() == VarDecl::DeclarationOnly + ? "decl" + : "def"; + PrettyKind = "variable"; + } else if (isa<NamespaceDecl>(D) || isa<NamespaceAliasDecl>(D)) { + Kind = "def"; + PrettyKind = "namespace"; + PeekRange = SourceRange(Loc, Loc); + NamespaceDecl *D2 = dyn_cast<NamespaceDecl>(D); + if (D2) { + // There's no exposure of the left brace so we have to find it. + NestingRange = SourceRange( + findLeftBraceFromLoc(D2->isAnonymousNamespace() ? D2->getBeginLoc() : Loc), + D2->getRBraceLoc()); + } + } else if (isa<FieldDecl>(D)) { + Kind = "def"; + PrettyKind = "field"; + } else if (isa<EnumConstantDecl>(D)) { + Kind = "def"; + PrettyKind = "enum constant"; + } else { + return true; + } + + QualType qtype = QualType(); + if (ValueDecl *D2 = dyn_cast<ValueDecl>(D)) { + qtype = D2->getType(); + } + + SourceRange CommentRange = getCommentRange(D); + PeekRange = combineRanges(PeekRange, CommentRange); + PeekRange = validateRange(Loc, PeekRange); + NestingRange = validateRange(Loc, NestingRange); + + std::string Symbol = getMangledName(CurMangleContext, D); + + // In the case of destructors, Loc might point to the ~ character. In that + // case we want to skip to the name of the class. However, Loc might also + // point to other places that generate destructors, such as the use site of + // a macro that expands to generate a destructor, or a lambda (apparently + // clang 8 creates a destructor declaration for at least some lambdas). In + // the former case we'll use the macro use site as the location, and in the + // latter we'll just drop the declaration. + if (isa<CXXDestructorDecl>(D)) { + PrettyKind = "destructor"; + const char *P = SM.getCharacterData(Loc); + if (*P == '~') { + // Advance Loc to the class name + P++; + + unsigned Skipped = 1; + while (*P == ' ' || *P == '\t' || *P == '\r' || *P == '\n') { + P++; + Skipped++; + } + + Loc = Loc.getLocWithOffset(Skipped); + } else { + // See if the destructor is coming from a macro expansion + P = SM.getCharacterData(expandedLoc); + if (*P != '~') { + // It's not + return true; + } + // It is, so just use Loc as-is + } + } + + visitIdentifier(Kind, PrettyKind, getQualifiedName(D), SourceRange(Loc), Symbol, + qtype, + getContext(D), Flags, PeekRange, NestingRange); + + // In-progress structured info emission. + if (RecordDecl *D2 = dyn_cast<RecordDecl>(D)) { + if (D2->isThisDeclarationADefinition() && + // XXX getASTRecordLayout doesn't work for dependent types, so we + // avoid calling into emitStructuredInfo for now if there's a + // dependent type or if we're in any kind of template context. This + // should be re-evaluated once this is working for normal classes and + // we can better evaluate what is useful. + !D2->isDependentType() && + !TemplateStack) { + emitStructuredInfo(Loc, D2); + } + } + if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) { + if ((D2->isThisDeclarationADefinition() || D2->isPure()) && + // a clause at the top should have generalized and set wasTemplate so + // it shouldn't be the case that isTemplateInstantiation() is true. + !D2->isTemplateInstantiation() && + !wasTemplate && + !D2->isFunctionTemplateSpecialization() && + !TemplateStack) { + emitStructuredInfo(Loc, D2); + } + } + if (FieldDecl *D2 = dyn_cast<FieldDecl>(D)) { + if (!D2->isTemplated() && + !TemplateStack) { + emitStructuredInfo(Loc, D2); + } + } + + return true; + } + + bool VisitCXXConstructExpr(CXXConstructExpr *E) { + SourceLocation Loc = E->getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + FunctionDecl *Ctor = E->getConstructor(); + if (Ctor->isTemplateInstantiation()) { + Ctor = Ctor->getTemplateInstantiationPattern(); + } + std::string Mangled = getMangledName(CurMangleContext, Ctor); + + // FIXME: Need to do something different for list initialization. + + visitIdentifier("use", "constructor", getQualifiedName(Ctor), Loc, Mangled, + QualType(), getContext(Loc)); + + return true; + } + + bool VisitCallExpr(CallExpr *E) { + Decl *Callee = E->getCalleeDecl(); + if (!Callee || !FunctionDecl::classof(Callee)) { + return true; + } + + const NamedDecl *NamedCallee = dyn_cast<NamedDecl>(Callee); + + SourceLocation Loc; + + const FunctionDecl *F = dyn_cast<FunctionDecl>(NamedCallee); + if (F->isTemplateInstantiation()) { + NamedCallee = F->getTemplateInstantiationPattern(); + } + + std::string Mangled = getMangledName(CurMangleContext, NamedCallee); + int Flags = 0; + + Expr *CalleeExpr = E->getCallee()->IgnoreParenImpCasts(); + + if (CXXOperatorCallExpr::classof(E)) { + // Just take the first token. + CXXOperatorCallExpr *Op = dyn_cast<CXXOperatorCallExpr>(E); + Loc = Op->getOperatorLoc(); + Flags |= NotIdentifierToken; + } else if (MemberExpr::classof(CalleeExpr)) { + MemberExpr *Member = dyn_cast<MemberExpr>(CalleeExpr); + Loc = Member->getMemberLoc(); + } else if (DeclRefExpr::classof(CalleeExpr)) { + // We handle this in VisitDeclRefExpr. + return true; + } else { + return true; + } + + normalizeLocation(&Loc); + + if (!isInterestingLocation(Loc)) { + return true; + } + + visitIdentifier("use", "function", getQualifiedName(NamedCallee), Loc, Mangled, + E->getCallReturnType(*AstContext), getContext(Loc), Flags); + + return true; + } + + bool VisitTagTypeLoc(TagTypeLoc L) { + SourceLocation Loc = L.getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + TagDecl *Decl = L.getDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + L.getType(), getContext(Loc)); + return true; + } + + bool VisitTypedefTypeLoc(TypedefTypeLoc L) { + SourceLocation Loc = L.getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + NamedDecl *Decl = L.getTypedefNameDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + L.getType(), getContext(Loc)); + return true; + } + + bool VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L) { + SourceLocation Loc = L.getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + NamedDecl *Decl = L.getDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + L.getType(), getContext(Loc)); + return true; + } + + bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) { + SourceLocation Loc = L.getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + TemplateDecl *Td = L.getTypePtr()->getTemplateName().getAsTemplateDecl(); + if (ClassTemplateDecl *D = dyn_cast<ClassTemplateDecl>(Td)) { + NamedDecl *Decl = D->getTemplatedDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + QualType(), getContext(Loc)); + } else if (TypeAliasTemplateDecl *D = dyn_cast<TypeAliasTemplateDecl>(Td)) { + NamedDecl *Decl = D->getTemplatedDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + QualType(), getContext(Loc)); + } + + return true; + } + + bool VisitDeclRefExpr(DeclRefExpr *E) { + SourceLocation Loc = E->getExprLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + if (E->hasQualifier()) { + Loc = E->getNameInfo().getLoc(); + normalizeLocation(&Loc); + } + + NamedDecl *Decl = E->getDecl(); + if (const VarDecl *D2 = dyn_cast<VarDecl>(Decl)) { + int Flags = 0; + if (D2->isLocalVarDeclOrParm()) { + Flags = NoCrossref; + } + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "variable", getQualifiedName(Decl), Loc, Mangled, + D2->getType(), getContext(Loc), Flags); + } else if (isa<FunctionDecl>(Decl)) { + const FunctionDecl *F = dyn_cast<FunctionDecl>(Decl); + if (F->isTemplateInstantiation()) { + Decl = F->getTemplateInstantiationPattern(); + } + + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "function", getQualifiedName(Decl), Loc, Mangled, + E->getType(), getContext(Loc)); + } else if (isa<EnumConstantDecl>(Decl)) { + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "enum", getQualifiedName(Decl), Loc, Mangled, + E->getType(), getContext(Loc)); + } + + return true; + } + + bool VisitCXXConstructorDecl(CXXConstructorDecl *D) { + if (!isInterestingLocation(D->getLocation())) { + return true; + } + + for (CXXConstructorDecl::init_const_iterator It = D->init_begin(); + It != D->init_end(); ++It) { + const CXXCtorInitializer *Ci = *It; + if (!Ci->getMember() || !Ci->isWritten()) { + continue; + } + + SourceLocation Loc = Ci->getMemberLocation(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + continue; + } + + FieldDecl *Member = Ci->getMember(); + std::string Mangled = getMangledName(CurMangleContext, Member); + visitIdentifier("use", "field", getQualifiedName(Member), Loc, Mangled, + Member->getType(), getContext(D)); + } + + return true; + } + + bool VisitMemberExpr(MemberExpr *E) { + SourceLocation Loc = E->getExprLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + ValueDecl *Decl = E->getMemberDecl(); + if (FieldDecl *Field = dyn_cast<FieldDecl>(Decl)) { + std::string Mangled = getMangledName(CurMangleContext, Field); + visitIdentifier("use", "field", getQualifiedName(Field), Loc, Mangled, + Field->getType(), getContext(Loc)); + } + return true; + } + + bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) { + SourceLocation Loc = E->getMemberLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + if (TemplateStack) { + TemplateStack->visitDependent(Loc); + } + return true; + } + + void enterSourceFile(SourceLocation Loc) { + normalizeLocation(&Loc); + FileInfo* newFile = getFileInfo(Loc); + if (!newFile->Interesting) { + return; + } + FileType type = newFile->Generated ? FileType::Generated : FileType::Source; + std::string symbol = + std::string("FILE_") + mangleFile(newFile->Realname, type); + + // We use an explicit zero-length source range at the start of the file. If we + // don't set the LocRangeEndValid flag, the visitIdentifier code will use the + // entire first token, which could be e.g. a long multiline-comment. + visitIdentifier("def", "file", newFile->Realname, SourceRange(Loc), + symbol, QualType(), Context(), + NotIdentifierToken | LocRangeEndValid); + } + + void inclusionDirective(SourceRange FileNameRange, const FileEntry* File) { + std::string includedFile(File->tryGetRealPathName()); + FileType type = relativizePath(includedFile); + if (type == FileType::Unknown) { + return; + } + std::string symbol = + std::string("FILE_") + mangleFile(includedFile, type); + + visitIdentifier("use", "file", includedFile, FileNameRange, symbol, + QualType(), Context(), + NotIdentifierToken | LocRangeEndValid); + } + + void macroDefined(const Token &Tok, const MacroDirective *Macro) { + if (Macro->getMacroInfo()->isBuiltinMacro()) { + return; + } + SourceLocation Loc = Tok.getLocation(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return; + } + + IdentifierInfo *Ident = Tok.getIdentifierInfo(); + if (Ident) { + std::string Mangled = + std::string("M_") + mangleLocation(Loc, std::string(Ident->getName())); + visitIdentifier("def", "macro", Ident->getName(), Loc, Mangled); + } + } + + void macroUsed(const Token &Tok, const MacroInfo *Macro) { + if (!Macro) { + return; + } + if (Macro->isBuiltinMacro()) { + return; + } + SourceLocation Loc = Tok.getLocation(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return; + } + + IdentifierInfo *Ident = Tok.getIdentifierInfo(); + if (Ident) { + std::string Mangled = + std::string("M_") + + mangleLocation(Macro->getDefinitionLoc(), std::string(Ident->getName())); + visitIdentifier("use", "macro", Ident->getName(), Loc, Mangled); + } + } +}; + +void PreprocessorHook::FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID = FileID()) { + switch (Reason) { + case PPCallbacks::RenameFile: + case PPCallbacks::SystemHeaderPragma: + // Don't care about these, since we want the actual on-disk filenames + break; + case PPCallbacks::EnterFile: + Indexer->enterSourceFile(Loc); + break; + case PPCallbacks::ExitFile: + // Don't care about exiting files + break; + } +} + +void PreprocessorHook::InclusionDirective(SourceLocation HashLoc, + const Token &IncludeTok, + StringRef FileName, + bool IsAngled, + CharSourceRange FileNameRange, +#if CLANG_VERSION_MAJOR >= 16 + OptionalFileEntryRef File, +#elif CLANG_VERSION_MAJOR >= 15 + Optional<FileEntryRef> File, +#else + const FileEntry *File, +#endif + StringRef SearchPath, + StringRef RelativePath, + const Module *Imported, + SrcMgr::CharacteristicKind FileType) { +#if CLANG_VERSION_MAJOR >= 15 + if (!File) { + return; + } + Indexer->inclusionDirective(FileNameRange.getAsRange(), &File->getFileEntry()); +#else + Indexer->inclusionDirective(FileNameRange.getAsRange(), File); +#endif +} + +void PreprocessorHook::MacroDefined(const Token &Tok, + const MacroDirective *Md) { + Indexer->macroDefined(Tok, Md); +} + +void PreprocessorHook::MacroExpands(const Token &Tok, const MacroDefinition &Md, + SourceRange Range, const MacroArgs *Ma) { + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +void PreprocessorHook::MacroUndefined(const Token &Tok, + const MacroDefinition &Md, + const MacroDirective *Undef) +{ + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +void PreprocessorHook::Defined(const Token &Tok, const MacroDefinition &Md, + SourceRange Range) { + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +void PreprocessorHook::Ifdef(SourceLocation Loc, const Token &Tok, + const MacroDefinition &Md) { + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +void PreprocessorHook::Ifndef(SourceLocation Loc, const Token &Tok, + const MacroDefinition &Md) { + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +class IndexAction : public PluginASTAction { +protected: + std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI, + llvm::StringRef F) { + return make_unique<IndexConsumer>(CI); + } + + bool ParseArgs(const CompilerInstance &CI, + const std::vector<std::string> &Args) { + if (Args.size() != 3) { + DiagnosticsEngine &D = CI.getDiagnostics(); + unsigned DiagID = D.getCustomDiagID( + DiagnosticsEngine::Error, + "Need arguments for the source, output, and object directories"); + D.Report(DiagID); + return false; + } + + // Load our directories + Srcdir = getAbsolutePath(Args[0]); + if (Srcdir.empty()) { + DiagnosticsEngine &D = CI.getDiagnostics(); + unsigned DiagID = D.getCustomDiagID( + DiagnosticsEngine::Error, "Source directory '%0' does not exist"); + D.Report(DiagID) << Args[0]; + return false; + } + + ensurePath(Args[1] + PATHSEP_STRING); + Outdir = getAbsolutePath(Args[1]); + Outdir += PATHSEP_STRING; + + Objdir = getAbsolutePath(Args[2]); + if (Objdir.empty()) { + DiagnosticsEngine &D = CI.getDiagnostics(); + unsigned DiagID = D.getCustomDiagID(DiagnosticsEngine::Error, + "Objdir '%0' does not exist"); + D.Report(DiagID) << Args[2]; + return false; + } + Objdir += PATHSEP_STRING; + + printf("MOZSEARCH: %s %s %s\n", Srcdir.c_str(), Outdir.c_str(), + Objdir.c_str()); + + return true; + } + + void printHelp(llvm::raw_ostream &Ros) { + Ros << "Help for mozsearch plugin goes here\n"; + } +}; + +static FrontendPluginRegistry::Add<IndexAction> + Y("mozsearch-index", "create the mozsearch index database"); diff --git a/build/clang-plugin/mozsearch-plugin/README b/build/clang-plugin/mozsearch-plugin/README new file mode 100644 index 0000000000..d948e9aca3 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/README @@ -0,0 +1,12 @@ +This clang plugin code generates a JSON file for each compiler input +file. The JSON file contains information about the C++ symbols that +are referenced by the input file. The data is eventually consumed by +Searchfox. See https://github.com/mozsearch/mozsearch for more +information. + +This plugin is enabled with the --enable-clang-plugin and +--enable-mozsearch-plugin mozconfig options. The output of the plugin +is stored in $OBJDIR/mozsearch_index. + +This code is not a checker, unlike other parts of the Mozilla clang +plugin. It cannot be used with clang-tidy. diff --git a/build/clang-plugin/mozsearch-plugin/StringOperations.cpp b/build/clang-plugin/mozsearch-plugin/StringOperations.cpp new file mode 100644 index 0000000000..a2e60e42c6 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/StringOperations.cpp @@ -0,0 +1,42 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "StringOperations.h" + +static unsigned long djbHash(const char *Str) { + unsigned long Hash = 5381; + + for (const char *P = Str; *P; P++) { + // Hash * 33 + c + Hash = ((Hash << 5) + Hash) + *P; + } + + return Hash; +} + +// This doesn't actually return a hex string of |hash|, but it +// does... something. It doesn't really matter what. +static void hashToString(unsigned long Hash, char *Buffer) { + const char Table[] = {"0123456789abcdef"}; + char *P = Buffer; + while (Hash) { + *P = Table[Hash & 0xf]; + Hash >>= 4; + P++; + } + + *P = 0; +} + +std::string hash(const std::string &Str) { + static char HashStr[41]; + unsigned long H = djbHash(Str.c_str()); + hashToString(H, HashStr); + return std::string(HashStr); +} + +std::string toString(int N) { + return stringFormat("%d", N); +} diff --git a/build/clang-plugin/mozsearch-plugin/StringOperations.h b/build/clang-plugin/mozsearch-plugin/StringOperations.h new file mode 100644 index 0000000000..4aa5b31962 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/StringOperations.h @@ -0,0 +1,25 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef StringOperations_h +#define StringOperations_h + +#include <memory> +#include <string> +#include <string.h> + +std::string hash(const std::string &Str); + +template <typename... Args> +inline std::string stringFormat(const std::string &Format, Args... ArgList) { + size_t Len = snprintf(nullptr, 0, Format.c_str(), ArgList...); + std::unique_ptr<char[]> Buf(new char[Len + 1]); + snprintf(Buf.get(), Len + 1, Format.c_str(), ArgList...); + return std::string(Buf.get(), Buf.get() + Len); +} + +std::string toString(int N); + +#endif |