diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /build/clang-plugin/mozsearch-plugin | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'build/clang-plugin/mozsearch-plugin')
-rw-r--r-- | build/clang-plugin/mozsearch-plugin/FileOperations.cpp | 150 | ||||
-rw-r--r-- | build/clang-plugin/mozsearch-plugin/FileOperations.h | 68 | ||||
-rw-r--r-- | build/clang-plugin/mozsearch-plugin/JSONFormatter.cpp | 119 | ||||
-rw-r--r-- | build/clang-plugin/mozsearch-plugin/JSONFormatter.h | 53 | ||||
-rw-r--r-- | build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp | 1896 | ||||
-rw-r--r-- | build/clang-plugin/mozsearch-plugin/README | 12 | ||||
-rw-r--r-- | build/clang-plugin/mozsearch-plugin/StringOperations.cpp | 42 | ||||
-rw-r--r-- | build/clang-plugin/mozsearch-plugin/StringOperations.h | 25 |
8 files changed, 2365 insertions, 0 deletions
diff --git a/build/clang-plugin/mozsearch-plugin/FileOperations.cpp b/build/clang-plugin/mozsearch-plugin/FileOperations.cpp new file mode 100644 index 0000000000..2468672b9f --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/FileOperations.cpp @@ -0,0 +1,150 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "FileOperations.h" + +#include <stdio.h> +#include <stdlib.h> + +#if defined(_WIN32) || defined(_WIN64) +#include <direct.h> +#include <io.h> +#include <windows.h> +#include "StringOperations.h" +#else +#include <sys/file.h> +#include <sys/time.h> +#include <unistd.h> +#endif + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> + +// Make sure that all directories on path exist, excluding the final element of +// the path. +void ensurePath(std::string Path) { + size_t Pos = 0; + if (Path[0] == PATHSEP_CHAR) { + Pos++; + } + + while ((Pos = Path.find(PATHSEP_CHAR, Pos)) != std::string::npos) { + std::string Portion = Path.substr(0, Pos); + if (!Portion.empty()) { +#if defined(_WIN32) || defined(_WIN64) + int Err = _mkdir(Portion.c_str()); +#else + int Err = mkdir(Portion.c_str(), 0775); +#endif + if (Err == -1 && errno != EEXIST) { + perror("mkdir failed"); + exit(1); + } + } + + Pos++; + } +} + +#if defined(_WIN32) || defined(_WIN64) +AutoLockFile::AutoLockFile(const std::string &SrcFile, const std::string &DstFile) { + this->Filename = DstFile; + std::string Hash = hash(SrcFile); + std::string MutexName = std::string("Local\\searchfox-") + Hash; + std::wstring WideMutexName; + WideMutexName.assign(MutexName.begin(), MutexName.end()); + Handle = CreateMutex(nullptr, false, WideMutexName.c_str()); + if (Handle == NULL) { + return; + } + + if (WaitForSingleObject(Handle, INFINITE) != WAIT_OBJECT_0) { + return; + } +} + +AutoLockFile::~AutoLockFile() { + ReleaseMutex(Handle); + CloseHandle(Handle); +} + +bool AutoLockFile::success() { + return Handle != NULL; +} + +FILE *AutoLockFile::openFile() { + int DstDescriptor = _open(Filename.c_str(), _O_RDONLY | _O_CREAT | _O_BINARY, 0666); + return _fdopen(DstDescriptor, "rb"); +} + +FILE *AutoLockFile::openTmp() { + int TmpDescriptor = _open((Filename + ".tmp").c_str(), _O_WRONLY | _O_APPEND | _O_CREAT | _O_BINARY, 0666); + return _fdopen(TmpDescriptor, "ab"); +} + +bool AutoLockFile::moveTmp() { + if (_unlink(Filename.c_str()) == -1) { + if (errno != ENOENT) { + return false; + } + } + return rename((Filename + ".tmp").c_str(), Filename.c_str()) == 0; +} + +std::string getAbsolutePath(const std::string &Filename) { + char Full[_MAX_PATH]; + if (!_fullpath(Full, Filename.c_str(), _MAX_PATH)) { + return std::string(""); + } + return std::string(Full); +} +#else +AutoLockFile::AutoLockFile(const std::string &SrcFile, const std::string &DstFile) { + this->Filename = DstFile; + FileDescriptor = open(SrcFile.c_str(), O_RDONLY); + if (FileDescriptor == -1) { + return; + } + + do { + int rv = flock(FileDescriptor, LOCK_EX); + if (rv == 0) { + break; + } + } while (true); +} + +AutoLockFile::~AutoLockFile() { close(FileDescriptor); } + +bool AutoLockFile::success() { return FileDescriptor != -1; } + +FILE *AutoLockFile::openFile() { + int DstDescriptor = open(Filename.c_str(), O_RDONLY | O_CREAT, 0666); + return fdopen(DstDescriptor, "rb"); +} + +FILE* AutoLockFile::openTmp() { + int TmpDescriptor = open((Filename + ".tmp").c_str(), O_WRONLY | O_APPEND | O_CREAT, 0666); + return fdopen(TmpDescriptor, "ab"); +} + +bool AutoLockFile::moveTmp() { + if (unlink(Filename.c_str()) == -1) { + if (errno != ENOENT) { + return false; + } + } + return rename((Filename + ".tmp").c_str(), Filename.c_str()) == 0; +} + +std::string getAbsolutePath(const std::string &Filename) { + char Full[4096]; + if (!realpath(Filename.c_str(), Full)) { + return std::string(""); + } + return std::string(Full); +} +#endif diff --git a/build/clang-plugin/mozsearch-plugin/FileOperations.h b/build/clang-plugin/mozsearch-plugin/FileOperations.h new file mode 100644 index 0000000000..472c835b23 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/FileOperations.h @@ -0,0 +1,68 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef FileOperations_h +#define FileOperations_h + +#include <stdio.h> +#include <string> + +#if defined(_WIN32) || defined(_WIN64) +#include <windows.h> +#define PATHSEP_CHAR '\\' +#define PATHSEP_STRING "\\" +#else +#define PATHSEP_CHAR '/' +#define PATHSEP_STRING "/" +#endif + +// Make sure that all directories on path exist, excluding the final element of +// the path. +void ensurePath(std::string Path); + +std::string getAbsolutePath(const std::string &Filename); + +// Used to synchronize access when writing to an analysis file, so that +// concurrently running clang instances don't clobber each other's data. +// On Windows, we use a named mutex. On POSIX platforms, we use flock on the +// source files. flock is advisory locking, and doesn't interfere with clang's +// own opening of the source files (i.e. to interfere, clang would have to be +// using flock itself, which it does not). +struct AutoLockFile { + // Absolute path to the analysis file + std::string Filename; + +#if defined(_WIN32) || defined(_WIN64) + // Handle for the named Mutex + HANDLE Handle = NULL; +#else + // fd for the *source* file that corresponds to the analysis file. We use + // the source file because it doesn't change while the analysis file gets + // repeatedly replaced by a new version written to a separate tmp file. + // This fd is used when using flock to synchronize access. + int FileDescriptor = -1; +#endif + + // SrcFile should be the absolute path to the source code file, and DstFile + // the absolute path to the corresponding analysis file. This constructor + // will block until exclusive access has been obtained. + AutoLockFile(const std::string &SrcFile, const std::string &DstFile); + ~AutoLockFile(); + + // Check after constructing to ensure the mutex was properly set up. + bool success(); + + // Open the existing analysis file for reading (an empty one is created if + // it doesn't already exist). Caller is responsible for fclose'ing it. + FILE *openFile(); + // Open a new tmp file for writing the new analysis data to. Caller is + // responsible for fclose'ing it. + FILE *openTmp(); + // Replace the existing analysis file with the new "tmp" one that has the new + // data. Returns false on error. + bool moveTmp(); +}; + +#endif diff --git a/build/clang-plugin/mozsearch-plugin/JSONFormatter.cpp b/build/clang-plugin/mozsearch-plugin/JSONFormatter.cpp new file mode 100644 index 0000000000..ec77fc9b2d --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/JSONFormatter.cpp @@ -0,0 +1,119 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "JSONFormatter.h" + +#include <algorithm> +#include <cassert> +#include <string.h> + +static std::string replaceAll(std::string Mangled, std::string Pattern, + std::string Replacement) { + size_t Pos = 0; + while ((Pos = Mangled.find(Pattern, Pos)) != std::string::npos) { + Mangled = Mangled.replace(Pos, Pattern.length(), Replacement); + Pos += Replacement.length(); + } + return Mangled; +} + +/** + * Hacky escaping logic with the goal of not upsetting the much more thorough + * rust JSON parsing library that actually understands UTF-8. Double-quote + * and (escaping) backslash are escaped, as is tab (\t), with newlines (\r\n + * and \n) normalized to escaped \n. + * + * Additionally, everything that's not printable ASCII is simply erased. The + * motivating file is media/openmax_il/il112/OMX_Other.h#93 which has a + * corrupted apostrophe as <92> in there. The better course of action would + * be a validating UTF-8 parse that discards corrupt/non-printable characters. + * Since this is motivated by a commenting proof-of-concept and builds are + * already slow, I'm punting on that. + */ +std::string JSONFormatter::escape(std::string Input) { + bool NeedsEscape = false; + for (char C : Input) { + if (C == '\\' || C == '"' || C < 32 || C > 126) { + NeedsEscape = true; + break; + } + } + + if (!NeedsEscape) { + return Input; + } + + std::string Cur = Input; + Cur = replaceAll(Cur, "\\", "\\\\"); + Cur = replaceAll(Cur, "\"", "\\\""); + Cur = replaceAll(Cur, "\t", "\\t"); + Cur = replaceAll(Cur, "\r\n", "\\n"); + Cur = replaceAll(Cur, "\n", "\\n"); + Cur.erase(std::remove_if(Cur.begin(), Cur.end(), + [](char C){ return C < 32 || C > 126; }), + Cur.end()); + return Cur; +} + +void JSONFormatter::add(const char *Name, const char *Value) { + assert(PropertyCount < kMaxProperties); + Properties[PropertyCount] = Property(Name, std::string(Value)); + PropertyCount++; + + // `"Name":"Value",` + Length += strlen(Name) + 3 + strlen(Value) + 2 + 1; +} + +void JSONFormatter::add(const char *Name, std::string Value) { + std::string Escaped = escape(std::move(Value)); + + // `"Name":"Escaped",` + Length += strlen(Name) + 3 + Escaped.length() + 2 + 1; + + assert(PropertyCount < kMaxProperties); + Properties[PropertyCount] = Property(Name, std::move(Escaped)); + PropertyCount++; +} + +void JSONFormatter::add(const char *Name, int Value) { + // 1 digit + assert(Value >= 0 && Value < 10); + + assert(PropertyCount < kMaxProperties); + Properties[PropertyCount] = Property(Name, Value); + PropertyCount++; + + // `"Name":V,` + Length += strlen(Name) + 3 + 2; +} + +void JSONFormatter::format(std::string &Result) { + Result.reserve(Length + 2); + + Result.push_back('{'); + for (int I = 0; I < PropertyCount; I++) { + Result.push_back('"'); + Result.append(Properties[I].Name); + Result.push_back('"'); + Result.push_back(':'); + + if (Properties[I].IsString) { + Result.push_back('"'); + Result.append(Properties[I].StringValue); + Result.push_back('"'); + } else { + Result.push_back(Properties[I].IntValue + '0'); + } + + if (I + 1 != PropertyCount) { + Result.push_back(','); + } + } + + Result.push_back('}'); + Result.push_back('\n'); + + assert(Result.length() == Length + 2); +} diff --git a/build/clang-plugin/mozsearch-plugin/JSONFormatter.h b/build/clang-plugin/mozsearch-plugin/JSONFormatter.h new file mode 100644 index 0000000000..7d4c7e292e --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/JSONFormatter.h @@ -0,0 +1,53 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef JSONFormatter_h +#define JSONFormatter_h + +#include <memory> +#include <string> + +// A very basic JSON formatter that records key/value pairs and outputs a JSON +// object that contains only non-object data. +class JSONFormatter { + // Of these fields, only mEscapedStringValue is owned by this class. All the + // others are expected to outlive the class (which is typically allocated + // on-stack). + struct Property { + const char *Name; + std::string StringValue; + int IntValue; + bool IsString; + + Property() {} + + Property(const char* Name, std::string String) + : Name(Name), StringValue(std::move(String)), IsString(true) {} + + Property(const char* Name, int Int) + : Name(Name), IntValue(Int), IsString(false) {} + }; + + static const int kMaxProperties = 32; + + Property Properties[kMaxProperties]; + int PropertyCount; + + // Length of the generated JSON output. + size_t Length; + + std::string escape(std::string Input); + +public: + JSONFormatter() : PropertyCount(0), Length(0) {} + + void add(const char *Name, const char *Value); + void add(const char *Name, std::string Value); + void add(const char *Name, int Value); + + void format(std::string &Result); +}; + +#endif diff --git a/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp b/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp new file mode 100644 index 0000000000..9d2cda29d7 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp @@ -0,0 +1,1896 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Mangle.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/Version.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendPluginRegistry.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +#include <iostream> +#include <map> +#include <memory> +#include <sstream> +#include <tuple> +#include <unordered_set> + +#include <stdio.h> +#include <stdlib.h> + +#include "FileOperations.h" +#include "JSONFormatter.h" +#include "StringOperations.h" + +#if CLANG_VERSION_MAJOR < 8 +// Starting with Clang 8.0 some basic functions have been renamed +#define getBeginLoc getLocStart +#define getEndLoc getLocEnd +#endif +// We want std::make_unique, but that's only available in c++14. In versions +// prior to that, we need to fall back to llvm's make_unique. It's also the +// case that we expect clang 10 to build with c++14 and clang 9 and earlier to +// build with c++11, at least as suggested by the llvm-config --cxxflags on +// non-windows platforms. mozilla-central seems to build with -std=c++17 on +// windows so we need to make this decision based on __cplusplus instead of +// the CLANG_VERSION_MAJOR. +#if __cplusplus < 201402L +using llvm::make_unique; +#else +using std::make_unique; +#endif + +using namespace clang; + +const std::string GENERATED("__GENERATED__" PATHSEP_STRING); + +// Absolute path to directory containing source code. +std::string Srcdir; + +// Absolute path to objdir (including generated code). +std::string Objdir; + +// Absolute path where analysis JSON output will be stored. +std::string Outdir; + +enum class FileType { + // The file was either in the source tree nor objdir. It might be a system + // include, for example. + Unknown, + // A file from the source tree. + Source, + // A file from the objdir. + Generated, +}; + +// Takes an absolute path to a file, and returns the type of file it is. If +// it's a Source or Generated file, the provided inout path argument is modified +// in-place so that it is relative to the source dir or objdir, respectively. +FileType relativizePath(std::string& path) { + if (path.compare(0, Objdir.length(), Objdir) == 0) { + path.replace(0, Objdir.length(), GENERATED); + return FileType::Generated; + } + // Empty filenames can get turned into Srcdir when they are resolved as + // absolute paths, so we should exclude files that are exactly equal to + // Srcdir or anything outside Srcdir. + if (path.length() > Srcdir.length() && path.compare(0, Srcdir.length(), Srcdir) == 0) { + // Remove the trailing `/' as well. + path.erase(0, Srcdir.length() + 1); + return FileType::Source; + } + return FileType::Unknown; +} + +#if !defined(_WIN32) && !defined(_WIN64) +#include <sys/time.h> + +static double time() { + struct timeval Tv; + gettimeofday(&Tv, nullptr); + return double(Tv.tv_sec) + double(Tv.tv_usec) / 1000000.; +} +#endif + +// Return true if |input| is a valid C++ identifier. We don't want to generate +// analysis information for operators, string literals, etc. by accident since +// it trips up consumers of the data. +static bool isValidIdentifier(std::string Input) { + for (char C : Input) { + if (!(isalpha(C) || isdigit(C) || C == '_')) { + return false; + } + } + return true; +} + +struct RAIITracer { + RAIITracer(const char *log) : mLog(log) { + printf("<%s>\n", mLog); + } + + ~RAIITracer() { + printf("</%s>\n", mLog); + } + + const char* mLog; +}; + +#define TRACEFUNC RAIITracer tracer(__FUNCTION__); + +class IndexConsumer; + +// For each C++ file seen by the analysis (.cpp or .h), we track a +// FileInfo. This object tracks whether the file is "interesting" (i.e., whether +// it's in the source dir or the objdir). We also store the analysis output +// here. +struct FileInfo { + FileInfo(std::string &Rname) : Realname(Rname) { + switch (relativizePath(Realname)) { + case FileType::Generated: + Interesting = true; + Generated = true; + break; + case FileType::Source: + Interesting = true; + Generated = false; + break; + case FileType::Unknown: + Interesting = false; + Generated = false; + break; + } + } + std::string Realname; + std::vector<std::string> Output; + bool Interesting; + bool Generated; +}; + +class IndexConsumer; + +class PreprocessorHook : public PPCallbacks { + IndexConsumer *Indexer; + +public: + PreprocessorHook(IndexConsumer *C) : Indexer(C) {} + + virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) override; + + virtual void InclusionDirective(SourceLocation HashLoc, + const Token &IncludeTok, + StringRef FileName, + bool IsAngled, + CharSourceRange FileNameRange, + const FileEntry *File, + StringRef SearchPath, + StringRef RelativePath, + const Module *Imported, + SrcMgr::CharacteristicKind FileType) override; + + virtual void MacroDefined(const Token &Tok, + const MacroDirective *Md) override; + + virtual void MacroExpands(const Token &Tok, const MacroDefinition &Md, + SourceRange Range, const MacroArgs *Ma) override; + virtual void MacroUndefined(const Token &Tok, const MacroDefinition &Md, + const MacroDirective *Undef) override; + virtual void Defined(const Token &Tok, const MacroDefinition &Md, + SourceRange Range) override; + virtual void Ifdef(SourceLocation Loc, const Token &Tok, + const MacroDefinition &Md) override; + virtual void Ifndef(SourceLocation Loc, const Token &Tok, + const MacroDefinition &Md) override; +}; + +class IndexConsumer : public ASTConsumer, + public RecursiveASTVisitor<IndexConsumer>, + public DiagnosticConsumer { +private: + CompilerInstance &CI; + SourceManager &SM; + LangOptions &LO; + std::map<FileID, std::unique_ptr<FileInfo>> FileMap; + MangleContext *CurMangleContext; + ASTContext *AstContext; + + typedef RecursiveASTVisitor<IndexConsumer> Super; + + // Tracks the set of declarations that the current expression/statement is + // nested inside of. + struct AutoSetContext { + AutoSetContext(IndexConsumer *Self, NamedDecl *Context, bool VisitImplicit = false) + : Self(Self), Prev(Self->CurDeclContext), Decl(Context) { + this->VisitImplicit = VisitImplicit || (Prev ? Prev->VisitImplicit : false); + Self->CurDeclContext = this; + } + + ~AutoSetContext() { Self->CurDeclContext = Prev; } + + IndexConsumer *Self; + AutoSetContext *Prev; + NamedDecl *Decl; + bool VisitImplicit; + }; + AutoSetContext *CurDeclContext; + + FileInfo *getFileInfo(SourceLocation Loc) { + FileID Id = SM.getFileID(Loc); + + std::map<FileID, std::unique_ptr<FileInfo>>::iterator It; + It = FileMap.find(Id); + if (It == FileMap.end()) { + // We haven't seen this file before. We need to make the FileInfo + // structure information ourselves + std::string Filename = std::string(SM.getFilename(Loc)); + std::string Absolute; + // If Loc is a macro id rather than a file id, it Filename might be + // empty. Also for some types of file locations that are clang-internal + // like "<scratch>" it can return an empty Filename. In these cases we + // want to leave Absolute as empty. + if (!Filename.empty()) { + Absolute = getAbsolutePath(Filename); + if (Absolute.empty()) { + Absolute = Filename; + } + } + std::unique_ptr<FileInfo> Info = make_unique<FileInfo>(Absolute); + It = FileMap.insert(std::make_pair(Id, std::move(Info))).first; + } + return It->second.get(); + } + + // Helpers for processing declarations + // Should we ignore this location? + bool isInterestingLocation(SourceLocation Loc) { + if (Loc.isInvalid()) { + return false; + } + + return getFileInfo(Loc)->Interesting; + } + + // Convert location to "line:column" or "line:column-column" given length. + // In resulting string rep, line is 1-based and zero-padded to 5 digits, while + // column is 0-based and unpadded. + std::string locationToString(SourceLocation Loc, size_t Length = 0) { + std::pair<FileID, unsigned> Pair = SM.getDecomposedLoc(Loc); + + bool IsInvalid; + unsigned Line = SM.getLineNumber(Pair.first, Pair.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Column = SM.getColumnNumber(Pair.first, Pair.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + + if (Length) { + return stringFormat("%05d:%d-%d", Line, Column - 1, Column - 1 + Length); + } else { + return stringFormat("%05d:%d", Line, Column - 1); + } + } + + // Convert SourceRange to "line-line". + // In the resulting string rep, line is 1-based. + std::string lineRangeToString(SourceRange Range) { + std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin()); + std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd()); + + bool IsInvalid; + unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + + return stringFormat("%d-%d", Line1, Line2); + } + + // Convert SourceRange to "line:column-line:column". + // In the resulting string rep, line is 1-based, column is 0-based. + std::string fullRangeToString(SourceRange Range) { + std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin()); + std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd()); + + bool IsInvalid; + unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Column1 = SM.getColumnNumber(Begin.first, Begin.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Column2 = SM.getColumnNumber(End.first, End.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + + return stringFormat("%d:%d-%d:%d", Line1, Column1 - 1, Line2, Column2 - 1); + } + + // Returns the qualified name of `d` without considering template parameters. + std::string getQualifiedName(const NamedDecl *D) { + const DeclContext *Ctx = D->getDeclContext(); + if (Ctx->isFunctionOrMethod()) { + return D->getQualifiedNameAsString(); + } + + std::vector<const DeclContext *> Contexts; + + // Collect contexts. + while (Ctx && isa<NamedDecl>(Ctx)) { + Contexts.push_back(Ctx); + Ctx = Ctx->getParent(); + } + + std::string Result; + + std::reverse(Contexts.begin(), Contexts.end()); + + for (const DeclContext *DC : Contexts) { + if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) { + Result += Spec->getNameAsString(); + + if (Spec->getSpecializationKind() == TSK_ExplicitSpecialization) { + std::string Backing; + llvm::raw_string_ostream Stream(Backing); + const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs(); + printTemplateArgumentList( + Stream, TemplateArgs.asArray(), PrintingPolicy(CI.getLangOpts())); + Result += Stream.str(); + } + } else if (const auto *Nd = dyn_cast<NamespaceDecl>(DC)) { + if (Nd->isAnonymousNamespace() || Nd->isInline()) { + continue; + } + Result += Nd->getNameAsString(); + } else if (const auto *Rd = dyn_cast<RecordDecl>(DC)) { + if (!Rd->getIdentifier()) { + Result += "(anonymous)"; + } else { + Result += Rd->getNameAsString(); + } + } else if (const auto *Fd = dyn_cast<FunctionDecl>(DC)) { + Result += Fd->getNameAsString(); + } else if (const auto *Ed = dyn_cast<EnumDecl>(DC)) { + // C++ [dcl.enum]p10: Each enum-name and each unscoped + // enumerator is declared in the scope that immediately contains + // the enum-specifier. Each scoped enumerator is declared in the + // scope of the enumeration. + if (Ed->isScoped() || Ed->getIdentifier()) + Result += Ed->getNameAsString(); + else + continue; + } else { + Result += cast<NamedDecl>(DC)->getNameAsString(); + } + Result += "::"; + } + + if (D->getDeclName()) + Result += D->getNameAsString(); + else + Result += "(anonymous)"; + + return Result; + } + + std::string mangleLocation(SourceLocation Loc, + std::string Backup = std::string()) { + FileInfo *F = getFileInfo(Loc); + std::string Filename = F->Realname; + if (Filename.length() == 0 && Backup.length() != 0) { + return Backup; + } + if (F->Generated) { + // Since generated files may be different on different platforms, + // we need to include a platform-specific thing in the hash. Otherwise + // we can end up with hash collisions where different symbols from + // different platforms map to the same thing. + char* Platform = getenv("MOZSEARCH_PLATFORM"); + Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename; + } + return hash(Filename + std::string("@") + locationToString(Loc)); + } + + bool isAcceptableSymbolChar(char c) { + return isalpha(c) || isdigit(c) || c == '_' || c == '/'; + } + + std::string mangleFile(std::string Filename, FileType Type) { + // "Mangle" the file path, such that: + // 1. The majority of paths will still be mostly human-readable. + // 2. The sanitization algorithm doesn't produce collisions where two + // different unsanitized paths can result in the same sanitized paths. + // 3. The produced symbol doesn't cause problems with downstream consumers. + // In order to accomplish this, we keep alphanumeric chars, underscores, + // and slashes, and replace everything else with an "@xx" hex encoding. + // The majority of path characters are letters and slashes which don't get + // encoded, so that satisifies (1). Since "@" characters in the unsanitized + // path get encoded, there should be no "@" characters in the sanitized path + // that got preserved from the unsanitized input, so that should satisfy (2). + // And (3) was done by trial-and-error. Note in particular the dot (.) + // character needs to be encoded, or the symbol-search feature of mozsearch + // doesn't work correctly, as all dot characters in the symbol query get + // replaced by #. + for (size_t i = 0; i < Filename.length(); i++) { + char c = Filename[i]; + if (isAcceptableSymbolChar(c)) { + continue; + } + char hex[4]; + sprintf(hex, "@%02X", ((int)c) & 0xFF); + Filename.replace(i, 1, hex); + i += 2; + } + + if (Type == FileType::Generated) { + // Since generated files may be different on different platforms, + // we need to include a platform-specific thing in the hash. Otherwise + // we can end up with hash collisions where different symbols from + // different platforms map to the same thing. + char* Platform = getenv("MOZSEARCH_PLATFORM"); + Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename; + } + return Filename; + } + + std::string mangleQualifiedName(std::string Name) { + std::replace(Name.begin(), Name.end(), ' ', '_'); + return Name; + } + + std::string getMangledName(clang::MangleContext *Ctx, + const clang::NamedDecl *Decl) { + if (isa<FunctionDecl>(Decl) && cast<FunctionDecl>(Decl)->isExternC()) { + return cast<FunctionDecl>(Decl)->getNameAsString(); + } + + if (isa<FunctionDecl>(Decl) || isa<VarDecl>(Decl)) { + const DeclContext *DC = Decl->getDeclContext(); + if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC) || + isa<LinkageSpecDecl>(DC) || + // isa<ExternCContextDecl>(DC) || + isa<TagDecl>(DC)) { + llvm::SmallVector<char, 512> Output; + llvm::raw_svector_ostream Out(Output); +#if CLANG_VERSION_MAJOR >= 11 + // This code changed upstream in version 11: + // https://github.com/llvm/llvm-project/commit/29e1a16be8216066d1ed733a763a749aed13ff47 + GlobalDecl GD; + if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) { + GD = GlobalDecl(D, Ctor_Complete); + } else if (const CXXDestructorDecl *D = + dyn_cast<CXXDestructorDecl>(Decl)) { + GD = GlobalDecl(D, Dtor_Complete); + } else { + GD = GlobalDecl(Decl); + } + Ctx->mangleName(GD, Out); +#else + if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) { + Ctx->mangleCXXCtor(D, CXXCtorType::Ctor_Complete, Out); + } else if (const CXXDestructorDecl *D = + dyn_cast<CXXDestructorDecl>(Decl)) { + Ctx->mangleCXXDtor(D, CXXDtorType::Dtor_Complete, Out); + } else { + Ctx->mangleName(Decl, Out); + } +#endif + return Out.str().str(); + } else { + return std::string("V_") + mangleLocation(Decl->getLocation()) + + std::string("_") + hash(std::string(Decl->getName())); + } + } else if (isa<TagDecl>(Decl) || isa<TypedefNameDecl>(Decl) || + isa<ObjCInterfaceDecl>(Decl)) { + if (!Decl->getIdentifier()) { + // Anonymous. + return std::string("T_") + mangleLocation(Decl->getLocation()); + } + + return std::string("T_") + mangleQualifiedName(getQualifiedName(Decl)); + } else if (isa<NamespaceDecl>(Decl) || isa<NamespaceAliasDecl>(Decl)) { + if (!Decl->getIdentifier()) { + // Anonymous. + return std::string("NS_") + mangleLocation(Decl->getLocation()); + } + + return std::string("NS_") + mangleQualifiedName(getQualifiedName(Decl)); + } else if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(Decl)) { + const ObjCInterfaceDecl *Iface = D2->getContainingInterface(); + return std::string("F_<") + getMangledName(Ctx, Iface) + ">_" + + D2->getNameAsString(); + } else if (const FieldDecl *D2 = dyn_cast<FieldDecl>(Decl)) { + const RecordDecl *Record = D2->getParent(); + return std::string("F_<") + getMangledName(Ctx, Record) + ">_" + + D2->getNameAsString(); + } else if (const EnumConstantDecl *D2 = dyn_cast<EnumConstantDecl>(Decl)) { + const DeclContext *DC = Decl->getDeclContext(); + if (const NamedDecl *Named = dyn_cast<NamedDecl>(DC)) { + return std::string("E_<") + getMangledName(Ctx, Named) + ">_" + + D2->getNameAsString(); + } + } + + assert(false); + return std::string(""); + } + + void debugLocation(SourceLocation Loc) { + std::string S = locationToString(Loc); + StringRef Filename = SM.getFilename(Loc); + printf("--> %s %s\n", std::string(Filename).c_str(), S.c_str()); + } + + void debugRange(SourceRange Range) { + printf("Range\n"); + debugLocation(Range.getBegin()); + debugLocation(Range.getEnd()); + } + +public: + IndexConsumer(CompilerInstance &CI) + : CI(CI), SM(CI.getSourceManager()), LO(CI.getLangOpts()), CurMangleContext(nullptr), + AstContext(nullptr), CurDeclContext(nullptr), TemplateStack(nullptr) { + CI.getPreprocessor().addPPCallbacks( + make_unique<PreprocessorHook>(this)); + } + + virtual DiagnosticConsumer *clone(DiagnosticsEngine &Diags) const { + return new IndexConsumer(CI); + } + +#if !defined(_WIN32) && !defined(_WIN64) + struct AutoTime { + AutoTime(double *Counter) : Counter(Counter), Start(time()) {} + ~AutoTime() { + if (Start) { + *Counter += time() - Start; + } + } + void stop() { + *Counter += time() - Start; + Start = 0; + } + double *Counter; + double Start; + }; +#endif + + // All we need is to follow the final declaration. + virtual void HandleTranslationUnit(ASTContext &Ctx) { + CurMangleContext = + clang::ItaniumMangleContext::create(Ctx, CI.getDiagnostics()); + + AstContext = &Ctx; + TraverseDecl(Ctx.getTranslationUnitDecl()); + + // Emit the JSON data for all files now. + std::map<FileID, std::unique_ptr<FileInfo>>::iterator It; + for (It = FileMap.begin(); It != FileMap.end(); It++) { + if (!It->second->Interesting) { + continue; + } + + FileInfo &Info = *It->second; + + std::string Filename = Outdir + Info.Realname; + std::string SrcFilename = Info.Generated + ? Objdir + Info.Realname.substr(GENERATED.length()) + : Srcdir + PATHSEP_STRING + Info.Realname; + + ensurePath(Filename); + + // We lock the output file in case some other clang process is trying to + // write to it at the same time. + AutoLockFile Lock(SrcFilename, Filename); + + if (!Lock.success()) { + fprintf(stderr, "Unable to lock file %s\n", Filename.c_str()); + exit(1); + } + + // Merge our results with the existing lines from the output file. + // This ensures that header files that are included multiple times + // in different ways are analyzed completely. + + FILE *Fp = Lock.openFile(); + if (!Fp) { + fprintf(stderr, "Unable to open input file %s\n", Filename.c_str()); + exit(1); + } + FILE *OutFp = Lock.openTmp(); + if (!OutFp) { + fprintf(stderr, "Unable to open tmp out file for %s\n", Filename.c_str()); + exit(1); + } + + // Sort our new results and get an iterator to them + std::sort(Info.Output.begin(), Info.Output.end()); + std::vector<std::string>::const_iterator NewLinesIter = Info.Output.begin(); + std::string LastNewWritten; + + // Loop over the existing (sorted) lines in the analysis output file. + char Buffer[65536]; + while (fgets(Buffer, sizeof(Buffer), Fp)) { + std::string OldLine(Buffer); + + // Write any results from Info.Output that are lexicographically + // smaller than OldLine (read from the existing file), but make sure + // to skip duplicates. Keep advacing NewLinesIter until we reach an + // entry that is lexicographically greater than OldLine. + for (; NewLinesIter != Info.Output.end(); NewLinesIter++) { + if (*NewLinesIter > OldLine) { + break; + } + if (*NewLinesIter == OldLine) { + continue; + } + if (*NewLinesIter == LastNewWritten) { + // dedupe the new entries being written + continue; + } + if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) { + fprintf(stderr, "Unable to write to tmp output file for %s\n", Filename.c_str()); + exit(1); + } + LastNewWritten = *NewLinesIter; + } + + // Write the entry read from the existing file. + if (fwrite(OldLine.c_str(), OldLine.length(), 1, OutFp) != 1) { + fprintf(stderr, "Unable to write to tmp output file for %s\n", Filename.c_str()); + exit(1); + } + } + + // We finished reading from Fp + fclose(Fp); + + // Finish iterating our new results, discarding duplicates + for (; NewLinesIter != Info.Output.end(); NewLinesIter++) { + if (*NewLinesIter == LastNewWritten) { + continue; + } + if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) { + fprintf(stderr, "Unable to write to tmp output file for %s\n", Filename.c_str()); + exit(1); + } + LastNewWritten = *NewLinesIter; + } + + // Done writing all the things, close it and replace the old output file + // with the new one. + fclose(OutFp); + if (!Lock.moveTmp()) { + fprintf(stderr, "Unable to move tmp output file into place for %s (err %d)\n", Filename.c_str(), errno); + exit(1); + } + } + } + + // Return a list of mangled names of all the methods that the given method + // overrides. + void findOverriddenMethods(const CXXMethodDecl *Method, + std::vector<std::string> &Symbols) { + std::string Mangled = getMangledName(CurMangleContext, Method); + Symbols.push_back(Mangled); + + CXXMethodDecl::method_iterator Iter = Method->begin_overridden_methods(); + CXXMethodDecl::method_iterator End = Method->end_overridden_methods(); + for (; Iter != End; Iter++) { + const CXXMethodDecl *Decl = *Iter; + if (Decl->isTemplateInstantiation()) { + Decl = dyn_cast<CXXMethodDecl>(Decl->getTemplateInstantiationPattern()); + } + return findOverriddenMethods(Decl, Symbols); + } + } + + // Unfortunately, we have to override all these methods in order to track the + // context we're inside. + + bool TraverseEnumDecl(EnumDecl *D) { + AutoSetContext Asc(this, D); + return Super::TraverseEnumDecl(D); + } + bool TraverseRecordDecl(RecordDecl *D) { + AutoSetContext Asc(this, D); + return Super::TraverseRecordDecl(D); + } + bool TraverseCXXRecordDecl(CXXRecordDecl *D) { + AutoSetContext Asc(this, D); + return Super::TraverseCXXRecordDecl(D); + } + bool TraverseFunctionDecl(FunctionDecl *D) { + AutoSetContext Asc(this, D); + const FunctionDecl *Def; + // (See the larger AutoTemplateContext comment for more information.) If a + // method on a templated class is declared out-of-line, we need to analyze + // the definition inside the scope of the template or else we won't properly + // handle member access on the templated type. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseFunctionDecl(D); + } + bool TraverseCXXMethodDecl(CXXMethodDecl *D) { + AutoSetContext Asc(this, D); + const FunctionDecl *Def; + // See TraverseFunctionDecl. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseCXXMethodDecl(D); + } + bool TraverseCXXConstructorDecl(CXXConstructorDecl *D) { + AutoSetContext Asc(this, D, /*VisitImplicit=*/true); + const FunctionDecl *Def; + // See TraverseFunctionDecl. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseCXXConstructorDecl(D); + } + bool TraverseCXXConversionDecl(CXXConversionDecl *D) { + AutoSetContext Asc(this, D); + const FunctionDecl *Def; + // See TraverseFunctionDecl. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseCXXConversionDecl(D); + } + bool TraverseCXXDestructorDecl(CXXDestructorDecl *D) { + AutoSetContext Asc(this, D); + const FunctionDecl *Def; + // See TraverseFunctionDecl. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseCXXDestructorDecl(D); + } + + // Used to keep track of the context in which a token appears. + struct Context { + // Ultimately this becomes the "context" JSON property. + std::string Name; + + // Ultimately this becomes the "contextsym" JSON property. + std::vector<std::string> Symbols; + + Context() {} + Context(std::string Name, std::vector<std::string> Symbols) + : Name(Name), Symbols(Symbols) {} + }; + + Context translateContext(NamedDecl *D) { + const FunctionDecl *F = dyn_cast<FunctionDecl>(D); + if (F && F->isTemplateInstantiation()) { + D = F->getTemplateInstantiationPattern(); + } + + std::vector<std::string> Symbols = {getMangledName(CurMangleContext, D)}; + if (CXXMethodDecl::classof(D)) { + Symbols.clear(); + findOverriddenMethods(dyn_cast<CXXMethodDecl>(D), Symbols); + } + return Context(D->getQualifiedNameAsString(), Symbols); + } + + Context getContext(SourceLocation Loc) { + if (SM.isMacroBodyExpansion(Loc)) { + // If we're inside a macro definition, we don't return any context. It + // will probably not be what the user expects if we do. + return Context(); + } + + if (CurDeclContext) { + return translateContext(CurDeclContext->Decl); + } + return Context(); + } + + // Similar to GetContext(SourceLocation), but it skips the declaration passed + // in. This is useful if we want the context of a declaration that's already + // on the stack. + Context getContext(Decl *D) { + if (SM.isMacroBodyExpansion(D->getLocation())) { + // If we're inside a macro definition, we don't return any context. It + // will probably not be what the user expects if we do. + return Context(); + } + + AutoSetContext *Ctxt = CurDeclContext; + while (Ctxt) { + if (Ctxt->Decl != D) { + return translateContext(Ctxt->Decl); + } + Ctxt = Ctxt->Prev; + } + return Context(); + } + + static std::string concatSymbols(const std::vector<std::string> Symbols) { + if (Symbols.empty()) { + return ""; + } + + size_t Total = 0; + for (auto It = Symbols.begin(); It != Symbols.end(); It++) { + Total += It->length(); + } + Total += Symbols.size() - 1; + + std::string SymbolList; + SymbolList.reserve(Total); + + for (auto It = Symbols.begin(); It != Symbols.end(); It++) { + std::string Symbol = *It; + + if (It != Symbols.begin()) { + SymbolList.push_back(','); + } + SymbolList.append(Symbol); + } + + return SymbolList; + } + + // Analyzing template code is tricky. Suppose we have this code: + // + // template<class T> + // bool Foo(T* ptr) { return T::StaticMethod(ptr); } + // + // If we analyze the body of Foo without knowing the type T, then we will not + // be able to generate any information for StaticMethod. However, analyzing + // Foo for every possible instantiation is inefficient and it also generates + // too much data in some cases. For example, the following code would generate + // one definition of Baz for every instantiation, which is undesirable: + // + // template<class T> + // class Bar { struct Baz { ... }; }; + // + // To solve this problem, we analyze templates only once. We do so in a + // GatherDependent mode where we look for "dependent scoped member + // expressions" (i.e., things like StaticMethod). We keep track of the + // locations of these expressions. If we find one or more of them, we analyze + // the template for each instantiation, in an AnalyzeDependent mode. This mode + // ignores all source locations except for the ones where we found dependent + // scoped member expressions before. For these locations, we generate a + // separate JSON result for each instantiation. + // + // We inherit our parent's mode if it is exists. This is because if our + // parent is in analyze mode, it means we've already lived a full life in + // gather mode and we must not restart in gather mode or we'll cause the + // indexer to visit EVERY identifier, which is way too much data. + struct AutoTemplateContext { + AutoTemplateContext(IndexConsumer *Self) + : Self(Self) + , CurMode(Self->TemplateStack ? Self->TemplateStack->CurMode : Mode::GatherDependent) + , Parent(Self->TemplateStack) { + Self->TemplateStack = this; + } + + ~AutoTemplateContext() { Self->TemplateStack = Parent; } + + // We traverse templates in two modes: + enum class Mode { + // Gather mode does not traverse into specializations. It looks for + // locations where it would help to have more info from template + // specializations. + GatherDependent, + + // Analyze mode traverses into template specializations and records + // information about token locations saved in gather mode. + AnalyzeDependent, + }; + + // We found a dependent scoped member expression! Keep track of it for + // later. + void visitDependent(SourceLocation Loc) { + if (CurMode == Mode::AnalyzeDependent) { + return; + } + + DependentLocations.insert(Loc.getRawEncoding()); + if (Parent) { + Parent->visitDependent(Loc); + } + } + + bool inGatherMode() { + return CurMode == Mode::GatherDependent; + } + + // Do we need to perform the extra AnalyzeDependent passes (one per + // instantiation)? + bool needsAnalysis() const { + if (!DependentLocations.empty()) { + return true; + } + if (Parent) { + return Parent->needsAnalysis(); + } + return false; + } + + void switchMode() { CurMode = Mode::AnalyzeDependent; } + + // Do we want to analyze each template instantiation separately? + bool shouldVisitTemplateInstantiations() const { + if (CurMode == Mode::AnalyzeDependent) { + return true; + } + if (Parent) { + return Parent->shouldVisitTemplateInstantiations(); + } + return false; + } + + // For a given expression/statement, should we emit JSON data for it? + bool shouldVisit(SourceLocation Loc) { + if (CurMode == Mode::GatherDependent) { + return true; + } + if (DependentLocations.find(Loc.getRawEncoding()) != + DependentLocations.end()) { + return true; + } + if (Parent) { + return Parent->shouldVisit(Loc); + } + return false; + } + + private: + IndexConsumer *Self; + Mode CurMode; + std::unordered_set<unsigned> DependentLocations; + AutoTemplateContext *Parent; + }; + + AutoTemplateContext *TemplateStack; + + bool shouldVisitTemplateInstantiations() const { + if (TemplateStack) { + return TemplateStack->shouldVisitTemplateInstantiations(); + } + return false; + } + + bool shouldVisitImplicitCode() const { + return CurDeclContext && CurDeclContext->VisitImplicit; + } + + bool TraverseClassTemplateDecl(ClassTemplateDecl *D) { + AutoTemplateContext Atc(this); + Super::TraverseClassTemplateDecl(D); + + if (!Atc.needsAnalysis()) { + return true; + } + + Atc.switchMode(); + + if (D != D->getCanonicalDecl()) { + return true; + } + + for (auto *Spec : D->specializations()) { + for (auto *Rd : Spec->redecls()) { + // We don't want to visit injected-class-names in this traversal. + if (cast<CXXRecordDecl>(Rd)->isInjectedClassName()) + continue; + + TraverseDecl(Rd); + } + } + + return true; + } + + bool TraverseFunctionTemplateDecl(FunctionTemplateDecl *D) { + AutoTemplateContext Atc(this); + if (Atc.inGatherMode()) { + Super::TraverseFunctionTemplateDecl(D); + } + + if (!Atc.needsAnalysis()) { + return true; + } + + Atc.switchMode(); + + if (D != D->getCanonicalDecl()) { + return true; + } + + for (auto *Spec : D->specializations()) { + for (auto *Rd : Spec->redecls()) { + TraverseDecl(Rd); + } + } + + return true; + } + + bool shouldVisit(SourceLocation Loc) { + if (TemplateStack) { + return TemplateStack->shouldVisit(Loc); + } + return true; + } + + enum { + // Flag to omit the identifier from being cross-referenced across files. + // This is usually desired for local variables. + NoCrossref = 1 << 0, + // Flag to indicate the token with analysis data is not an identifier. Indicates + // we want to skip the check that tries to ensure a sane identifier token. + NotIdentifierToken = 1 << 1, + // This indicates that the end of the provided SourceRange is valid and + // should be respected. If this flag is not set, the visitIdentifier + // function should use only the start of the SourceRange and auto-detect + // the end based on whatever token is found at the start. + LocRangeEndValid = 1 << 2 + }; + + // This is the only function that emits analysis JSON data. It should be + // called for each identifier that corresponds to a symbol. + void visitIdentifier(const char *Kind, const char *SyntaxKind, + llvm::StringRef QualName, SourceRange LocRange, + const std::vector<std::string> &Symbols, + Context TokenContext = Context(), int Flags = 0, + SourceRange PeekRange = SourceRange(), + SourceRange NestingRange = SourceRange()) { + SourceLocation Loc = LocRange.getBegin(); + if (!shouldVisit(Loc)) { + return; + } + + // Find the file positions corresponding to the token. + unsigned StartOffset = SM.getFileOffset(Loc); + unsigned EndOffset = (Flags & LocRangeEndValid) + ? SM.getFileOffset(LocRange.getEnd()) + : StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts()); + + std::string LocStr = locationToString(Loc, EndOffset - StartOffset); + std::string RangeStr = locationToString(Loc, EndOffset - StartOffset); + std::string PeekRangeStr; + + if (!(Flags & NotIdentifierToken)) { + // Get the token's characters so we can make sure it's a valid token. + const char *StartChars = SM.getCharacterData(Loc); + std::string Text(StartChars, EndOffset - StartOffset); + if (!isValidIdentifier(Text)) { + return; + } + } + + FileInfo *F = getFileInfo(Loc); + + std::string SymbolList; + + // Reserve space in symbolList for everything in `symbols`. `symbols` can + // contain some very long strings. + size_t Total = 0; + for (auto It = Symbols.begin(); It != Symbols.end(); It++) { + Total += It->length(); + } + + // Space for commas. + Total += Symbols.size() - 1; + SymbolList.reserve(Total); + + // For each symbol, generate one "target":1 item. We want to find this line + // if someone searches for any one of these symbols. + for (auto It = Symbols.begin(); It != Symbols.end(); It++) { + std::string Symbol = *It; + + if (!(Flags & NoCrossref)) { + JSONFormatter Fmt; + + Fmt.add("loc", LocStr); + Fmt.add("target", 1); + Fmt.add("kind", Kind); + Fmt.add("pretty", QualName.data()); + Fmt.add("sym", Symbol); + if (!TokenContext.Name.empty()) { + Fmt.add("context", TokenContext.Name); + } + std::string ContextSymbol = concatSymbols(TokenContext.Symbols); + if (!ContextSymbol.empty()) { + Fmt.add("contextsym", ContextSymbol); + } + if (PeekRange.isValid()) { + PeekRangeStr = lineRangeToString(PeekRange); + if (!PeekRangeStr.empty()) { + Fmt.add("peekRange", PeekRangeStr); + } + } + + std::string S; + Fmt.format(S); + F->Output.push_back(std::move(S)); + } + + if (It != Symbols.begin()) { + SymbolList.push_back(','); + } + SymbolList.append(Symbol); + } + + // Generate a single "source":1 for all the symbols. If we search from here, + // we want to union the results for every symbol in `symbols`. + JSONFormatter Fmt; + + Fmt.add("loc", RangeStr); + Fmt.add("source", 1); + + if (NestingRange.isValid()) { + std::string NestingRangeStr = fullRangeToString(NestingRange); + if (!NestingRangeStr.empty()) { + Fmt.add("nestingRange", NestingRangeStr); + } + } + + std::string Syntax; + if (Flags & NoCrossref) { + Fmt.add("syntax", ""); + } else { + Syntax = Kind; + Syntax.push_back(','); + Syntax.append(SyntaxKind); + Fmt.add("syntax", Syntax); + } + + std::string Pretty(SyntaxKind); + Pretty.push_back(' '); + Pretty.append(QualName.data()); + Fmt.add("pretty", Pretty); + + Fmt.add("sym", SymbolList); + + if (Flags & NoCrossref) { + Fmt.add("no_crossref", 1); + } + + std::string Buf; + Fmt.format(Buf); + F->Output.push_back(std::move(Buf)); + } + + void visitIdentifier(const char *Kind, const char *SyntaxKind, + llvm::StringRef QualName, SourceLocation Loc, std::string Symbol, + Context TokenContext = Context(), int Flags = 0, + SourceRange PeekRange = SourceRange(), + SourceRange NestingRange = SourceRange()) { + std::vector<std::string> V = {Symbol}; + visitIdentifier(Kind, SyntaxKind, QualName, SourceRange(Loc), V, TokenContext, Flags, + PeekRange, NestingRange); + } + + void normalizeLocation(SourceLocation *Loc) { + *Loc = SM.getSpellingLoc(*Loc); + } + + // For cases where the left-brace is not directly accessible from the AST, + // helper to use the lexer to find the brace. Make sure you're picking the + // start location appropriately! + SourceLocation findLeftBraceFromLoc(SourceLocation Loc) { + return Lexer::findLocationAfterToken(Loc, tok::l_brace, SM, LO, false); + } + + // If the provided statement is compound, return its range. + SourceRange getCompoundStmtRange(Stmt* D) { + if (!D) { + return SourceRange(); + } + + CompoundStmt *D2 = dyn_cast<CompoundStmt>(D); + if (D2) { + return D2->getSourceRange(); + } + + return SourceRange(); + } + + SourceRange getFunctionPeekRange(FunctionDecl* D) { + // We always start at the start of the function decl, which may include the + // return type on a separate line. + SourceLocation Start = D->getBeginLoc(); + + // By default, we end at the line containing the function's name. + SourceLocation End = D->getLocation(); + + std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End); + + // But if there are parameters, we want to include those as well. + for (ParmVarDecl* Param : D->parameters()) { + std::pair<FileID, unsigned> ParamLoc = SM.getDecomposedLoc(Param->getLocation()); + + // It's possible there are macros involved or something. We don't include + // the parameters in that case. + if (ParamLoc.first == FuncLoc.first) { + // Assume parameters are in order, so we always take the last one. + End = Param->getEndLoc(); + } + } + + return SourceRange(Start, End); + } + + SourceRange getTagPeekRange(TagDecl* D) { + SourceLocation Start = D->getBeginLoc(); + + // By default, we end at the line containing the name. + SourceLocation End = D->getLocation(); + + std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End); + + if (CXXRecordDecl* D2 = dyn_cast<CXXRecordDecl>(D)) { + // But if there are parameters, we want to include those as well. + for (CXXBaseSpecifier& Base : D2->bases()) { + std::pair<FileID, unsigned> Loc = SM.getDecomposedLoc(Base.getEndLoc()); + + // It's possible there are macros involved or something. We don't include + // the parameters in that case. + if (Loc.first == FuncLoc.first) { + // Assume parameters are in order, so we always take the last one. + End = Base.getEndLoc(); + } + } + } + + return SourceRange(Start, End); + } + + SourceRange getCommentRange(NamedDecl* D) { + const RawComment* RC = + AstContext->getRawCommentForDeclNoCache(D); + if (!RC) { + return SourceRange(); + } + + return RC->getSourceRange(); + } + + // Sanity checks that all ranges are in the same file, returning the first if + // they're in different files. Unions the ranges based on which is first. + SourceRange combineRanges(SourceRange Range1, SourceRange Range2) { + if (Range1.isInvalid()) { + return Range2; + } + if (Range2.isInvalid()) { + return Range1; + } + + std::pair<FileID, unsigned> Begin1 = SM.getDecomposedLoc(Range1.getBegin()); + std::pair<FileID, unsigned> End1 = SM.getDecomposedLoc(Range1.getEnd()); + std::pair<FileID, unsigned> Begin2 = SM.getDecomposedLoc(Range2.getBegin()); + std::pair<FileID, unsigned> End2 = SM.getDecomposedLoc(Range2.getEnd()); + + if (End1.first != Begin2.first) { + // Something weird is probably happening with the preprocessor. Just + // return the first range. + return Range1; + } + + // See which range comes first. + if (Begin1.second <= End2.second) { + return SourceRange(Range1.getBegin(), Range2.getEnd()); + } else { + return SourceRange(Range2.getBegin(), Range1.getEnd()); + } + } + + // Given a location and a range, returns the range if: + // - The location and the range live in the same file. + // - The range is well ordered (end is not before begin). + // Returns an empty range otherwise. + SourceRange validateRange(SourceLocation Loc, SourceRange Range) { + std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc); + std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin()); + std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd()); + + if (Begin.first != Decomposed.first || End.first != Decomposed.first) { + return SourceRange(); + } + + if (Begin.second >= End.second) { + return SourceRange(); + } + + return Range; + } + + bool VisitNamedDecl(NamedDecl *D) { + SourceLocation Loc = D->getLocation(); + + // If the token is from a macro expansion and the expansion location + // is interesting, use that instead as it tends to be more useful. + SourceLocation expandedLoc = Loc; + if (SM.isMacroBodyExpansion(Loc)) { + Loc = SM.getFileLoc(Loc); + } + + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + if (isa<ParmVarDecl>(D) && !D->getDeclName().getAsIdentifierInfo()) { + // Unnamed parameter in function proto. + return true; + } + + int Flags = 0; + const char *Kind = "def"; + const char *PrettyKind = "?"; + SourceRange PeekRange(D->getBeginLoc(), D->getEndLoc()); + // The nesting range identifies the left brace and right brace, which + // heavily depends on the AST node type. + SourceRange NestingRange; + if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) { + if (D2->isTemplateInstantiation()) { + D = D2->getTemplateInstantiationPattern(); + } + Kind = D2->isThisDeclarationADefinition() ? "def" : "decl"; + PrettyKind = "function"; + PeekRange = getFunctionPeekRange(D2); + + // Only emit the nesting range if: + // - This is a definition AND + // - This isn't a template instantiation. Function templates' + // instantiations can end up as a definition with a Loc at their point + // of declaration but with the CompoundStmt of the template's + // point of definition. This really messes up the nesting range logic. + // At the time of writing this, the test repo's `big_header.h`'s + // `WhatsYourVector_impl::forwardDeclaredTemplateThingInlinedBelow` as + // instantiated by `big_cpp.cpp` triggers this phenomenon. + // + // Note: As covered elsewhere, template processing is tricky and it's + // conceivable that we may change traversal patterns in the future, + // mooting this guard. + if (D2->isThisDeclarationADefinition() && + !D2->isTemplateInstantiation()) { + // The CompoundStmt range is the brace range. + NestingRange = getCompoundStmtRange(D2->getBody()); + } + } else if (TagDecl *D2 = dyn_cast<TagDecl>(D)) { + Kind = D2->isThisDeclarationADefinition() ? "def" : "decl"; + PrettyKind = "type"; + + if (D2->isThisDeclarationADefinition() && D2->getDefinition() == D2) { + PeekRange = getTagPeekRange(D2); + NestingRange = D2->getBraceRange(); + } else { + PeekRange = SourceRange(); + } + } else if (isa<TypedefNameDecl>(D)) { + Kind = "def"; + PrettyKind = "type"; + PeekRange = SourceRange(Loc, Loc); + } else if (VarDecl *D2 = dyn_cast<VarDecl>(D)) { + if (D2->isLocalVarDeclOrParm()) { + Flags = NoCrossref; + } + + Kind = D2->isThisDeclarationADefinition() == VarDecl::DeclarationOnly + ? "decl" + : "def"; + PrettyKind = "variable"; + } else if (isa<NamespaceDecl>(D) || isa<NamespaceAliasDecl>(D)) { + Kind = "def"; + PrettyKind = "namespace"; + PeekRange = SourceRange(Loc, Loc); + NamespaceDecl *D2 = dyn_cast<NamespaceDecl>(D); + if (D2) { + // There's no exposure of the left brace so we have to find it. + NestingRange = SourceRange( + findLeftBraceFromLoc(D2->isAnonymousNamespace() ? D2->getBeginLoc() : Loc), + D2->getRBraceLoc()); + } + } else if (isa<FieldDecl>(D)) { + Kind = "def"; + PrettyKind = "field"; + } else if (isa<EnumConstantDecl>(D)) { + Kind = "def"; + PrettyKind = "enum constant"; + } else { + return true; + } + + SourceRange CommentRange = getCommentRange(D); + PeekRange = combineRanges(PeekRange, CommentRange); + PeekRange = validateRange(Loc, PeekRange); + NestingRange = validateRange(Loc, NestingRange); + + std::vector<std::string> Symbols = {getMangledName(CurMangleContext, D)}; + if (CXXMethodDecl::classof(D)) { + Symbols.clear(); + findOverriddenMethods(dyn_cast<CXXMethodDecl>(D), Symbols); + } + + // In the case of destructors, Loc might point to the ~ character. In that + // case we want to skip to the name of the class. However, Loc might also + // point to other places that generate destructors, such as the use site of + // a macro that expands to generate a destructor, or a lambda (apparently + // clang 8 creates a destructor declaration for at least some lambdas). In + // the former case we'll use the macro use site as the location, and in the + // latter we'll just drop the declaration. + if (isa<CXXDestructorDecl>(D)) { + PrettyKind = "destructor"; + const char *P = SM.getCharacterData(Loc); + if (*P == '~') { + // Advance Loc to the class name + P++; + + unsigned Skipped = 1; + while (*P == ' ' || *P == '\t' || *P == '\r' || *P == '\n') { + P++; + Skipped++; + } + + Loc = Loc.getLocWithOffset(Skipped); + } else { + // See if the destructor is coming from a macro expansion + P = SM.getCharacterData(expandedLoc); + if (*P != '~') { + // It's not + return true; + } + // It is, so just use Loc as-is + } + } + + visitIdentifier(Kind, PrettyKind, getQualifiedName(D), SourceRange(Loc), Symbols, + getContext(D), Flags, PeekRange, NestingRange); + + return true; + } + + bool VisitCXXConstructExpr(CXXConstructExpr *E) { + SourceLocation Loc = E->getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + FunctionDecl *Ctor = E->getConstructor(); + if (Ctor->isTemplateInstantiation()) { + Ctor = Ctor->getTemplateInstantiationPattern(); + } + std::string Mangled = getMangledName(CurMangleContext, Ctor); + + // FIXME: Need to do something different for list initialization. + + visitIdentifier("use", "constructor", getQualifiedName(Ctor), Loc, Mangled, + getContext(Loc)); + + return true; + } + + bool VisitCallExpr(CallExpr *E) { + Decl *Callee = E->getCalleeDecl(); + if (!Callee || !FunctionDecl::classof(Callee)) { + return true; + } + + const NamedDecl *NamedCallee = dyn_cast<NamedDecl>(Callee); + + SourceLocation Loc; + + const FunctionDecl *F = dyn_cast<FunctionDecl>(NamedCallee); + if (F->isTemplateInstantiation()) { + NamedCallee = F->getTemplateInstantiationPattern(); + } + + std::string Mangled = getMangledName(CurMangleContext, NamedCallee); + int Flags = 0; + + Expr *CalleeExpr = E->getCallee()->IgnoreParenImpCasts(); + + if (CXXOperatorCallExpr::classof(E)) { + // Just take the first token. + CXXOperatorCallExpr *Op = dyn_cast<CXXOperatorCallExpr>(E); + Loc = Op->getOperatorLoc(); + Flags |= NotIdentifierToken; + } else if (MemberExpr::classof(CalleeExpr)) { + MemberExpr *Member = dyn_cast<MemberExpr>(CalleeExpr); + Loc = Member->getMemberLoc(); + } else if (DeclRefExpr::classof(CalleeExpr)) { + // We handle this in VisitDeclRefExpr. + return true; + } else { + return true; + } + + normalizeLocation(&Loc); + + if (!isInterestingLocation(Loc)) { + return true; + } + + visitIdentifier("use", "function", getQualifiedName(NamedCallee), Loc, Mangled, + getContext(Loc), Flags); + + return true; + } + + bool VisitTagTypeLoc(TagTypeLoc L) { + SourceLocation Loc = L.getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + TagDecl *Decl = L.getDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + getContext(Loc)); + return true; + } + + bool VisitTypedefTypeLoc(TypedefTypeLoc L) { + SourceLocation Loc = L.getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + NamedDecl *Decl = L.getTypedefNameDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + getContext(Loc)); + return true; + } + + bool VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L) { + SourceLocation Loc = L.getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + NamedDecl *Decl = L.getDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + getContext(Loc)); + return true; + } + + bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) { + SourceLocation Loc = L.getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + TemplateDecl *Td = L.getTypePtr()->getTemplateName().getAsTemplateDecl(); + if (ClassTemplateDecl *D = dyn_cast<ClassTemplateDecl>(Td)) { + NamedDecl *Decl = D->getTemplatedDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + getContext(Loc)); + } else if (TypeAliasTemplateDecl *D = dyn_cast<TypeAliasTemplateDecl>(Td)) { + NamedDecl *Decl = D->getTemplatedDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + getContext(Loc)); + } + + return true; + } + + bool VisitDeclRefExpr(DeclRefExpr *E) { + SourceLocation Loc = E->getExprLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + if (E->hasQualifier()) { + Loc = E->getNameInfo().getLoc(); + normalizeLocation(&Loc); + } + + NamedDecl *Decl = E->getDecl(); + if (const VarDecl *D2 = dyn_cast<VarDecl>(Decl)) { + int Flags = 0; + if (D2->isLocalVarDeclOrParm()) { + Flags = NoCrossref; + } + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "variable", getQualifiedName(Decl), Loc, Mangled, + getContext(Loc), Flags); + } else if (isa<FunctionDecl>(Decl)) { + const FunctionDecl *F = dyn_cast<FunctionDecl>(Decl); + if (F->isTemplateInstantiation()) { + Decl = F->getTemplateInstantiationPattern(); + } + + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "function", getQualifiedName(Decl), Loc, Mangled, + getContext(Loc)); + } else if (isa<EnumConstantDecl>(Decl)) { + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "enum", getQualifiedName(Decl), Loc, Mangled, + getContext(Loc)); + } + + return true; + } + + bool VisitCXXConstructorDecl(CXXConstructorDecl *D) { + if (!isInterestingLocation(D->getLocation())) { + return true; + } + + for (CXXConstructorDecl::init_const_iterator It = D->init_begin(); + It != D->init_end(); ++It) { + const CXXCtorInitializer *Ci = *It; + if (!Ci->getMember() || !Ci->isWritten()) { + continue; + } + + SourceLocation Loc = Ci->getMemberLocation(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + continue; + } + + FieldDecl *Member = Ci->getMember(); + std::string Mangled = getMangledName(CurMangleContext, Member); + visitIdentifier("use", "field", getQualifiedName(Member), Loc, Mangled, + getContext(D)); + } + + return true; + } + + bool VisitMemberExpr(MemberExpr *E) { + SourceLocation Loc = E->getExprLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + ValueDecl *Decl = E->getMemberDecl(); + if (FieldDecl *Field = dyn_cast<FieldDecl>(Decl)) { + std::string Mangled = getMangledName(CurMangleContext, Field); + visitIdentifier("use", "field", getQualifiedName(Field), Loc, Mangled, + getContext(Loc)); + } + return true; + } + + bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) { + SourceLocation Loc = E->getMemberLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + if (TemplateStack) { + TemplateStack->visitDependent(Loc); + } + return true; + } + + void enterSourceFile(SourceLocation Loc) { + normalizeLocation(&Loc); + FileInfo* newFile = getFileInfo(Loc); + if (!newFile->Interesting) { + return; + } + FileType type = newFile->Generated ? FileType::Generated : FileType::Source; + std::vector<std::string> symbols = { + std::string("FILE_") + mangleFile(newFile->Realname, type) + }; + // We use an explicit zero-length source range at the start of the file. If we + // don't set the LocRangeEndValid flag, the visitIdentifier code will use the + // entire first token, which could be e.g. a long multiline-comment. + visitIdentifier("def", "file", newFile->Realname, SourceRange(Loc), + symbols, Context(), NotIdentifierToken | LocRangeEndValid); + } + + void inclusionDirective(SourceRange FileNameRange, const FileEntry* File) { + std::string includedFile(File->tryGetRealPathName()); + FileType type = relativizePath(includedFile); + if (type == FileType::Unknown) { + return; + } + std::vector<std::string> symbols = { + std::string("FILE_") + mangleFile(includedFile, type) + }; + visitIdentifier("use", "file", includedFile, FileNameRange, symbols, + Context(), NotIdentifierToken | LocRangeEndValid); + } + + void macroDefined(const Token &Tok, const MacroDirective *Macro) { + if (Macro->getMacroInfo()->isBuiltinMacro()) { + return; + } + SourceLocation Loc = Tok.getLocation(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return; + } + + IdentifierInfo *Ident = Tok.getIdentifierInfo(); + if (Ident) { + std::string Mangled = + std::string("M_") + mangleLocation(Loc, std::string(Ident->getName())); + visitIdentifier("def", "macro", Ident->getName(), Loc, Mangled); + } + } + + void macroUsed(const Token &Tok, const MacroInfo *Macro) { + if (!Macro) { + return; + } + if (Macro->isBuiltinMacro()) { + return; + } + SourceLocation Loc = Tok.getLocation(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return; + } + + IdentifierInfo *Ident = Tok.getIdentifierInfo(); + if (Ident) { + std::string Mangled = + std::string("M_") + + mangleLocation(Macro->getDefinitionLoc(), std::string(Ident->getName())); + visitIdentifier("use", "macro", Ident->getName(), Loc, Mangled); + } + } +}; + +void PreprocessorHook::FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID = FileID()) { + switch (Reason) { + case PPCallbacks::RenameFile: + case PPCallbacks::SystemHeaderPragma: + // Don't care about these, since we want the actual on-disk filenames + break; + case PPCallbacks::EnterFile: + Indexer->enterSourceFile(Loc); + break; + case PPCallbacks::ExitFile: + // Don't care about exiting files + break; + } +} + +void PreprocessorHook::InclusionDirective(SourceLocation HashLoc, + const Token &IncludeTok, + StringRef FileName, + bool IsAngled, + CharSourceRange FileNameRange, + const FileEntry *File, + StringRef SearchPath, + StringRef RelativePath, + const Module *Imported, + SrcMgr::CharacteristicKind FileType) { + Indexer->inclusionDirective(FileNameRange.getAsRange(), File); +} + +void PreprocessorHook::MacroDefined(const Token &Tok, + const MacroDirective *Md) { + Indexer->macroDefined(Tok, Md); +} + +void PreprocessorHook::MacroExpands(const Token &Tok, const MacroDefinition &Md, + SourceRange Range, const MacroArgs *Ma) { + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +void PreprocessorHook::MacroUndefined(const Token &Tok, + const MacroDefinition &Md, + const MacroDirective *Undef) +{ + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +void PreprocessorHook::Defined(const Token &Tok, const MacroDefinition &Md, + SourceRange Range) { + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +void PreprocessorHook::Ifdef(SourceLocation Loc, const Token &Tok, + const MacroDefinition &Md) { + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +void PreprocessorHook::Ifndef(SourceLocation Loc, const Token &Tok, + const MacroDefinition &Md) { + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +class IndexAction : public PluginASTAction { +protected: + std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI, + llvm::StringRef F) { + return make_unique<IndexConsumer>(CI); + } + + bool ParseArgs(const CompilerInstance &CI, + const std::vector<std::string> &Args) { + if (Args.size() != 3) { + DiagnosticsEngine &D = CI.getDiagnostics(); + unsigned DiagID = D.getCustomDiagID( + DiagnosticsEngine::Error, + "Need arguments for the source, output, and object directories"); + D.Report(DiagID); + return false; + } + + // Load our directories + Srcdir = getAbsolutePath(Args[0]); + if (Srcdir.empty()) { + DiagnosticsEngine &D = CI.getDiagnostics(); + unsigned DiagID = D.getCustomDiagID( + DiagnosticsEngine::Error, "Source directory '%0' does not exist"); + D.Report(DiagID) << Args[0]; + return false; + } + + ensurePath(Args[1] + PATHSEP_STRING); + Outdir = getAbsolutePath(Args[1]); + Outdir += PATHSEP_STRING; + + Objdir = getAbsolutePath(Args[2]); + if (Objdir.empty()) { + DiagnosticsEngine &D = CI.getDiagnostics(); + unsigned DiagID = D.getCustomDiagID(DiagnosticsEngine::Error, + "Objdir '%0' does not exist"); + D.Report(DiagID) << Args[2]; + return false; + } + Objdir += PATHSEP_STRING; + + printf("MOZSEARCH: %s %s %s\n", Srcdir.c_str(), Outdir.c_str(), + Objdir.c_str()); + + return true; + } + + void printHelp(llvm::raw_ostream &Ros) { + Ros << "Help for mozsearch plugin goes here\n"; + } +}; + +static FrontendPluginRegistry::Add<IndexAction> + Y("mozsearch-index", "create the mozsearch index database"); diff --git a/build/clang-plugin/mozsearch-plugin/README b/build/clang-plugin/mozsearch-plugin/README new file mode 100644 index 0000000000..d948e9aca3 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/README @@ -0,0 +1,12 @@ +This clang plugin code generates a JSON file for each compiler input +file. The JSON file contains information about the C++ symbols that +are referenced by the input file. The data is eventually consumed by +Searchfox. See https://github.com/mozsearch/mozsearch for more +information. + +This plugin is enabled with the --enable-clang-plugin and +--enable-mozsearch-plugin mozconfig options. The output of the plugin +is stored in $OBJDIR/mozsearch_index. + +This code is not a checker, unlike other parts of the Mozilla clang +plugin. It cannot be used with clang-tidy. diff --git a/build/clang-plugin/mozsearch-plugin/StringOperations.cpp b/build/clang-plugin/mozsearch-plugin/StringOperations.cpp new file mode 100644 index 0000000000..a2e60e42c6 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/StringOperations.cpp @@ -0,0 +1,42 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "StringOperations.h" + +static unsigned long djbHash(const char *Str) { + unsigned long Hash = 5381; + + for (const char *P = Str; *P; P++) { + // Hash * 33 + c + Hash = ((Hash << 5) + Hash) + *P; + } + + return Hash; +} + +// This doesn't actually return a hex string of |hash|, but it +// does... something. It doesn't really matter what. +static void hashToString(unsigned long Hash, char *Buffer) { + const char Table[] = {"0123456789abcdef"}; + char *P = Buffer; + while (Hash) { + *P = Table[Hash & 0xf]; + Hash >>= 4; + P++; + } + + *P = 0; +} + +std::string hash(const std::string &Str) { + static char HashStr[41]; + unsigned long H = djbHash(Str.c_str()); + hashToString(H, HashStr); + return std::string(HashStr); +} + +std::string toString(int N) { + return stringFormat("%d", N); +} diff --git a/build/clang-plugin/mozsearch-plugin/StringOperations.h b/build/clang-plugin/mozsearch-plugin/StringOperations.h new file mode 100644 index 0000000000..4aa5b31962 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/StringOperations.h @@ -0,0 +1,25 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef StringOperations_h +#define StringOperations_h + +#include <memory> +#include <string> +#include <string.h> + +std::string hash(const std::string &Str); + +template <typename... Args> +inline std::string stringFormat(const std::string &Format, Args... ArgList) { + size_t Len = snprintf(nullptr, 0, Format.c_str(), ArgList...); + std::unique_ptr<char[]> Buf(new char[Len + 1]); + snprintf(Buf.get(), Len + 1, Format.c_str(), ArgList...); + return std::string(Buf.get(), Buf.get() + Len); +} + +std::string toString(int N); + +#endif |