summaryrefslogtreecommitdiffstats
path: root/build/clang-plugin/mozsearch-plugin
diff options
context:
space:
mode:
Diffstat (limited to 'build/clang-plugin/mozsearch-plugin')
-rw-r--r--build/clang-plugin/mozsearch-plugin/FileOperations.cpp140
-rw-r--r--build/clang-plugin/mozsearch-plugin/FileOperations.h70
-rw-r--r--build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp2200
-rw-r--r--build/clang-plugin/mozsearch-plugin/README12
-rw-r--r--build/clang-plugin/mozsearch-plugin/StringOperations.cpp42
-rw-r--r--build/clang-plugin/mozsearch-plugin/StringOperations.h25
6 files changed, 2489 insertions, 0 deletions
diff --git a/build/clang-plugin/mozsearch-plugin/FileOperations.cpp b/build/clang-plugin/mozsearch-plugin/FileOperations.cpp
new file mode 100644
index 0000000000..9307f4989d
--- /dev/null
+++ b/build/clang-plugin/mozsearch-plugin/FileOperations.cpp
@@ -0,0 +1,140 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "FileOperations.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined(_WIN32) || defined(_WIN64)
+#include <direct.h>
+#include <io.h>
+#include <windows.h>
+#include "StringOperations.h"
+#else
+#include <sys/file.h>
+#include <sys/time.h>
+#include <unistd.h>
+#endif
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+// Make sure that all directories on path exist, excluding the final element of
+// the path.
+void ensurePath(std::string Path) {
+ size_t Pos = 0;
+ if (Path[0] == PATHSEP_CHAR) {
+ Pos++;
+ }
+
+ while ((Pos = Path.find(PATHSEP_CHAR, Pos)) != std::string::npos) {
+ std::string Portion = Path.substr(0, Pos);
+ if (!Portion.empty()) {
+#if defined(_WIN32) || defined(_WIN64)
+ int Err = _mkdir(Portion.c_str());
+#else
+ int Err = mkdir(Portion.c_str(), 0775);
+#endif
+ if (Err == -1 && errno != EEXIST) {
+ perror("mkdir failed");
+ exit(1);
+ }
+ }
+
+ Pos++;
+ }
+}
+
+#if defined(_WIN32) || defined(_WIN64)
+AutoLockFile::AutoLockFile(const std::string &SrcFile, const std::string &DstFile) {
+ this->Filename = DstFile;
+ std::string Hash = hash(SrcFile);
+ std::string MutexName = std::string("Local\\searchfox-") + Hash;
+ std::wstring WideMutexName;
+ WideMutexName.assign(MutexName.begin(), MutexName.end());
+ Handle = CreateMutex(nullptr, false, WideMutexName.c_str());
+ if (Handle == NULL) {
+ return;
+ }
+
+ if (WaitForSingleObject(Handle, INFINITE) != WAIT_OBJECT_0) {
+ return;
+ }
+}
+
+AutoLockFile::~AutoLockFile() {
+ ReleaseMutex(Handle);
+ CloseHandle(Handle);
+}
+
+bool AutoLockFile::success() {
+ return Handle != NULL;
+}
+
+FILE *AutoLockFile::openTmp() {
+ int TmpDescriptor = _open((Filename + ".tmp").c_str(), _O_WRONLY | _O_APPEND | _O_CREAT | _O_BINARY, 0666);
+ return _fdopen(TmpDescriptor, "ab");
+}
+
+bool AutoLockFile::moveTmp() {
+ if (_unlink(Filename.c_str()) == -1) {
+ if (errno != ENOENT) {
+ return false;
+ }
+ }
+ return rename((Filename + ".tmp").c_str(), Filename.c_str()) == 0;
+}
+
+std::string getAbsolutePath(const std::string &Filename) {
+ char Full[_MAX_PATH];
+ if (!_fullpath(Full, Filename.c_str(), _MAX_PATH)) {
+ return std::string("");
+ }
+ return std::string(Full);
+}
+#else
+AutoLockFile::AutoLockFile(const std::string &SrcFile, const std::string &DstFile) {
+ this->Filename = DstFile;
+ FileDescriptor = open(SrcFile.c_str(), O_RDONLY);
+ if (FileDescriptor == -1) {
+ return;
+ }
+
+ do {
+ int rv = flock(FileDescriptor, LOCK_EX);
+ if (rv == 0) {
+ break;
+ }
+ } while (true);
+}
+
+AutoLockFile::~AutoLockFile() { close(FileDescriptor); }
+
+bool AutoLockFile::success() { return FileDescriptor != -1; }
+
+FILE* AutoLockFile::openTmp() {
+ int TmpDescriptor = open((Filename + ".tmp").c_str(), O_WRONLY | O_APPEND | O_CREAT, 0666);
+ return fdopen(TmpDescriptor, "ab");
+}
+
+bool AutoLockFile::moveTmp() {
+ if (unlink(Filename.c_str()) == -1) {
+ if (errno != ENOENT) {
+ return false;
+ }
+ }
+ return rename((Filename + ".tmp").c_str(), Filename.c_str()) == 0;
+}
+
+std::string getAbsolutePath(const std::string &Filename) {
+ char Full[4096];
+ if (!realpath(Filename.c_str(), Full)) {
+ return std::string("");
+ }
+ return std::string(Full);
+}
+#endif
diff --git a/build/clang-plugin/mozsearch-plugin/FileOperations.h b/build/clang-plugin/mozsearch-plugin/FileOperations.h
new file mode 100644
index 0000000000..90764484da
--- /dev/null
+++ b/build/clang-plugin/mozsearch-plugin/FileOperations.h
@@ -0,0 +1,70 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef FileOperations_h
+#define FileOperations_h
+
+#include <stdio.h>
+#include <string>
+
+#if defined(_WIN32) || defined(_WIN64)
+#include <windows.h>
+#define PATHSEP_CHAR '\\'
+#define PATHSEP_STRING "\\"
+#else
+#define PATHSEP_CHAR '/'
+#define PATHSEP_STRING "/"
+#endif
+
+// Make sure that all directories on path exist, excluding the final element of
+// the path.
+void ensurePath(std::string Path);
+
+std::string getAbsolutePath(const std::string &Filename);
+
+// Used to synchronize access when writing to an analysis file, so that
+// concurrently running clang instances don't clobber each other's data.
+// On Windows, we use a named mutex. On POSIX platforms, we use flock on the
+// source files. flock is advisory locking, and doesn't interfere with clang's
+// own opening of the source files (i.e. to interfere, clang would have to be
+// using flock itself, which it does not).
+struct AutoLockFile {
+ // Absolute path to the analysis file
+ std::string Filename;
+
+#if defined(_WIN32) || defined(_WIN64)
+ // Handle for the named Mutex
+ HANDLE Handle = NULL;
+#else
+ // fd for the *source* file that corresponds to the analysis file. We use
+ // the source file because it doesn't change while the analysis file gets
+ // repeatedly replaced by a new version written to a separate tmp file.
+ // This fd is used when using flock to synchronize access.
+ int FileDescriptor = -1;
+#endif
+
+ // SrcFile should be the absolute path to the source code file, and DstFile
+ // the absolute path to the corresponding analysis file. This constructor
+ // will block until exclusive access has been obtained.
+ AutoLockFile(const std::string &SrcFile, const std::string &DstFile);
+ ~AutoLockFile();
+
+ // Check after constructing to ensure the mutex was properly set up.
+ bool success();
+
+ // There used to be an `openFile` method here but we switched to directly
+ // using a std::ifstream for the input file in able to take advantage of its
+ // support for variable length lines (as opposed to fgets which takes a fixed
+ // size buffer).
+
+ // Open a new tmp file for writing the new analysis data to. Caller is
+ // responsible for fclose'ing it.
+ FILE *openTmp();
+ // Replace the existing analysis file with the new "tmp" one that has the new
+ // data. Returns false on error.
+ bool moveTmp();
+};
+
+#endif
diff --git a/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp b/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp
new file mode 100644
index 0000000000..904897df6b
--- /dev/null
+++ b/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp
@@ -0,0 +1,2200 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "clang/AST/AST.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
+#include "clang/AST/Mangle.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Version.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendPluginRegistry.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <unordered_set>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "FileOperations.h"
+#include "StringOperations.h"
+
+#if CLANG_VERSION_MAJOR < 8
+// Starting with Clang 8.0 some basic functions have been renamed
+#define getBeginLoc getLocStart
+#define getEndLoc getLocEnd
+#endif
+// We want std::make_unique, but that's only available in c++14. In versions
+// prior to that, we need to fall back to llvm's make_unique. It's also the
+// case that we expect clang 10 to build with c++14 and clang 9 and earlier to
+// build with c++11, at least as suggested by the llvm-config --cxxflags on
+// non-windows platforms. mozilla-central seems to build with -std=c++17 on
+// windows so we need to make this decision based on __cplusplus instead of
+// the CLANG_VERSION_MAJOR.
+#if __cplusplus < 201402L
+using llvm::make_unique;
+#else
+using std::make_unique;
+#endif
+
+using namespace clang;
+
+const std::string GENERATED("__GENERATED__" PATHSEP_STRING);
+
+// Absolute path to directory containing source code.
+std::string Srcdir;
+
+// Absolute path to objdir (including generated code).
+std::string Objdir;
+
+// Absolute path where analysis JSON output will be stored.
+std::string Outdir;
+
+enum class FileType {
+ // The file was either in the source tree nor objdir. It might be a system
+ // include, for example.
+ Unknown,
+ // A file from the source tree.
+ Source,
+ // A file from the objdir.
+ Generated,
+};
+
+// Takes an absolute path to a file, and returns the type of file it is. If
+// it's a Source or Generated file, the provided inout path argument is modified
+// in-place so that it is relative to the source dir or objdir, respectively.
+FileType relativizePath(std::string& path) {
+ if (path.compare(0, Objdir.length(), Objdir) == 0) {
+ path.replace(0, Objdir.length(), GENERATED);
+ return FileType::Generated;
+ }
+ // Empty filenames can get turned into Srcdir when they are resolved as
+ // absolute paths, so we should exclude files that are exactly equal to
+ // Srcdir or anything outside Srcdir.
+ if (path.length() > Srcdir.length() && path.compare(0, Srcdir.length(), Srcdir) == 0) {
+ // Remove the trailing `/' as well.
+ path.erase(0, Srcdir.length() + 1);
+ return FileType::Source;
+ }
+ return FileType::Unknown;
+}
+
+#if !defined(_WIN32) && !defined(_WIN64)
+#include <sys/time.h>
+
+static double time() {
+ struct timeval Tv;
+ gettimeofday(&Tv, nullptr);
+ return double(Tv.tv_sec) + double(Tv.tv_usec) / 1000000.;
+}
+#endif
+
+// Return true if |input| is a valid C++ identifier. We don't want to generate
+// analysis information for operators, string literals, etc. by accident since
+// it trips up consumers of the data.
+static bool isValidIdentifier(std::string Input) {
+ for (char C : Input) {
+ if (!(isalpha(C) || isdigit(C) || C == '_')) {
+ return false;
+ }
+ }
+ return true;
+}
+
+struct RAIITracer {
+ RAIITracer(const char *log) : mLog(log) {
+ printf("<%s>\n", mLog);
+ }
+
+ ~RAIITracer() {
+ printf("</%s>\n", mLog);
+ }
+
+ const char* mLog;
+};
+
+#define TRACEFUNC RAIITracer tracer(__FUNCTION__);
+
+class IndexConsumer;
+
+// For each C++ file seen by the analysis (.cpp or .h), we track a
+// FileInfo. This object tracks whether the file is "interesting" (i.e., whether
+// it's in the source dir or the objdir). We also store the analysis output
+// here.
+struct FileInfo {
+ FileInfo(std::string &Rname) : Realname(Rname) {
+ switch (relativizePath(Realname)) {
+ case FileType::Generated:
+ Interesting = true;
+ Generated = true;
+ break;
+ case FileType::Source:
+ Interesting = true;
+ Generated = false;
+ break;
+ case FileType::Unknown:
+ Interesting = false;
+ Generated = false;
+ break;
+ }
+ }
+ std::string Realname;
+ std::vector<std::string> Output;
+ bool Interesting;
+ bool Generated;
+};
+
+class IndexConsumer;
+
+class PreprocessorHook : public PPCallbacks {
+ IndexConsumer *Indexer;
+
+public:
+ PreprocessorHook(IndexConsumer *C) : Indexer(C) {}
+
+ virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
+ SrcMgr::CharacteristicKind FileType,
+ FileID PrevFID) override;
+
+ virtual void InclusionDirective(SourceLocation HashLoc,
+ const Token &IncludeTok,
+ StringRef FileName,
+ bool IsAngled,
+ CharSourceRange FileNameRange,
+#if CLANG_VERSION_MAJOR >= 16
+ OptionalFileEntryRef File,
+#elif CLANG_VERSION_MAJOR >= 15
+ Optional<FileEntryRef> File,
+#else
+ const FileEntry *File,
+#endif
+ StringRef SearchPath,
+ StringRef RelativePath,
+ const Module *Imported,
+ SrcMgr::CharacteristicKind FileType) override;
+
+ virtual void MacroDefined(const Token &Tok,
+ const MacroDirective *Md) override;
+
+ virtual void MacroExpands(const Token &Tok, const MacroDefinition &Md,
+ SourceRange Range, const MacroArgs *Ma) override;
+ virtual void MacroUndefined(const Token &Tok, const MacroDefinition &Md,
+ const MacroDirective *Undef) override;
+ virtual void Defined(const Token &Tok, const MacroDefinition &Md,
+ SourceRange Range) override;
+ virtual void Ifdef(SourceLocation Loc, const Token &Tok,
+ const MacroDefinition &Md) override;
+ virtual void Ifndef(SourceLocation Loc, const Token &Tok,
+ const MacroDefinition &Md) override;
+};
+
+class IndexConsumer : public ASTConsumer,
+ public RecursiveASTVisitor<IndexConsumer>,
+ public DiagnosticConsumer {
+private:
+ CompilerInstance &CI;
+ SourceManager &SM;
+ LangOptions &LO;
+ std::map<FileID, std::unique_ptr<FileInfo>> FileMap;
+ MangleContext *CurMangleContext;
+ ASTContext *AstContext;
+
+ typedef RecursiveASTVisitor<IndexConsumer> Super;
+
+ // Tracks the set of declarations that the current expression/statement is
+ // nested inside of.
+ struct AutoSetContext {
+ AutoSetContext(IndexConsumer *Self, NamedDecl *Context, bool VisitImplicit = false)
+ : Self(Self), Prev(Self->CurDeclContext), Decl(Context) {
+ this->VisitImplicit = VisitImplicit || (Prev ? Prev->VisitImplicit : false);
+ Self->CurDeclContext = this;
+ }
+
+ ~AutoSetContext() { Self->CurDeclContext = Prev; }
+
+ IndexConsumer *Self;
+ AutoSetContext *Prev;
+ NamedDecl *Decl;
+ bool VisitImplicit;
+ };
+ AutoSetContext *CurDeclContext;
+
+ FileInfo *getFileInfo(SourceLocation Loc) {
+ FileID Id = SM.getFileID(Loc);
+
+ std::map<FileID, std::unique_ptr<FileInfo>>::iterator It;
+ It = FileMap.find(Id);
+ if (It == FileMap.end()) {
+ // We haven't seen this file before. We need to make the FileInfo
+ // structure information ourselves
+ std::string Filename = std::string(SM.getFilename(Loc));
+ std::string Absolute;
+ // If Loc is a macro id rather than a file id, it Filename might be
+ // empty. Also for some types of file locations that are clang-internal
+ // like "<scratch>" it can return an empty Filename. In these cases we
+ // want to leave Absolute as empty.
+ if (!Filename.empty()) {
+ Absolute = getAbsolutePath(Filename);
+ if (Absolute.empty()) {
+ Absolute = Filename;
+ }
+ }
+ std::unique_ptr<FileInfo> Info = make_unique<FileInfo>(Absolute);
+ It = FileMap.insert(std::make_pair(Id, std::move(Info))).first;
+ }
+ return It->second.get();
+ }
+
+ // Helpers for processing declarations
+ // Should we ignore this location?
+ bool isInterestingLocation(SourceLocation Loc) {
+ if (Loc.isInvalid()) {
+ return false;
+ }
+
+ return getFileInfo(Loc)->Interesting;
+ }
+
+ // Convert location to "line:column" or "line:column-column" given length.
+ // In resulting string rep, line is 1-based and zero-padded to 5 digits, while
+ // column is 0-based and unpadded.
+ std::string locationToString(SourceLocation Loc, size_t Length = 0) {
+ std::pair<FileID, unsigned> Pair = SM.getDecomposedLoc(Loc);
+
+ bool IsInvalid;
+ unsigned Line = SM.getLineNumber(Pair.first, Pair.second, &IsInvalid);
+ if (IsInvalid) {
+ return "";
+ }
+ unsigned Column = SM.getColumnNumber(Pair.first, Pair.second, &IsInvalid);
+ if (IsInvalid) {
+ return "";
+ }
+
+ if (Length) {
+ return stringFormat("%05d:%d-%d", Line, Column - 1, Column - 1 + Length);
+ } else {
+ return stringFormat("%05d:%d", Line, Column - 1);
+ }
+ }
+
+ // Convert SourceRange to "line-line".
+ // In the resulting string rep, line is 1-based.
+ std::string lineRangeToString(SourceRange Range) {
+ std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
+ std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
+
+ bool IsInvalid;
+ unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
+ if (IsInvalid) {
+ return "";
+ }
+ unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
+ if (IsInvalid) {
+ return "";
+ }
+
+ return stringFormat("%d-%d", Line1, Line2);
+ }
+
+ // Convert SourceRange to "line:column-line:column".
+ // In the resulting string rep, line is 1-based, column is 0-based.
+ std::string fullRangeToString(SourceRange Range) {
+ std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
+ std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
+
+ bool IsInvalid;
+ unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
+ if (IsInvalid) {
+ return "";
+ }
+ unsigned Column1 = SM.getColumnNumber(Begin.first, Begin.second, &IsInvalid);
+ if (IsInvalid) {
+ return "";
+ }
+ unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
+ if (IsInvalid) {
+ return "";
+ }
+ unsigned Column2 = SM.getColumnNumber(End.first, End.second, &IsInvalid);
+ if (IsInvalid) {
+ return "";
+ }
+
+ return stringFormat("%d:%d-%d:%d", Line1, Column1 - 1, Line2, Column2 - 1);
+ }
+
+ // Returns the qualified name of `d` without considering template parameters.
+ std::string getQualifiedName(const NamedDecl *D) {
+ const DeclContext *Ctx = D->getDeclContext();
+ if (Ctx->isFunctionOrMethod()) {
+ return D->getQualifiedNameAsString();
+ }
+
+ std::vector<const DeclContext *> Contexts;
+
+ // Collect contexts.
+ while (Ctx && isa<NamedDecl>(Ctx)) {
+ Contexts.push_back(Ctx);
+ Ctx = Ctx->getParent();
+ }
+
+ std::string Result;
+
+ std::reverse(Contexts.begin(), Contexts.end());
+
+ for (const DeclContext *DC : Contexts) {
+ if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) {
+ Result += Spec->getNameAsString();
+
+ if (Spec->getSpecializationKind() == TSK_ExplicitSpecialization) {
+ std::string Backing;
+ llvm::raw_string_ostream Stream(Backing);
+ const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
+ printTemplateArgumentList(
+ Stream, TemplateArgs.asArray(), PrintingPolicy(CI.getLangOpts()));
+ Result += Stream.str();
+ }
+ } else if (const auto *Nd = dyn_cast<NamespaceDecl>(DC)) {
+ if (Nd->isAnonymousNamespace() || Nd->isInline()) {
+ continue;
+ }
+ Result += Nd->getNameAsString();
+ } else if (const auto *Rd = dyn_cast<RecordDecl>(DC)) {
+ if (!Rd->getIdentifier()) {
+ Result += "(anonymous)";
+ } else {
+ Result += Rd->getNameAsString();
+ }
+ } else if (const auto *Fd = dyn_cast<FunctionDecl>(DC)) {
+ Result += Fd->getNameAsString();
+ } else if (const auto *Ed = dyn_cast<EnumDecl>(DC)) {
+ // C++ [dcl.enum]p10: Each enum-name and each unscoped
+ // enumerator is declared in the scope that immediately contains
+ // the enum-specifier. Each scoped enumerator is declared in the
+ // scope of the enumeration.
+ if (Ed->isScoped() || Ed->getIdentifier())
+ Result += Ed->getNameAsString();
+ else
+ continue;
+ } else {
+ Result += cast<NamedDecl>(DC)->getNameAsString();
+ }
+ Result += "::";
+ }
+
+ if (D->getDeclName())
+ Result += D->getNameAsString();
+ else
+ Result += "(anonymous)";
+
+ return Result;
+ }
+
+ std::string mangleLocation(SourceLocation Loc,
+ std::string Backup = std::string()) {
+ FileInfo *F = getFileInfo(Loc);
+ std::string Filename = F->Realname;
+ if (Filename.length() == 0 && Backup.length() != 0) {
+ return Backup;
+ }
+ if (F->Generated) {
+ // Since generated files may be different on different platforms,
+ // we need to include a platform-specific thing in the hash. Otherwise
+ // we can end up with hash collisions where different symbols from
+ // different platforms map to the same thing.
+ char* Platform = getenv("MOZSEARCH_PLATFORM");
+ Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename;
+ }
+ return hash(Filename + std::string("@") + locationToString(Loc));
+ }
+
+ bool isAcceptableSymbolChar(char c) {
+ return isalpha(c) || isdigit(c) || c == '_' || c == '/';
+ }
+
+ std::string mangleFile(std::string Filename, FileType Type) {
+ // "Mangle" the file path, such that:
+ // 1. The majority of paths will still be mostly human-readable.
+ // 2. The sanitization algorithm doesn't produce collisions where two
+ // different unsanitized paths can result in the same sanitized paths.
+ // 3. The produced symbol doesn't cause problems with downstream consumers.
+ // In order to accomplish this, we keep alphanumeric chars, underscores,
+ // and slashes, and replace everything else with an "@xx" hex encoding.
+ // The majority of path characters are letters and slashes which don't get
+ // encoded, so that satisifies (1). Since "@" characters in the unsanitized
+ // path get encoded, there should be no "@" characters in the sanitized path
+ // that got preserved from the unsanitized input, so that should satisfy (2).
+ // And (3) was done by trial-and-error. Note in particular the dot (.)
+ // character needs to be encoded, or the symbol-search feature of mozsearch
+ // doesn't work correctly, as all dot characters in the symbol query get
+ // replaced by #.
+ for (size_t i = 0; i < Filename.length(); i++) {
+ char c = Filename[i];
+ if (isAcceptableSymbolChar(c)) {
+ continue;
+ }
+ char hex[4];
+ sprintf(hex, "@%02X", ((int)c) & 0xFF);
+ Filename.replace(i, 1, hex);
+ i += 2;
+ }
+
+ if (Type == FileType::Generated) {
+ // Since generated files may be different on different platforms,
+ // we need to include a platform-specific thing in the hash. Otherwise
+ // we can end up with hash collisions where different symbols from
+ // different platforms map to the same thing.
+ char* Platform = getenv("MOZSEARCH_PLATFORM");
+ Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename;
+ }
+ return Filename;
+ }
+
+ std::string mangleQualifiedName(std::string Name) {
+ std::replace(Name.begin(), Name.end(), ' ', '_');
+ return Name;
+ }
+
+ std::string getMangledName(clang::MangleContext *Ctx,
+ const clang::NamedDecl *Decl) {
+ if (isa<FunctionDecl>(Decl) && cast<FunctionDecl>(Decl)->isExternC()) {
+ return cast<FunctionDecl>(Decl)->getNameAsString();
+ }
+
+ if (isa<FunctionDecl>(Decl) || isa<VarDecl>(Decl)) {
+ const DeclContext *DC = Decl->getDeclContext();
+ if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC) ||
+ isa<LinkageSpecDecl>(DC) ||
+ // isa<ExternCContextDecl>(DC) ||
+ isa<TagDecl>(DC)) {
+ llvm::SmallVector<char, 512> Output;
+ llvm::raw_svector_ostream Out(Output);
+#if CLANG_VERSION_MAJOR >= 11
+ // This code changed upstream in version 11:
+ // https://github.com/llvm/llvm-project/commit/29e1a16be8216066d1ed733a763a749aed13ff47
+ GlobalDecl GD;
+ if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) {
+ GD = GlobalDecl(D, Ctor_Complete);
+ } else if (const CXXDestructorDecl *D =
+ dyn_cast<CXXDestructorDecl>(Decl)) {
+ GD = GlobalDecl(D, Dtor_Complete);
+ } else {
+ GD = GlobalDecl(Decl);
+ }
+ Ctx->mangleName(GD, Out);
+#else
+ if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) {
+ Ctx->mangleCXXCtor(D, CXXCtorType::Ctor_Complete, Out);
+ } else if (const CXXDestructorDecl *D =
+ dyn_cast<CXXDestructorDecl>(Decl)) {
+ Ctx->mangleCXXDtor(D, CXXDtorType::Dtor_Complete, Out);
+ } else {
+ Ctx->mangleName(Decl, Out);
+ }
+#endif
+ return Out.str().str();
+ } else {
+ return std::string("V_") + mangleLocation(Decl->getLocation()) +
+ std::string("_") + hash(std::string(Decl->getName()));
+ }
+ } else if (isa<TagDecl>(Decl) || isa<TypedefNameDecl>(Decl) ||
+ isa<ObjCInterfaceDecl>(Decl)) {
+ if (!Decl->getIdentifier()) {
+ // Anonymous.
+ return std::string("T_") + mangleLocation(Decl->getLocation());
+ }
+
+ return std::string("T_") + mangleQualifiedName(getQualifiedName(Decl));
+ } else if (isa<NamespaceDecl>(Decl) || isa<NamespaceAliasDecl>(Decl)) {
+ if (!Decl->getIdentifier()) {
+ // Anonymous.
+ return std::string("NS_") + mangleLocation(Decl->getLocation());
+ }
+
+ return std::string("NS_") + mangleQualifiedName(getQualifiedName(Decl));
+ } else if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(Decl)) {
+ const ObjCInterfaceDecl *Iface = D2->getContainingInterface();
+ return std::string("F_<") + getMangledName(Ctx, Iface) + ">_" +
+ D2->getNameAsString();
+ } else if (const FieldDecl *D2 = dyn_cast<FieldDecl>(Decl)) {
+ const RecordDecl *Record = D2->getParent();
+ return std::string("F_<") + getMangledName(Ctx, Record) + ">_" +
+ D2->getNameAsString();
+ } else if (const EnumConstantDecl *D2 = dyn_cast<EnumConstantDecl>(Decl)) {
+ const DeclContext *DC = Decl->getDeclContext();
+ if (const NamedDecl *Named = dyn_cast<NamedDecl>(DC)) {
+ return std::string("E_<") + getMangledName(Ctx, Named) + ">_" +
+ D2->getNameAsString();
+ }
+ }
+
+ assert(false);
+ return std::string("");
+ }
+
+ void debugLocation(SourceLocation Loc) {
+ std::string S = locationToString(Loc);
+ StringRef Filename = SM.getFilename(Loc);
+ printf("--> %s %s\n", std::string(Filename).c_str(), S.c_str());
+ }
+
+ void debugRange(SourceRange Range) {
+ printf("Range\n");
+ debugLocation(Range.getBegin());
+ debugLocation(Range.getEnd());
+ }
+
+public:
+ IndexConsumer(CompilerInstance &CI)
+ : CI(CI), SM(CI.getSourceManager()), LO(CI.getLangOpts()), CurMangleContext(nullptr),
+ AstContext(nullptr), CurDeclContext(nullptr), TemplateStack(nullptr) {
+ CI.getPreprocessor().addPPCallbacks(
+ make_unique<PreprocessorHook>(this));
+ }
+
+ virtual DiagnosticConsumer *clone(DiagnosticsEngine &Diags) const {
+ return new IndexConsumer(CI);
+ }
+
+#if !defined(_WIN32) && !defined(_WIN64)
+ struct AutoTime {
+ AutoTime(double *Counter) : Counter(Counter), Start(time()) {}
+ ~AutoTime() {
+ if (Start) {
+ *Counter += time() - Start;
+ }
+ }
+ void stop() {
+ *Counter += time() - Start;
+ Start = 0;
+ }
+ double *Counter;
+ double Start;
+ };
+#endif
+
+ // All we need is to follow the final declaration.
+ virtual void HandleTranslationUnit(ASTContext &Ctx) {
+ CurMangleContext =
+ clang::ItaniumMangleContext::create(Ctx, CI.getDiagnostics());
+
+ AstContext = &Ctx;
+ TraverseDecl(Ctx.getTranslationUnitDecl());
+
+ // Emit the JSON data for all files now.
+ std::map<FileID, std::unique_ptr<FileInfo>>::iterator It;
+ for (It = FileMap.begin(); It != FileMap.end(); It++) {
+ if (!It->second->Interesting) {
+ continue;
+ }
+
+ FileInfo &Info = *It->second;
+
+ std::string Filename = Outdir + Info.Realname;
+ std::string SrcFilename = Info.Generated
+ ? Objdir + Info.Realname.substr(GENERATED.length())
+ : Srcdir + PATHSEP_STRING + Info.Realname;
+
+ ensurePath(Filename);
+
+ // We lock the output file in case some other clang process is trying to
+ // write to it at the same time.
+ AutoLockFile Lock(SrcFilename, Filename);
+
+ if (!Lock.success()) {
+ fprintf(stderr, "Unable to lock file %s\n", Filename.c_str());
+ exit(1);
+ }
+
+ // Merge our results with the existing lines from the output file.
+ // This ensures that header files that are included multiple times
+ // in different ways are analyzed completely.
+ std::ifstream Fin(Filename.c_str(), std::ios::in | std::ios::binary);
+ FILE *OutFp = Lock.openTmp();
+ if (!OutFp) {
+ fprintf(stderr, "Unable to open tmp out file for %s\n", Filename.c_str());
+ exit(1);
+ }
+
+ // Sort our new results and get an iterator to them
+ std::sort(Info.Output.begin(), Info.Output.end());
+ std::vector<std::string>::const_iterator NewLinesIter = Info.Output.begin();
+ std::string LastNewWritten;
+
+ // Loop over the existing (sorted) lines in the analysis output file.
+ // (The good() check also handles the case where Fin did not exist when we
+ // went to open it.)
+ while(Fin.good()) {
+ std::string OldLine;
+ std::getline(Fin, OldLine);
+ // Skip blank lines.
+ if (OldLine.length() == 0) {
+ continue;
+ }
+ // We need to put the newlines back that getline() eats.
+ OldLine.push_back('\n');
+
+ // Write any results from Info.Output that are lexicographically
+ // smaller than OldLine (read from the existing file), but make sure
+ // to skip duplicates. Keep advacing NewLinesIter until we reach an
+ // entry that is lexicographically greater than OldLine.
+ for (; NewLinesIter != Info.Output.end(); NewLinesIter++) {
+ if (*NewLinesIter > OldLine) {
+ break;
+ }
+ if (*NewLinesIter == OldLine) {
+ continue;
+ }
+ if (*NewLinesIter == LastNewWritten) {
+ // dedupe the new entries being written
+ continue;
+ }
+ if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) {
+ fprintf(stderr, "Unable to write %zu bytes[1] to tmp output file for %s\n",
+ NewLinesIter->length(), Filename.c_str());
+ exit(1);
+ }
+ LastNewWritten = *NewLinesIter;
+ }
+
+ // Write the entry read from the existing file.
+ if (fwrite(OldLine.c_str(), OldLine.length(), 1, OutFp) != 1) {
+ fprintf(stderr, "Unable to write %zu bytes[2] to tmp output file for %s\n",
+ OldLine.length(), Filename.c_str());
+ exit(1);
+ }
+ }
+
+ // We finished reading from Fin
+ Fin.close();
+
+ // Finish iterating our new results, discarding duplicates
+ for (; NewLinesIter != Info.Output.end(); NewLinesIter++) {
+ if (*NewLinesIter == LastNewWritten) {
+ continue;
+ }
+ if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) {
+ fprintf(stderr, "Unable to write %zu bytes[3] to tmp output file for %s\n",
+ NewLinesIter->length(), Filename.c_str());
+ exit(1);
+ }
+ LastNewWritten = *NewLinesIter;
+ }
+
+ // Done writing all the things, close it and replace the old output file
+ // with the new one.
+ fclose(OutFp);
+ if (!Lock.moveTmp()) {
+ fprintf(stderr, "Unable to move tmp output file into place for %s (err %d)\n", Filename.c_str(), errno);
+ exit(1);
+ }
+ }
+ }
+
+ // Unfortunately, we have to override all these methods in order to track the
+ // context we're inside.
+
+ bool TraverseEnumDecl(EnumDecl *D) {
+ AutoSetContext Asc(this, D);
+ return Super::TraverseEnumDecl(D);
+ }
+ bool TraverseRecordDecl(RecordDecl *D) {
+ AutoSetContext Asc(this, D);
+ return Super::TraverseRecordDecl(D);
+ }
+ bool TraverseCXXRecordDecl(CXXRecordDecl *D) {
+ AutoSetContext Asc(this, D);
+ return Super::TraverseCXXRecordDecl(D);
+ }
+ bool TraverseFunctionDecl(FunctionDecl *D) {
+ AutoSetContext Asc(this, D);
+ const FunctionDecl *Def;
+ // (See the larger AutoTemplateContext comment for more information.) If a
+ // method on a templated class is declared out-of-line, we need to analyze
+ // the definition inside the scope of the template or else we won't properly
+ // handle member access on the templated type.
+ if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
+ TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
+ }
+ return Super::TraverseFunctionDecl(D);
+ }
+ bool TraverseCXXMethodDecl(CXXMethodDecl *D) {
+ AutoSetContext Asc(this, D);
+ const FunctionDecl *Def;
+ // See TraverseFunctionDecl.
+ if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
+ TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
+ }
+ return Super::TraverseCXXMethodDecl(D);
+ }
+ bool TraverseCXXConstructorDecl(CXXConstructorDecl *D) {
+ AutoSetContext Asc(this, D, /*VisitImplicit=*/true);
+ const FunctionDecl *Def;
+ // See TraverseFunctionDecl.
+ if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
+ TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
+ }
+ return Super::TraverseCXXConstructorDecl(D);
+ }
+ bool TraverseCXXConversionDecl(CXXConversionDecl *D) {
+ AutoSetContext Asc(this, D);
+ const FunctionDecl *Def;
+ // See TraverseFunctionDecl.
+ if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
+ TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
+ }
+ return Super::TraverseCXXConversionDecl(D);
+ }
+ bool TraverseCXXDestructorDecl(CXXDestructorDecl *D) {
+ AutoSetContext Asc(this, D);
+ const FunctionDecl *Def;
+ // See TraverseFunctionDecl.
+ if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
+ TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
+ }
+ return Super::TraverseCXXDestructorDecl(D);
+ }
+
+ // Used to keep track of the context in which a token appears.
+ struct Context {
+ // Ultimately this becomes the "context" JSON property.
+ std::string Name;
+
+ // Ultimately this becomes the "contextsym" JSON property.
+ std::string Symbol;
+
+ Context() {}
+ Context(std::string Name, std::string Symbol)
+ : Name(Name), Symbol(Symbol) {}
+ };
+
+ Context translateContext(NamedDecl *D) {
+ const FunctionDecl *F = dyn_cast<FunctionDecl>(D);
+ if (F && F->isTemplateInstantiation()) {
+ D = F->getTemplateInstantiationPattern();
+ }
+
+ return Context(D->getQualifiedNameAsString(), getMangledName(CurMangleContext, D));
+ }
+
+ Context getContext(SourceLocation Loc) {
+ if (SM.isMacroBodyExpansion(Loc)) {
+ // If we're inside a macro definition, we don't return any context. It
+ // will probably not be what the user expects if we do.
+ return Context();
+ }
+
+ if (CurDeclContext) {
+ return translateContext(CurDeclContext->Decl);
+ }
+ return Context();
+ }
+
+ // Similar to GetContext(SourceLocation), but it skips the declaration passed
+ // in. This is useful if we want the context of a declaration that's already
+ // on the stack.
+ Context getContext(Decl *D) {
+ if (SM.isMacroBodyExpansion(D->getLocation())) {
+ // If we're inside a macro definition, we don't return any context. It
+ // will probably not be what the user expects if we do.
+ return Context();
+ }
+
+ AutoSetContext *Ctxt = CurDeclContext;
+ while (Ctxt) {
+ if (Ctxt->Decl != D) {
+ return translateContext(Ctxt->Decl);
+ }
+ Ctxt = Ctxt->Prev;
+ }
+ return Context();
+ }
+
+ // Analyzing template code is tricky. Suppose we have this code:
+ //
+ // template<class T>
+ // bool Foo(T* ptr) { return T::StaticMethod(ptr); }
+ //
+ // If we analyze the body of Foo without knowing the type T, then we will not
+ // be able to generate any information for StaticMethod. However, analyzing
+ // Foo for every possible instantiation is inefficient and it also generates
+ // too much data in some cases. For example, the following code would generate
+ // one definition of Baz for every instantiation, which is undesirable:
+ //
+ // template<class T>
+ // class Bar { struct Baz { ... }; };
+ //
+ // To solve this problem, we analyze templates only once. We do so in a
+ // GatherDependent mode where we look for "dependent scoped member
+ // expressions" (i.e., things like StaticMethod). We keep track of the
+ // locations of these expressions. If we find one or more of them, we analyze
+ // the template for each instantiation, in an AnalyzeDependent mode. This mode
+ // ignores all source locations except for the ones where we found dependent
+ // scoped member expressions before. For these locations, we generate a
+ // separate JSON result for each instantiation.
+ //
+ // We inherit our parent's mode if it is exists. This is because if our
+ // parent is in analyze mode, it means we've already lived a full life in
+ // gather mode and we must not restart in gather mode or we'll cause the
+ // indexer to visit EVERY identifier, which is way too much data.
+ struct AutoTemplateContext {
+ AutoTemplateContext(IndexConsumer *Self)
+ : Self(Self)
+ , CurMode(Self->TemplateStack ? Self->TemplateStack->CurMode : Mode::GatherDependent)
+ , Parent(Self->TemplateStack) {
+ Self->TemplateStack = this;
+ }
+
+ ~AutoTemplateContext() { Self->TemplateStack = Parent; }
+
+ // We traverse templates in two modes:
+ enum class Mode {
+ // Gather mode does not traverse into specializations. It looks for
+ // locations where it would help to have more info from template
+ // specializations.
+ GatherDependent,
+
+ // Analyze mode traverses into template specializations and records
+ // information about token locations saved in gather mode.
+ AnalyzeDependent,
+ };
+
+ // We found a dependent scoped member expression! Keep track of it for
+ // later.
+ void visitDependent(SourceLocation Loc) {
+ if (CurMode == Mode::AnalyzeDependent) {
+ return;
+ }
+
+ DependentLocations.insert(Loc.getRawEncoding());
+ if (Parent) {
+ Parent->visitDependent(Loc);
+ }
+ }
+
+ bool inGatherMode() {
+ return CurMode == Mode::GatherDependent;
+ }
+
+ // Do we need to perform the extra AnalyzeDependent passes (one per
+ // instantiation)?
+ bool needsAnalysis() const {
+ if (!DependentLocations.empty()) {
+ return true;
+ }
+ if (Parent) {
+ return Parent->needsAnalysis();
+ }
+ return false;
+ }
+
+ void switchMode() { CurMode = Mode::AnalyzeDependent; }
+
+ // Do we want to analyze each template instantiation separately?
+ bool shouldVisitTemplateInstantiations() const {
+ if (CurMode == Mode::AnalyzeDependent) {
+ return true;
+ }
+ if (Parent) {
+ return Parent->shouldVisitTemplateInstantiations();
+ }
+ return false;
+ }
+
+ // For a given expression/statement, should we emit JSON data for it?
+ bool shouldVisit(SourceLocation Loc) {
+ if (CurMode == Mode::GatherDependent) {
+ return true;
+ }
+ if (DependentLocations.find(Loc.getRawEncoding()) !=
+ DependentLocations.end()) {
+ return true;
+ }
+ if (Parent) {
+ return Parent->shouldVisit(Loc);
+ }
+ return false;
+ }
+
+ private:
+ IndexConsumer *Self;
+ Mode CurMode;
+ std::unordered_set<unsigned> DependentLocations;
+ AutoTemplateContext *Parent;
+ };
+
+ AutoTemplateContext *TemplateStack;
+
+ bool shouldVisitTemplateInstantiations() const {
+ if (TemplateStack) {
+ return TemplateStack->shouldVisitTemplateInstantiations();
+ }
+ return false;
+ }
+
+ bool shouldVisitImplicitCode() const {
+ return CurDeclContext && CurDeclContext->VisitImplicit;
+ }
+
+ bool TraverseClassTemplateDecl(ClassTemplateDecl *D) {
+ AutoTemplateContext Atc(this);
+ Super::TraverseClassTemplateDecl(D);
+
+ if (!Atc.needsAnalysis()) {
+ return true;
+ }
+
+ Atc.switchMode();
+
+ if (D != D->getCanonicalDecl()) {
+ return true;
+ }
+
+ for (auto *Spec : D->specializations()) {
+ for (auto *Rd : Spec->redecls()) {
+ // We don't want to visit injected-class-names in this traversal.
+ if (cast<CXXRecordDecl>(Rd)->isInjectedClassName())
+ continue;
+
+ TraverseDecl(Rd);
+ }
+ }
+
+ return true;
+ }
+
+ bool TraverseFunctionTemplateDecl(FunctionTemplateDecl *D) {
+ AutoTemplateContext Atc(this);
+ if (Atc.inGatherMode()) {
+ Super::TraverseFunctionTemplateDecl(D);
+ }
+
+ if (!Atc.needsAnalysis()) {
+ return true;
+ }
+
+ Atc.switchMode();
+
+ if (D != D->getCanonicalDecl()) {
+ return true;
+ }
+
+ for (auto *Spec : D->specializations()) {
+ for (auto *Rd : Spec->redecls()) {
+ TraverseDecl(Rd);
+ }
+ }
+
+ return true;
+ }
+
+ bool shouldVisit(SourceLocation Loc) {
+ if (TemplateStack) {
+ return TemplateStack->shouldVisit(Loc);
+ }
+ return true;
+ }
+
+ enum {
+ // Flag to omit the identifier from being cross-referenced across files.
+ // This is usually desired for local variables.
+ NoCrossref = 1 << 0,
+ // Flag to indicate the token with analysis data is not an identifier. Indicates
+ // we want to skip the check that tries to ensure a sane identifier token.
+ NotIdentifierToken = 1 << 1,
+ // This indicates that the end of the provided SourceRange is valid and
+ // should be respected. If this flag is not set, the visitIdentifier
+ // function should use only the start of the SourceRange and auto-detect
+ // the end based on whatever token is found at the start.
+ LocRangeEndValid = 1 << 2
+ };
+
+ void emitStructuredInfo(SourceLocation Loc, const RecordDecl *decl) {
+ std::string json_str;
+ llvm::raw_string_ostream ros(json_str);
+ llvm::json::OStream J(ros);
+ // Start the top-level object.
+ J.objectBegin();
+
+ unsigned StartOffset = SM.getFileOffset(Loc);
+ unsigned EndOffset =
+ StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
+ J.attribute("loc", locationToString(Loc, EndOffset - StartOffset));
+ J.attribute("structured", 1);
+ J.attribute("pretty", getQualifiedName(decl));
+ J.attribute("sym", getMangledName(CurMangleContext, decl));
+
+ J.attribute("kind", TypeWithKeyword::getTagTypeKindName(decl->getTagKind()));
+
+ const ASTContext &C = *AstContext;
+ const ASTRecordLayout &Layout = C.getASTRecordLayout(decl);
+
+ J.attribute("sizeBytes", Layout.getSize().getQuantity());
+
+ auto cxxDecl = dyn_cast<CXXRecordDecl>(decl);
+
+ if (cxxDecl) {
+ J.attributeBegin("supers");
+ J.arrayBegin();
+ for (const CXXBaseSpecifier &Base : cxxDecl->bases()) {
+ const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
+
+ J.objectBegin();
+
+ J.attribute("pretty", getQualifiedName(BaseDecl));
+ J.attribute("sym", getMangledName(CurMangleContext, BaseDecl));
+
+ J.attributeBegin("props");
+ J.arrayBegin();
+ if (Base.isVirtual()) {
+ J.value("virtual");
+ }
+ J.arrayEnd();
+ J.attributeEnd();
+
+ J.objectEnd();
+ }
+ J.arrayEnd();
+ J.attributeEnd();
+
+ J.attributeBegin("methods");
+ J.arrayBegin();
+ for (const CXXMethodDecl *MethodDecl : cxxDecl->methods()) {
+ J.objectBegin();
+
+ J.attribute("pretty", getQualifiedName(MethodDecl));
+ J.attribute("sym", getMangledName(CurMangleContext, MethodDecl));
+
+ // TODO: Better figure out what to do for non-isUserProvided methods
+ // which means there's potentially semantic data that doesn't correspond
+ // to a source location in the source. Should we be emitting
+ // structured info for those when we're processing the class here?
+
+ J.attributeBegin("props");
+ J.arrayBegin();
+ if (MethodDecl->isStatic()) {
+ J.value("static");
+ }
+ if (MethodDecl->isInstance()) {
+ J.value("instance");
+ }
+ if (MethodDecl->isVirtual()) {
+ J.value("virtual");
+ }
+ if (MethodDecl->isUserProvided()) {
+ J.value("user");
+ }
+ if (MethodDecl->isDefaulted()) {
+ J.value("defaulted");
+ }
+ if (MethodDecl->isDeleted()) {
+ J.value("deleted");
+ }
+ if (MethodDecl->isConstexpr()) {
+ J.value("constexpr");
+ }
+ J.arrayEnd();
+ J.attributeEnd();
+
+ J.objectEnd();
+ }
+ J.arrayEnd();
+ J.attributeEnd();
+ }
+
+ J.attributeBegin("fields");
+ J.arrayBegin();
+ uint64_t iField = 0;
+ for (RecordDecl::field_iterator It = decl->field_begin(),
+ End = decl->field_end(); It != End; ++It, ++iField) {
+ const FieldDecl &Field = **It;
+ uint64_t localOffsetBits = Layout.getFieldOffset(iField);
+ CharUnits localOffsetBytes = C.toCharUnitsFromBits(localOffsetBits);
+
+ J.objectBegin();
+ J.attribute("pretty", getQualifiedName(&Field));
+ J.attribute("sym", getMangledName(CurMangleContext, &Field));
+ QualType FieldType = Field.getType();
+ J.attribute("type", FieldType.getAsString());
+ QualType CanonicalFieldType = FieldType.getCanonicalType();
+ const TagDecl *tagDecl = CanonicalFieldType->getAsTagDecl();
+ if (tagDecl) {
+ J.attribute("typesym", getMangledName(CurMangleContext, tagDecl));
+ }
+ J.attribute("offsetBytes", localOffsetBytes.getQuantity());
+ if (Field.isBitField()) {
+ J.attributeBegin("bitPositions");
+ J.objectBegin();
+
+ J.attribute("begin", unsigned(localOffsetBits - C.toBits(localOffsetBytes)));
+ J.attribute("width", Field.getBitWidthValue(C));
+
+ J.objectEnd();
+ J.attributeEnd();
+ } else {
+ // Try and get the field as a record itself so we can know its size, but
+ // we don't actually want to recurse into it.
+ if (auto FieldRec = Field.getType()->getAs<RecordType>()) {
+ auto const &FieldLayout = C.getASTRecordLayout(FieldRec->getDecl());
+ J.attribute("sizeBytes", FieldLayout.getSize().getQuantity());
+ } else {
+ // We were unable to get it as a record, which suggests it's a normal
+ // type, in which case let's just ask for the type size. (Maybe this
+ // would also work for the above case too?)
+ uint64_t typeSizeBits = C.getTypeSize(Field.getType());
+ CharUnits typeSizeBytes = C.toCharUnitsFromBits(typeSizeBits);
+ J.attribute("sizeBytes", typeSizeBytes.getQuantity());
+ }
+ }
+ J.objectEnd();
+ }
+ J.arrayEnd();
+ J.attributeEnd();
+
+ // End the top-level object.
+ J.objectEnd();
+
+ FileInfo *F = getFileInfo(Loc);
+ // we want a newline.
+ ros << '\n';
+ F->Output.push_back(std::move(ros.str()));
+ }
+
+ void emitStructuredInfo(SourceLocation Loc, const FunctionDecl *decl) {
+ std::string json_str;
+ llvm::raw_string_ostream ros(json_str);
+ llvm::json::OStream J(ros);
+ // Start the top-level object.
+ J.objectBegin();
+
+ unsigned StartOffset = SM.getFileOffset(Loc);
+ unsigned EndOffset =
+ StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
+ J.attribute("loc", locationToString(Loc, EndOffset - StartOffset));
+ J.attribute("structured", 1);
+ J.attribute("pretty", getQualifiedName(decl));
+ J.attribute("sym", getMangledName(CurMangleContext, decl));
+
+ auto cxxDecl = dyn_cast<CXXMethodDecl>(decl);
+
+ if (cxxDecl) {
+ J.attribute("kind", "method");
+ if (auto parentDecl = cxxDecl->getParent()) {
+ J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl));
+ }
+
+ J.attributeBegin("overrides");
+ J.arrayBegin();
+ for (const CXXMethodDecl *MethodDecl : cxxDecl->overridden_methods()) {
+ J.objectBegin();
+
+ // TODO: Make sure we're doing template traversals appropriately...
+ // findOverriddenMethods (now removed) liked to do:
+ // if (Decl->isTemplateInstantiation()) {
+ // Decl = dyn_cast<CXXMethodDecl>(Decl->getTemplateInstantiationPattern());
+ // }
+ // I think our pre-emptive dereferencing/avoidance of templates may
+ // protect us from this, but it needs more investigation.
+
+ J.attribute("pretty", getQualifiedName(MethodDecl));
+ J.attribute("sym", getMangledName(CurMangleContext, MethodDecl));
+
+ J.objectEnd();
+ }
+ J.arrayEnd();
+ J.attributeEnd();
+
+ } else {
+ J.attribute("kind", "function");
+ }
+
+ // ## Props
+ J.attributeBegin("props");
+ J.arrayBegin();
+ // some of these are only possible on a CXXMethodDecl, but we want them all
+ // in the same array, so condition these first ones.
+ if (cxxDecl) {
+ if (cxxDecl->isStatic()) {
+ J.value("static");
+ }
+ if (cxxDecl->isInstance()) {
+ J.value("instance");
+ }
+ if (cxxDecl->isVirtual()) {
+ J.value("virtual");
+ }
+ if (cxxDecl->isUserProvided()) {
+ J.value("user");
+ }
+ }
+ if (decl->isDefaulted()) {
+ J.value("defaulted");
+ }
+ if (decl->isDeleted()) {
+ J.value("deleted");
+ }
+ if (decl->isConstexpr()) {
+ J.value("constexpr");
+ }
+ J.arrayEnd();
+ J.attributeEnd();
+
+ // End the top-level object.
+ J.objectEnd();
+
+ FileInfo *F = getFileInfo(Loc);
+ // we want a newline.
+ ros << '\n';
+ F->Output.push_back(std::move(ros.str()));
+ }
+
+ /**
+ * Emit structured info for a field. Right now the intent is for this to just
+ * be a pointer to its parent's structured info with this method entirely
+ * avoiding getting the ASTRecordLayout.
+ *
+ * TODO: Give more thought on where to locate the canonical info on fields and
+ * how to normalize their exposure over the web. We could relink the info
+ * both at cross-reference time and web-server lookup time. This is also
+ * called out in `analysis.md`.
+ */
+ void emitStructuredInfo(SourceLocation Loc, const FieldDecl *decl) {
+ // XXX the call to decl::getParent will assert below for ObjCIvarDecl
+ // instances because their DecContext is not a RecordDecl. So just bail
+ // for now.
+ // TODO: better support ObjC.
+ if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(decl)) {
+ return;
+ }
+
+ std::string json_str;
+ llvm::raw_string_ostream ros(json_str);
+ llvm::json::OStream J(ros);
+ // Start the top-level object.
+ J.objectBegin();
+
+ unsigned StartOffset = SM.getFileOffset(Loc);
+ unsigned EndOffset =
+ StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
+ J.attribute("loc", locationToString(Loc, EndOffset - StartOffset));
+ J.attribute("structured", 1);
+ J.attribute("pretty", getQualifiedName(decl));
+ J.attribute("sym", getMangledName(CurMangleContext, decl));
+ J.attribute("kind", "field");
+
+ if (auto parentDecl = decl->getParent()) {
+ J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl));
+ }
+
+ // End the top-level object.
+ J.objectEnd();
+
+ FileInfo *F = getFileInfo(Loc);
+ // we want a newline.
+ ros << '\n';
+ F->Output.push_back(std::move(ros.str()));
+ }
+
+ // XXX Type annotating.
+ // QualType is the type class. It has helpers like TagDecl via getAsTagDecl.
+ // ValueDecl exposes a getType() method.
+ //
+ // Arguably it makes sense to only expose types that Searchfox has definitions
+ // for as first-class. Probably the way to go is like context/contextsym.
+ // We expose a "type" which is just a human-readable string which has no
+ // semantic purposes and is just a display string, plus then a "typesym" which
+ // we expose if we were able to map the type.
+ //
+ // Other meta-info: field offsets. Ancestor types.
+
+ // This is the only function that emits analysis JSON data. It should be
+ // called for each identifier that corresponds to a symbol.
+ void visitIdentifier(const char *Kind, const char *SyntaxKind,
+ llvm::StringRef QualName, SourceRange LocRange,
+ std::string Symbol,
+ QualType MaybeType = QualType(),
+ Context TokenContext = Context(), int Flags = 0,
+ SourceRange PeekRange = SourceRange(),
+ SourceRange NestingRange = SourceRange()) {
+ SourceLocation Loc = LocRange.getBegin();
+ if (!shouldVisit(Loc)) {
+ return;
+ }
+
+ // Find the file positions corresponding to the token.
+ unsigned StartOffset = SM.getFileOffset(Loc);
+ unsigned EndOffset = (Flags & LocRangeEndValid)
+ ? SM.getFileOffset(LocRange.getEnd())
+ : StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
+
+ std::string LocStr = locationToString(Loc, EndOffset - StartOffset);
+ std::string RangeStr = locationToString(Loc, EndOffset - StartOffset);
+ std::string PeekRangeStr;
+
+ if (!(Flags & NotIdentifierToken)) {
+ // Get the token's characters so we can make sure it's a valid token.
+ const char *StartChars = SM.getCharacterData(Loc);
+ std::string Text(StartChars, EndOffset - StartOffset);
+ if (!isValidIdentifier(Text)) {
+ return;
+ }
+ }
+
+ FileInfo *F = getFileInfo(Loc);
+
+ if (!(Flags & NoCrossref)) {
+ std::string json_str;
+ llvm::raw_string_ostream ros(json_str);
+ llvm::json::OStream J(ros);
+ // Start the top-level object.
+ J.objectBegin();
+
+ J.attribute("loc", LocStr);
+ J.attribute("target", 1);
+ J.attribute("kind", Kind);
+ J.attribute("pretty", QualName.data());
+ J.attribute("sym", Symbol);
+ if (!TokenContext.Name.empty()) {
+ J.attribute("context", TokenContext.Name);
+ }
+ if (!TokenContext.Symbol.empty()) {
+ J.attribute("contextsym", TokenContext.Symbol);
+ }
+ if (PeekRange.isValid()) {
+ PeekRangeStr = lineRangeToString(PeekRange);
+ if (!PeekRangeStr.empty()) {
+ J.attribute("peekRange", PeekRangeStr);
+ }
+ }
+
+ // End the top-level object.
+ J.objectEnd();
+ // we want a newline.
+ ros << '\n';
+ F->Output.push_back(std::move(ros.str()));
+ }
+
+ // Generate a single "source":1 for all the symbols. If we search from here,
+ // we want to union the results for every symbol in `symbols`.
+ std::string json_str;
+ llvm::raw_string_ostream ros(json_str);
+ llvm::json::OStream J(ros);
+ // Start the top-level object.
+ J.objectBegin();
+
+ J.attribute("loc", RangeStr);
+ J.attribute("source", 1);
+
+ if (NestingRange.isValid()) {
+ std::string NestingRangeStr = fullRangeToString(NestingRange);
+ if (!NestingRangeStr.empty()) {
+ J.attribute("nestingRange", NestingRangeStr);
+ }
+ }
+
+ std::string Syntax;
+ if (Flags & NoCrossref) {
+ J.attribute("syntax", "");
+ } else {
+ Syntax = Kind;
+ Syntax.push_back(',');
+ Syntax.append(SyntaxKind);
+ J.attribute("syntax", Syntax);
+ }
+
+ if (!MaybeType.isNull()) {
+ J.attribute("type", MaybeType.getAsString());
+ QualType canonical = MaybeType.getCanonicalType();
+ const TagDecl *decl = canonical->getAsTagDecl();
+ if (decl) {
+ std::string Mangled = getMangledName(CurMangleContext, decl);
+ J.attribute("typesym", Mangled);
+ }
+ }
+
+ std::string Pretty(SyntaxKind);
+ Pretty.push_back(' ');
+ Pretty.append(QualName.data());
+ J.attribute("pretty", Pretty);
+
+ J.attribute("sym", Symbol);
+
+ if (Flags & NoCrossref) {
+ J.attribute("no_crossref", 1);
+ }
+
+ // End the top-level object.
+ J.objectEnd();
+
+ // we want a newline.
+ ros << '\n';
+ F->Output.push_back(std::move(ros.str()));
+ }
+
+ void normalizeLocation(SourceLocation *Loc) {
+ *Loc = SM.getSpellingLoc(*Loc);
+ }
+
+ // For cases where the left-brace is not directly accessible from the AST,
+ // helper to use the lexer to find the brace. Make sure you're picking the
+ // start location appropriately!
+ SourceLocation findLeftBraceFromLoc(SourceLocation Loc) {
+ return Lexer::findLocationAfterToken(Loc, tok::l_brace, SM, LO, false);
+ }
+
+ // If the provided statement is compound, return its range.
+ SourceRange getCompoundStmtRange(Stmt* D) {
+ if (!D) {
+ return SourceRange();
+ }
+
+ CompoundStmt *D2 = dyn_cast<CompoundStmt>(D);
+ if (D2) {
+ return D2->getSourceRange();
+ }
+
+ return SourceRange();
+ }
+
+ SourceRange getFunctionPeekRange(FunctionDecl* D) {
+ // We always start at the start of the function decl, which may include the
+ // return type on a separate line.
+ SourceLocation Start = D->getBeginLoc();
+
+ // By default, we end at the line containing the function's name.
+ SourceLocation End = D->getLocation();
+
+ std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End);
+
+ // But if there are parameters, we want to include those as well.
+ for (ParmVarDecl* Param : D->parameters()) {
+ std::pair<FileID, unsigned> ParamLoc = SM.getDecomposedLoc(Param->getLocation());
+
+ // It's possible there are macros involved or something. We don't include
+ // the parameters in that case.
+ if (ParamLoc.first == FuncLoc.first) {
+ // Assume parameters are in order, so we always take the last one.
+ End = Param->getEndLoc();
+ }
+ }
+
+ return SourceRange(Start, End);
+ }
+
+ SourceRange getTagPeekRange(TagDecl* D) {
+ SourceLocation Start = D->getBeginLoc();
+
+ // By default, we end at the line containing the name.
+ SourceLocation End = D->getLocation();
+
+ std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End);
+
+ if (CXXRecordDecl* D2 = dyn_cast<CXXRecordDecl>(D)) {
+ // But if there are parameters, we want to include those as well.
+ for (CXXBaseSpecifier& Base : D2->bases()) {
+ std::pair<FileID, unsigned> Loc = SM.getDecomposedLoc(Base.getEndLoc());
+
+ // It's possible there are macros involved or something. We don't include
+ // the parameters in that case.
+ if (Loc.first == FuncLoc.first) {
+ // Assume parameters are in order, so we always take the last one.
+ End = Base.getEndLoc();
+ }
+ }
+ }
+
+ return SourceRange(Start, End);
+ }
+
+ SourceRange getCommentRange(NamedDecl* D) {
+ const RawComment* RC =
+ AstContext->getRawCommentForDeclNoCache(D);
+ if (!RC) {
+ return SourceRange();
+ }
+
+ return RC->getSourceRange();
+ }
+
+ // Sanity checks that all ranges are in the same file, returning the first if
+ // they're in different files. Unions the ranges based on which is first.
+ SourceRange combineRanges(SourceRange Range1, SourceRange Range2) {
+ if (Range1.isInvalid()) {
+ return Range2;
+ }
+ if (Range2.isInvalid()) {
+ return Range1;
+ }
+
+ std::pair<FileID, unsigned> Begin1 = SM.getDecomposedLoc(Range1.getBegin());
+ std::pair<FileID, unsigned> End1 = SM.getDecomposedLoc(Range1.getEnd());
+ std::pair<FileID, unsigned> Begin2 = SM.getDecomposedLoc(Range2.getBegin());
+ std::pair<FileID, unsigned> End2 = SM.getDecomposedLoc(Range2.getEnd());
+
+ if (End1.first != Begin2.first) {
+ // Something weird is probably happening with the preprocessor. Just
+ // return the first range.
+ return Range1;
+ }
+
+ // See which range comes first.
+ if (Begin1.second <= End2.second) {
+ return SourceRange(Range1.getBegin(), Range2.getEnd());
+ } else {
+ return SourceRange(Range2.getBegin(), Range1.getEnd());
+ }
+ }
+
+ // Given a location and a range, returns the range if:
+ // - The location and the range live in the same file.
+ // - The range is well ordered (end is not before begin).
+ // Returns an empty range otherwise.
+ SourceRange validateRange(SourceLocation Loc, SourceRange Range) {
+ std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc);
+ std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
+ std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
+
+ if (Begin.first != Decomposed.first || End.first != Decomposed.first) {
+ return SourceRange();
+ }
+
+ if (Begin.second >= End.second) {
+ return SourceRange();
+ }
+
+ return Range;
+ }
+
+ bool VisitNamedDecl(NamedDecl *D) {
+ SourceLocation Loc = D->getLocation();
+
+ // If the token is from a macro expansion and the expansion location
+ // is interesting, use that instead as it tends to be more useful.
+ SourceLocation expandedLoc = Loc;
+ if (SM.isMacroBodyExpansion(Loc)) {
+ Loc = SM.getFileLoc(Loc);
+ }
+
+ normalizeLocation(&Loc);
+ if (!isInterestingLocation(Loc)) {
+ return true;
+ }
+
+ if (isa<ParmVarDecl>(D) && !D->getDeclName().getAsIdentifierInfo()) {
+ // Unnamed parameter in function proto.
+ return true;
+ }
+
+ int Flags = 0;
+ const char *Kind = "def";
+ const char *PrettyKind = "?";
+ bool wasTemplate = false;
+ SourceRange PeekRange(D->getBeginLoc(), D->getEndLoc());
+ // The nesting range identifies the left brace and right brace, which
+ // heavily depends on the AST node type.
+ SourceRange NestingRange;
+ if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) {
+ if (D2->isTemplateInstantiation()) {
+ wasTemplate = true;
+ D = D2->getTemplateInstantiationPattern();
+ }
+ // We treat pure virtual declarations as definitions.
+ Kind = (D2->isThisDeclarationADefinition() || D2->isPure()) ? "def" : "decl";
+ PrettyKind = "function";
+ PeekRange = getFunctionPeekRange(D2);
+
+ // Only emit the nesting range if:
+ // - This is a definition AND
+ // - This isn't a template instantiation. Function templates'
+ // instantiations can end up as a definition with a Loc at their point
+ // of declaration but with the CompoundStmt of the template's
+ // point of definition. This really messes up the nesting range logic.
+ // At the time of writing this, the test repo's `big_header.h`'s
+ // `WhatsYourVector_impl::forwardDeclaredTemplateThingInlinedBelow` as
+ // instantiated by `big_cpp.cpp` triggers this phenomenon.
+ //
+ // Note: As covered elsewhere, template processing is tricky and it's
+ // conceivable that we may change traversal patterns in the future,
+ // mooting this guard.
+ if (D2->isThisDeclarationADefinition() &&
+ !D2->isTemplateInstantiation()) {
+ // The CompoundStmt range is the brace range.
+ NestingRange = getCompoundStmtRange(D2->getBody());
+ }
+ } else if (TagDecl *D2 = dyn_cast<TagDecl>(D)) {
+ Kind = D2->isThisDeclarationADefinition() ? "def" : "forward";
+ PrettyKind = "type";
+
+ if (D2->isThisDeclarationADefinition() && D2->getDefinition() == D2) {
+ PeekRange = getTagPeekRange(D2);
+ NestingRange = D2->getBraceRange();
+ } else {
+ PeekRange = SourceRange();
+ }
+ } else if (isa<TypedefNameDecl>(D)) {
+ Kind = "def";
+ PrettyKind = "type";
+ PeekRange = SourceRange(Loc, Loc);
+ } else if (VarDecl *D2 = dyn_cast<VarDecl>(D)) {
+ if (D2->isLocalVarDeclOrParm()) {
+ Flags = NoCrossref;
+ }
+
+ Kind = D2->isThisDeclarationADefinition() == VarDecl::DeclarationOnly
+ ? "decl"
+ : "def";
+ PrettyKind = "variable";
+ } else if (isa<NamespaceDecl>(D) || isa<NamespaceAliasDecl>(D)) {
+ Kind = "def";
+ PrettyKind = "namespace";
+ PeekRange = SourceRange(Loc, Loc);
+ NamespaceDecl *D2 = dyn_cast<NamespaceDecl>(D);
+ if (D2) {
+ // There's no exposure of the left brace so we have to find it.
+ NestingRange = SourceRange(
+ findLeftBraceFromLoc(D2->isAnonymousNamespace() ? D2->getBeginLoc() : Loc),
+ D2->getRBraceLoc());
+ }
+ } else if (isa<FieldDecl>(D)) {
+ Kind = "def";
+ PrettyKind = "field";
+ } else if (isa<EnumConstantDecl>(D)) {
+ Kind = "def";
+ PrettyKind = "enum constant";
+ } else {
+ return true;
+ }
+
+ QualType qtype = QualType();
+ if (ValueDecl *D2 = dyn_cast<ValueDecl>(D)) {
+ qtype = D2->getType();
+ }
+
+ SourceRange CommentRange = getCommentRange(D);
+ PeekRange = combineRanges(PeekRange, CommentRange);
+ PeekRange = validateRange(Loc, PeekRange);
+ NestingRange = validateRange(Loc, NestingRange);
+
+ std::string Symbol = getMangledName(CurMangleContext, D);
+
+ // In the case of destructors, Loc might point to the ~ character. In that
+ // case we want to skip to the name of the class. However, Loc might also
+ // point to other places that generate destructors, such as the use site of
+ // a macro that expands to generate a destructor, or a lambda (apparently
+ // clang 8 creates a destructor declaration for at least some lambdas). In
+ // the former case we'll use the macro use site as the location, and in the
+ // latter we'll just drop the declaration.
+ if (isa<CXXDestructorDecl>(D)) {
+ PrettyKind = "destructor";
+ const char *P = SM.getCharacterData(Loc);
+ if (*P == '~') {
+ // Advance Loc to the class name
+ P++;
+
+ unsigned Skipped = 1;
+ while (*P == ' ' || *P == '\t' || *P == '\r' || *P == '\n') {
+ P++;
+ Skipped++;
+ }
+
+ Loc = Loc.getLocWithOffset(Skipped);
+ } else {
+ // See if the destructor is coming from a macro expansion
+ P = SM.getCharacterData(expandedLoc);
+ if (*P != '~') {
+ // It's not
+ return true;
+ }
+ // It is, so just use Loc as-is
+ }
+ }
+
+ visitIdentifier(Kind, PrettyKind, getQualifiedName(D), SourceRange(Loc), Symbol,
+ qtype,
+ getContext(D), Flags, PeekRange, NestingRange);
+
+ // In-progress structured info emission.
+ if (RecordDecl *D2 = dyn_cast<RecordDecl>(D)) {
+ if (D2->isThisDeclarationADefinition() &&
+ // XXX getASTRecordLayout doesn't work for dependent types, so we
+ // avoid calling into emitStructuredInfo for now if there's a
+ // dependent type or if we're in any kind of template context. This
+ // should be re-evaluated once this is working for normal classes and
+ // we can better evaluate what is useful.
+ !D2->isDependentType() &&
+ !TemplateStack) {
+ emitStructuredInfo(Loc, D2);
+ }
+ }
+ if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) {
+ if ((D2->isThisDeclarationADefinition() || D2->isPure()) &&
+ // a clause at the top should have generalized and set wasTemplate so
+ // it shouldn't be the case that isTemplateInstantiation() is true.
+ !D2->isTemplateInstantiation() &&
+ !wasTemplate &&
+ !D2->isFunctionTemplateSpecialization() &&
+ !TemplateStack) {
+ emitStructuredInfo(Loc, D2);
+ }
+ }
+ if (FieldDecl *D2 = dyn_cast<FieldDecl>(D)) {
+ if (!D2->isTemplated() &&
+ !TemplateStack) {
+ emitStructuredInfo(Loc, D2);
+ }
+ }
+
+ return true;
+ }
+
+ bool VisitCXXConstructExpr(CXXConstructExpr *E) {
+ SourceLocation Loc = E->getBeginLoc();
+ normalizeLocation(&Loc);
+ if (!isInterestingLocation(Loc)) {
+ return true;
+ }
+
+ FunctionDecl *Ctor = E->getConstructor();
+ if (Ctor->isTemplateInstantiation()) {
+ Ctor = Ctor->getTemplateInstantiationPattern();
+ }
+ std::string Mangled = getMangledName(CurMangleContext, Ctor);
+
+ // FIXME: Need to do something different for list initialization.
+
+ visitIdentifier("use", "constructor", getQualifiedName(Ctor), Loc, Mangled,
+ QualType(), getContext(Loc));
+
+ return true;
+ }
+
+ bool VisitCallExpr(CallExpr *E) {
+ Decl *Callee = E->getCalleeDecl();
+ if (!Callee || !FunctionDecl::classof(Callee)) {
+ return true;
+ }
+
+ const NamedDecl *NamedCallee = dyn_cast<NamedDecl>(Callee);
+
+ SourceLocation Loc;
+
+ const FunctionDecl *F = dyn_cast<FunctionDecl>(NamedCallee);
+ if (F->isTemplateInstantiation()) {
+ NamedCallee = F->getTemplateInstantiationPattern();
+ }
+
+ std::string Mangled = getMangledName(CurMangleContext, NamedCallee);
+ int Flags = 0;
+
+ Expr *CalleeExpr = E->getCallee()->IgnoreParenImpCasts();
+
+ if (CXXOperatorCallExpr::classof(E)) {
+ // Just take the first token.
+ CXXOperatorCallExpr *Op = dyn_cast<CXXOperatorCallExpr>(E);
+ Loc = Op->getOperatorLoc();
+ Flags |= NotIdentifierToken;
+ } else if (MemberExpr::classof(CalleeExpr)) {
+ MemberExpr *Member = dyn_cast<MemberExpr>(CalleeExpr);
+ Loc = Member->getMemberLoc();
+ } else if (DeclRefExpr::classof(CalleeExpr)) {
+ // We handle this in VisitDeclRefExpr.
+ return true;
+ } else {
+ return true;
+ }
+
+ normalizeLocation(&Loc);
+
+ if (!isInterestingLocation(Loc)) {
+ return true;
+ }
+
+ visitIdentifier("use", "function", getQualifiedName(NamedCallee), Loc, Mangled,
+ E->getCallReturnType(*AstContext), getContext(Loc), Flags);
+
+ return true;
+ }
+
+ bool VisitTagTypeLoc(TagTypeLoc L) {
+ SourceLocation Loc = L.getBeginLoc();
+ normalizeLocation(&Loc);
+ if (!isInterestingLocation(Loc)) {
+ return true;
+ }
+
+ TagDecl *Decl = L.getDecl();
+ std::string Mangled = getMangledName(CurMangleContext, Decl);
+ visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
+ L.getType(), getContext(Loc));
+ return true;
+ }
+
+ bool VisitTypedefTypeLoc(TypedefTypeLoc L) {
+ SourceLocation Loc = L.getBeginLoc();
+ normalizeLocation(&Loc);
+ if (!isInterestingLocation(Loc)) {
+ return true;
+ }
+
+ NamedDecl *Decl = L.getTypedefNameDecl();
+ std::string Mangled = getMangledName(CurMangleContext, Decl);
+ visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
+ L.getType(), getContext(Loc));
+ return true;
+ }
+
+ bool VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L) {
+ SourceLocation Loc = L.getBeginLoc();
+ normalizeLocation(&Loc);
+ if (!isInterestingLocation(Loc)) {
+ return true;
+ }
+
+ NamedDecl *Decl = L.getDecl();
+ std::string Mangled = getMangledName(CurMangleContext, Decl);
+ visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
+ L.getType(), getContext(Loc));
+ return true;
+ }
+
+ bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) {
+ SourceLocation Loc = L.getBeginLoc();
+ normalizeLocation(&Loc);
+ if (!isInterestingLocation(Loc)) {
+ return true;
+ }
+
+ TemplateDecl *Td = L.getTypePtr()->getTemplateName().getAsTemplateDecl();
+ if (ClassTemplateDecl *D = dyn_cast<ClassTemplateDecl>(Td)) {
+ NamedDecl *Decl = D->getTemplatedDecl();
+ std::string Mangled = getMangledName(CurMangleContext, Decl);
+ visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
+ QualType(), getContext(Loc));
+ } else if (TypeAliasTemplateDecl *D = dyn_cast<TypeAliasTemplateDecl>(Td)) {
+ NamedDecl *Decl = D->getTemplatedDecl();
+ std::string Mangled = getMangledName(CurMangleContext, Decl);
+ visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
+ QualType(), getContext(Loc));
+ }
+
+ return true;
+ }
+
+ bool VisitDeclRefExpr(DeclRefExpr *E) {
+ SourceLocation Loc = E->getExprLoc();
+ normalizeLocation(&Loc);
+ if (!isInterestingLocation(Loc)) {
+ return true;
+ }
+
+ if (E->hasQualifier()) {
+ Loc = E->getNameInfo().getLoc();
+ normalizeLocation(&Loc);
+ }
+
+ NamedDecl *Decl = E->getDecl();
+ if (const VarDecl *D2 = dyn_cast<VarDecl>(Decl)) {
+ int Flags = 0;
+ if (D2->isLocalVarDeclOrParm()) {
+ Flags = NoCrossref;
+ }
+ std::string Mangled = getMangledName(CurMangleContext, Decl);
+ visitIdentifier("use", "variable", getQualifiedName(Decl), Loc, Mangled,
+ D2->getType(), getContext(Loc), Flags);
+ } else if (isa<FunctionDecl>(Decl)) {
+ const FunctionDecl *F = dyn_cast<FunctionDecl>(Decl);
+ if (F->isTemplateInstantiation()) {
+ Decl = F->getTemplateInstantiationPattern();
+ }
+
+ std::string Mangled = getMangledName(CurMangleContext, Decl);
+ visitIdentifier("use", "function", getQualifiedName(Decl), Loc, Mangled,
+ E->getType(), getContext(Loc));
+ } else if (isa<EnumConstantDecl>(Decl)) {
+ std::string Mangled = getMangledName(CurMangleContext, Decl);
+ visitIdentifier("use", "enum", getQualifiedName(Decl), Loc, Mangled,
+ E->getType(), getContext(Loc));
+ }
+
+ return true;
+ }
+
+ bool VisitCXXConstructorDecl(CXXConstructorDecl *D) {
+ if (!isInterestingLocation(D->getLocation())) {
+ return true;
+ }
+
+ for (CXXConstructorDecl::init_const_iterator It = D->init_begin();
+ It != D->init_end(); ++It) {
+ const CXXCtorInitializer *Ci = *It;
+ if (!Ci->getMember() || !Ci->isWritten()) {
+ continue;
+ }
+
+ SourceLocation Loc = Ci->getMemberLocation();
+ normalizeLocation(&Loc);
+ if (!isInterestingLocation(Loc)) {
+ continue;
+ }
+
+ FieldDecl *Member = Ci->getMember();
+ std::string Mangled = getMangledName(CurMangleContext, Member);
+ visitIdentifier("use", "field", getQualifiedName(Member), Loc, Mangled,
+ Member->getType(), getContext(D));
+ }
+
+ return true;
+ }
+
+ bool VisitMemberExpr(MemberExpr *E) {
+ SourceLocation Loc = E->getExprLoc();
+ normalizeLocation(&Loc);
+ if (!isInterestingLocation(Loc)) {
+ return true;
+ }
+
+ ValueDecl *Decl = E->getMemberDecl();
+ if (FieldDecl *Field = dyn_cast<FieldDecl>(Decl)) {
+ std::string Mangled = getMangledName(CurMangleContext, Field);
+ visitIdentifier("use", "field", getQualifiedName(Field), Loc, Mangled,
+ Field->getType(), getContext(Loc));
+ }
+ return true;
+ }
+
+ bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) {
+ SourceLocation Loc = E->getMemberLoc();
+ normalizeLocation(&Loc);
+ if (!isInterestingLocation(Loc)) {
+ return true;
+ }
+
+ if (TemplateStack) {
+ TemplateStack->visitDependent(Loc);
+ }
+ return true;
+ }
+
+ void enterSourceFile(SourceLocation Loc) {
+ normalizeLocation(&Loc);
+ FileInfo* newFile = getFileInfo(Loc);
+ if (!newFile->Interesting) {
+ return;
+ }
+ FileType type = newFile->Generated ? FileType::Generated : FileType::Source;
+ std::string symbol =
+ std::string("FILE_") + mangleFile(newFile->Realname, type);
+
+ // We use an explicit zero-length source range at the start of the file. If we
+ // don't set the LocRangeEndValid flag, the visitIdentifier code will use the
+ // entire first token, which could be e.g. a long multiline-comment.
+ visitIdentifier("def", "file", newFile->Realname, SourceRange(Loc),
+ symbol, QualType(), Context(),
+ NotIdentifierToken | LocRangeEndValid);
+ }
+
+ void inclusionDirective(SourceRange FileNameRange, const FileEntry* File) {
+ std::string includedFile(File->tryGetRealPathName());
+ FileType type = relativizePath(includedFile);
+ if (type == FileType::Unknown) {
+ return;
+ }
+ std::string symbol =
+ std::string("FILE_") + mangleFile(includedFile, type);
+
+ visitIdentifier("use", "file", includedFile, FileNameRange, symbol,
+ QualType(), Context(),
+ NotIdentifierToken | LocRangeEndValid);
+ }
+
+ void macroDefined(const Token &Tok, const MacroDirective *Macro) {
+ if (Macro->getMacroInfo()->isBuiltinMacro()) {
+ return;
+ }
+ SourceLocation Loc = Tok.getLocation();
+ normalizeLocation(&Loc);
+ if (!isInterestingLocation(Loc)) {
+ return;
+ }
+
+ IdentifierInfo *Ident = Tok.getIdentifierInfo();
+ if (Ident) {
+ std::string Mangled =
+ std::string("M_") + mangleLocation(Loc, std::string(Ident->getName()));
+ visitIdentifier("def", "macro", Ident->getName(), Loc, Mangled);
+ }
+ }
+
+ void macroUsed(const Token &Tok, const MacroInfo *Macro) {
+ if (!Macro) {
+ return;
+ }
+ if (Macro->isBuiltinMacro()) {
+ return;
+ }
+ SourceLocation Loc = Tok.getLocation();
+ normalizeLocation(&Loc);
+ if (!isInterestingLocation(Loc)) {
+ return;
+ }
+
+ IdentifierInfo *Ident = Tok.getIdentifierInfo();
+ if (Ident) {
+ std::string Mangled =
+ std::string("M_") +
+ mangleLocation(Macro->getDefinitionLoc(), std::string(Ident->getName()));
+ visitIdentifier("use", "macro", Ident->getName(), Loc, Mangled);
+ }
+ }
+};
+
+void PreprocessorHook::FileChanged(SourceLocation Loc, FileChangeReason Reason,
+ SrcMgr::CharacteristicKind FileType,
+ FileID PrevFID = FileID()) {
+ switch (Reason) {
+ case PPCallbacks::RenameFile:
+ case PPCallbacks::SystemHeaderPragma:
+ // Don't care about these, since we want the actual on-disk filenames
+ break;
+ case PPCallbacks::EnterFile:
+ Indexer->enterSourceFile(Loc);
+ break;
+ case PPCallbacks::ExitFile:
+ // Don't care about exiting files
+ break;
+ }
+}
+
+void PreprocessorHook::InclusionDirective(SourceLocation HashLoc,
+ const Token &IncludeTok,
+ StringRef FileName,
+ bool IsAngled,
+ CharSourceRange FileNameRange,
+#if CLANG_VERSION_MAJOR >= 16
+ OptionalFileEntryRef File,
+#elif CLANG_VERSION_MAJOR >= 15
+ Optional<FileEntryRef> File,
+#else
+ const FileEntry *File,
+#endif
+ StringRef SearchPath,
+ StringRef RelativePath,
+ const Module *Imported,
+ SrcMgr::CharacteristicKind FileType) {
+#if CLANG_VERSION_MAJOR >= 15
+ if (!File) {
+ return;
+ }
+ Indexer->inclusionDirective(FileNameRange.getAsRange(), &File->getFileEntry());
+#else
+ Indexer->inclusionDirective(FileNameRange.getAsRange(), File);
+#endif
+}
+
+void PreprocessorHook::MacroDefined(const Token &Tok,
+ const MacroDirective *Md) {
+ Indexer->macroDefined(Tok, Md);
+}
+
+void PreprocessorHook::MacroExpands(const Token &Tok, const MacroDefinition &Md,
+ SourceRange Range, const MacroArgs *Ma) {
+ Indexer->macroUsed(Tok, Md.getMacroInfo());
+}
+
+void PreprocessorHook::MacroUndefined(const Token &Tok,
+ const MacroDefinition &Md,
+ const MacroDirective *Undef)
+{
+ Indexer->macroUsed(Tok, Md.getMacroInfo());
+}
+
+void PreprocessorHook::Defined(const Token &Tok, const MacroDefinition &Md,
+ SourceRange Range) {
+ Indexer->macroUsed(Tok, Md.getMacroInfo());
+}
+
+void PreprocessorHook::Ifdef(SourceLocation Loc, const Token &Tok,
+ const MacroDefinition &Md) {
+ Indexer->macroUsed(Tok, Md.getMacroInfo());
+}
+
+void PreprocessorHook::Ifndef(SourceLocation Loc, const Token &Tok,
+ const MacroDefinition &Md) {
+ Indexer->macroUsed(Tok, Md.getMacroInfo());
+}
+
+class IndexAction : public PluginASTAction {
+protected:
+ std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
+ llvm::StringRef F) {
+ return make_unique<IndexConsumer>(CI);
+ }
+
+ bool ParseArgs(const CompilerInstance &CI,
+ const std::vector<std::string> &Args) {
+ if (Args.size() != 3) {
+ DiagnosticsEngine &D = CI.getDiagnostics();
+ unsigned DiagID = D.getCustomDiagID(
+ DiagnosticsEngine::Error,
+ "Need arguments for the source, output, and object directories");
+ D.Report(DiagID);
+ return false;
+ }
+
+ // Load our directories
+ Srcdir = getAbsolutePath(Args[0]);
+ if (Srcdir.empty()) {
+ DiagnosticsEngine &D = CI.getDiagnostics();
+ unsigned DiagID = D.getCustomDiagID(
+ DiagnosticsEngine::Error, "Source directory '%0' does not exist");
+ D.Report(DiagID) << Args[0];
+ return false;
+ }
+
+ ensurePath(Args[1] + PATHSEP_STRING);
+ Outdir = getAbsolutePath(Args[1]);
+ Outdir += PATHSEP_STRING;
+
+ Objdir = getAbsolutePath(Args[2]);
+ if (Objdir.empty()) {
+ DiagnosticsEngine &D = CI.getDiagnostics();
+ unsigned DiagID = D.getCustomDiagID(DiagnosticsEngine::Error,
+ "Objdir '%0' does not exist");
+ D.Report(DiagID) << Args[2];
+ return false;
+ }
+ Objdir += PATHSEP_STRING;
+
+ printf("MOZSEARCH: %s %s %s\n", Srcdir.c_str(), Outdir.c_str(),
+ Objdir.c_str());
+
+ return true;
+ }
+
+ void printHelp(llvm::raw_ostream &Ros) {
+ Ros << "Help for mozsearch plugin goes here\n";
+ }
+};
+
+static FrontendPluginRegistry::Add<IndexAction>
+ Y("mozsearch-index", "create the mozsearch index database");
diff --git a/build/clang-plugin/mozsearch-plugin/README b/build/clang-plugin/mozsearch-plugin/README
new file mode 100644
index 0000000000..d948e9aca3
--- /dev/null
+++ b/build/clang-plugin/mozsearch-plugin/README
@@ -0,0 +1,12 @@
+This clang plugin code generates a JSON file for each compiler input
+file. The JSON file contains information about the C++ symbols that
+are referenced by the input file. The data is eventually consumed by
+Searchfox. See https://github.com/mozsearch/mozsearch for more
+information.
+
+This plugin is enabled with the --enable-clang-plugin and
+--enable-mozsearch-plugin mozconfig options. The output of the plugin
+is stored in $OBJDIR/mozsearch_index.
+
+This code is not a checker, unlike other parts of the Mozilla clang
+plugin. It cannot be used with clang-tidy.
diff --git a/build/clang-plugin/mozsearch-plugin/StringOperations.cpp b/build/clang-plugin/mozsearch-plugin/StringOperations.cpp
new file mode 100644
index 0000000000..a2e60e42c6
--- /dev/null
+++ b/build/clang-plugin/mozsearch-plugin/StringOperations.cpp
@@ -0,0 +1,42 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "StringOperations.h"
+
+static unsigned long djbHash(const char *Str) {
+ unsigned long Hash = 5381;
+
+ for (const char *P = Str; *P; P++) {
+ // Hash * 33 + c
+ Hash = ((Hash << 5) + Hash) + *P;
+ }
+
+ return Hash;
+}
+
+// This doesn't actually return a hex string of |hash|, but it
+// does... something. It doesn't really matter what.
+static void hashToString(unsigned long Hash, char *Buffer) {
+ const char Table[] = {"0123456789abcdef"};
+ char *P = Buffer;
+ while (Hash) {
+ *P = Table[Hash & 0xf];
+ Hash >>= 4;
+ P++;
+ }
+
+ *P = 0;
+}
+
+std::string hash(const std::string &Str) {
+ static char HashStr[41];
+ unsigned long H = djbHash(Str.c_str());
+ hashToString(H, HashStr);
+ return std::string(HashStr);
+}
+
+std::string toString(int N) {
+ return stringFormat("%d", N);
+}
diff --git a/build/clang-plugin/mozsearch-plugin/StringOperations.h b/build/clang-plugin/mozsearch-plugin/StringOperations.h
new file mode 100644
index 0000000000..4aa5b31962
--- /dev/null
+++ b/build/clang-plugin/mozsearch-plugin/StringOperations.h
@@ -0,0 +1,25 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef StringOperations_h
+#define StringOperations_h
+
+#include <memory>
+#include <string>
+#include <string.h>
+
+std::string hash(const std::string &Str);
+
+template <typename... Args>
+inline std::string stringFormat(const std::string &Format, Args... ArgList) {
+ size_t Len = snprintf(nullptr, 0, Format.c_str(), ArgList...);
+ std::unique_ptr<char[]> Buf(new char[Len + 1]);
+ snprintf(Buf.get(), Len + 1, Format.c_str(), ArgList...);
+ return std::string(Buf.get(), Buf.get() + Len);
+}
+
+std::string toString(int N);
+
+#endif