6 files changed, 2489 insertions, 0 deletions
diff --git a/build/clang-plugin/mozsearch-plugin/FileOperations.cpp b/build/clang-plugin/mozsearch-plugin/FileOperations.cpp
new file mode 100644
index 0000000000..9307f4989d
--- /dev/null
+++ b/build/clang-plugin/mozsearch-plugin/FileOperations.cpp
@@ -0,0 +1,140 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "FileOperations.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined(_WIN32) || defined(_WIN64)
+#include <direct.h>
+#include <io.h>
+#include <windows.h>
+#include "StringOperations.h"
+#else
+#include <sys/file.h>
+#include <sys/time.h>
+#include <unistd.h>
+#endif
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+// Make sure that all directories on path exist, excluding the final element of
+// the path.
+void ensurePath(std::string Path) {
+  size_t Pos = 0;
+  if (Path[0] == PATHSEP_CHAR) {
+    Pos++;
+  }
+
+  while ((Pos = Path.find(PATHSEP_CHAR, Pos)) != std::string::npos) {
+    std::string Portion = Path.substr(0, Pos);
+    if (!Portion.empty()) {
+#if defined(_WIN32) || defined(_WIN64)
+      int Err = _mkdir(Portion.c_str());
+#else
+      int Err = mkdir(Portion.c_str(), 0775);
+#endif
+      if (Err == -1 && errno != EEXIST) {
+        perror("mkdir failed");
+        exit(1);
+      }
+    }
+
+    Pos++;
+  }
+}
+
+#if defined(_WIN32) || defined(_WIN64)
+AutoLockFile::AutoLockFile(const std::string &SrcFile, const std::string &DstFile) {
+  this->Filename = DstFile;
+  std::string Hash = hash(SrcFile);
+  std::string MutexName = std::string("Local\\searchfox-") + Hash;
+  std::wstring WideMutexName;
+  WideMutexName.assign(MutexName.begin(), MutexName.end());
+  Handle = CreateMutex(nullptr, false, WideMutexName.c_str());
+  if (Handle == NULL) {
+    return;
+  }
+
+  if (WaitForSingleObject(Handle, INFINITE) != WAIT_OBJECT_0) {
+    return;
+  }
+}
+
+AutoLockFile::~AutoLockFile() {
+  ReleaseMutex(Handle);
+  CloseHandle(Handle);
+}
+
+bool AutoLockFile::success() {
+  return Handle != NULL;
+}
+
+FILE *AutoLockFile::openTmp() {
+  int TmpDescriptor = _open((Filename + ".tmp").c_str(), _O_WRONLY | _O_APPEND | _O_CREAT | _O_BINARY, 0666);
+  return _fdopen(TmpDescriptor, "ab");
+}
+
+bool AutoLockFile::moveTmp() {
+  if (_unlink(Filename.c_str()) == -1) {
+    if (errno != ENOENT) {
+      return false;
+    }
+  }
+  return rename((Filename + ".tmp").c_str(), Filename.c_str()) == 0;
+}
+
+std::string getAbsolutePath(const std::string &Filename) {
+  char Full[_MAX_PATH];
+  if (!_fullpath(Full, Filename.c_str(), _MAX_PATH)) {
+    return std::string("");
+  }
+  return std::string(Full);
+}
+#else
+AutoLockFile::AutoLockFile(const std::string &SrcFile, const std::string &DstFile) {
+  this->Filename = DstFile;
+  FileDescriptor = open(SrcFile.c_str(), O_RDONLY);
+  if (FileDescriptor == -1) {
+    return;
+  }
+
+  do {
+    int rv = flock(FileDescriptor, LOCK_EX);
+    if (rv == 0) {
+      break;
+    }
+  } while (true);
+}
+
+AutoLockFile::~AutoLockFile() { close(FileDescriptor); }
+
+bool AutoLockFile::success() { return FileDescriptor != -1; }
+
+FILE* AutoLockFile::openTmp() {
+  int TmpDescriptor = open((Filename + ".tmp").c_str(), O_WRONLY | O_APPEND | O_CREAT, 0666);
+  return fdopen(TmpDescriptor, "ab");
+}
+
+bool AutoLockFile::moveTmp() {
+  if (unlink(Filename.c_str()) == -1) {
+    if (errno != ENOENT) {
+      return false;
+    }
+  }
+  return rename((Filename + ".tmp").c_str(), Filename.c_str()) == 0;
+}
+
+std::string getAbsolutePath(const std::string &Filename) {
+  char Full[4096];
+  if (!realpath(Filename.c_str(), Full)) {
+    return std::string("");
+  }
+  return std::string(Full);
+}
+#endif
diff --git a/build/clang-plugin/mozsearch-plugin/FileOperations.h b/build/clang-plugin/mozsearch-plugin/FileOperations.h
new file mode 100644
index 0000000000..90764484da
--- /dev/null
+++ b/build/clang-plugin/mozsearch-plugin/FileOperations.h
@@ -0,0 +1,70 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef FileOperations_h
+#define FileOperations_h
+
+#include <stdio.h>
+#include <string>
+
+#if defined(_WIN32) || defined(_WIN64)
+#include <windows.h>
+#define PATHSEP_CHAR '\\'
+#define PATHSEP_STRING "\\"
+#else
+#define PATHSEP_CHAR '/'
+#define PATHSEP_STRING "/"
+#endif
+
+// Make sure that all directories on path exist, excluding the final element of
+// the path.
+void ensurePath(std::string Path);
+
+std::string getAbsolutePath(const std::string &Filename);
+
+// Used to synchronize access when writing to an analysis file, so that
+// concurrently running clang instances don't clobber each other's data.
+// On Windows, we use a named mutex. On POSIX platforms, we use flock on the
+// source files. flock is advisory locking, and doesn't interfere with clang's
+// own opening of the source files (i.e. to interfere, clang would have to be
+// using flock itself, which it does not).
+struct AutoLockFile {
+  // Absolute path to the analysis file
+  std::string Filename;
+
+#if defined(_WIN32) || defined(_WIN64)
+  // Handle for the named Mutex
+  HANDLE Handle = NULL;
+#else
+  // fd for the *source* file that corresponds to the analysis file. We use
+  // the source file because it doesn't change while the analysis file gets
+  // repeatedly replaced by a new version written to a separate tmp file.
+  // This fd is used when using flock to synchronize access.
+  int FileDescriptor = -1;
+#endif
+
+  // SrcFile should be the absolute path to the source code file, and DstFile
+  // the absolute path to the corresponding analysis file. This constructor
+  // will block until exclusive access has been obtained.
+  AutoLockFile(const std::string &SrcFile, const std::string &DstFile);
+  ~AutoLockFile();
+
+  // Check after constructing to ensure the mutex was properly set up.
+  bool success();
+
+  // There used to be an `openFile` method here but we switched to directly
+  // using a std::ifstream for the input file in able to take advantage of its
+  // support for variable length lines (as opposed to fgets which takes a fixed
+  // size buffer).
+
+  // Open a new tmp file for writing the new analysis data to. Caller is
+  // responsible for fclose'ing it.
+  FILE *openTmp();
+  // Replace the existing analysis file with the new "tmp" one that has the new
+  // data. Returns false on error.
+  bool moveTmp();
+};
+
+#endif
diff --git a/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp b/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp
new file mode 100644
index 0000000000..904897df6b
--- /dev/null
+++ b/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp
@@ -0,0 +1,2200 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "clang/AST/AST.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
+#include "clang/AST/Mangle.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Version.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendPluginRegistry.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <unordered_set>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "FileOperations.h"
+#include "StringOperations.h"
+
+#if CLANG_VERSION_MAJOR < 8
+// Starting with Clang 8.0 some basic functions have been renamed
+#define getBeginLoc getLocStart
+#define getEndLoc getLocEnd
+#endif
+// We want std::make_unique, but that's only available in c++14.  In versions
+// prior to that, we need to fall back to llvm's make_unique.  It's also the
+// case that we expect clang 10 to build with c++14 and clang 9 and earlier to
+// build with c++11, at least as suggested by the llvm-config --cxxflags on
+// non-windows platforms.  mozilla-central seems to build with -std=c++17 on
+// windows so we need to make this decision based on __cplusplus instead of
+// the CLANG_VERSION_MAJOR.
+#if __cplusplus < 201402L
+using llvm::make_unique;
+#else
+using std::make_unique;
+#endif
+
+using namespace clang;
+
+const std::string GENERATED("__GENERATED__" PATHSEP_STRING);
+
+// Absolute path to directory containing source code.
+std::string Srcdir;
+
+// Absolute path to objdir (including generated code).
+std::string Objdir;
+
+// Absolute path where analysis JSON output will be stored.
+std::string Outdir;
+
+enum class FileType {
+  // The file was either in the source tree nor objdir. It might be a system
+  // include, for example.
+  Unknown,
+  // A file from the source tree.
+  Source,
+  // A file from the objdir.
+  Generated,
+};
+
+// Takes an absolute path to a file, and returns the type of file it is. If
+// it's a Source or Generated file, the provided inout path argument is modified
+// in-place so that it is relative to the source dir or objdir, respectively.
+FileType relativizePath(std::string& path) {
+  if (path.compare(0, Objdir.length(), Objdir) == 0) {
+    path.replace(0, Objdir.length(), GENERATED);
+    return FileType::Generated;
+  }
+  // Empty filenames can get turned into Srcdir when they are resolved as
+  // absolute paths, so we should exclude files that are exactly equal to
+  // Srcdir or anything outside Srcdir.
+  if (path.length() > Srcdir.length() && path.compare(0, Srcdir.length(), Srcdir) == 0) {
+    // Remove the trailing `/' as well.
+    path.erase(0, Srcdir.length() + 1);
+    return FileType::Source;
+  }
+  return FileType::Unknown;
+}
+
+#if !defined(_WIN32) && !defined(_WIN64)
+#include <sys/time.h>
+
+static double time() {
+  struct timeval Tv;
+  gettimeofday(&Tv, nullptr);
+  return double(Tv.tv_sec) + double(Tv.tv_usec) / 1000000.;
+}
+#endif
+
+// Return true if |input| is a valid C++ identifier. We don't want to generate
+// analysis information for operators, string literals, etc. by accident since
+// it trips up consumers of the data.
+static bool isValidIdentifier(std::string Input) {
+  for (char C : Input) {
+    if (!(isalpha(C) || isdigit(C) || C == '_')) {
+      return false;
+    }
+  }
+  return true;
+}
+
+struct RAIITracer {
+  RAIITracer(const char *log) : mLog(log) {
+    printf("<%s>\n", mLog);
+  }
+
+  ~RAIITracer() {
+    printf("</%s>\n", mLog);
+  }
+
+  const char* mLog;
+};
+
+#define TRACEFUNC RAIITracer tracer(__FUNCTION__);
+
+class IndexConsumer;
+
+// For each C++ file seen by the analysis (.cpp or .h), we track a
+// FileInfo. This object tracks whether the file is "interesting" (i.e., whether
+// it's in the source dir or the objdir). We also store the analysis output
+// here.
+struct FileInfo {
+  FileInfo(std::string &Rname) : Realname(Rname) {
+    switch (relativizePath(Realname)) {
+      case FileType::Generated:
+        Interesting = true;
+        Generated = true;
+        break;
+      case FileType::Source:
+        Interesting = true;
+        Generated = false;
+        break;
+      case FileType::Unknown:
+        Interesting = false;
+        Generated = false;
+        break;
+    }
+  }
+  std::string Realname;
+  std::vector<std::string> Output;
+  bool Interesting;
+  bool Generated;
+};
+
+class IndexConsumer;
+
+class PreprocessorHook : public PPCallbacks {
+  IndexConsumer *Indexer;
+
+public:
+  PreprocessorHook(IndexConsumer *C) : Indexer(C) {}
+
+  virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
+                           SrcMgr::CharacteristicKind FileType,
+                           FileID PrevFID) override;
+
+  virtual void InclusionDirective(SourceLocation HashLoc,
+                                  const Token &IncludeTok,
+                                  StringRef FileName,
+                                  bool IsAngled,
+                                  CharSourceRange FileNameRange,
+#if CLANG_VERSION_MAJOR >= 16
+                                  OptionalFileEntryRef File,
+#elif CLANG_VERSION_MAJOR >= 15
+                                  Optional<FileEntryRef> File,
+#else
+                                  const FileEntry *File,
+#endif
+                                  StringRef SearchPath,
+                                  StringRef RelativePath,
+                                  const Module *Imported,
+                                  SrcMgr::CharacteristicKind FileType) override;
+
+  virtual void MacroDefined(const Token &Tok,
+                            const MacroDirective *Md) override;
+
+  virtual void MacroExpands(const Token &Tok, const MacroDefinition &Md,
+                            SourceRange Range, const MacroArgs *Ma) override;
+  virtual void MacroUndefined(const Token &Tok, const MacroDefinition &Md,
+                              const MacroDirective *Undef) override;
+  virtual void Defined(const Token &Tok, const MacroDefinition &Md,
+                       SourceRange Range) override;
+  virtual void Ifdef(SourceLocation Loc, const Token &Tok,
+                     const MacroDefinition &Md) override;
+  virtual void Ifndef(SourceLocation Loc, const Token &Tok,
+                      const MacroDefinition &Md) override;
+};
+
+class IndexConsumer : public ASTConsumer,
+                      public RecursiveASTVisitor<IndexConsumer>,
+                      public DiagnosticConsumer {
+private:
+  CompilerInstance &CI;
+  SourceManager &SM;
+  LangOptions &LO;
+  std::map<FileID, std::unique_ptr<FileInfo>> FileMap;
+  MangleContext *CurMangleContext;
+  ASTContext *AstContext;
+
+  typedef RecursiveASTVisitor<IndexConsumer> Super;
+
+  // Tracks the set of declarations that the current expression/statement is
+  // nested inside of.
+  struct AutoSetContext {
+    AutoSetContext(IndexConsumer *Self, NamedDecl *Context, bool VisitImplicit = false)
+        : Self(Self), Prev(Self->CurDeclContext), Decl(Context) {
+      this->VisitImplicit = VisitImplicit || (Prev ? Prev->VisitImplicit : false);
+      Self->CurDeclContext = this;
+    }
+
+    ~AutoSetContext() { Self->CurDeclContext = Prev; }
+
+    IndexConsumer *Self;
+    AutoSetContext *Prev;
+    NamedDecl *Decl;
+    bool VisitImplicit;
+  };
+  AutoSetContext *CurDeclContext;
+
+  FileInfo *getFileInfo(SourceLocation Loc) {
+    FileID Id = SM.getFileID(Loc);
+
+    std::map<FileID, std::unique_ptr<FileInfo>>::iterator It;
+    It = FileMap.find(Id);
+    if (It == FileMap.end()) {
+      // We haven't seen this file before. We need to make the FileInfo
+      // structure information ourselves
+      std::string Filename = std::string(SM.getFilename(Loc));
+      std::string Absolute;
+      // If Loc is a macro id rather than a file id, it Filename might be
+      // empty. Also for some types of file locations that are clang-internal
+      // like "<scratch>" it can return an empty Filename. In these cases we
+      // want to leave Absolute as empty.
+      if (!Filename.empty()) {
+        Absolute = getAbsolutePath(Filename);
+        if (Absolute.empty()) {
+          Absolute = Filename;
+        }
+      }
+      std::unique_ptr<FileInfo> Info = make_unique<FileInfo>(Absolute);
+      It = FileMap.insert(std::make_pair(Id, std::move(Info))).first;
+    }
+    return It->second.get();
+  }
+
+  // Helpers for processing declarations
+  // Should we ignore this location?
+  bool isInterestingLocation(SourceLocation Loc) {
+    if (Loc.isInvalid()) {
+      return false;
+    }
+
+    return getFileInfo(Loc)->Interesting;
+  }
+
+  // Convert location to "line:column" or "line:column-column" given length.
+  // In resulting string rep, line is 1-based and zero-padded to 5 digits, while
+  // column is 0-based and unpadded.
+  std::string locationToString(SourceLocation Loc, size_t Length = 0) {
+    std::pair<FileID, unsigned> Pair = SM.getDecomposedLoc(Loc);
+
+    bool IsInvalid;
+    unsigned Line = SM.getLineNumber(Pair.first, Pair.second, &IsInvalid);
+    if (IsInvalid) {
+      return "";
+    }
+    unsigned Column = SM.getColumnNumber(Pair.first, Pair.second, &IsInvalid);
+    if (IsInvalid) {
+      return "";
+    }
+
+    if (Length) {
+      return stringFormat("%05d:%d-%d", Line, Column - 1, Column - 1 + Length);
+    } else {
+      return stringFormat("%05d:%d", Line, Column - 1);
+    }
+  }
+
+  // Convert SourceRange to "line-line".
+  // In the resulting string rep, line is 1-based.
+  std::string lineRangeToString(SourceRange Range) {
+    std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
+    std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
+
+    bool IsInvalid;
+    unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
+    if (IsInvalid) {
+      return "";
+    }
+    unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
+    if (IsInvalid) {
+      return "";
+    }
+
+    return stringFormat("%d-%d", Line1, Line2);
+  }
+
+  // Convert SourceRange to "line:column-line:column".
+  // In the resulting string rep, line is 1-based, column is 0-based.
+  std::string fullRangeToString(SourceRange Range) {
+    std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
+    std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
+
+    bool IsInvalid;
+    unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
+    if (IsInvalid) {
+      return "";
+    }
+    unsigned Column1 = SM.getColumnNumber(Begin.first, Begin.second, &IsInvalid);
+    if (IsInvalid) {
+      return "";
+    }
+    unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
+    if (IsInvalid) {
+      return "";
+    }
+    unsigned Column2 = SM.getColumnNumber(End.first, End.second, &IsInvalid);
+    if (IsInvalid) {
+      return "";
+    }
+
+    return stringFormat("%d:%d-%d:%d", Line1, Column1 - 1, Line2, Column2 - 1);
+  }
+
+  // Returns the qualified name of `d` without considering template parameters.
+  std::string getQualifiedName(const NamedDecl *D) {
+    const DeclContext *Ctx = D->getDeclContext();
+    if (Ctx->isFunctionOrMethod()) {
+      return D->getQualifiedNameAsString();
+    }
+
+    std::vector<const DeclContext *> Contexts;
+
+    // Collect contexts.
+    while (Ctx && isa<NamedDecl>(Ctx)) {
+      Contexts.push_back(Ctx);
+      Ctx = Ctx->getParent();
+    }
+
+    std::string Result;
+
+    std::reverse(Contexts.begin(), Contexts.end());
+
+    for (const DeclContext *DC : Contexts) {
+      if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) {
+        Result += Spec->getNameAsString();
+
+        if (Spec->getSpecializationKind() == TSK_ExplicitSpecialization) {
+          std::string Backing;
+          llvm::raw_string_ostream Stream(Backing);
+          const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
+          printTemplateArgumentList(
+              Stream, TemplateArgs.asArray(), PrintingPolicy(CI.getLangOpts()));
+          Result += Stream.str();
+        }
+      } else if (const auto *Nd = dyn_cast<NamespaceDecl>(DC)) {
+        if (Nd->isAnonymousNamespace() || Nd->isInline()) {
+          continue;
+        }
+        Result += Nd->getNameAsString();
+      } else if (const auto *Rd = dyn_cast<RecordDecl>(DC)) {
+        if (!Rd->getIdentifier()) {
+          Result += "(anonymous)";
+        } else {
+          Result += Rd->getNameAsString();
+        }
+      } else if (const auto *Fd = dyn_cast<FunctionDecl>(DC)) {
+        Result += Fd->getNameAsString();
+      } else if (const auto *Ed = dyn_cast<EnumDecl>(DC)) {
+        // C++ [dcl.enum]p10: Each enum-name and each unscoped
+        // enumerator is declared in the scope that immediately contains
+        // the enum-specifier. Each scoped enumerator is declared in the
+        // scope of the enumeration.
+        if (Ed->isScoped() || Ed->getIdentifier())
+          Result += Ed->getNameAsString();
+        else
+          continue;
+      } else {
+        Result += cast<NamedDecl>(DC)->getNameAsString();
+      }
+      Result += "::";
+    }
+
+    if (D->getDeclName())
+      Result += D->getNameAsString();
+    else
+      Result += "(anonymous)";
+
+    return Result;
+  }
+
+  std::string mangleLocation(SourceLocation Loc,
+                             std::string Backup = std::string()) {
+    FileInfo *F = getFileInfo(Loc);
+    std::string Filename = F->Realname;
+    if (Filename.length() == 0 && Backup.length() != 0) {
+      return Backup;
+    }
+    if (F->Generated) {
+      // Since generated files may be different on different platforms,
+      // we need to include a platform-specific thing in the hash. Otherwise
+      // we can end up with hash collisions where different symbols from
+      // different platforms map to the same thing.
+      char* Platform = getenv("MOZSEARCH_PLATFORM");
+      Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename;
+    }
+    return hash(Filename + std::string("@") + locationToString(Loc));
+  }
+
+  bool isAcceptableSymbolChar(char c) {
+    return isalpha(c) || isdigit(c) || c == '_' || c == '/';
+  }
+
+  std::string mangleFile(std::string Filename, FileType Type) {
+    // "Mangle" the file path, such that:
+    // 1. The majority of paths will still be mostly human-readable.
+    // 2. The sanitization algorithm doesn't produce collisions where two
+    //    different unsanitized paths can result in the same sanitized paths.
+    // 3. The produced symbol doesn't cause problems with downstream consumers.
+    // In order to accomplish this, we keep alphanumeric chars, underscores,
+    // and slashes, and replace everything else with an "@xx" hex encoding.
+    // The majority of path characters are letters and slashes which don't get
+    // encoded, so that satisifies (1). Since "@" characters in the unsanitized
+    // path get encoded, there should be no "@" characters in the sanitized path
+    // that got preserved from the unsanitized input, so that should satisfy (2).
+    // And (3) was done by trial-and-error. Note in particular the dot (.)
+    // character needs to be encoded, or the symbol-search feature of mozsearch
+    // doesn't work correctly, as all dot characters in the symbol query get
+    // replaced by #.
+    for (size_t i = 0; i < Filename.length(); i++) {
+      char c = Filename[i];
+      if (isAcceptableSymbolChar(c)) {
+        continue;
+      }
+      char hex[4];
+      sprintf(hex, "@%02X", ((int)c) & 0xFF);
+      Filename.replace(i, 1, hex);
+      i += 2;
+    }
+
+    if (Type == FileType::Generated) {
+      // Since generated files may be different on different platforms,
+      // we need to include a platform-specific thing in the hash. Otherwise
+      // we can end up with hash collisions where different symbols from
+      // different platforms map to the same thing.
+      char* Platform = getenv("MOZSEARCH_PLATFORM");
+      Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename;
+    }
+    return Filename;
+  }
+
+  std::string mangleQualifiedName(std::string Name) {
+    std::replace(Name.begin(), Name.end(), ' ', '_');
+    return Name;
+  }
+
+  std::string getMangledName(clang::MangleContext *Ctx,
+                             const clang::NamedDecl *Decl) {
+    if (isa<FunctionDecl>(Decl) && cast<FunctionDecl>(Decl)->isExternC()) {
+      return cast<FunctionDecl>(Decl)->getNameAsString();
+    }
+
+    if (isa<FunctionDecl>(Decl) || isa<VarDecl>(Decl)) {
+      const DeclContext *DC = Decl->getDeclContext();
+      if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC) ||
+          isa<LinkageSpecDecl>(DC) ||
+          // isa<ExternCContextDecl>(DC) ||
+          isa<TagDecl>(DC)) {
+        llvm::SmallVector<char, 512> Output;
+        llvm::raw_svector_ostream Out(Output);
+#if CLANG_VERSION_MAJOR >= 11
+        // This code changed upstream in version 11:
+        // https://github.com/llvm/llvm-project/commit/29e1a16be8216066d1ed733a763a749aed13ff47
+        GlobalDecl GD;
+        if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) {
+          GD = GlobalDecl(D, Ctor_Complete);
+        } else if (const CXXDestructorDecl *D =
+                       dyn_cast<CXXDestructorDecl>(Decl)) {
+          GD = GlobalDecl(D, Dtor_Complete);
+        } else {
+          GD = GlobalDecl(Decl);
+        }
+        Ctx->mangleName(GD, Out);
+#else
+        if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) {
+          Ctx->mangleCXXCtor(D, CXXCtorType::Ctor_Complete, Out);
+        } else if (const CXXDestructorDecl *D =
+                       dyn_cast<CXXDestructorDecl>(Decl)) {
+          Ctx->mangleCXXDtor(D, CXXDtorType::Dtor_Complete, Out);
+        } else {
+          Ctx->mangleName(Decl, Out);
+        }
+#endif
+        return Out.str().str();
+      } else {
+        return std::string("V_") + mangleLocation(Decl->getLocation()) +
+               std::string("_") + hash(std::string(Decl->getName()));
+      }
+    } else if (isa<TagDecl>(Decl) || isa<TypedefNameDecl>(Decl) ||
+               isa<ObjCInterfaceDecl>(Decl)) {
+      if (!Decl->getIdentifier()) {
+        // Anonymous.
+        return std::string("T_") + mangleLocation(Decl->getLocation());
+      }
+
+      return std::string("T_") + mangleQualifiedName(getQualifiedName(Decl));
+    } else if (isa<NamespaceDecl>(Decl) || isa<NamespaceAliasDecl>(Decl)) {
+      if (!Decl->getIdentifier()) {
+        // Anonymous.
+        return std::string("NS_") + mangleLocation(Decl->getLocation());
+      }
+
+      return std::string("NS_") + mangleQualifiedName(getQualifiedName(Decl));
+    } else if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(Decl)) {
+      const ObjCInterfaceDecl *Iface = D2->getContainingInterface();
+      return std::string("F_<") + getMangledName(Ctx, Iface) + ">_" +
+             D2->getNameAsString();
+    } else if (const FieldDecl *D2 = dyn_cast<FieldDecl>(Decl)) {
+      const RecordDecl *Record = D2->getParent();
+      return std::string("F_<") + getMangledName(Ctx, Record) + ">_" +
+             D2->getNameAsString();
+    } else if (const EnumConstantDecl *D2 = dyn_cast<EnumConstantDecl>(Decl)) {
+      const DeclContext *DC = Decl->getDeclContext();
+      if (const NamedDecl *Named = dyn_cast<NamedDecl>(DC)) {
+        return std::string("E_<") + getMangledName(Ctx, Named) + ">_" +
+               D2->getNameAsString();
+      }
+    }
+
+    assert(false);
+    return std::string("");
+  }
+
+  void debugLocation(SourceLocation Loc) {
+    std::string S = locationToString(Loc);
+    StringRef Filename = SM.getFilename(Loc);
+    printf("--> %s %s\n", std::string(Filename).c_str(), S.c_str());
+  }
+
+  void debugRange(SourceRange Range) {
+    printf("Range\n");
+    debugLocation(Range.getBegin());
+    debugLocation(Range.getEnd());
+  }
+
+public:
+  IndexConsumer(CompilerInstance &CI)
+      : CI(CI), SM(CI.getSourceManager()), LO(CI.getLangOpts()), CurMangleContext(nullptr),
+        AstContext(nullptr), CurDeclContext(nullptr), TemplateStack(nullptr) {
+    CI.getPreprocessor().addPPCallbacks(
+        make_unique<PreprocessorHook>(this));
+  }
+
+  virtual DiagnosticConsumer *clone(DiagnosticsEngine &Diags) const {
+    return new IndexConsumer(CI);
+  }
+
+#if !defined(_WIN32) && !defined(_WIN64)
+  struct AutoTime {
+    AutoTime(double *Counter) : Counter(Counter), Start(time()) {}
+    ~AutoTime() {
+      if (Start) {
+        *Counter += time() - Start;
+      }
+    }
+    void stop() {
+      *Counter += time() - Start;
+      Start = 0;
+    }
+    double *Counter;
+    double Start;
+  };
+#endif
+
+  // All we need is to follow the final declaration.
+  virtual void HandleTranslationUnit(ASTContext &Ctx) {
+    CurMangleContext =
+      clang::ItaniumMangleContext::create(Ctx, CI.getDiagnostics());
+
+    AstContext = &Ctx;
+    TraverseDecl(Ctx.getTranslationUnitDecl());
+
+    // Emit the JSON data for all files now.
+    std::map<FileID, std::unique_ptr<FileInfo>>::iterator It;
+    for (It = FileMap.begin(); It != FileMap.end(); It++) {
+      if (!It->second->Interesting) {
+        continue;
+      }
+
+      FileInfo &Info = *It->second;
+
+      std::string Filename = Outdir + Info.Realname;
+      std::string SrcFilename = Info.Generated
+        ? Objdir + Info.Realname.substr(GENERATED.length())
+        : Srcdir + PATHSEP_STRING + Info.Realname;
+
+      ensurePath(Filename);
+
+      // We lock the output file in case some other clang process is trying to
+      // write to it at the same time.
+      AutoLockFile Lock(SrcFilename, Filename);
+
+      if (!Lock.success()) {
+        fprintf(stderr, "Unable to lock file %s\n", Filename.c_str());
+        exit(1);
+      }
+
+      // Merge our results with the existing lines from the output file.
+      // This ensures that header files that are included multiple times
+      // in different ways are analyzed completely.
+      std::ifstream Fin(Filename.c_str(), std::ios::in | std::ios::binary);
+      FILE *OutFp = Lock.openTmp();
+      if (!OutFp) {
+        fprintf(stderr, "Unable to open tmp out file for %s\n", Filename.c_str());
+        exit(1);
+      }
+
+      // Sort our new results and get an iterator to them
+      std::sort(Info.Output.begin(), Info.Output.end());
+      std::vector<std::string>::const_iterator NewLinesIter = Info.Output.begin();
+      std::string LastNewWritten;
+
+      // Loop over the existing (sorted) lines in the analysis output file.
+      // (The good() check also handles the case where Fin did not exist when we
+      // went to open it.)
+      while(Fin.good()) {
+        std::string OldLine;
+        std::getline(Fin, OldLine);
+        // Skip blank lines.
+        if (OldLine.length() == 0) {
+          continue;
+        }
+        // We need to put the newlines back that getline() eats.
+        OldLine.push_back('\n');
+
+        // Write any results from Info.Output that are lexicographically
+        // smaller than OldLine (read from the existing file), but make sure
+        // to skip duplicates. Keep advacing NewLinesIter until we reach an
+        // entry that is lexicographically greater than OldLine.
+        for (; NewLinesIter != Info.Output.end(); NewLinesIter++) {
+          if (*NewLinesIter > OldLine) {
+            break;
+          }
+          if (*NewLinesIter == OldLine) {
+            continue;
+          }
+          if (*NewLinesIter == LastNewWritten) {
+            // dedupe the new entries being written
+            continue;
+          }
+          if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) {
+            fprintf(stderr, "Unable to write %zu bytes[1] to tmp output file for %s\n",
+                    NewLinesIter->length(), Filename.c_str());
+            exit(1);
+          }
+          LastNewWritten = *NewLinesIter;
+        }
+
+        // Write the entry read from the existing file.
+        if (fwrite(OldLine.c_str(), OldLine.length(), 1, OutFp) != 1) {
+          fprintf(stderr, "Unable to write %zu bytes[2] to tmp output file for %s\n",
+                  OldLine.length(), Filename.c_str());
+          exit(1);
+        }
+      }
+
+      // We finished reading from Fin
+      Fin.close();
+
+      // Finish iterating our new results, discarding duplicates
+      for (; NewLinesIter != Info.Output.end(); NewLinesIter++) {
+        if (*NewLinesIter == LastNewWritten) {
+          continue;
+        }
+        if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) {
+          fprintf(stderr, "Unable to write %zu bytes[3] to tmp output file for %s\n",
+                  NewLinesIter->length(), Filename.c_str());
+          exit(1);
+        }
+        LastNewWritten = *NewLinesIter;
+      }
+
+      // Done writing all the things, close it and replace the old output file
+      // with the new one.
+      fclose(OutFp);
+      if (!Lock.moveTmp()) {
+        fprintf(stderr, "Unable to move tmp output file into place for %s (err %d)\n", Filename.c_str(), errno);
+        exit(1);
+      }
+    }
+  }
+
+  // Unfortunately, we have to override all these methods in order to track the
+  // context we're inside.
+
+  bool TraverseEnumDecl(EnumDecl *D) {
+    AutoSetContext Asc(this, D);
+    return Super::TraverseEnumDecl(D);
+  }
+  bool TraverseRecordDecl(RecordDecl *D) {
+    AutoSetContext Asc(this, D);
+    return Super::TraverseRecordDecl(D);
+  }
+  bool TraverseCXXRecordDecl(CXXRecordDecl *D) {
+    AutoSetContext Asc(this, D);
+    return Super::TraverseCXXRecordDecl(D);
+  }
+  bool TraverseFunctionDecl(FunctionDecl *D) {
+    AutoSetContext Asc(this, D);
+    const FunctionDecl *Def;
+    // (See the larger AutoTemplateContext comment for more information.) If a
+    // method on a templated class is declared out-of-line, we need to analyze
+    // the definition inside the scope of the template or else we won't properly
+    // handle member access on the templated type.
+    if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
+      TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
+    }
+    return Super::TraverseFunctionDecl(D);
+  }
+  bool TraverseCXXMethodDecl(CXXMethodDecl *D) {
+    AutoSetContext Asc(this, D);
+    const FunctionDecl *Def;
+    // See TraverseFunctionDecl.
+    if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
+      TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
+    }
+    return Super::TraverseCXXMethodDecl(D);
+  }
+  bool TraverseCXXConstructorDecl(CXXConstructorDecl *D) {
+    AutoSetContext Asc(this, D, /*VisitImplicit=*/true);
+    const FunctionDecl *Def;
+    // See TraverseFunctionDecl.
+    if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
+      TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
+    }
+    return Super::TraverseCXXConstructorDecl(D);
+  }
+  bool TraverseCXXConversionDecl(CXXConversionDecl *D) {
+    AutoSetContext Asc(this, D);
+    const FunctionDecl *Def;
+    // See TraverseFunctionDecl.
+    if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
+      TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
+    }
+    return Super::TraverseCXXConversionDecl(D);
+  }
+  bool TraverseCXXDestructorDecl(CXXDestructorDecl *D) {
+    AutoSetContext Asc(this, D);
+    const FunctionDecl *Def;
+    // See TraverseFunctionDecl.
+    if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
+      TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
+    }
+    return Super::TraverseCXXDestructorDecl(D);
+  }
+
+  // Used to keep track of the context in which a token appears.
+  struct Context {
+    // Ultimately this becomes the "context" JSON property.
+    std::string Name;
+
+    // Ultimately this becomes the "contextsym" JSON property.
+    std::string Symbol;
+
+    Context() {}
+    Context(std::string Name, std::string Symbol)
+        : Name(Name), Symbol(Symbol) {}
+  };
+
+  Context translateContext(NamedDecl *D) {
+    const FunctionDecl *F = dyn_cast<FunctionDecl>(D);
+    if (F && F->isTemplateInstantiation()) {
+      D = F->getTemplateInstantiationPattern();
+    }
+
+    return Context(D->getQualifiedNameAsString(), getMangledName(CurMangleContext, D));
+  }
+
+  Context getContext(SourceLocation Loc) {
+    if (SM.isMacroBodyExpansion(Loc)) {
+      // If we're inside a macro definition, we don't return any context. It
+      // will probably not be what the user expects if we do.
+      return Context();
+    }
+
+    if (CurDeclContext) {
+      return translateContext(CurDeclContext->Decl);
+    }
+    return Context();
+  }
+
+  // Similar to GetContext(SourceLocation), but it skips the declaration passed
+  // in. This is useful if we want the context of a declaration that's already
+  // on the stack.
+  Context getContext(Decl *D) {
+    if (SM.isMacroBodyExpansion(D->getLocation())) {
+      // If we're inside a macro definition, we don't return any context. It
+      // will probably not be what the user expects if we do.
+      return Context();
+    }
+
+    AutoSetContext *Ctxt = CurDeclContext;
+    while (Ctxt) {
+      if (Ctxt->Decl != D) {
+        return translateContext(Ctxt->Decl);
+      }
+      Ctxt = Ctxt->Prev;
+    }
+    return Context();
+  }
+
+  // Analyzing template code is tricky. Suppose we have this code:
+  //
+  //   template<class T>
+  //   bool Foo(T* ptr) { return T::StaticMethod(ptr); }
+  //
+  // If we analyze the body of Foo without knowing the type T, then we will not
+  // be able to generate any information for StaticMethod. However, analyzing
+  // Foo for every possible instantiation is inefficient and it also generates
+  // too much data in some cases. For example, the following code would generate
+  // one definition of Baz for every instantiation, which is undesirable:
+  //
+  //   template<class T>
+  //   class Bar { struct Baz { ... }; };
+  //
+  // To solve this problem, we analyze templates only once. We do so in a
+  // GatherDependent mode where we look for "dependent scoped member
+  // expressions" (i.e., things like StaticMethod). We keep track of the
+  // locations of these expressions. If we find one or more of them, we analyze
+  // the template for each instantiation, in an AnalyzeDependent mode. This mode
+  // ignores all source locations except for the ones where we found dependent
+  // scoped member expressions before. For these locations, we generate a
+  // separate JSON result for each instantiation.
+  //
+  // We inherit our parent's mode if it is exists.  This is because if our
+  // parent is in analyze mode, it means we've already lived a full life in
+  // gather mode and we must not restart in gather mode or we'll cause the
+  // indexer to visit EVERY identifier, which is way too much data.
+  struct AutoTemplateContext {
+    AutoTemplateContext(IndexConsumer *Self)
+        : Self(Self)
+        , CurMode(Self->TemplateStack ? Self->TemplateStack->CurMode : Mode::GatherDependent)
+        , Parent(Self->TemplateStack) {
+      Self->TemplateStack = this;
+    }
+
+    ~AutoTemplateContext() { Self->TemplateStack = Parent; }
+
+    // We traverse templates in two modes:
+    enum class Mode {
+      // Gather mode does not traverse into specializations. It looks for
+      // locations where it would help to have more info from template
+      // specializations.
+      GatherDependent,
+
+      // Analyze mode traverses into template specializations and records
+      // information about token locations saved in gather mode.
+      AnalyzeDependent,
+    };
+
+    // We found a dependent scoped member expression! Keep track of it for
+    // later.
+    void visitDependent(SourceLocation Loc) {
+      if (CurMode == Mode::AnalyzeDependent) {
+        return;
+      }
+
+      DependentLocations.insert(Loc.getRawEncoding());
+      if (Parent) {
+        Parent->visitDependent(Loc);
+      }
+    }
+
+    bool inGatherMode() {
+      return CurMode == Mode::GatherDependent;
+    }
+
+    // Do we need to perform the extra AnalyzeDependent passes (one per
+    // instantiation)?
+    bool needsAnalysis() const {
+      if (!DependentLocations.empty()) {
+        return true;
+      }
+      if (Parent) {
+        return Parent->needsAnalysis();
+      }
+      return false;
+    }
+
+    void switchMode() { CurMode = Mode::AnalyzeDependent; }
+
+    // Do we want to analyze each template instantiation separately?
+    bool shouldVisitTemplateInstantiations() const {
+      if (CurMode == Mode::AnalyzeDependent) {
+        return true;
+      }
+      if (Parent) {
+        return Parent->shouldVisitTemplateInstantiations();
+      }
+      return false;
+    }
+
+    // For a given expression/statement, should we emit JSON data for it?
+    bool shouldVisit(SourceLocation Loc) {
+      if (CurMode == Mode::GatherDependent) {
+        return true;
+      }
+      if (DependentLocations.find(Loc.getRawEncoding()) !=
+          DependentLocations.end()) {
+        return true;
+      }
+      if (Parent) {
+        return Parent->shouldVisit(Loc);
+      }
+      return false;
+    }
+
+  private:
+    IndexConsumer *Self;
+    Mode CurMode;
+    std::unordered_set<unsigned> DependentLocations;
+    AutoTemplateContext *Parent;
+  };
+
+  AutoTemplateContext *TemplateStack;
+
+  bool shouldVisitTemplateInstantiations() const {
+    if (TemplateStack) {
+      return TemplateStack->shouldVisitTemplateInstantiations();
+    }
+    return false;
+  }
+
+  bool shouldVisitImplicitCode() const {
+    return CurDeclContext && CurDeclContext->VisitImplicit;
+  }
+
+  bool TraverseClassTemplateDecl(ClassTemplateDecl *D) {
+    AutoTemplateContext Atc(this);
+    Super::TraverseClassTemplateDecl(D);
+
+    if (!Atc.needsAnalysis()) {
+      return true;
+    }
+
+    Atc.switchMode();
+
+    if (D != D->getCanonicalDecl()) {
+      return true;
+    }
+
+    for (auto *Spec : D->specializations()) {
+      for (auto *Rd : Spec->redecls()) {
+        // We don't want to visit injected-class-names in this traversal.
+        if (cast<CXXRecordDecl>(Rd)->isInjectedClassName())
+          continue;
+
+        TraverseDecl(Rd);
+      }
+    }
+
+    return true;
+  }
+
+  bool TraverseFunctionTemplateDecl(FunctionTemplateDecl *D) {
+    AutoTemplateContext Atc(this);
+    if (Atc.inGatherMode()) {
+      Super::TraverseFunctionTemplateDecl(D);
+    }
+
+    if (!Atc.needsAnalysis()) {
+      return true;
+    }
+
+    Atc.switchMode();
+
+    if (D != D->getCanonicalDecl()) {
+      return true;
+    }
+
+    for (auto *Spec : D->specializations()) {
+      for (auto *Rd : Spec->redecls()) {
+        TraverseDecl(Rd);
+      }
+    }
+
+    return true;
+  }
+
+  bool shouldVisit(SourceLocation Loc) {
+    if (TemplateStack) {
+      return TemplateStack->shouldVisit(Loc);
+    }
+    return true;
+  }
+
+  enum {
+    // Flag to omit the identifier from being cross-referenced across files.
+    // This is usually desired for local variables.
+    NoCrossref = 1 << 0,
+    // Flag to indicate the token with analysis data is not an identifier. Indicates
+    // we want to skip the check that tries to ensure a sane identifier token.
+    NotIdentifierToken = 1 << 1,
+    // This indicates that the end of the provided SourceRange is valid and
+    // should be respected. If this flag is not set, the visitIdentifier
+    // function should use only the start of the SourceRange and auto-detect
+    // the end based on whatever token is found at the start.
+    LocRangeEndValid = 1 << 2
+  };
+
+  void emitStructuredInfo(SourceLocation Loc, const RecordDecl *decl) {
+    std::string json_str;
+    llvm::raw_string_ostream ros(json_str);
+    llvm::json::OStream J(ros);
+    // Start the top-level object.
+    J.objectBegin();
+
+    unsigned StartOffset = SM.getFileOffset(Loc);
+    unsigned EndOffset =
+        StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
+    J.attribute("loc", locationToString(Loc, EndOffset - StartOffset));
+    J.attribute("structured", 1);
+    J.attribute("pretty", getQualifiedName(decl));
+    J.attribute("sym", getMangledName(CurMangleContext, decl));
+
+    J.attribute("kind", TypeWithKeyword::getTagTypeKindName(decl->getTagKind()));
+
+    const ASTContext &C = *AstContext;
+    const ASTRecordLayout &Layout = C.getASTRecordLayout(decl);
+
+    J.attribute("sizeBytes", Layout.getSize().getQuantity());
+
+    auto cxxDecl = dyn_cast<CXXRecordDecl>(decl);
+
+    if (cxxDecl) {
+      J.attributeBegin("supers");
+      J.arrayBegin();
+      for (const CXXBaseSpecifier &Base : cxxDecl->bases()) {
+        const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
+
+        J.objectBegin();
+
+        J.attribute("pretty", getQualifiedName(BaseDecl));
+        J.attribute("sym", getMangledName(CurMangleContext, BaseDecl));
+
+        J.attributeBegin("props");
+        J.arrayBegin();
+        if (Base.isVirtual()) {
+          J.value("virtual");
+        }
+        J.arrayEnd();
+        J.attributeEnd();
+
+        J.objectEnd();
+      }
+      J.arrayEnd();
+      J.attributeEnd();
+
+      J.attributeBegin("methods");
+      J.arrayBegin();
+      for (const CXXMethodDecl *MethodDecl : cxxDecl->methods()) {
+        J.objectBegin();
+
+        J.attribute("pretty", getQualifiedName(MethodDecl));
+        J.attribute("sym", getMangledName(CurMangleContext, MethodDecl));
+
+        // TODO: Better figure out what to do for non-isUserProvided methods
+        // which means there's potentially semantic data that doesn't correspond
+        // to a source location in the source.  Should we be emitting
+        // structured info for those when we're processing the class here?
+
+        J.attributeBegin("props");
+        J.arrayBegin();
+        if (MethodDecl->isStatic()) {
+          J.value("static");
+        }
+        if (MethodDecl->isInstance()) {
+          J.value("instance");
+        }
+        if (MethodDecl->isVirtual()) {
+          J.value("virtual");
+        }
+        if (MethodDecl->isUserProvided()) {
+          J.value("user");
+        }
+        if (MethodDecl->isDefaulted()) {
+          J.value("defaulted");
+        }
+        if (MethodDecl->isDeleted()) {
+          J.value("deleted");
+        }
+        if (MethodDecl->isConstexpr()) {
+          J.value("constexpr");
+        }
+        J.arrayEnd();
+        J.attributeEnd();
+
+        J.objectEnd();
+      }
+      J.arrayEnd();
+      J.attributeEnd();
+    }
+
+    J.attributeBegin("fields");
+    J.arrayBegin();
+    uint64_t iField = 0;
+    for (RecordDecl::field_iterator It = decl->field_begin(),
+          End = decl->field_end(); It != End; ++It, ++iField) {
+      const FieldDecl &Field = **It;
+      uint64_t localOffsetBits = Layout.getFieldOffset(iField);
+      CharUnits localOffsetBytes = C.toCharUnitsFromBits(localOffsetBits);
+
+      J.objectBegin();
+      J.attribute("pretty", getQualifiedName(&Field));
+      J.attribute("sym", getMangledName(CurMangleContext, &Field));
+      QualType FieldType = Field.getType();
+      J.attribute("type", FieldType.getAsString());
+      QualType CanonicalFieldType = FieldType.getCanonicalType();
+      const TagDecl *tagDecl = CanonicalFieldType->getAsTagDecl();
+      if (tagDecl) {
+        J.attribute("typesym", getMangledName(CurMangleContext, tagDecl));
+      }
+      J.attribute("offsetBytes", localOffsetBytes.getQuantity());
+      if (Field.isBitField()) {
+        J.attributeBegin("bitPositions");
+        J.objectBegin();
+
+        J.attribute("begin", unsigned(localOffsetBits - C.toBits(localOffsetBytes)));
+        J.attribute("width", Field.getBitWidthValue(C));
+
+        J.objectEnd();
+        J.attributeEnd();
+      } else {
+        // Try and get the field as a record itself so we can know its size, but
+        // we don't actually want to recurse into it.
+        if (auto FieldRec = Field.getType()->getAs<RecordType>()) {
+          auto const &FieldLayout = C.getASTRecordLayout(FieldRec->getDecl());
+          J.attribute("sizeBytes", FieldLayout.getSize().getQuantity());
+        } else {
+          // We were unable to get it as a record, which suggests it's a normal
+          // type, in which case let's just ask for the type size.  (Maybe this
+          // would also work for the above case too?)
+          uint64_t typeSizeBits = C.getTypeSize(Field.getType());
+          CharUnits typeSizeBytes = C.toCharUnitsFromBits(typeSizeBits);
+          J.attribute("sizeBytes", typeSizeBytes.getQuantity());
+        }
+      }
+      J.objectEnd();
+    }
+    J.arrayEnd();
+    J.attributeEnd();
+
+    // End the top-level object.
+    J.objectEnd();
+
+    FileInfo *F = getFileInfo(Loc);
+    // we want a newline.
+    ros << '\n';
+    F->Output.push_back(std::move(ros.str()));
+  }
+
+  void emitStructuredInfo(SourceLocation Loc, const FunctionDecl *decl) {
+    std::string json_str;
+    llvm::raw_string_ostream ros(json_str);
+    llvm::json::OStream J(ros);
+    // Start the top-level object.
+    J.objectBegin();
+
+    unsigned StartOffset = SM.getFileOffset(Loc);
+    unsigned EndOffset =
+        StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
+    J.attribute("loc", locationToString(Loc, EndOffset - StartOffset));
+    J.attribute("structured", 1);
+    J.attribute("pretty", getQualifiedName(decl));
+    J.attribute("sym", getMangledName(CurMangleContext, decl));
+
+    auto cxxDecl = dyn_cast<CXXMethodDecl>(decl);
+
+    if (cxxDecl) {
+      J.attribute("kind", "method");
+      if (auto parentDecl = cxxDecl->getParent()) {
+        J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl));
+      }
+
+      J.attributeBegin("overrides");
+      J.arrayBegin();
+      for (const CXXMethodDecl *MethodDecl : cxxDecl->overridden_methods()) {
+        J.objectBegin();
+
+        // TODO: Make sure we're doing template traversals appropriately...
+        // findOverriddenMethods (now removed) liked to do:
+        //   if (Decl->isTemplateInstantiation()) {
+        //     Decl = dyn_cast<CXXMethodDecl>(Decl->getTemplateInstantiationPattern());
+        //   }
+        // I think our pre-emptive dereferencing/avoidance of templates may
+        // protect us from this, but it needs more investigation.
+
+        J.attribute("pretty", getQualifiedName(MethodDecl));
+        J.attribute("sym", getMangledName(CurMangleContext, MethodDecl));
+
+        J.objectEnd();
+      }
+      J.arrayEnd();
+      J.attributeEnd();
+
+    } else {
+      J.attribute("kind", "function");
+    }
+
+    // ## Props
+    J.attributeBegin("props");
+    J.arrayBegin();
+    // some of these are only possible on a CXXMethodDecl, but we want them all
+    // in the same array, so condition these first ones.
+    if (cxxDecl) {
+      if (cxxDecl->isStatic()) {
+        J.value("static");
+      }
+      if (cxxDecl->isInstance()) {
+        J.value("instance");
+      }
+      if (cxxDecl->isVirtual()) {
+        J.value("virtual");
+      }
+      if (cxxDecl->isUserProvided()) {
+        J.value("user");
+      }
+    }
+    if (decl->isDefaulted()) {
+      J.value("defaulted");
+    }
+    if (decl->isDeleted()) {
+      J.value("deleted");
+    }
+    if (decl->isConstexpr()) {
+      J.value("constexpr");
+    }
+    J.arrayEnd();
+    J.attributeEnd();
+
+    // End the top-level object.
+    J.objectEnd();
+
+    FileInfo *F = getFileInfo(Loc);
+    // we want a newline.
+    ros << '\n';
+    F->Output.push_back(std::move(ros.str()));
+  }
+
+  /**
+   * Emit structured info for a field.  Right now the intent is for this to just
+   * be a pointer to its parent's structured info with this method entirely
+   * avoiding getting the ASTRecordLayout.
+   *
+   * TODO: Give more thought on where to locate the canonical info on fields and
+   * how to normalize their exposure over the web.  We could relink the info
+   * both at cross-reference time and web-server lookup time.  This is also
+   * called out in `analysis.md`.
+   */
+  void emitStructuredInfo(SourceLocation Loc, const FieldDecl *decl) {
+    // XXX the call to decl::getParent will assert below for ObjCIvarDecl
+    // instances because their DecContext is not a RecordDecl.  So just bail
+    // for now.
+    // TODO: better support ObjC.
+    if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(decl)) {
+      return;
+    }
+
+    std::string json_str;
+    llvm::raw_string_ostream ros(json_str);
+    llvm::json::OStream J(ros);
+    // Start the top-level object.
+    J.objectBegin();
+
+    unsigned StartOffset = SM.getFileOffset(Loc);
+    unsigned EndOffset =
+        StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
+    J.attribute("loc", locationToString(Loc, EndOffset - StartOffset));
+    J.attribute("structured", 1);
+    J.attribute("pretty", getQualifiedName(decl));
+    J.attribute("sym", getMangledName(CurMangleContext, decl));
+    J.attribute("kind", "field");
+
+    if (auto parentDecl = decl->getParent()) {
+      J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl));
+    }
+
+    // End the top-level object.
+    J.objectEnd();
+
+    FileInfo *F = getFileInfo(Loc);
+    // we want a newline.
+    ros << '\n';
+    F->Output.push_back(std::move(ros.str()));
+  }
+
+  // XXX Type annotating.
+  // QualType is the type class.  It has helpers like TagDecl via getAsTagDecl.
+  // ValueDecl exposes a getType() method.
+  //
+  // Arguably it makes sense to only expose types that Searchfox has definitions
+  // for as first-class.  Probably the way to go is like context/contextsym.
+  // We expose a "type" which is just a human-readable string which has no
+  // semantic purposes and is just a display string, plus then a "typesym" which
+  // we expose if we were able to map the type.
+  //
+  // Other meta-info: field offsets.  Ancestor types.
+
+  // This is the only function that emits analysis JSON data. It should be
+  // called for each identifier that corresponds to a symbol.
+  void visitIdentifier(const char *Kind, const char *SyntaxKind,
+                       llvm::StringRef QualName, SourceRange LocRange,
+                       std::string Symbol,
+                       QualType MaybeType = QualType(),
+                       Context TokenContext = Context(), int Flags = 0,
+                       SourceRange PeekRange = SourceRange(),
+                       SourceRange NestingRange = SourceRange()) {
+    SourceLocation Loc = LocRange.getBegin();
+    if (!shouldVisit(Loc)) {
+      return;
+    }
+
+    // Find the file positions corresponding to the token.
+    unsigned StartOffset = SM.getFileOffset(Loc);
+    unsigned EndOffset = (Flags & LocRangeEndValid)
+        ? SM.getFileOffset(LocRange.getEnd())
+        : StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
+
+    std::string LocStr = locationToString(Loc, EndOffset - StartOffset);
+    std::string RangeStr = locationToString(Loc, EndOffset - StartOffset);
+    std::string PeekRangeStr;
+
+    if (!(Flags & NotIdentifierToken)) {
+      // Get the token's characters so we can make sure it's a valid token.
+      const char *StartChars = SM.getCharacterData(Loc);
+      std::string Text(StartChars, EndOffset - StartOffset);
+      if (!isValidIdentifier(Text)) {
+        return;
+      }
+    }
+
+    FileInfo *F = getFileInfo(Loc);
+
+    if (!(Flags & NoCrossref)) {
+      std::string json_str;
+      llvm::raw_string_ostream ros(json_str);
+      llvm::json::OStream J(ros);
+      // Start the top-level object.
+      J.objectBegin();
+
+      J.attribute("loc", LocStr);
+      J.attribute("target", 1);
+      J.attribute("kind", Kind);
+      J.attribute("pretty", QualName.data());
+      J.attribute("sym", Symbol);
+      if (!TokenContext.Name.empty()) {
+        J.attribute("context", TokenContext.Name);
+      }
+      if (!TokenContext.Symbol.empty()) {
+        J.attribute("contextsym", TokenContext.Symbol);
+      }
+      if (PeekRange.isValid()) {
+        PeekRangeStr = lineRangeToString(PeekRange);
+        if (!PeekRangeStr.empty()) {
+          J.attribute("peekRange", PeekRangeStr);
+        }
+      }
+
+      // End the top-level object.
+      J.objectEnd();
+      // we want a newline.
+      ros << '\n';
+      F->Output.push_back(std::move(ros.str()));
+    }
+
+    // Generate a single "source":1 for all the symbols. If we search from here,
+    // we want to union the results for every symbol in `symbols`.
+    std::string json_str;
+    llvm::raw_string_ostream ros(json_str);
+    llvm::json::OStream J(ros);
+    // Start the top-level object.
+    J.objectBegin();
+
+    J.attribute("loc", RangeStr);
+    J.attribute("source", 1);
+
+    if (NestingRange.isValid()) {
+      std::string NestingRangeStr = fullRangeToString(NestingRange);
+      if (!NestingRangeStr.empty()) {
+        J.attribute("nestingRange", NestingRangeStr);
+      }
+    }
+
+    std::string Syntax;
+    if (Flags & NoCrossref) {
+      J.attribute("syntax", "");
+    } else {
+      Syntax = Kind;
+      Syntax.push_back(',');
+      Syntax.append(SyntaxKind);
+      J.attribute("syntax", Syntax);
+    }
+
+    if (!MaybeType.isNull()) {
+      J.attribute("type", MaybeType.getAsString());
+      QualType canonical = MaybeType.getCanonicalType();
+      const TagDecl *decl = canonical->getAsTagDecl();
+      if (decl) {
+        std::string Mangled = getMangledName(CurMangleContext, decl);
+        J.attribute("typesym", Mangled);
+      }
+    }
+
+    std::string Pretty(SyntaxKind);
+    Pretty.push_back(' ');
+    Pretty.append(QualName.data());
+    J.attribute("pretty", Pretty);
+
+    J.attribute("sym", Symbol);
+
+    if (Flags & NoCrossref) {
+      J.attribute("no_crossref", 1);
+    }
+
+    // End the top-level object.
+    J.objectEnd();
+
+    // we want a newline.
+    ros << '\n';
+    F->Output.push_back(std::move(ros.str()));
+  }
+
+  void normalizeLocation(SourceLocation *Loc) {
+    *Loc = SM.getSpellingLoc(*Loc);
+  }
+
+  // For cases where the left-brace is not directly accessible from the AST,
+  // helper to use the lexer to find the brace.  Make sure you're picking the
+  // start location appropriately!
+  SourceLocation findLeftBraceFromLoc(SourceLocation Loc) {
+    return Lexer::findLocationAfterToken(Loc, tok::l_brace, SM, LO, false);
+  }
+
+  // If the provided statement is compound, return its range.
+  SourceRange getCompoundStmtRange(Stmt* D) {
+    if (!D) {
+      return SourceRange();
+    }
+
+    CompoundStmt *D2 = dyn_cast<CompoundStmt>(D);
+    if (D2) {
+      return D2->getSourceRange();
+    }
+
+    return SourceRange();
+  }
+
+  SourceRange getFunctionPeekRange(FunctionDecl* D) {
+    // We always start at the start of the function decl, which may include the
+    // return type on a separate line.
+    SourceLocation Start = D->getBeginLoc();
+
+    // By default, we end at the line containing the function's name.
+    SourceLocation End = D->getLocation();
+
+    std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End);
+
+    // But if there are parameters, we want to include those as well.
+    for (ParmVarDecl* Param : D->parameters()) {
+      std::pair<FileID, unsigned> ParamLoc = SM.getDecomposedLoc(Param->getLocation());
+
+      // It's possible there are macros involved or something. We don't include
+      // the parameters in that case.
+      if (ParamLoc.first == FuncLoc.first) {
+        // Assume parameters are in order, so we always take the last one.
+        End = Param->getEndLoc();
+      }
+    }
+
+    return SourceRange(Start, End);
+  }
+
+  SourceRange getTagPeekRange(TagDecl* D) {
+    SourceLocation Start = D->getBeginLoc();
+
+    // By default, we end at the line containing the name.
+    SourceLocation End = D->getLocation();
+
+    std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End);
+
+    if (CXXRecordDecl* D2 = dyn_cast<CXXRecordDecl>(D)) {
+      // But if there are parameters, we want to include those as well.
+      for (CXXBaseSpecifier& Base : D2->bases()) {
+        std::pair<FileID, unsigned> Loc = SM.getDecomposedLoc(Base.getEndLoc());
+
+        // It's possible there are macros involved or something. We don't include
+        // the parameters in that case.
+        if (Loc.first == FuncLoc.first) {
+          // Assume parameters are in order, so we always take the last one.
+          End = Base.getEndLoc();
+        }
+      }
+    }
+
+    return SourceRange(Start, End);
+  }
+
+  SourceRange getCommentRange(NamedDecl* D) {
+    const RawComment* RC =
+      AstContext->getRawCommentForDeclNoCache(D);
+    if (!RC) {
+      return SourceRange();
+    }
+
+    return RC->getSourceRange();
+  }
+
+  // Sanity checks that all ranges are in the same file, returning the first if
+  // they're in different files.  Unions the ranges based on which is first.
+  SourceRange combineRanges(SourceRange Range1, SourceRange Range2) {
+    if (Range1.isInvalid()) {
+      return Range2;
+    }
+    if (Range2.isInvalid()) {
+      return Range1;
+    }
+
+    std::pair<FileID, unsigned> Begin1 = SM.getDecomposedLoc(Range1.getBegin());
+    std::pair<FileID, unsigned> End1 = SM.getDecomposedLoc(Range1.getEnd());
+    std::pair<FileID, unsigned> Begin2 = SM.getDecomposedLoc(Range2.getBegin());
+    std::pair<FileID, unsigned> End2 = SM.getDecomposedLoc(Range2.getEnd());
+
+    if (End1.first != Begin2.first) {
+      // Something weird is probably happening with the preprocessor. Just
+      // return the first range.
+      return Range1;
+    }
+
+    // See which range comes first.
+    if (Begin1.second <= End2.second) {
+      return SourceRange(Range1.getBegin(), Range2.getEnd());
+    } else {
+      return SourceRange(Range2.getBegin(), Range1.getEnd());
+    }
+  }
+
+  // Given a location and a range, returns the range if:
+  // - The location and the range live in the same file.
+  // - The range is well ordered (end is not before begin).
+  // Returns an empty range otherwise.
+  SourceRange validateRange(SourceLocation Loc, SourceRange Range) {
+    std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc);
+    std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin());
+    std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd());
+
+    if (Begin.first != Decomposed.first || End.first != Decomposed.first) {
+      return SourceRange();
+    }
+
+    if (Begin.second >= End.second) {
+      return SourceRange();
+    }
+
+    return Range;
+  }
+
+  bool VisitNamedDecl(NamedDecl *D) {
+    SourceLocation Loc = D->getLocation();
+
+    // If the token is from a macro expansion and the expansion location
+    // is interesting, use that instead as it tends to be more useful.
+    SourceLocation expandedLoc = Loc;
+    if (SM.isMacroBodyExpansion(Loc)) {
+      Loc = SM.getFileLoc(Loc);
+    }
+
+    normalizeLocation(&Loc);
+    if (!isInterestingLocation(Loc)) {
+      return true;
+    }
+
+    if (isa<ParmVarDecl>(D) && !D->getDeclName().getAsIdentifierInfo()) {
+      // Unnamed parameter in function proto.
+      return true;
+    }
+
+    int Flags = 0;
+    const char *Kind = "def";
+    const char *PrettyKind = "?";
+    bool wasTemplate = false;
+    SourceRange PeekRange(D->getBeginLoc(), D->getEndLoc());
+    // The nesting range identifies the left brace and right brace, which
+    // heavily depends on the AST node type.
+    SourceRange NestingRange;
+    if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) {
+      if (D2->isTemplateInstantiation()) {
+        wasTemplate = true;
+        D = D2->getTemplateInstantiationPattern();
+      }
+      // We treat pure virtual declarations as definitions.
+      Kind = (D2->isThisDeclarationADefinition() || D2->isPure()) ? "def" : "decl";
+      PrettyKind = "function";
+      PeekRange = getFunctionPeekRange(D2);
+
+      // Only emit the nesting range if:
+      // - This is a definition AND
+      // - This isn't a template instantiation.  Function templates'
+      //   instantiations can end up as a definition with a Loc at their point
+      //   of declaration but with the CompoundStmt of the template's
+      //   point of definition.  This really messes up the nesting range logic.
+      //   At the time of writing this, the test repo's `big_header.h`'s
+      //   `WhatsYourVector_impl::forwardDeclaredTemplateThingInlinedBelow` as
+      //   instantiated by `big_cpp.cpp` triggers this phenomenon.
+      //
+      // Note: As covered elsewhere, template processing is tricky and it's
+      // conceivable that we may change traversal patterns in the future,
+      // mooting this guard.
+      if (D2->isThisDeclarationADefinition() &&
+          !D2->isTemplateInstantiation()) {
+        // The CompoundStmt range is the brace range.
+        NestingRange = getCompoundStmtRange(D2->getBody());
+      }
+    } else if (TagDecl *D2 = dyn_cast<TagDecl>(D)) {
+      Kind = D2->isThisDeclarationADefinition() ? "def" : "forward";
+      PrettyKind = "type";
+
+      if (D2->isThisDeclarationADefinition() && D2->getDefinition() == D2) {
+        PeekRange = getTagPeekRange(D2);
+        NestingRange = D2->getBraceRange();
+      } else {
+        PeekRange = SourceRange();
+      }
+    } else if (isa<TypedefNameDecl>(D)) {
+      Kind = "def";
+      PrettyKind = "type";
+      PeekRange = SourceRange(Loc, Loc);
+    } else if (VarDecl *D2 = dyn_cast<VarDecl>(D)) {
+      if (D2->isLocalVarDeclOrParm()) {
+        Flags = NoCrossref;
+      }
+
+      Kind = D2->isThisDeclarationADefinition() == VarDecl::DeclarationOnly
+                 ? "decl"
+                 : "def";
+      PrettyKind = "variable";
+    } else if (isa<NamespaceDecl>(D) || isa<NamespaceAliasDecl>(D)) {
+      Kind = "def";
+      PrettyKind = "namespace";
+      PeekRange = SourceRange(Loc, Loc);
+      NamespaceDecl *D2 = dyn_cast<NamespaceDecl>(D);
+      if (D2) {
+        // There's no exposure of the left brace so we have to find it.
+        NestingRange = SourceRange(
+          findLeftBraceFromLoc(D2->isAnonymousNamespace() ? D2->getBeginLoc() : Loc),
+          D2->getRBraceLoc());
+      }
+    } else if (isa<FieldDecl>(D)) {
+      Kind = "def";
+      PrettyKind = "field";
+    } else if (isa<EnumConstantDecl>(D)) {
+      Kind = "def";
+      PrettyKind = "enum constant";
+    } else {
+      return true;
+    }
+
+    QualType qtype = QualType();
+    if (ValueDecl *D2 = dyn_cast<ValueDecl>(D)) {
+      qtype = D2->getType();
+    }
+
+    SourceRange CommentRange = getCommentRange(D);
+    PeekRange = combineRanges(PeekRange, CommentRange);
+    PeekRange = validateRange(Loc, PeekRange);
+    NestingRange = validateRange(Loc, NestingRange);
+
+    std::string Symbol = getMangledName(CurMangleContext, D);
+
+    // In the case of destructors, Loc might point to the ~ character. In that
+    // case we want to skip to the name of the class. However, Loc might also
+    // point to other places that generate destructors, such as the use site of
+    // a macro that expands to generate a destructor, or a lambda (apparently
+    // clang 8 creates a destructor declaration for at least some lambdas). In
+    // the former case we'll use the macro use site as the location, and in the
+    // latter we'll just drop the declaration.
+    if (isa<CXXDestructorDecl>(D)) {
+      PrettyKind = "destructor";
+      const char *P = SM.getCharacterData(Loc);
+      if (*P == '~') {
+        // Advance Loc to the class name
+        P++;
+
+        unsigned Skipped = 1;
+        while (*P == ' ' || *P == '\t' || *P == '\r' || *P == '\n') {
+          P++;
+          Skipped++;
+        }
+
+        Loc = Loc.getLocWithOffset(Skipped);
+      } else {
+        // See if the destructor is coming from a macro expansion
+        P = SM.getCharacterData(expandedLoc);
+        if (*P != '~') {
+          // It's not
+          return true;
+        }
+        // It is, so just use Loc as-is
+      }
+    }
+
+    visitIdentifier(Kind, PrettyKind, getQualifiedName(D), SourceRange(Loc), Symbol,
+                    qtype,
+                    getContext(D), Flags, PeekRange, NestingRange);
+
+    // In-progress structured info emission.
+    if (RecordDecl *D2 = dyn_cast<RecordDecl>(D)) {
+      if (D2->isThisDeclarationADefinition() &&
+          // XXX getASTRecordLayout doesn't work for dependent types, so we
+          // avoid calling into emitStructuredInfo for now if there's a
+          // dependent type or if we're in any kind of template context.  This
+          // should be re-evaluated once this is working for normal classes and
+          // we can better evaluate what is useful.
+          !D2->isDependentType() &&
+          !TemplateStack) {
+        emitStructuredInfo(Loc, D2);
+      }
+    }
+    if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) {
+      if ((D2->isThisDeclarationADefinition() || D2->isPure()) &&
+          // a clause at the top should have generalized and set wasTemplate so
+          // it shouldn't be the case that isTemplateInstantiation() is true.
+          !D2->isTemplateInstantiation() &&
+          !wasTemplate &&
+          !D2->isFunctionTemplateSpecialization() &&
+          !TemplateStack) {
+        emitStructuredInfo(Loc, D2);
+      }
+    }
+    if (FieldDecl *D2 = dyn_cast<FieldDecl>(D)) {
+      if (!D2->isTemplated() &&
+          !TemplateStack) {
+        emitStructuredInfo(Loc, D2);
+      }
+    }
+
+    return true;
+  }
+
+  bool VisitCXXConstructExpr(CXXConstructExpr *E) {
+    SourceLocation Loc = E->getBeginLoc();
+    normalizeLocation(&Loc);
+    if (!isInterestingLocation(Loc)) {
+      return true;
+    }
+
+    FunctionDecl *Ctor = E->getConstructor();
+    if (Ctor->isTemplateInstantiation()) {
+      Ctor = Ctor->getTemplateInstantiationPattern();
+    }
+    std::string Mangled = getMangledName(CurMangleContext, Ctor);
+
+    // FIXME: Need to do something different for list initialization.
+
+    visitIdentifier("use", "constructor", getQualifiedName(Ctor), Loc, Mangled,
+                    QualType(), getContext(Loc));
+
+    return true;
+  }
+
+  bool VisitCallExpr(CallExpr *E) {
+    Decl *Callee = E->getCalleeDecl();
+    if (!Callee || !FunctionDecl::classof(Callee)) {
+      return true;
+    }
+
+    const NamedDecl *NamedCallee = dyn_cast<NamedDecl>(Callee);
+
+    SourceLocation Loc;
+
+    const FunctionDecl *F = dyn_cast<FunctionDecl>(NamedCallee);
+    if (F->isTemplateInstantiation()) {
+      NamedCallee = F->getTemplateInstantiationPattern();
+    }
+
+    std::string Mangled = getMangledName(CurMangleContext, NamedCallee);
+    int Flags = 0;
+
+    Expr *CalleeExpr = E->getCallee()->IgnoreParenImpCasts();
+
+    if (CXXOperatorCallExpr::classof(E)) {
+      // Just take the first token.
+      CXXOperatorCallExpr *Op = dyn_cast<CXXOperatorCallExpr>(E);
+      Loc = Op->getOperatorLoc();
+      Flags |= NotIdentifierToken;
+    } else if (MemberExpr::classof(CalleeExpr)) {
+      MemberExpr *Member = dyn_cast<MemberExpr>(CalleeExpr);
+      Loc = Member->getMemberLoc();
+    } else if (DeclRefExpr::classof(CalleeExpr)) {
+      // We handle this in VisitDeclRefExpr.
+      return true;
+    } else {
+      return true;
+    }
+
+    normalizeLocation(&Loc);
+
+    if (!isInterestingLocation(Loc)) {
+      return true;
+    }
+
+    visitIdentifier("use", "function", getQualifiedName(NamedCallee), Loc, Mangled,
+                    E->getCallReturnType(*AstContext), getContext(Loc), Flags);
+
+    return true;
+  }
+
+  bool VisitTagTypeLoc(TagTypeLoc L) {
+    SourceLocation Loc = L.getBeginLoc();
+    normalizeLocation(&Loc);
+    if (!isInterestingLocation(Loc)) {
+      return true;
+    }
+
+    TagDecl *Decl = L.getDecl();
+    std::string Mangled = getMangledName(CurMangleContext, Decl);
+    visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
+                    L.getType(), getContext(Loc));
+    return true;
+  }
+
+  bool VisitTypedefTypeLoc(TypedefTypeLoc L) {
+    SourceLocation Loc = L.getBeginLoc();
+    normalizeLocation(&Loc);
+    if (!isInterestingLocation(Loc)) {
+      return true;
+    }
+
+    NamedDecl *Decl = L.getTypedefNameDecl();
+    std::string Mangled = getMangledName(CurMangleContext, Decl);
+    visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
+                    L.getType(), getContext(Loc));
+    return true;
+  }
+
+  bool VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L) {
+    SourceLocation Loc = L.getBeginLoc();
+    normalizeLocation(&Loc);
+    if (!isInterestingLocation(Loc)) {
+      return true;
+    }
+
+    NamedDecl *Decl = L.getDecl();
+    std::string Mangled = getMangledName(CurMangleContext, Decl);
+    visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
+                    L.getType(), getContext(Loc));
+    return true;
+  }
+
+  bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) {
+    SourceLocation Loc = L.getBeginLoc();
+    normalizeLocation(&Loc);
+    if (!isInterestingLocation(Loc)) {
+      return true;
+    }
+
+    TemplateDecl *Td = L.getTypePtr()->getTemplateName().getAsTemplateDecl();
+    if (ClassTemplateDecl *D = dyn_cast<ClassTemplateDecl>(Td)) {
+      NamedDecl *Decl = D->getTemplatedDecl();
+      std::string Mangled = getMangledName(CurMangleContext, Decl);
+      visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
+                      QualType(), getContext(Loc));
+    } else if (TypeAliasTemplateDecl *D = dyn_cast<TypeAliasTemplateDecl>(Td)) {
+      NamedDecl *Decl = D->getTemplatedDecl();
+      std::string Mangled = getMangledName(CurMangleContext, Decl);
+      visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
+                      QualType(), getContext(Loc));
+    }
+
+    return true;
+  }
+
+  bool VisitDeclRefExpr(DeclRefExpr *E) {
+    SourceLocation Loc = E->getExprLoc();
+    normalizeLocation(&Loc);
+    if (!isInterestingLocation(Loc)) {
+      return true;
+    }
+
+    if (E->hasQualifier()) {
+      Loc = E->getNameInfo().getLoc();
+      normalizeLocation(&Loc);
+    }
+
+    NamedDecl *Decl = E->getDecl();
+    if (const VarDecl *D2 = dyn_cast<VarDecl>(Decl)) {
+      int Flags = 0;
+      if (D2->isLocalVarDeclOrParm()) {
+        Flags = NoCrossref;
+      }
+      std::string Mangled = getMangledName(CurMangleContext, Decl);
+      visitIdentifier("use", "variable", getQualifiedName(Decl), Loc, Mangled,
+                      D2->getType(), getContext(Loc), Flags);
+    } else if (isa<FunctionDecl>(Decl)) {
+      const FunctionDecl *F = dyn_cast<FunctionDecl>(Decl);
+      if (F->isTemplateInstantiation()) {
+        Decl = F->getTemplateInstantiationPattern();
+      }
+
+      std::string Mangled = getMangledName(CurMangleContext, Decl);
+      visitIdentifier("use", "function", getQualifiedName(Decl), Loc, Mangled,
+                      E->getType(), getContext(Loc));
+    } else if (isa<EnumConstantDecl>(Decl)) {
+      std::string Mangled = getMangledName(CurMangleContext, Decl);
+      visitIdentifier("use", "enum", getQualifiedName(Decl), Loc, Mangled,
+                      E->getType(), getContext(Loc));
+    }
+
+    return true;
+  }
+
+  bool VisitCXXConstructorDecl(CXXConstructorDecl *D) {
+    if (!isInterestingLocation(D->getLocation())) {
+      return true;
+    }
+
+    for (CXXConstructorDecl::init_const_iterator It = D->init_begin();
+         It != D->init_end(); ++It) {
+      const CXXCtorInitializer *Ci = *It;
+      if (!Ci->getMember() || !Ci->isWritten()) {
+        continue;
+      }
+
+      SourceLocation Loc = Ci->getMemberLocation();
+      normalizeLocation(&Loc);
+      if (!isInterestingLocation(Loc)) {
+        continue;
+      }
+
+      FieldDecl *Member = Ci->getMember();
+      std::string Mangled = getMangledName(CurMangleContext, Member);
+      visitIdentifier("use", "field", getQualifiedName(Member), Loc, Mangled,
+                      Member->getType(), getContext(D));
+    }
+
+    return true;
+  }
+
+  bool VisitMemberExpr(MemberExpr *E) {
+    SourceLocation Loc = E->getExprLoc();
+    normalizeLocation(&Loc);
+    if (!isInterestingLocation(Loc)) {
+      return true;
+    }
+
+    ValueDecl *Decl = E->getMemberDecl();
+    if (FieldDecl *Field = dyn_cast<FieldDecl>(Decl)) {
+      std::string Mangled = getMangledName(CurMangleContext, Field);
+      visitIdentifier("use", "field", getQualifiedName(Field), Loc, Mangled,
+                      Field->getType(), getContext(Loc));
+    }
+    return true;
+  }
+
+  bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) {
+    SourceLocation Loc = E->getMemberLoc();
+    normalizeLocation(&Loc);
+    if (!isInterestingLocation(Loc)) {
+      return true;
+    }
+
+    if (TemplateStack) {
+      TemplateStack->visitDependent(Loc);
+    }
+    return true;
+  }
+
+  void enterSourceFile(SourceLocation Loc) {
+    normalizeLocation(&Loc);
+    FileInfo* newFile = getFileInfo(Loc);
+    if (!newFile->Interesting) {
+      return;
+    }
+    FileType type = newFile->Generated ? FileType::Generated : FileType::Source;
+    std::string symbol =
+        std::string("FILE_") + mangleFile(newFile->Realname, type);
+
+    // We use an explicit zero-length source range at the start of the file. If we
+    // don't set the LocRangeEndValid flag, the visitIdentifier code will use the
+    // entire first token, which could be e.g. a long multiline-comment.
+    visitIdentifier("def", "file", newFile->Realname, SourceRange(Loc),
+                    symbol, QualType(), Context(),
+                    NotIdentifierToken | LocRangeEndValid);
+  }
+
+  void inclusionDirective(SourceRange FileNameRange, const FileEntry* File) {
+    std::string includedFile(File->tryGetRealPathName());
+    FileType type = relativizePath(includedFile);
+    if (type == FileType::Unknown) {
+      return;
+    }
+    std::string symbol =
+        std::string("FILE_") + mangleFile(includedFile, type);
+
+    visitIdentifier("use", "file", includedFile, FileNameRange, symbol,
+                    QualType(), Context(),
+                    NotIdentifierToken | LocRangeEndValid);
+  }
+
+  void macroDefined(const Token &Tok, const MacroDirective *Macro) {
+    if (Macro->getMacroInfo()->isBuiltinMacro()) {
+      return;
+    }
+    SourceLocation Loc = Tok.getLocation();
+    normalizeLocation(&Loc);
+    if (!isInterestingLocation(Loc)) {
+      return;
+    }
+
+    IdentifierInfo *Ident = Tok.getIdentifierInfo();
+    if (Ident) {
+      std::string Mangled =
+          std::string("M_") + mangleLocation(Loc, std::string(Ident->getName()));
+      visitIdentifier("def", "macro", Ident->getName(), Loc, Mangled);
+    }
+  }
+
+  void macroUsed(const Token &Tok, const MacroInfo *Macro) {
+    if (!Macro) {
+      return;
+    }
+    if (Macro->isBuiltinMacro()) {
+      return;
+    }
+    SourceLocation Loc = Tok.getLocation();
+    normalizeLocation(&Loc);
+    if (!isInterestingLocation(Loc)) {
+      return;
+    }
+
+    IdentifierInfo *Ident = Tok.getIdentifierInfo();
+    if (Ident) {
+      std::string Mangled =
+          std::string("M_") +
+          mangleLocation(Macro->getDefinitionLoc(), std::string(Ident->getName()));
+      visitIdentifier("use", "macro", Ident->getName(), Loc, Mangled);
+    }
+  }
+};
+
+void PreprocessorHook::FileChanged(SourceLocation Loc, FileChangeReason Reason,
+                                   SrcMgr::CharacteristicKind FileType,
+                                   FileID PrevFID = FileID()) {
+  switch (Reason) {
+    case PPCallbacks::RenameFile:
+    case PPCallbacks::SystemHeaderPragma:
+      // Don't care about these, since we want the actual on-disk filenames
+      break;
+    case PPCallbacks::EnterFile:
+      Indexer->enterSourceFile(Loc);
+      break;
+    case PPCallbacks::ExitFile:
+      // Don't care about exiting files
+      break;
+  }
+}
+
+void PreprocessorHook::InclusionDirective(SourceLocation HashLoc,
+                                          const Token &IncludeTok,
+                                          StringRef FileName,
+                                          bool IsAngled,
+                                          CharSourceRange FileNameRange,
+#if CLANG_VERSION_MAJOR >= 16
+                                          OptionalFileEntryRef File,
+#elif CLANG_VERSION_MAJOR >= 15
+                                          Optional<FileEntryRef> File,
+#else
+                                          const FileEntry *File,
+#endif
+                                          StringRef SearchPath,
+                                          StringRef RelativePath,
+                                          const Module *Imported,
+                                          SrcMgr::CharacteristicKind FileType) {
+#if CLANG_VERSION_MAJOR >= 15
+  if (!File) {
+    return;
+  }
+  Indexer->inclusionDirective(FileNameRange.getAsRange(), &File->getFileEntry());
+#else
+  Indexer->inclusionDirective(FileNameRange.getAsRange(), File);
+#endif
+}
+
+void PreprocessorHook::MacroDefined(const Token &Tok,
+                                    const MacroDirective *Md) {
+  Indexer->macroDefined(Tok, Md);
+}
+
+void PreprocessorHook::MacroExpands(const Token &Tok, const MacroDefinition &Md,
+                                    SourceRange Range, const MacroArgs *Ma) {
+  Indexer->macroUsed(Tok, Md.getMacroInfo());
+}
+
+void PreprocessorHook::MacroUndefined(const Token &Tok,
+                                      const MacroDefinition &Md,
+                                      const MacroDirective *Undef)
+{
+  Indexer->macroUsed(Tok, Md.getMacroInfo());
+}
+
+void PreprocessorHook::Defined(const Token &Tok, const MacroDefinition &Md,
+                               SourceRange Range) {
+  Indexer->macroUsed(Tok, Md.getMacroInfo());
+}
+
+void PreprocessorHook::Ifdef(SourceLocation Loc, const Token &Tok,
+                             const MacroDefinition &Md) {
+  Indexer->macroUsed(Tok, Md.getMacroInfo());
+}
+
+void PreprocessorHook::Ifndef(SourceLocation Loc, const Token &Tok,
+                              const MacroDefinition &Md) {
+  Indexer->macroUsed(Tok, Md.getMacroInfo());
+}
+
+class IndexAction : public PluginASTAction {
+protected:
+  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
+                                                 llvm::StringRef F) {
+    return make_unique<IndexConsumer>(CI);
+  }
+
+  bool ParseArgs(const CompilerInstance &CI,
+                 const std::vector<std::string> &Args) {
+    if (Args.size() != 3) {
+      DiagnosticsEngine &D = CI.getDiagnostics();
+      unsigned DiagID = D.getCustomDiagID(
+          DiagnosticsEngine::Error,
+          "Need arguments for the source, output, and object directories");
+      D.Report(DiagID);
+      return false;
+    }
+
+    // Load our directories
+    Srcdir = getAbsolutePath(Args[0]);
+    if (Srcdir.empty()) {
+      DiagnosticsEngine &D = CI.getDiagnostics();
+      unsigned DiagID = D.getCustomDiagID(
+          DiagnosticsEngine::Error, "Source directory '%0' does not exist");
+      D.Report(DiagID) << Args[0];
+      return false;
+    }
+
+    ensurePath(Args[1] + PATHSEP_STRING);
+    Outdir = getAbsolutePath(Args[1]);
+    Outdir += PATHSEP_STRING;
+
+    Objdir = getAbsolutePath(Args[2]);
+    if (Objdir.empty()) {
+      DiagnosticsEngine &D = CI.getDiagnostics();
+      unsigned DiagID = D.getCustomDiagID(DiagnosticsEngine::Error,
+                                          "Objdir '%0' does not exist");
+      D.Report(DiagID) << Args[2];
+      return false;
+    }
+    Objdir += PATHSEP_STRING;
+
+    printf("MOZSEARCH: %s %s %s\n", Srcdir.c_str(), Outdir.c_str(),
+           Objdir.c_str());
+
+    return true;
+  }
+
+  void printHelp(llvm::raw_ostream &Ros) {
+    Ros << "Help for mozsearch plugin goes here\n";
+  }
+};
+
+static FrontendPluginRegistry::Add<IndexAction>
+    Y("mozsearch-index", "create the mozsearch index database");
diff --git a/build/clang-plugin/mozsearch-plugin/README b/build/clang-plugin/mozsearch-plugin/README
new file mode 100644
index 0000000000..d948e9aca3
--- /dev/null
+++ b/build/clang-plugin/mozsearch-plugin/README
@@ -0,0 +1,12 @@
+This clang plugin code generates a JSON file for each compiler input
+file. The JSON file contains information about the C++ symbols that
+are referenced by the input file. The data is eventually consumed by
+Searchfox. See https://github.com/mozsearch/mozsearch for more
+information.
+
+This plugin is enabled with the --enable-clang-plugin and
+--enable-mozsearch-plugin mozconfig options. The output of the plugin
+is stored in $OBJDIR/mozsearch_index.
+
+This code is not a checker, unlike other parts of the Mozilla clang
+plugin. It cannot be used with clang-tidy.
diff --git a/build/clang-plugin/mozsearch-plugin/StringOperations.cpp b/build/clang-plugin/mozsearch-plugin/StringOperations.cpp
new file mode 100644
index 0000000000..a2e60e42c6
--- /dev/null
+++ b/build/clang-plugin/mozsearch-plugin/StringOperations.cpp
@@ -0,0 +1,42 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "StringOperations.h"
+
+static unsigned long djbHash(const char *Str) {
+  unsigned long Hash = 5381;
+
+  for (const char *P = Str; *P; P++) {
+    // Hash * 33 + c
+    Hash = ((Hash << 5) + Hash) + *P;
+  }
+
+  return Hash;
+}
+
+// This doesn't actually return a hex string of |hash|, but it
+// does... something. It doesn't really matter what.
+static void hashToString(unsigned long Hash, char *Buffer) {
+  const char Table[] = {"0123456789abcdef"};
+  char *P = Buffer;
+  while (Hash) {
+    *P = Table[Hash & 0xf];
+    Hash >>= 4;
+    P++;
+  }
+
+  *P = 0;
+}
+
+std::string hash(const std::string &Str) {
+  static char HashStr[41];
+  unsigned long H = djbHash(Str.c_str());
+  hashToString(H, HashStr);
+  return std::string(HashStr);
+}
+
+std::string toString(int N) {
+  return stringFormat("%d", N);
+}
diff --git a/build/clang-plugin/mozsearch-plugin/StringOperations.h b/build/clang-plugin/mozsearch-plugin/StringOperations.h
new file mode 100644
index 0000000000..4aa5b31962
--- /dev/null
+++ b/build/clang-plugin/mozsearch-plugin/StringOperations.h
@@ -0,0 +1,25 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef StringOperations_h
+#define StringOperations_h
+
+#include <memory>
+#include <string>
+#include <string.h>
+
+std::string hash(const std::string &Str);
+
+template <typename... Args>
+inline std::string stringFormat(const std::string &Format, Args... ArgList) {
+  size_t Len = snprintf(nullptr, 0, Format.c_str(), ArgList...);
+  std::unique_ptr<char[]> Buf(new char[Len + 1]);
+  snprintf(Buf.get(), Len + 1, Format.c_str(), ArgList...);
+  return std::string(Buf.get(), Buf.get() + Len);
+}
+
+std::string toString(int N);
+
+#endif