diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /build/clang-plugin/mozsearch-plugin | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'build/clang-plugin/mozsearch-plugin')
11 files changed, 4015 insertions, 0 deletions
diff --git a/build/clang-plugin/mozsearch-plugin/BindingOperations.cpp b/build/clang-plugin/mozsearch-plugin/BindingOperations.cpp new file mode 100644 index 0000000000..79f7f3aebf --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/BindingOperations.cpp @@ -0,0 +1,792 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "BindingOperations.h" + +#include <clang/AST/Attr.h> +#include <clang/AST/Expr.h> +#include <clang/AST/RecursiveASTVisitor.h> + +#include <algorithm> +#include <array> +#include <cuchar> +#include <set> +#include <string> +#include <unordered_map> +#include <vector> + +#ifdef __cpp_lib_optional +#include <optional> +template<typename T> using optional = std::optional<T>; +#else +#include <llvm/ADT/Optional.h> +template<typename T> using optional = clang::Optional<T>; +#endif + +using namespace clang; + +namespace { + +template<typename InputIt> +bool hasReverseQualifiedName(InputIt first, InputIt last, const NamedDecl &tag) +{ + const NamedDecl *currentDecl = &tag; + InputIt currentName; + for (currentName = first; currentName != last; currentName++) { + if (!currentDecl || !currentDecl->getIdentifier() || currentDecl->getName() != *currentName) + return false; + + currentDecl = dyn_cast<NamedDecl>(currentDecl->getDeclContext()); + } + if (currentName != last) + return false; + + if (currentDecl != nullptr) + return false; + + return true; +} + +bool isMozillaJniObjectBase(const CXXRecordDecl &klass) +{ + const auto qualifiedName = std::array<StringRef, 3>{"mozilla", "jni", "ObjectBase"}; + return hasReverseQualifiedName(qualifiedName.crbegin(), qualifiedName.crend(), klass); +} + +bool isMozillaJniNativeImpl(const CXXRecordDecl &klass) +{ + const auto qualifiedName = std::array<StringRef, 3>{"mozilla", "jni", "NativeImpl"}; + return hasReverseQualifiedName(qualifiedName.crbegin(), qualifiedName.crend(), klass); +} + +const NamedDecl *fieldNamed(StringRef name, const RecordDecl &strukt) +{ + for (const auto *decl : strukt.decls()) { + const auto *namedDecl = dyn_cast<VarDecl>(decl); + if (!namedDecl) + continue; + + if (!namedDecl->getIdentifier() || namedDecl->getName() != name) + continue; + + return namedDecl; + } + + return {}; +} + +optional<StringRef> nameFieldValue(const RecordDecl &strukt) +{ + const auto *nameField = dyn_cast_or_null<VarDecl>(fieldNamed("name", strukt)); + if (!nameField) + return {}; + + const auto *def = nameField->getDefinition(); + if (!def) + return {}; + + const auto *name = dyn_cast_or_null<StringLiteral>(def->getInit()); + if (!name) + return {}; + + return name->getString(); +} + +struct AbstractBinding { + // Subset of tools/analysis/BindingSlotLang + enum class Lang { + Cpp, + Jvm, + }; + static constexpr size_t LangLength = 2; + static constexpr std::array<StringRef, LangLength> langNames = { + "cpp", + "jvm", + }; + + static optional<Lang> langFromString(StringRef langName) + { + const auto it = std::find(langNames.begin(), langNames.end(), langName); + if (it == langNames.end()) + return {}; + + return Lang(it - langNames.begin()); + } + static StringRef stringFromLang(Lang lang) + { + return langNames[size_t(lang)]; + } + + // Subset of tools/analysis/BindingSlotKind + enum class Kind { + Class, + Method, + Getter, + Setter, + Const, + }; + static constexpr size_t KindLength = 5; + static constexpr std::array<StringRef, KindLength> kindNames = { + "class", + "method", + "getter", + "setter", + "const", + }; + + static optional<Kind> kindFromString(StringRef kindName) + { + const auto it = std::find(kindNames.begin(), kindNames.end(), kindName); + if (it == kindNames.end()) + return {}; + + return Kind(it - kindNames.begin()); + } + static StringRef stringFromKind(Kind kind) + { + return kindNames[size_t(kind)]; + } + + Lang lang; + Kind kind; + StringRef symbol; +}; +constexpr size_t AbstractBinding::KindLength; +constexpr std::array<StringRef, AbstractBinding::KindLength> AbstractBinding::kindNames; +constexpr size_t AbstractBinding::LangLength; +constexpr std::array<StringRef, AbstractBinding::LangLength> AbstractBinding::langNames; + +struct BindingTo : public AbstractBinding { + BindingTo(AbstractBinding b) : AbstractBinding(std::move(b)) {} + static constexpr StringRef ANNOTATION = "binding_to"; +}; +constexpr StringRef BindingTo::ANNOTATION; + +struct BoundAs : public AbstractBinding { + BoundAs(AbstractBinding b) : AbstractBinding(std::move(b)) {} + static constexpr StringRef ANNOTATION = "bound_as"; +}; +constexpr StringRef BoundAs::ANNOTATION; + +template<typename B> +void setBindingAttr(ASTContext &C, Decl &decl, B binding) +{ + // recent LLVM: CreateImplicit then setDelayedArgs + Expr *langExpr = StringLiteral::Create(C, AbstractBinding::stringFromLang(binding.lang), StringLiteral::UTF8, false, {}, {}); + Expr *kindExpr = StringLiteral::Create(C, AbstractBinding::stringFromKind(binding.kind), StringLiteral::UTF8, false, {}, {}); + Expr *symbolExpr = StringLiteral::Create(C, binding.symbol, StringLiteral::UTF8, false, {}, {}); + auto **args = new (C, 16) Expr *[3]{langExpr, kindExpr, symbolExpr}; + auto *attr = AnnotateAttr::CreateImplicit(C, B::ANNOTATION, args, 3); + decl.addAttr(attr); +} + +optional<AbstractBinding> readBinding(const AnnotateAttr &attr) +{ + if (attr.args_size() != 3) + return {}; + + const auto *langExpr = attr.args().begin()[0]; + const auto *kindExpr = attr.args().begin()[1]; + const auto *symbolExpr = attr.args().begin()[2]; + if (!langExpr || !kindExpr || !symbolExpr) + return {}; + + const auto *langName = dyn_cast<StringLiteral>(langExpr->IgnoreUnlessSpelledInSource()); + const auto *kindName = dyn_cast<StringLiteral>(kindExpr->IgnoreUnlessSpelledInSource()); + const auto *symbol = dyn_cast<StringLiteral>(symbolExpr->IgnoreUnlessSpelledInSource()); + if (!langName || !kindName || !symbol) + return {}; + + const auto lang = AbstractBinding::langFromString(langName->getString()); + const auto kind = AbstractBinding::kindFromString(kindName->getString()); + + if (!lang || !kind) + return {}; + + return AbstractBinding { + .lang = *lang, + .kind = *kind, + .symbol = symbol->getString(), + }; +} + +optional<BindingTo> getBindingTo(const Decl &decl) +{ + for (const auto *attr : decl.specific_attrs<AnnotateAttr>()) { + if (attr->getAnnotation() != BindingTo::ANNOTATION) + continue; + + const auto binding = readBinding(*attr); + if (!binding) + continue; + + return BindingTo{*binding}; + } + return {}; +} + +// C++23: turn into generator +std::vector<BoundAs> getBoundAs(const Decl &decl) +{ + std::vector<BoundAs> found; + + for (const auto *attr : decl.specific_attrs<AnnotateAttr>()) { + if (attr->getAnnotation() != BoundAs::ANNOTATION) + continue; + + const auto binding = readBinding(*attr); + if (!binding) + continue; + + found.push_back(BoundAs{*binding}); + } + + return found; +} + +class FindCallCall : private RecursiveASTVisitor<FindCallCall> +{ +public: + struct Result { + using Kind = AbstractBinding::Kind; + + Kind kind; + StringRef name; + }; + + static optional<Result> search(Stmt *statement) + { + FindCallCall finder; + finder.TraverseStmt(statement); + return finder.result; + } + +private: + optional<Result> result; + + friend RecursiveASTVisitor<FindCallCall>; + + optional<Result> tryParseCallCall(CallExpr *callExpr){ + const auto *callee = dyn_cast_or_null<CXXMethodDecl>(callExpr->getDirectCallee()); + if (!callee) + return {}; + + if (!callee->getIdentifier()) + return {}; + + const auto action = callee->getIdentifier()->getName(); + + if (action != "Call" && action != "Get" && action != "Set") + return {}; + + const auto *parentClass = dyn_cast_or_null<ClassTemplateSpecializationDecl>(callee->getParent()); + + if (!parentClass) + return {}; + + const auto *parentTemplate = parentClass->getTemplateInstantiationPattern(); + + if (!parentTemplate || !parentTemplate->getIdentifier()) + return {}; + + const auto parentName = parentTemplate->getIdentifier()->getName(); + + AbstractBinding::Kind kind; + if (action == "Call") { + if (parentName == "Constructor" || parentName == "Method") { + kind = AbstractBinding::Kind::Method; + } else { + return {}; + } + } else if (parentName == "Field") { + if (action == "Get") { + kind = AbstractBinding::Kind::Getter; + } else if (action == "Set") { + kind = AbstractBinding::Kind::Setter; + } else { + return {}; + } + } else { + return {}; + } + + const auto *templateArg = parentClass->getTemplateArgs().get(0).getAsType()->getAsRecordDecl(); + + if (!templateArg) + return {}; + + const auto name = nameFieldValue(*templateArg); + if (!name) + return {}; + + return Result { + .kind = kind, + .name = *name, + }; + + return {}; + } + bool VisitCallExpr(CallExpr *callExpr) + { + return !(result = tryParseCallCall(callExpr)); + } +}; + +constexpr StringRef JVM_SCIP_SYMBOL_PREFIX = "S_jvm_"; + +std::string javaScipSymbol(StringRef prefix, StringRef name, AbstractBinding::Kind kind) +{ + auto symbol = (prefix + name).str(); + + switch (kind) { + case AbstractBinding::Kind::Class: + std::replace(symbol.begin(), symbol.end(), '$', '#'); + symbol += "#"; + break; + case AbstractBinding::Kind::Method: + symbol += "()."; + break; + case AbstractBinding::Kind::Const: + case AbstractBinding::Kind::Getter: + case AbstractBinding::Kind::Setter: + symbol += "."; + break; + } + + return symbol; +} + +void addSlotOwnerAttribute(llvm::json::OStream &J, const Decl &decl) +{ + if (const auto bindingTo = getBindingTo(decl)) { + J.attributeBegin("slotOwner"); + J.objectBegin(); + J.attribute("slotKind", AbstractBinding::stringFromKind(bindingTo->kind)); + J.attribute("slotLang", "cpp"); + J.attribute("ownerLang", AbstractBinding::stringFromLang(bindingTo->lang)); + J.attribute("sym", bindingTo->symbol); + J.objectEnd(); + J.attributeEnd(); + } +} +void addBindingSlotsAttribute(llvm::json::OStream &J, const Decl &decl) +{ + const auto allBoundAs = getBoundAs(decl); + if (!allBoundAs.empty()) { + J.attributeBegin("bindingSlots"); + J.arrayBegin(); + for (const auto boundAs : allBoundAs) { + J.objectBegin(); + J.attribute("slotKind", AbstractBinding::stringFromKind(boundAs.kind)); + J.attribute("slotLang", AbstractBinding::stringFromLang(boundAs.lang)); + J.attribute("ownerLang", "cpp"); + J.attribute("sym", boundAs.symbol); + J.objectEnd(); + } + J.arrayEnd(); + J.attributeEnd(); + } +} + +// The mangling scheme is documented at https://docs.oracle.com/javase/8/docs/technotes/guides/jni/spec/design.html +// The main takeaways are: +// - _0xxxx is the utf16 code unit xxxx +// - _1 is _ +// - _2 is ; +// - _3 is [ +// - __ is the separator between function name and overload specification +// - _ is otherwise the separator between packages/classes/methods +// +// This method takes a StringRef & and mutates it and can be called twice on a Jnicall function name to get +// first the demangled name +// second the demangled overload specification +// But we don't use the later for now because we have no way to map that to how SCIP resolves overloads. +optional<std::string> demangleJnicallPart(StringRef &remainder) +{ + std::string demangled; + + std::mbstate_t ps = {}; + + while (!remainder.empty()) { + switch (remainder[0]) { + case '0': { + remainder = remainder.drop_front(1); + + uint16_t codeUnit; + const auto ok = remainder.substr(1, 4).getAsInteger(16, codeUnit); + remainder = remainder.drop_front(4); + + if (!ok) // failed reading xxxx as hexadecimal from _0xxxx + return {}; + + std::array<char, MB_LEN_MAX> codePoint; + const auto mbLen = std::c16rtomb(codePoint.data(), codeUnit, &ps); + + if (mbLen == -1) // failed converting utf16 to utf8 + return {}; + + demangled += StringRef(codePoint.begin(), mbLen); + break; + } + case '1': + remainder = remainder.drop_front(1); + ps = {}; + demangled += '_'; + break; + case '2': + remainder = remainder.drop_front(1); + ps = {}; + demangled += ';'; + break; + case '3': + remainder = remainder.drop_front(1); + ps = {}; + demangled += '['; + break; + case '_': + remainder = remainder.drop_front(1); + ps = {}; + if (remainder.empty()) // the string ends with _ + return {}; + + switch (remainder[0]) { + case '0': + case '1': + case '2': + case '3': + demangled += '.'; + break; + default: + // either: + // * the string began with _[^0-3], which is not supposed to happen; or + // * we reached __[^0-3] meaning we finished the first part of the name and remainder holds the overload specification + return demangled; + } + default: + ps = {}; + demangled += '.'; + break; + } + StringRef token; + std::tie(token, remainder) = remainder.split('_'); + demangled += token; + } + + return demangled; +} + +optional<std::string> scipSymbolFromJnicallFunctionName(StringRef functionName) +{ + if (!functionName.consume_front("Java_")) + return {}; + + const auto demangledName = demangleJnicallPart(functionName); + + if (!demangledName || demangledName->empty()) + return {}; + + // demangleJavaName returns something like .some.package.Class$InnerClass.method + // - prepend S_jvm_ + // - remove the leading dot + // - replace the last dot with a # + // - replace the other dots with / + // - replace $ with # + // - add the ([+overloadNumber]). suffix + auto symbol = JVM_SCIP_SYMBOL_PREFIX.str(); + symbol += demangledName->substr(1); + const auto lastDot = symbol.rfind('.'); + if (lastDot != std::string::npos) + symbol[lastDot] = '#'; + std::replace(symbol.begin(), symbol.end(), '.', '/'); + std::replace(symbol.begin(), symbol.end(), '$', '#'); + + // Keep track of how many times we have seen this method, to build the ([+overloadNumber]). suffix. + // This assumes this function is called on C function definitions in the same order the matching overloads are declared in Java. + static std::unordered_map<std::string, uint> jnicallFunctions; + auto &overloadNumber = jnicallFunctions[symbol]; + + symbol += '('; + if (overloadNumber) { + symbol += '+'; + symbol += overloadNumber; + overloadNumber++; + } + symbol += ")."; + + return symbol; +}; + +} // anonymous namespace + +// class [wrapper] : public mozilla::jni::ObjectBase<[wrapper]> +// { +// static constexpr char name[] = "[nameFieldValue]"; +// } +void findBindingToJavaClass(ASTContext &C, CXXRecordDecl &klass) +{ + for (const auto &baseSpecifier : klass.bases()) { + const auto *base = baseSpecifier.getType()->getAsCXXRecordDecl(); + if (!base) + continue; + + if (!isMozillaJniObjectBase(*base)) + continue; + + const auto name = nameFieldValue(klass); + if (!name) + continue; + + const auto symbol = javaScipSymbol(JVM_SCIP_SYMBOL_PREFIX, *name, BindingTo::Kind::Class); + const auto binding = BindingTo {{ + .lang = BindingTo::Lang::Jvm, + .kind = BindingTo::Kind::Class, + .symbol = symbol, + }}; + + setBindingAttr(C, klass, binding); + return; + } +} + +// When a Java method is marked as native, the JRE looks by default for a function +// named Java_<mangled method name>[__<mangled overload specification>]. +void findBindingToJavaFunction(ASTContext &C, FunctionDecl &function) +{ + const auto *identifier = function.getIdentifier(); + if (!identifier) + return; + + const auto name = identifier->getName(); + const auto symbol = scipSymbolFromJnicallFunctionName(name); + if (!symbol) + return; + + const auto binding = BoundAs {{ + .lang = BindingTo::Lang::Jvm, + .kind = BindingTo::Kind::Method, + .symbol = *symbol, + }}; + + setBindingAttr(C, function, binding); +} + +// class [parent] +// { +// struct [methodStruct] { +// static constexpr char name[] = "[methodNameFieldValue]"; +// } +// [method] +// { +// ... +// mozilla::jni::{Method,Constructor,Field}<[methodStruct]>::{Call,Get,Set}(...) +// ... +// } +// } +void findBindingToJavaMember(ASTContext &C, CXXMethodDecl &method) +{ + const auto *parent = method.getParent(); + if (!parent) + return; + const auto classBinding = getBindingTo(*parent); + if (!classBinding) + return; + + auto *body = method.getBody(); + if (!body) + return; + + const auto found = FindCallCall::search(body); + if (!found) + return; + + const auto symbol = javaScipSymbol(classBinding->symbol, found->name, found->kind); + const auto binding = BindingTo {{ + .lang = BindingTo::Lang::Jvm, + .kind = found->kind, + .symbol = symbol, + }}; + + setBindingAttr(C, method, binding); +} + +// class [parent] +// { +// struct [methodStruct] { +// static constexpr char name[] = "[methodNameFieldValue]"; +// } +// [method] +// { +// ... +// mozilla::jni::{Method,Constructor,Field}<[methodStruct]>::{Call,Get,Set}(...) +// ... +// } +// } +void findBindingToJavaConstant(ASTContext &C, VarDecl &field) +{ + const auto *parent = dyn_cast_or_null<CXXRecordDecl>(field.getDeclContext()); + if (!parent) + return; + + const auto classBinding = getBindingTo(*parent); + if (!classBinding) + return; + + const auto symbol = javaScipSymbol(classBinding->symbol, field.getName(), BindingTo::Kind::Const); + const auto binding = BindingTo {{ + .lang = BindingTo::Lang::Jvm, + .kind = BindingTo::Kind::Const, + .symbol = symbol, + }}; + + setBindingAttr(C, field, binding); +} + +// class [klass] : public [wrapper]::Natives<[klass]> {...} +// class [wrapper] : public mozilla::jni::ObjectBase<[wrapper]> +// { +// static constexpr char name[] = "[nameFieldValue]"; +// +// struct [methodStruct] { +// static constexpr char name[] = "[methodNameFieldValue]"; +// } +// +// template<typename T> +// class [wrapper]::Natives : public mozilla::jni::NativeImpl<[wrapper], T> { +// static const JNINativeMethod methods[] = { +// mozilla::jni::MakeNativeMethod<[wrapper]::[methodStruct]>( +// mozilla::jni::NativeStub<[wrapper]::[methodStruct], Impl> +// ::template Wrap<&Impl::[method]>), +// } +// } +// } +void findBoundAsJavaClasses(ASTContext &C, CXXRecordDecl &klass) +{ + for (const auto &baseSpecifier : klass.bases()) { + const auto *base = baseSpecifier.getType()->getAsCXXRecordDecl(); + if (!base) + continue; + + for (const auto &baseBaseSpecifier : base->bases()) { + const auto *baseBase = dyn_cast_or_null<ClassTemplateSpecializationDecl>(baseBaseSpecifier.getType()->getAsCXXRecordDecl()); + if (!baseBase) + continue; + + if (!isMozillaJniNativeImpl(*baseBase)) + continue; + + const auto *wrapper = baseBase->getTemplateArgs().get(0).getAsType()->getAsCXXRecordDecl(); + + if (!wrapper) + continue; + + const auto name = nameFieldValue(*wrapper); + if (!name) + continue; + + const auto javaClassSymbol = javaScipSymbol(JVM_SCIP_SYMBOL_PREFIX, *name, BoundAs::Kind::Class); + const auto classBinding = BoundAs {{ + .lang = BoundAs::Lang::Jvm, + .kind = BoundAs::Kind::Class, + .symbol = javaClassSymbol, + }}; + setBindingAttr(C, klass, classBinding); + + const auto *methodsDecl = dyn_cast_or_null<VarDecl>(fieldNamed("methods", *base)); + if (!methodsDecl) + continue; + + const auto *methodsDef = methodsDecl->getDefinition(); + if (!methodsDef) + continue; + + const auto *inits = dyn_cast_or_null<InitListExpr>(methodsDef->getInit()); + if (!inits) + continue; + + std::set<const CXXMethodDecl *> alreadyBound; + + for (const auto *init : inits->inits()) { + const auto *call = dyn_cast<CallExpr>(init->IgnoreUnlessSpelledInSource()); + if (!call) + continue; + + const auto *funcDecl = call->getDirectCallee(); + if (!funcDecl) + continue; + + const auto *templateArgs = funcDecl->getTemplateSpecializationArgs(); + if (!templateArgs) + continue; + + const auto *strukt = dyn_cast_or_null<RecordDecl>(templateArgs->get(0).getAsType()->getAsRecordDecl()); + if (!strukt) + continue; + + const auto *wrapperRef = dyn_cast_or_null<DeclRefExpr>(call->getArg(0)->IgnoreUnlessSpelledInSource()); + if (!wrapperRef) + continue; + + const auto *boundRef = dyn_cast_or_null<UnaryOperator>(wrapperRef->template_arguments().front().getArgument().getAsExpr()); + if (!boundRef) + continue; + + auto addToBound = [&](CXXMethodDecl &boundDecl, uint overloadNum) { + const auto methodName = nameFieldValue(*strukt); + if (!methodName) + return; + + auto javaMethodSymbol = javaClassSymbol; + javaMethodSymbol += *methodName; + javaMethodSymbol += '('; + if (overloadNum > 0) { + javaMethodSymbol += '+'; + javaMethodSymbol += std::to_string(overloadNum); + } + javaMethodSymbol += ")."; + + const auto binding = BoundAs {{ + .lang = BoundAs::Lang::Jvm, + .kind = BoundAs::Kind::Method, + .symbol = javaMethodSymbol, + }}; + setBindingAttr(C, boundDecl, binding); + }; + + if (auto *bound = dyn_cast_or_null<DeclRefExpr>(boundRef->getSubExpr())) { + auto *method = dyn_cast_or_null<CXXMethodDecl>(bound->getDecl()); + if (!method) + continue; + addToBound(*method, 0); + } else if (const auto *bound = dyn_cast_or_null<UnresolvedLookupExpr>(boundRef->getSubExpr())) { + // XXX This is hackish + // In case of overloads it's not obvious which one we should use + // this expects the declaration order between C++ and Java to match + auto declarations = std::vector<Decl*>(bound->decls_begin(), bound->decls_end()); + auto byLocation = [](Decl *a, Decl *b){ return a->getLocation() < b->getLocation(); }; + std::sort(declarations.begin(), declarations.end(), byLocation); + + uint i = 0; + for (auto *decl : declarations) { + auto *method = dyn_cast<CXXMethodDecl>(decl); + if (!method) + continue; + if (alreadyBound.find(method) == alreadyBound.end()) { + addToBound(*method, i); + alreadyBound.insert(method); + break; + } + i++; + } + } + } + } + } +} + +void emitBindingAttributes(llvm::json::OStream &J, const Decl &decl) +{ + addSlotOwnerAttribute(J, decl); + addBindingSlotsAttribute(J, decl); +} diff --git a/build/clang-plugin/mozsearch-plugin/BindingOperations.h b/build/clang-plugin/mozsearch-plugin/BindingOperations.h new file mode 100644 index 0000000000..259594c5c3 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/BindingOperations.h @@ -0,0 +1,17 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <clang/AST/ASTContext.h> +#include <clang/AST/Decl.h> +#include <clang/AST/DeclCXX.h> +#include <llvm/Support/JSON.h> + +void findBindingToJavaClass(clang::ASTContext &C, clang::CXXRecordDecl &klass); +void findBoundAsJavaClasses(clang::ASTContext &C, clang::CXXRecordDecl &klass); +void findBindingToJavaFunction(clang::ASTContext &C, clang::FunctionDecl &function); +void findBindingToJavaMember(clang::ASTContext &C, clang::CXXMethodDecl &method); +void findBindingToJavaConstant(clang::ASTContext &C, clang::VarDecl &field); + +void emitBindingAttributes(llvm::json::OStream &json, const clang::Decl &decl); diff --git a/build/clang-plugin/mozsearch-plugin/FileOperations.cpp b/build/clang-plugin/mozsearch-plugin/FileOperations.cpp new file mode 100644 index 0000000000..9307f4989d --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/FileOperations.cpp @@ -0,0 +1,140 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "FileOperations.h" + +#include <stdio.h> +#include <stdlib.h> + +#if defined(_WIN32) || defined(_WIN64) +#include <direct.h> +#include <io.h> +#include <windows.h> +#include "StringOperations.h" +#else +#include <sys/file.h> +#include <sys/time.h> +#include <unistd.h> +#endif + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> + +// Make sure that all directories on path exist, excluding the final element of +// the path. +void ensurePath(std::string Path) { + size_t Pos = 0; + if (Path[0] == PATHSEP_CHAR) { + Pos++; + } + + while ((Pos = Path.find(PATHSEP_CHAR, Pos)) != std::string::npos) { + std::string Portion = Path.substr(0, Pos); + if (!Portion.empty()) { +#if defined(_WIN32) || defined(_WIN64) + int Err = _mkdir(Portion.c_str()); +#else + int Err = mkdir(Portion.c_str(), 0775); +#endif + if (Err == -1 && errno != EEXIST) { + perror("mkdir failed"); + exit(1); + } + } + + Pos++; + } +} + +#if defined(_WIN32) || defined(_WIN64) +AutoLockFile::AutoLockFile(const std::string &SrcFile, const std::string &DstFile) { + this->Filename = DstFile; + std::string Hash = hash(SrcFile); + std::string MutexName = std::string("Local\\searchfox-") + Hash; + std::wstring WideMutexName; + WideMutexName.assign(MutexName.begin(), MutexName.end()); + Handle = CreateMutex(nullptr, false, WideMutexName.c_str()); + if (Handle == NULL) { + return; + } + + if (WaitForSingleObject(Handle, INFINITE) != WAIT_OBJECT_0) { + return; + } +} + +AutoLockFile::~AutoLockFile() { + ReleaseMutex(Handle); + CloseHandle(Handle); +} + +bool AutoLockFile::success() { + return Handle != NULL; +} + +FILE *AutoLockFile::openTmp() { + int TmpDescriptor = _open((Filename + ".tmp").c_str(), _O_WRONLY | _O_APPEND | _O_CREAT | _O_BINARY, 0666); + return _fdopen(TmpDescriptor, "ab"); +} + +bool AutoLockFile::moveTmp() { + if (_unlink(Filename.c_str()) == -1) { + if (errno != ENOENT) { + return false; + } + } + return rename((Filename + ".tmp").c_str(), Filename.c_str()) == 0; +} + +std::string getAbsolutePath(const std::string &Filename) { + char Full[_MAX_PATH]; + if (!_fullpath(Full, Filename.c_str(), _MAX_PATH)) { + return std::string(""); + } + return std::string(Full); +} +#else +AutoLockFile::AutoLockFile(const std::string &SrcFile, const std::string &DstFile) { + this->Filename = DstFile; + FileDescriptor = open(SrcFile.c_str(), O_RDONLY); + if (FileDescriptor == -1) { + return; + } + + do { + int rv = flock(FileDescriptor, LOCK_EX); + if (rv == 0) { + break; + } + } while (true); +} + +AutoLockFile::~AutoLockFile() { close(FileDescriptor); } + +bool AutoLockFile::success() { return FileDescriptor != -1; } + +FILE* AutoLockFile::openTmp() { + int TmpDescriptor = open((Filename + ".tmp").c_str(), O_WRONLY | O_APPEND | O_CREAT, 0666); + return fdopen(TmpDescriptor, "ab"); +} + +bool AutoLockFile::moveTmp() { + if (unlink(Filename.c_str()) == -1) { + if (errno != ENOENT) { + return false; + } + } + return rename((Filename + ".tmp").c_str(), Filename.c_str()) == 0; +} + +std::string getAbsolutePath(const std::string &Filename) { + char Full[4096]; + if (!realpath(Filename.c_str(), Full)) { + return std::string(""); + } + return std::string(Full); +} +#endif diff --git a/build/clang-plugin/mozsearch-plugin/FileOperations.h b/build/clang-plugin/mozsearch-plugin/FileOperations.h new file mode 100644 index 0000000000..90764484da --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/FileOperations.h @@ -0,0 +1,70 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef FileOperations_h +#define FileOperations_h + +#include <stdio.h> +#include <string> + +#if defined(_WIN32) || defined(_WIN64) +#include <windows.h> +#define PATHSEP_CHAR '\\' +#define PATHSEP_STRING "\\" +#else +#define PATHSEP_CHAR '/' +#define PATHSEP_STRING "/" +#endif + +// Make sure that all directories on path exist, excluding the final element of +// the path. +void ensurePath(std::string Path); + +std::string getAbsolutePath(const std::string &Filename); + +// Used to synchronize access when writing to an analysis file, so that +// concurrently running clang instances don't clobber each other's data. +// On Windows, we use a named mutex. On POSIX platforms, we use flock on the +// source files. flock is advisory locking, and doesn't interfere with clang's +// own opening of the source files (i.e. to interfere, clang would have to be +// using flock itself, which it does not). +struct AutoLockFile { + // Absolute path to the analysis file + std::string Filename; + +#if defined(_WIN32) || defined(_WIN64) + // Handle for the named Mutex + HANDLE Handle = NULL; +#else + // fd for the *source* file that corresponds to the analysis file. We use + // the source file because it doesn't change while the analysis file gets + // repeatedly replaced by a new version written to a separate tmp file. + // This fd is used when using flock to synchronize access. + int FileDescriptor = -1; +#endif + + // SrcFile should be the absolute path to the source code file, and DstFile + // the absolute path to the corresponding analysis file. This constructor + // will block until exclusive access has been obtained. + AutoLockFile(const std::string &SrcFile, const std::string &DstFile); + ~AutoLockFile(); + + // Check after constructing to ensure the mutex was properly set up. + bool success(); + + // There used to be an `openFile` method here but we switched to directly + // using a std::ifstream for the input file in able to take advantage of its + // support for variable length lines (as opposed to fgets which takes a fixed + // size buffer). + + // Open a new tmp file for writing the new analysis data to. Caller is + // responsible for fclose'ing it. + FILE *openTmp(); + // Replace the existing analysis file with the new "tmp" one that has the new + // data. Returns false on error. + bool moveTmp(); +}; + +#endif diff --git a/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp b/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp new file mode 100644 index 0000000000..f6958384a8 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/MozsearchIndexer.cpp @@ -0,0 +1,2436 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Mangle.h" +#include "clang/AST/RecordLayout.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/Version.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendPluginRegistry.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/raw_ostream.h" + +#include <fstream> +#include <iostream> +#include <map> +#include <memory> +#include <sstream> +#include <string> +#include <tuple> +#include <unordered_set> + +#include <stdio.h> +#include <stdlib.h> + +#include "BindingOperations.h" +#include "FileOperations.h" +#include "StringOperations.h" +#include "from-clangd/HeuristicResolver.h" + +#if CLANG_VERSION_MAJOR < 8 +// Starting with Clang 8.0 some basic functions have been renamed +#define getBeginLoc getLocStart +#define getEndLoc getLocEnd +#endif +// We want std::make_unique, but that's only available in c++14. In versions +// prior to that, we need to fall back to llvm's make_unique. It's also the +// case that we expect clang 10 to build with c++14 and clang 9 and earlier to +// build with c++11, at least as suggested by the llvm-config --cxxflags on +// non-windows platforms. mozilla-central seems to build with -std=c++17 on +// windows so we need to make this decision based on __cplusplus instead of +// the CLANG_VERSION_MAJOR. +#if __cplusplus < 201402L +using llvm::make_unique; +#else +using std::make_unique; +#endif + +using namespace clang; + +const std::string GENERATED("__GENERATED__" PATHSEP_STRING); + +// Absolute path to directory containing source code. +std::string Srcdir; + +// Absolute path to objdir (including generated code). +std::string Objdir; + +// Absolute path where analysis JSON output will be stored. +std::string Outdir; + +enum class FileType { + // The file was either in the source tree nor objdir. It might be a system + // include, for example. + Unknown, + // A file from the source tree. + Source, + // A file from the objdir. + Generated, +}; + +// Takes an absolute path to a file, and returns the type of file it is. If +// it's a Source or Generated file, the provided inout path argument is modified +// in-place so that it is relative to the source dir or objdir, respectively. +FileType relativizePath(std::string& path) { + if (path.compare(0, Objdir.length(), Objdir) == 0) { + path.replace(0, Objdir.length(), GENERATED); + return FileType::Generated; + } + // Empty filenames can get turned into Srcdir when they are resolved as + // absolute paths, so we should exclude files that are exactly equal to + // Srcdir or anything outside Srcdir. + if (path.length() > Srcdir.length() && path.compare(0, Srcdir.length(), Srcdir) == 0) { + // Remove the trailing `/' as well. + path.erase(0, Srcdir.length() + 1); + return FileType::Source; + } + return FileType::Unknown; +} + +#if !defined(_WIN32) && !defined(_WIN64) +#include <sys/time.h> + +static double time() { + struct timeval Tv; + gettimeofday(&Tv, nullptr); + return double(Tv.tv_sec) + double(Tv.tv_usec) / 1000000.; +} +#endif + +// Return true if |input| is a valid C++ identifier. We don't want to generate +// analysis information for operators, string literals, etc. by accident since +// it trips up consumers of the data. +static bool isValidIdentifier(std::string Input) { + for (char C : Input) { + if (!(isalpha(C) || isdigit(C) || C == '_')) { + return false; + } + } + return true; +} + +struct RAIITracer { + RAIITracer(const char *log) : mLog(log) { + printf("<%s>\n", mLog); + } + + ~RAIITracer() { + printf("</%s>\n", mLog); + } + + const char* mLog; +}; + +#define TRACEFUNC RAIITracer tracer(__FUNCTION__); + +class IndexConsumer; + +// For each C++ file seen by the analysis (.cpp or .h), we track a +// FileInfo. This object tracks whether the file is "interesting" (i.e., whether +// it's in the source dir or the objdir). We also store the analysis output +// here. +struct FileInfo { + FileInfo(std::string &Rname) : Realname(Rname) { + switch (relativizePath(Realname)) { + case FileType::Generated: + Interesting = true; + Generated = true; + break; + case FileType::Source: + Interesting = true; + Generated = false; + break; + case FileType::Unknown: + Interesting = false; + Generated = false; + break; + } + } + std::string Realname; + std::vector<std::string> Output; + bool Interesting; + bool Generated; +}; + +class IndexConsumer; + +class PreprocessorHook : public PPCallbacks { + IndexConsumer *Indexer; + +public: + PreprocessorHook(IndexConsumer *C) : Indexer(C) {} + + virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) override; + + virtual void InclusionDirective(SourceLocation HashLoc, + const Token &IncludeTok, + StringRef FileName, + bool IsAngled, + CharSourceRange FileNameRange, +#if CLANG_VERSION_MAJOR >= 16 + OptionalFileEntryRef File, +#elif CLANG_VERSION_MAJOR >= 15 + Optional<FileEntryRef> File, +#else + const FileEntry *File, +#endif + StringRef SearchPath, + StringRef RelativePath, + const Module *Imported, + SrcMgr::CharacteristicKind FileType) override; + + virtual void MacroDefined(const Token &Tok, + const MacroDirective *Md) override; + + virtual void MacroExpands(const Token &Tok, const MacroDefinition &Md, + SourceRange Range, const MacroArgs *Ma) override; + virtual void MacroUndefined(const Token &Tok, const MacroDefinition &Md, + const MacroDirective *Undef) override; + virtual void Defined(const Token &Tok, const MacroDefinition &Md, + SourceRange Range) override; + virtual void Ifdef(SourceLocation Loc, const Token &Tok, + const MacroDefinition &Md) override; + virtual void Ifndef(SourceLocation Loc, const Token &Tok, + const MacroDefinition &Md) override; +}; + +class IndexConsumer : public ASTConsumer, + public RecursiveASTVisitor<IndexConsumer>, + public DiagnosticConsumer { +private: + CompilerInstance &CI; + SourceManager &SM; + LangOptions &LO; + std::map<FileID, std::unique_ptr<FileInfo>> FileMap; + MangleContext *CurMangleContext; + ASTContext *AstContext; + std::unique_ptr<clangd::HeuristicResolver> Resolver; + + typedef RecursiveASTVisitor<IndexConsumer> Super; + + // Tracks the set of declarations that the current expression/statement is + // nested inside of. + struct AutoSetContext { + AutoSetContext(IndexConsumer *Self, NamedDecl *Context, bool VisitImplicit = false) + : Self(Self), Prev(Self->CurDeclContext), Decl(Context) { + this->VisitImplicit = VisitImplicit || (Prev ? Prev->VisitImplicit : false); + Self->CurDeclContext = this; + } + + ~AutoSetContext() { Self->CurDeclContext = Prev; } + + IndexConsumer *Self; + AutoSetContext *Prev; + NamedDecl *Decl; + bool VisitImplicit; + }; + AutoSetContext *CurDeclContext; + + FileInfo *getFileInfo(SourceLocation Loc) { + FileID Id = SM.getFileID(Loc); + + std::map<FileID, std::unique_ptr<FileInfo>>::iterator It; + It = FileMap.find(Id); + if (It == FileMap.end()) { + // We haven't seen this file before. We need to make the FileInfo + // structure information ourselves + std::string Filename = std::string(SM.getFilename(Loc)); + std::string Absolute; + // If Loc is a macro id rather than a file id, it Filename might be + // empty. Also for some types of file locations that are clang-internal + // like "<scratch>" it can return an empty Filename. In these cases we + // want to leave Absolute as empty. + if (!Filename.empty()) { + Absolute = getAbsolutePath(Filename); + if (Absolute.empty()) { + Absolute = Filename; + } + } + std::unique_ptr<FileInfo> Info = make_unique<FileInfo>(Absolute); + It = FileMap.insert(std::make_pair(Id, std::move(Info))).first; + } + return It->second.get(); + } + + // Helpers for processing declarations + // Should we ignore this location? + bool isInterestingLocation(SourceLocation Loc) { + if (Loc.isInvalid()) { + return false; + } + + return getFileInfo(Loc)->Interesting; + } + + // Convert location to "line:column" or "line:column-column" given length. + // In resulting string rep, line is 1-based and zero-padded to 5 digits, while + // column is 0-based and unpadded. + std::string locationToString(SourceLocation Loc, size_t Length = 0) { + std::pair<FileID, unsigned> Pair = SM.getDecomposedLoc(Loc); + + bool IsInvalid; + unsigned Line = SM.getLineNumber(Pair.first, Pair.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Column = SM.getColumnNumber(Pair.first, Pair.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + + if (Length) { + return stringFormat("%05d:%d-%d", Line, Column - 1, Column - 1 + Length); + } else { + return stringFormat("%05d:%d", Line, Column - 1); + } + } + + // Convert SourceRange to "line-line". + // In the resulting string rep, line is 1-based. + std::string lineRangeToString(SourceRange Range) { + std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin()); + std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd()); + + bool IsInvalid; + unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + + return stringFormat("%d-%d", Line1, Line2); + } + + // Convert SourceRange to "line:column-line:column". + // In the resulting string rep, line is 1-based, column is 0-based. + std::string fullRangeToString(SourceRange Range) { + std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin()); + std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd()); + + bool IsInvalid; + unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Column1 = SM.getColumnNumber(Begin.first, Begin.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + unsigned Column2 = SM.getColumnNumber(End.first, End.second, &IsInvalid); + if (IsInvalid) { + return ""; + } + + return stringFormat("%d:%d-%d:%d", Line1, Column1 - 1, Line2, Column2 - 1); + } + + // Returns the qualified name of `d` without considering template parameters. + std::string getQualifiedName(const NamedDecl *D) { + const DeclContext *Ctx = D->getDeclContext(); + if (Ctx->isFunctionOrMethod()) { + return D->getQualifiedNameAsString(); + } + + std::vector<const DeclContext *> Contexts; + + // Collect contexts. + while (Ctx && isa<NamedDecl>(Ctx)) { + Contexts.push_back(Ctx); + Ctx = Ctx->getParent(); + } + + std::string Result; + + std::reverse(Contexts.begin(), Contexts.end()); + + for (const DeclContext *DC : Contexts) { + if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) { + Result += Spec->getNameAsString(); + + if (Spec->getSpecializationKind() == TSK_ExplicitSpecialization) { + std::string Backing; + llvm::raw_string_ostream Stream(Backing); + const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs(); + printTemplateArgumentList( + Stream, TemplateArgs.asArray(), PrintingPolicy(CI.getLangOpts())); + Result += Stream.str(); + } + } else if (const auto *Nd = dyn_cast<NamespaceDecl>(DC)) { + if (Nd->isAnonymousNamespace() || Nd->isInline()) { + continue; + } + Result += Nd->getNameAsString(); + } else if (const auto *Rd = dyn_cast<RecordDecl>(DC)) { + if (!Rd->getIdentifier()) { + Result += "(anonymous)"; + } else { + Result += Rd->getNameAsString(); + } + } else if (const auto *Fd = dyn_cast<FunctionDecl>(DC)) { + Result += Fd->getNameAsString(); + } else if (const auto *Ed = dyn_cast<EnumDecl>(DC)) { + // C++ [dcl.enum]p10: Each enum-name and each unscoped + // enumerator is declared in the scope that immediately contains + // the enum-specifier. Each scoped enumerator is declared in the + // scope of the enumeration. + if (Ed->isScoped() || Ed->getIdentifier()) + Result += Ed->getNameAsString(); + else + continue; + } else { + Result += cast<NamedDecl>(DC)->getNameAsString(); + } + Result += "::"; + } + + if (D->getDeclName()) + Result += D->getNameAsString(); + else + Result += "(anonymous)"; + + return Result; + } + + std::string mangleLocation(SourceLocation Loc, + std::string Backup = std::string()) { + FileInfo *F = getFileInfo(Loc); + std::string Filename = F->Realname; + if (Filename.length() == 0 && Backup.length() != 0) { + return Backup; + } + if (F->Generated) { + // Since generated files may be different on different platforms, + // we need to include a platform-specific thing in the hash. Otherwise + // we can end up with hash collisions where different symbols from + // different platforms map to the same thing. + char* Platform = getenv("MOZSEARCH_PLATFORM"); + Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename; + } + return hash(Filename + std::string("@") + locationToString(Loc)); + } + + bool isAcceptableSymbolChar(char c) { + return isalpha(c) || isdigit(c) || c == '_' || c == '/'; + } + + std::string mangleFile(std::string Filename, FileType Type) { + // "Mangle" the file path, such that: + // 1. The majority of paths will still be mostly human-readable. + // 2. The sanitization algorithm doesn't produce collisions where two + // different unsanitized paths can result in the same sanitized paths. + // 3. The produced symbol doesn't cause problems with downstream consumers. + // In order to accomplish this, we keep alphanumeric chars, underscores, + // and slashes, and replace everything else with an "@xx" hex encoding. + // The majority of path characters are letters and slashes which don't get + // encoded, so that satisfies (1). Since "@" characters in the unsanitized + // path get encoded, there should be no "@" characters in the sanitized path + // that got preserved from the unsanitized input, so that should satisfy (2). + // And (3) was done by trial-and-error. Note in particular the dot (.) + // character needs to be encoded, or the symbol-search feature of mozsearch + // doesn't work correctly, as all dot characters in the symbol query get + // replaced by #. + for (size_t i = 0; i < Filename.length(); i++) { + char c = Filename[i]; + if (isAcceptableSymbolChar(c)) { + continue; + } + char hex[4]; + sprintf(hex, "@%02X", ((int)c) & 0xFF); + Filename.replace(i, 1, hex); + i += 2; + } + + if (Type == FileType::Generated) { + // Since generated files may be different on different platforms, + // we need to include a platform-specific thing in the hash. Otherwise + // we can end up with hash collisions where different symbols from + // different platforms map to the same thing. + char* Platform = getenv("MOZSEARCH_PLATFORM"); + Filename = std::string(Platform ? Platform : "") + std::string("@") + Filename; + } + return Filename; + } + + std::string mangleQualifiedName(std::string Name) { + std::replace(Name.begin(), Name.end(), ' ', '_'); + return Name; + } + + std::string getMangledName(clang::MangleContext *Ctx, + const clang::NamedDecl *Decl) { + if (isa<FunctionDecl>(Decl) && cast<FunctionDecl>(Decl)->isExternC()) { + return cast<FunctionDecl>(Decl)->getNameAsString(); + } + + if (isa<FunctionDecl>(Decl) || isa<VarDecl>(Decl)) { + const DeclContext *DC = Decl->getDeclContext(); + if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC) || + isa<LinkageSpecDecl>(DC) || + // isa<ExternCContextDecl>(DC) || + isa<TagDecl>(DC)) { + llvm::SmallVector<char, 512> Output; + llvm::raw_svector_ostream Out(Output); +#if CLANG_VERSION_MAJOR >= 11 + // This code changed upstream in version 11: + // https://github.com/llvm/llvm-project/commit/29e1a16be8216066d1ed733a763a749aed13ff47 + GlobalDecl GD; + if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) { + GD = GlobalDecl(D, Ctor_Complete); + } else if (const CXXDestructorDecl *D = + dyn_cast<CXXDestructorDecl>(Decl)) { + GD = GlobalDecl(D, Dtor_Complete); + } else { + GD = GlobalDecl(Decl); + } + Ctx->mangleName(GD, Out); +#else + if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) { + Ctx->mangleCXXCtor(D, CXXCtorType::Ctor_Complete, Out); + } else if (const CXXDestructorDecl *D = + dyn_cast<CXXDestructorDecl>(Decl)) { + Ctx->mangleCXXDtor(D, CXXDtorType::Dtor_Complete, Out); + } else { + Ctx->mangleName(Decl, Out); + } +#endif + return Out.str().str(); + } else { + return std::string("V_") + mangleLocation(Decl->getLocation()) + + std::string("_") + hash(std::string(Decl->getName())); + } + } else if (isa<TagDecl>(Decl) || isa<ObjCInterfaceDecl>(Decl)) { + if (!Decl->getIdentifier()) { + // Anonymous. + return std::string("T_") + mangleLocation(Decl->getLocation()); + } + + return std::string("T_") + mangleQualifiedName(getQualifiedName(Decl)); + } else if (isa<TypedefNameDecl>(Decl)) { + if (!Decl->getIdentifier()) { + // Anonymous. + return std::string("TA_") + mangleLocation(Decl->getLocation()); + } + + return std::string("TA_") + mangleQualifiedName(getQualifiedName(Decl)); + } else if (isa<NamespaceDecl>(Decl) || isa<NamespaceAliasDecl>(Decl)) { + if (!Decl->getIdentifier()) { + // Anonymous. + return std::string("NS_") + mangleLocation(Decl->getLocation()); + } + + return std::string("NS_") + mangleQualifiedName(getQualifiedName(Decl)); + } else if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(Decl)) { + const ObjCInterfaceDecl *Iface = D2->getContainingInterface(); + return std::string("F_<") + getMangledName(Ctx, Iface) + ">_" + + D2->getNameAsString(); + } else if (const FieldDecl *D2 = dyn_cast<FieldDecl>(Decl)) { + const RecordDecl *Record = D2->getParent(); + return std::string("F_<") + getMangledName(Ctx, Record) + ">_" + + D2->getNameAsString(); + } else if (const EnumConstantDecl *D2 = dyn_cast<EnumConstantDecl>(Decl)) { + const DeclContext *DC = Decl->getDeclContext(); + if (const NamedDecl *Named = dyn_cast<NamedDecl>(DC)) { + return std::string("E_<") + getMangledName(Ctx, Named) + ">_" + + D2->getNameAsString(); + } + } + + assert(false); + return std::string(""); + } + + void debugLocation(SourceLocation Loc) { + std::string S = locationToString(Loc); + StringRef Filename = SM.getFilename(Loc); + printf("--> %s %s\n", std::string(Filename).c_str(), S.c_str()); + } + + void debugRange(SourceRange Range) { + printf("Range\n"); + debugLocation(Range.getBegin()); + debugLocation(Range.getEnd()); + } + +public: + IndexConsumer(CompilerInstance &CI) + : CI(CI), SM(CI.getSourceManager()), LO(CI.getLangOpts()), CurMangleContext(nullptr), + AstContext(nullptr), CurDeclContext(nullptr), TemplateStack(nullptr) { + CI.getPreprocessor().addPPCallbacks( + make_unique<PreprocessorHook>(this)); + } + + virtual DiagnosticConsumer *clone(DiagnosticsEngine &Diags) const { + return new IndexConsumer(CI); + } + +#if !defined(_WIN32) && !defined(_WIN64) + struct AutoTime { + AutoTime(double *Counter) : Counter(Counter), Start(time()) {} + ~AutoTime() { + if (Start) { + *Counter += time() - Start; + } + } + void stop() { + *Counter += time() - Start; + Start = 0; + } + double *Counter; + double Start; + }; +#endif + + // All we need is to follow the final declaration. + virtual void HandleTranslationUnit(ASTContext &Ctx) { + CurMangleContext = + clang::ItaniumMangleContext::create(Ctx, CI.getDiagnostics()); + + AstContext = &Ctx; + Resolver = std::make_unique<clangd::HeuristicResolver>(Ctx); + TraverseDecl(Ctx.getTranslationUnitDecl()); + + // Emit the JSON data for all files now. + std::map<FileID, std::unique_ptr<FileInfo>>::iterator It; + for (It = FileMap.begin(); It != FileMap.end(); It++) { + if (!It->second->Interesting) { + continue; + } + + FileInfo &Info = *It->second; + + std::string Filename = Outdir + Info.Realname; + std::string SrcFilename = Info.Generated + ? Objdir + Info.Realname.substr(GENERATED.length()) + : Srcdir + PATHSEP_STRING + Info.Realname; + + ensurePath(Filename); + + // We lock the output file in case some other clang process is trying to + // write to it at the same time. + AutoLockFile Lock(SrcFilename, Filename); + + if (!Lock.success()) { + fprintf(stderr, "Unable to lock file %s\n", Filename.c_str()); + exit(1); + } + + // Merge our results with the existing lines from the output file. + // This ensures that header files that are included multiple times + // in different ways are analyzed completely. + std::ifstream Fin(Filename.c_str(), std::ios::in | std::ios::binary); + FILE *OutFp = Lock.openTmp(); + if (!OutFp) { + fprintf(stderr, "Unable to open tmp out file for %s\n", Filename.c_str()); + exit(1); + } + + // Sort our new results and get an iterator to them + std::sort(Info.Output.begin(), Info.Output.end()); + std::vector<std::string>::const_iterator NewLinesIter = Info.Output.begin(); + std::string LastNewWritten; + + // Loop over the existing (sorted) lines in the analysis output file. + // (The good() check also handles the case where Fin did not exist when we + // went to open it.) + while(Fin.good()) { + std::string OldLine; + std::getline(Fin, OldLine); + // Skip blank lines. + if (OldLine.length() == 0) { + continue; + } + // We need to put the newlines back that getline() eats. + OldLine.push_back('\n'); + + // Write any results from Info.Output that are lexicographically + // smaller than OldLine (read from the existing file), but make sure + // to skip duplicates. Keep advancing NewLinesIter until we reach an + // entry that is lexicographically greater than OldLine. + for (; NewLinesIter != Info.Output.end(); NewLinesIter++) { + if (*NewLinesIter > OldLine) { + break; + } + if (*NewLinesIter == OldLine) { + continue; + } + if (*NewLinesIter == LastNewWritten) { + // dedupe the new entries being written + continue; + } + if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) { + fprintf(stderr, "Unable to write %zu bytes[1] to tmp output file for %s\n", + NewLinesIter->length(), Filename.c_str()); + exit(1); + } + LastNewWritten = *NewLinesIter; + } + + // Write the entry read from the existing file. + if (fwrite(OldLine.c_str(), OldLine.length(), 1, OutFp) != 1) { + fprintf(stderr, "Unable to write %zu bytes[2] to tmp output file for %s\n", + OldLine.length(), Filename.c_str()); + exit(1); + } + } + + // We finished reading from Fin + Fin.close(); + + // Finish iterating our new results, discarding duplicates + for (; NewLinesIter != Info.Output.end(); NewLinesIter++) { + if (*NewLinesIter == LastNewWritten) { + continue; + } + if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 1) { + fprintf(stderr, "Unable to write %zu bytes[3] to tmp output file for %s\n", + NewLinesIter->length(), Filename.c_str()); + exit(1); + } + LastNewWritten = *NewLinesIter; + } + + // Done writing all the things, close it and replace the old output file + // with the new one. + fclose(OutFp); + if (!Lock.moveTmp()) { + fprintf(stderr, "Unable to move tmp output file into place for %s (err %d)\n", Filename.c_str(), errno); + exit(1); + } + } + } + + // Unfortunately, we have to override all these methods in order to track the + // context we're inside. + + bool TraverseEnumDecl(EnumDecl *D) { + AutoSetContext Asc(this, D); + return Super::TraverseEnumDecl(D); + } + bool TraverseRecordDecl(RecordDecl *D) { + AutoSetContext Asc(this, D); + return Super::TraverseRecordDecl(D); + } + bool TraverseCXXRecordDecl(CXXRecordDecl *D) { + AutoSetContext Asc(this, D); + return Super::TraverseCXXRecordDecl(D); + } + bool TraverseFunctionDecl(FunctionDecl *D) { + AutoSetContext Asc(this, D); + const FunctionDecl *Def; + // (See the larger AutoTemplateContext comment for more information.) If a + // method on a templated class is declared out-of-line, we need to analyze + // the definition inside the scope of the template or else we won't properly + // handle member access on the templated type. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseFunctionDecl(D); + } + bool TraverseCXXMethodDecl(CXXMethodDecl *D) { + AutoSetContext Asc(this, D); + const FunctionDecl *Def; + // See TraverseFunctionDecl. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseCXXMethodDecl(D); + } + bool TraverseCXXConstructorDecl(CXXConstructorDecl *D) { + AutoSetContext Asc(this, D, /*VisitImplicit=*/true); + const FunctionDecl *Def; + // See TraverseFunctionDecl. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseCXXConstructorDecl(D); + } + bool TraverseCXXConversionDecl(CXXConversionDecl *D) { + AutoSetContext Asc(this, D); + const FunctionDecl *Def; + // See TraverseFunctionDecl. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseCXXConversionDecl(D); + } + bool TraverseCXXDestructorDecl(CXXDestructorDecl *D) { + AutoSetContext Asc(this, D); + const FunctionDecl *Def; + // See TraverseFunctionDecl. + if (TemplateStack && D->isDefined(Def) && Def && D != Def) { + TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); + } + return Super::TraverseCXXDestructorDecl(D); + } + + // Used to keep track of the context in which a token appears. + struct Context { + // Ultimately this becomes the "context" JSON property. + std::string Name; + + // Ultimately this becomes the "contextsym" JSON property. + std::string Symbol; + + Context() {} + Context(std::string Name, std::string Symbol) + : Name(Name), Symbol(Symbol) {} + }; + + Context translateContext(NamedDecl *D) { + const FunctionDecl *F = dyn_cast<FunctionDecl>(D); + if (F && F->isTemplateInstantiation()) { + D = F->getTemplateInstantiationPattern(); + } + + return Context(D->getQualifiedNameAsString(), getMangledName(CurMangleContext, D)); + } + + Context getContext(SourceLocation Loc) { + if (SM.isMacroBodyExpansion(Loc)) { + // If we're inside a macro definition, we don't return any context. It + // will probably not be what the user expects if we do. + return Context(); + } + + if (CurDeclContext) { + return translateContext(CurDeclContext->Decl); + } + return Context(); + } + + // Similar to GetContext(SourceLocation), but it skips the declaration passed + // in. This is useful if we want the context of a declaration that's already + // on the stack. + Context getContext(Decl *D) { + if (SM.isMacroBodyExpansion(D->getLocation())) { + // If we're inside a macro definition, we don't return any context. It + // will probably not be what the user expects if we do. + return Context(); + } + + AutoSetContext *Ctxt = CurDeclContext; + while (Ctxt) { + if (Ctxt->Decl != D) { + return translateContext(Ctxt->Decl); + } + Ctxt = Ctxt->Prev; + } + return Context(); + } + + // Analyzing template code is tricky. Suppose we have this code: + // + // template<class T> + // bool Foo(T* ptr) { return T::StaticMethod(ptr); } + // + // If we analyze the body of Foo without knowing the type T, then we will not + // be able to generate any information for StaticMethod. However, analyzing + // Foo for every possible instantiation is inefficient and it also generates + // too much data in some cases. For example, the following code would generate + // one definition of Baz for every instantiation, which is undesirable: + // + // template<class T> + // class Bar { struct Baz { ... }; }; + // + // To solve this problem, we analyze templates only once. We do so in a + // GatherDependent mode where we look for "dependent scoped member + // expressions" (i.e., things like StaticMethod). We keep track of the + // locations of these expressions. If we find one or more of them, we analyze + // the template for each instantiation, in an AnalyzeDependent mode. This mode + // ignores all source locations except for the ones where we found dependent + // scoped member expressions before. For these locations, we generate a + // separate JSON result for each instantiation. + // + // We inherit our parent's mode if it is exists. This is because if our + // parent is in analyze mode, it means we've already lived a full life in + // gather mode and we must not restart in gather mode or we'll cause the + // indexer to visit EVERY identifier, which is way too much data. + struct AutoTemplateContext { + AutoTemplateContext(IndexConsumer *Self) + : Self(Self) + , CurMode(Self->TemplateStack ? Self->TemplateStack->CurMode : Mode::GatherDependent) + , Parent(Self->TemplateStack) { + Self->TemplateStack = this; + } + + ~AutoTemplateContext() { Self->TemplateStack = Parent; } + + // We traverse templates in two modes: + enum class Mode { + // Gather mode does not traverse into specializations. It looks for + // locations where it would help to have more info from template + // specializations. + GatherDependent, + + // Analyze mode traverses into template specializations and records + // information about token locations saved in gather mode. + AnalyzeDependent, + }; + + // We found a dependent scoped member expression! Keep track of it for + // later. + void visitDependent(SourceLocation Loc) { + if (CurMode == Mode::AnalyzeDependent) { + return; + } + + DependentLocations.insert(Loc.getRawEncoding()); + if (Parent) { + Parent->visitDependent(Loc); + } + } + + bool inGatherMode() { + return CurMode == Mode::GatherDependent; + } + + // Do we need to perform the extra AnalyzeDependent passes (one per + // instantiation)? + bool needsAnalysis() const { + if (!DependentLocations.empty()) { + return true; + } + if (Parent) { + return Parent->needsAnalysis(); + } + return false; + } + + void switchMode() { CurMode = Mode::AnalyzeDependent; } + + // Do we want to analyze each template instantiation separately? + bool shouldVisitTemplateInstantiations() const { + if (CurMode == Mode::AnalyzeDependent) { + return true; + } + if (Parent) { + return Parent->shouldVisitTemplateInstantiations(); + } + return false; + } + + // For a given expression/statement, should we emit JSON data for it? + bool shouldVisit(SourceLocation Loc) { + if (CurMode == Mode::GatherDependent) { + return true; + } + if (DependentLocations.find(Loc.getRawEncoding()) != + DependentLocations.end()) { + return true; + } + if (Parent) { + return Parent->shouldVisit(Loc); + } + return false; + } + + private: + IndexConsumer *Self; + Mode CurMode; + std::unordered_set<unsigned> DependentLocations; + AutoTemplateContext *Parent; + }; + + AutoTemplateContext *TemplateStack; + + bool shouldVisitTemplateInstantiations() const { + if (TemplateStack) { + return TemplateStack->shouldVisitTemplateInstantiations(); + } + return false; + } + + bool shouldVisitImplicitCode() const { + return CurDeclContext && CurDeclContext->VisitImplicit; + } + + bool TraverseClassTemplateDecl(ClassTemplateDecl *D) { + AutoTemplateContext Atc(this); + Super::TraverseClassTemplateDecl(D); + + if (!Atc.needsAnalysis()) { + return true; + } + + Atc.switchMode(); + + if (D != D->getCanonicalDecl()) { + return true; + } + + for (auto *Spec : D->specializations()) { + for (auto *Rd : Spec->redecls()) { + // We don't want to visit injected-class-names in this traversal. + if (cast<CXXRecordDecl>(Rd)->isInjectedClassName()) + continue; + + TraverseDecl(Rd); + } + } + + return true; + } + + bool TraverseFunctionTemplateDecl(FunctionTemplateDecl *D) { + AutoTemplateContext Atc(this); + if (Atc.inGatherMode()) { + Super::TraverseFunctionTemplateDecl(D); + } + + if (!Atc.needsAnalysis()) { + return true; + } + + Atc.switchMode(); + + if (D != D->getCanonicalDecl()) { + return true; + } + + for (auto *Spec : D->specializations()) { + for (auto *Rd : Spec->redecls()) { + TraverseDecl(Rd); + } + } + + return true; + } + + bool shouldVisit(SourceLocation Loc) { + if (TemplateStack) { + return TemplateStack->shouldVisit(Loc); + } + return true; + } + + enum { + // Flag to omit the identifier from being cross-referenced across files. + // This is usually desired for local variables. + NoCrossref = 1 << 0, + // Flag to indicate the token with analysis data is not an identifier. Indicates + // we want to skip the check that tries to ensure a sane identifier token. + NotIdentifierToken = 1 << 1, + // This indicates that the end of the provided SourceRange is valid and + // should be respected. If this flag is not set, the visitIdentifier + // function should use only the start of the SourceRange and auto-detect + // the end based on whatever token is found at the start. + LocRangeEndValid = 1 << 2 + }; + + void emitStructuredInfo(SourceLocation Loc, const RecordDecl *decl) { + std::string json_str; + llvm::raw_string_ostream ros(json_str); + llvm::json::OStream J(ros); + // Start the top-level object. + J.objectBegin(); + + unsigned StartOffset = SM.getFileOffset(Loc); + unsigned EndOffset = + StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts()); + J.attribute("loc", locationToString(Loc, EndOffset - StartOffset)); + J.attribute("structured", 1); + J.attribute("pretty", getQualifiedName(decl)); + J.attribute("sym", getMangledName(CurMangleContext, decl)); + + J.attribute("kind", TypeWithKeyword::getTagTypeKindName(decl->getTagKind())); + + const ASTContext &C = *AstContext; + const ASTRecordLayout &Layout = C.getASTRecordLayout(decl); + + J.attribute("sizeBytes", Layout.getSize().getQuantity()); + + emitBindingAttributes(J, *decl); + + auto cxxDecl = dyn_cast<CXXRecordDecl>(decl); + + if (cxxDecl) { + J.attributeBegin("supers"); + J.arrayBegin(); + for (const CXXBaseSpecifier &Base : cxxDecl->bases()) { + const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); + + J.objectBegin(); + + J.attribute("sym", getMangledName(CurMangleContext, BaseDecl)); + + J.attributeBegin("props"); + J.arrayBegin(); + if (Base.isVirtual()) { + J.value("virtual"); + } + J.arrayEnd(); + J.attributeEnd(); + + J.objectEnd(); + } + J.arrayEnd(); + J.attributeEnd(); + + J.attributeBegin("methods"); + J.arrayBegin(); + for (const CXXMethodDecl *MethodDecl : cxxDecl->methods()) { + J.objectBegin(); + + J.attribute("pretty", getQualifiedName(MethodDecl)); + J.attribute("sym", getMangledName(CurMangleContext, MethodDecl)); + + // TODO: Better figure out what to do for non-isUserProvided methods + // which means there's potentially semantic data that doesn't correspond + // to a source location in the source. Should we be emitting + // structured info for those when we're processing the class here? + + J.attributeBegin("props"); + J.arrayBegin(); + if (MethodDecl->isStatic()) { + J.value("static"); + } + if (MethodDecl->isInstance()) { + J.value("instance"); + } + if (MethodDecl->isVirtual()) { + J.value("virtual"); + } + if (MethodDecl->isUserProvided()) { + J.value("user"); + } + if (MethodDecl->isDefaulted()) { + J.value("defaulted"); + } + if (MethodDecl->isDeleted()) { + J.value("deleted"); + } + if (MethodDecl->isConstexpr()) { + J.value("constexpr"); + } + J.arrayEnd(); + J.attributeEnd(); + + J.objectEnd(); + } + J.arrayEnd(); + J.attributeEnd(); + } + + J.attributeBegin("fields"); + J.arrayBegin(); + uint64_t iField = 0; + for (RecordDecl::field_iterator It = decl->field_begin(), + End = decl->field_end(); It != End; ++It, ++iField) { + const FieldDecl &Field = **It; + uint64_t localOffsetBits = Layout.getFieldOffset(iField); + CharUnits localOffsetBytes = C.toCharUnitsFromBits(localOffsetBits); + + J.objectBegin(); + J.attribute("pretty", getQualifiedName(&Field)); + J.attribute("sym", getMangledName(CurMangleContext, &Field)); + QualType FieldType = Field.getType(); + QualType CanonicalFieldType = FieldType.getCanonicalType(); + J.attribute("type", CanonicalFieldType.getAsString()); + const TagDecl *tagDecl = CanonicalFieldType->getAsTagDecl(); + if (!tagDecl) { + // Try again piercing any pointers/references involved. Note that our + // typesym semantics are dubious-ish and right now crossref just does + // some parsing of "type" itself until we improve this rep. + CanonicalFieldType = CanonicalFieldType->getPointeeType(); + if (!CanonicalFieldType.isNull()) { + tagDecl = CanonicalFieldType->getAsTagDecl(); + } + } + if (tagDecl) { + J.attribute("typesym", getMangledName(CurMangleContext, tagDecl)); + } + J.attribute("offsetBytes", localOffsetBytes.getQuantity()); + if (Field.isBitField()) { + J.attributeBegin("bitPositions"); + J.objectBegin(); + + J.attribute("begin", unsigned(localOffsetBits - C.toBits(localOffsetBytes))); + J.attribute("width", Field.getBitWidthValue(C)); + + J.objectEnd(); + J.attributeEnd(); + } else { + // Try and get the field as a record itself so we can know its size, but + // we don't actually want to recurse into it. + if (auto FieldRec = Field.getType()->getAs<RecordType>()) { + auto const &FieldLayout = C.getASTRecordLayout(FieldRec->getDecl()); + J.attribute("sizeBytes", FieldLayout.getSize().getQuantity()); + } else { + // We were unable to get it as a record, which suggests it's a normal + // type, in which case let's just ask for the type size. (Maybe this + // would also work for the above case too?) + uint64_t typeSizeBits = C.getTypeSize(Field.getType()); + CharUnits typeSizeBytes = C.toCharUnitsFromBits(typeSizeBits); + J.attribute("sizeBytes", typeSizeBytes.getQuantity()); + } + } + J.objectEnd(); + } + J.arrayEnd(); + J.attributeEnd(); + + // End the top-level object. + J.objectEnd(); + + FileInfo *F = getFileInfo(Loc); + // we want a newline. + ros << '\n'; + F->Output.push_back(std::move(ros.str())); + } + + void emitStructuredInfo(SourceLocation Loc, const FunctionDecl *decl) { + std::string json_str; + llvm::raw_string_ostream ros(json_str); + llvm::json::OStream J(ros); + // Start the top-level object. + J.objectBegin(); + + unsigned StartOffset = SM.getFileOffset(Loc); + unsigned EndOffset = + StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts()); + J.attribute("loc", locationToString(Loc, EndOffset - StartOffset)); + J.attribute("structured", 1); + J.attribute("pretty", getQualifiedName(decl)); + J.attribute("sym", getMangledName(CurMangleContext, decl)); + + emitBindingAttributes(J, *decl); + + J.attributeBegin("args"); + J.arrayBegin(); + + for (auto param : decl->parameters()) { + J.objectBegin(); + + J.attribute("name", param->getName()); + QualType ArgType = param->getOriginalType(); + J.attribute("type", ArgType.getAsString()); + + QualType CanonicalArgType = ArgType.getCanonicalType(); + const TagDecl *canonDecl = CanonicalArgType->getAsTagDecl(); + if (!canonDecl) { + // Try again piercing any pointers/references involved. Note that our + // typesym semantics are dubious-ish and right now crossref just does + // some parsing of "type" itself until we improve this rep. + CanonicalArgType = CanonicalArgType->getPointeeType(); + if (!CanonicalArgType.isNull()) { + canonDecl = CanonicalArgType->getAsTagDecl(); + } + } + if (canonDecl) { + J.attribute("typesym", getMangledName(CurMangleContext, canonDecl)); + } + + J.objectEnd(); + } + + J.arrayEnd(); + J.attributeEnd(); + + + auto cxxDecl = dyn_cast<CXXMethodDecl>(decl); + + if (cxxDecl) { + J.attribute("kind", "method"); + if (auto parentDecl = cxxDecl->getParent()) { + J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl)); + } + + J.attributeBegin("overrides"); + J.arrayBegin(); + for (const CXXMethodDecl *MethodDecl : cxxDecl->overridden_methods()) { + J.objectBegin(); + + // TODO: Make sure we're doing template traversals appropriately... + // findOverriddenMethods (now removed) liked to do: + // if (Decl->isTemplateInstantiation()) { + // Decl = dyn_cast<CXXMethodDecl>(Decl->getTemplateInstantiationPattern()); + // } + // I think our pre-emptive dereferencing/avoidance of templates may + // protect us from this, but it needs more investigation. + + J.attribute("sym", getMangledName(CurMangleContext, MethodDecl)); + + J.objectEnd(); + } + J.arrayEnd(); + J.attributeEnd(); + + } else { + J.attribute("kind", "function"); + } + + // ## Props + J.attributeBegin("props"); + J.arrayBegin(); + // some of these are only possible on a CXXMethodDecl, but we want them all + // in the same array, so condition these first ones. + if (cxxDecl) { + if (cxxDecl->isStatic()) { + J.value("static"); + } + if (cxxDecl->isInstance()) { + J.value("instance"); + } + if (cxxDecl->isVirtual()) { + J.value("virtual"); + } + if (cxxDecl->isUserProvided()) { + J.value("user"); + } + } + if (decl->isDefaulted()) { + J.value("defaulted"); + } + if (decl->isDeleted()) { + J.value("deleted"); + } + if (decl->isConstexpr()) { + J.value("constexpr"); + } + J.arrayEnd(); + J.attributeEnd(); + + // End the top-level object. + J.objectEnd(); + + FileInfo *F = getFileInfo(Loc); + // we want a newline. + ros << '\n'; + F->Output.push_back(std::move(ros.str())); + } + + /** + * Emit structured info for a field. Right now the intent is for this to just + * be a pointer to its parent's structured info with this method entirely + * avoiding getting the ASTRecordLayout. + * + * TODO: Give more thought on where to locate the canonical info on fields and + * how to normalize their exposure over the web. We could relink the info + * both at cross-reference time and web-server lookup time. This is also + * called out in `analysis.md`. + */ + void emitStructuredInfo(SourceLocation Loc, const FieldDecl *decl) { + // XXX the call to decl::getParent will assert below for ObjCIvarDecl + // instances because their DecContext is not a RecordDecl. So just bail + // for now. + // TODO: better support ObjC. + if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(decl)) { + return; + } + + std::string json_str; + llvm::raw_string_ostream ros(json_str); + llvm::json::OStream J(ros); + // Start the top-level object. + J.objectBegin(); + + unsigned StartOffset = SM.getFileOffset(Loc); + unsigned EndOffset = + StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts()); + J.attribute("loc", locationToString(Loc, EndOffset - StartOffset)); + J.attribute("structured", 1); + J.attribute("pretty", getQualifiedName(decl)); + J.attribute("sym", getMangledName(CurMangleContext, decl)); + J.attribute("kind", "field"); + + if (auto parentDecl = decl->getParent()) { + J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl)); + } + + // End the top-level object. + J.objectEnd(); + + FileInfo *F = getFileInfo(Loc); + // we want a newline. + ros << '\n'; + F->Output.push_back(std::move(ros.str())); + } + + /** + * Emit structured info for a variable if it is a static class member. + */ + void emitStructuredInfo(SourceLocation Loc, const VarDecl *decl) { + const auto *parentDecl = dyn_cast_or_null<RecordDecl>(decl->getDeclContext()); + + std::string json_str; + llvm::raw_string_ostream ros(json_str); + llvm::json::OStream J(ros); + // Start the top-level object. + J.objectBegin(); + + unsigned StartOffset = SM.getFileOffset(Loc); + unsigned EndOffset = + StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts()); + J.attribute("loc", locationToString(Loc, EndOffset - StartOffset)); + J.attribute("structured", 1); + J.attribute("pretty", getQualifiedName(decl)); + J.attribute("sym", getMangledName(CurMangleContext, decl)); + J.attribute("kind", "field"); + + if (parentDecl) { + J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl)); + } + + emitBindingAttributes(J, *decl); + + // End the top-level object. + J.objectEnd(); + + FileInfo *F = getFileInfo(Loc); + // we want a newline. + ros << '\n'; + F->Output.push_back(std::move(ros.str())); + } + + // XXX Type annotating. + // QualType is the type class. It has helpers like TagDecl via getAsTagDecl. + // ValueDecl exposes a getType() method. + // + // Arguably it makes sense to only expose types that Searchfox has definitions + // for as first-class. Probably the way to go is like context/contextsym. + // We expose a "type" which is just a human-readable string which has no + // semantic purposes and is just a display string, plus then a "typesym" which + // we expose if we were able to map the type. + // + // Other meta-info: field offsets. Ancestor types. + + // This is the only function that emits analysis JSON data. It should be + // called for each identifier that corresponds to a symbol. + void visitIdentifier(const char *Kind, const char *SyntaxKind, + llvm::StringRef QualName, SourceRange LocRange, + std::string Symbol, + QualType MaybeType = QualType(), + Context TokenContext = Context(), int Flags = 0, + SourceRange PeekRange = SourceRange(), + SourceRange NestingRange = SourceRange(), + std::vector<SourceRange> *ArgRanges = nullptr) { + SourceLocation Loc = LocRange.getBegin(); + if (!shouldVisit(Loc)) { + return; + } + + // Find the file positions corresponding to the token. + unsigned StartOffset = SM.getFileOffset(Loc); + unsigned EndOffset = (Flags & LocRangeEndValid) + ? SM.getFileOffset(LocRange.getEnd()) + : StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts()); + + std::string LocStr = locationToString(Loc, EndOffset - StartOffset); + std::string RangeStr = locationToString(Loc, EndOffset - StartOffset); + std::string PeekRangeStr; + + if (!(Flags & NotIdentifierToken)) { + // Get the token's characters so we can make sure it's a valid token. + const char *StartChars = SM.getCharacterData(Loc); + std::string Text(StartChars, EndOffset - StartOffset); + if (!isValidIdentifier(Text)) { + return; + } + } + + FileInfo *F = getFileInfo(Loc); + + if (!(Flags & NoCrossref)) { + std::string json_str; + llvm::raw_string_ostream ros(json_str); + llvm::json::OStream J(ros); + // Start the top-level object. + J.objectBegin(); + + J.attribute("loc", LocStr); + J.attribute("target", 1); + J.attribute("kind", Kind); + J.attribute("pretty", QualName.data()); + J.attribute("sym", Symbol); + if (!TokenContext.Name.empty()) { + J.attribute("context", TokenContext.Name); + } + if (!TokenContext.Symbol.empty()) { + J.attribute("contextsym", TokenContext.Symbol); + } + if (PeekRange.isValid()) { + PeekRangeStr = lineRangeToString(PeekRange); + if (!PeekRangeStr.empty()) { + J.attribute("peekRange", PeekRangeStr); + } + } + + if (ArgRanges) { + J.attributeBegin("argRanges"); + J.arrayBegin(); + + for (auto range : *ArgRanges) { + std::string ArgRangeStr = fullRangeToString(range); + if (!ArgRangeStr.empty()) { + J.value(ArgRangeStr); + } + } + + J.arrayEnd(); + J.attributeEnd(); + } + + // End the top-level object. + J.objectEnd(); + // we want a newline. + ros << '\n'; + F->Output.push_back(std::move(ros.str())); + } + + // Generate a single "source":1 for all the symbols. If we search from here, + // we want to union the results for every symbol in `symbols`. + std::string json_str; + llvm::raw_string_ostream ros(json_str); + llvm::json::OStream J(ros); + // Start the top-level object. + J.objectBegin(); + + J.attribute("loc", RangeStr); + J.attribute("source", 1); + + if (NestingRange.isValid()) { + std::string NestingRangeStr = fullRangeToString(NestingRange); + if (!NestingRangeStr.empty()) { + J.attribute("nestingRange", NestingRangeStr); + } + } + + std::string Syntax; + if (Flags & NoCrossref) { + J.attribute("syntax", ""); + } else { + Syntax = Kind; + Syntax.push_back(','); + Syntax.append(SyntaxKind); + J.attribute("syntax", Syntax); + } + + if (!MaybeType.isNull()) { + J.attribute("type", MaybeType.getAsString()); + QualType canonical = MaybeType.getCanonicalType(); + const TagDecl *decl = canonical->getAsTagDecl(); + if (!decl) { + // Try again piercing any pointers/references involved. Note that our + // typesym semantics are dubious-ish and right now crossref just does + // some parsing of "type" itself until we improve this rep. + canonical = canonical->getPointeeType(); + if (!canonical.isNull()) { + decl = canonical->getAsTagDecl(); + } + } + if (decl) { + std::string Mangled = getMangledName(CurMangleContext, decl); + J.attribute("typesym", Mangled); + } + } + + std::string Pretty(SyntaxKind); + Pretty.push_back(' '); + Pretty.append(QualName.data()); + J.attribute("pretty", Pretty); + + J.attribute("sym", Symbol); + + if (Flags & NoCrossref) { + J.attribute("no_crossref", 1); + } + + if (ArgRanges) { + J.attributeBegin("argRanges"); + J.arrayBegin(); + + for (auto range : *ArgRanges) { + std::string ArgRangeStr = fullRangeToString(range); + if (!ArgRangeStr.empty()) { + J.value(ArgRangeStr); + } + } + + J.arrayEnd(); + J.attributeEnd(); + } + + // End the top-level object. + J.objectEnd(); + + // we want a newline. + ros << '\n'; + F->Output.push_back(std::move(ros.str())); + } + + void normalizeLocation(SourceLocation *Loc) { + *Loc = SM.getSpellingLoc(*Loc); + } + + // For cases where the left-brace is not directly accessible from the AST, + // helper to use the lexer to find the brace. Make sure you're picking the + // start location appropriately! + SourceLocation findLeftBraceFromLoc(SourceLocation Loc) { + return Lexer::findLocationAfterToken(Loc, tok::l_brace, SM, LO, false); + } + + // If the provided statement is compound, return its range. + SourceRange getCompoundStmtRange(Stmt* D) { + if (!D) { + return SourceRange(); + } + + CompoundStmt *D2 = dyn_cast<CompoundStmt>(D); + if (D2) { + return D2->getSourceRange(); + } + + return SourceRange(); + } + + SourceRange getFunctionPeekRange(FunctionDecl* D) { + // We always start at the start of the function decl, which may include the + // return type on a separate line. + SourceLocation Start = D->getBeginLoc(); + + // By default, we end at the line containing the function's name. + SourceLocation End = D->getLocation(); + + std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End); + + // But if there are parameters, we want to include those as well. + for (ParmVarDecl* Param : D->parameters()) { + std::pair<FileID, unsigned> ParamLoc = SM.getDecomposedLoc(Param->getLocation()); + + // It's possible there are macros involved or something. We don't include + // the parameters in that case. + if (ParamLoc.first == FuncLoc.first) { + // Assume parameters are in order, so we always take the last one. + End = Param->getEndLoc(); + } + } + + return SourceRange(Start, End); + } + + SourceRange getTagPeekRange(TagDecl* D) { + SourceLocation Start = D->getBeginLoc(); + + // By default, we end at the line containing the name. + SourceLocation End = D->getLocation(); + + std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedLoc(End); + + if (CXXRecordDecl* D2 = dyn_cast<CXXRecordDecl>(D)) { + // But if there are parameters, we want to include those as well. + for (CXXBaseSpecifier& Base : D2->bases()) { + std::pair<FileID, unsigned> Loc = SM.getDecomposedLoc(Base.getEndLoc()); + + // It's possible there are macros involved or something. We don't include + // the parameters in that case. + if (Loc.first == FuncLoc.first) { + // Assume parameters are in order, so we always take the last one. + End = Base.getEndLoc(); + } + } + } + + return SourceRange(Start, End); + } + + SourceRange getCommentRange(NamedDecl* D) { + const RawComment* RC = + AstContext->getRawCommentForDeclNoCache(D); + if (!RC) { + return SourceRange(); + } + + return RC->getSourceRange(); + } + + // Sanity checks that all ranges are in the same file, returning the first if + // they're in different files. Unions the ranges based on which is first. + SourceRange combineRanges(SourceRange Range1, SourceRange Range2) { + if (Range1.isInvalid()) { + return Range2; + } + if (Range2.isInvalid()) { + return Range1; + } + + std::pair<FileID, unsigned> Begin1 = SM.getDecomposedLoc(Range1.getBegin()); + std::pair<FileID, unsigned> End1 = SM.getDecomposedLoc(Range1.getEnd()); + std::pair<FileID, unsigned> Begin2 = SM.getDecomposedLoc(Range2.getBegin()); + std::pair<FileID, unsigned> End2 = SM.getDecomposedLoc(Range2.getEnd()); + + if (End1.first != Begin2.first) { + // Something weird is probably happening with the preprocessor. Just + // return the first range. + return Range1; + } + + // See which range comes first. + if (Begin1.second <= End2.second) { + return SourceRange(Range1.getBegin(), Range2.getEnd()); + } else { + return SourceRange(Range2.getBegin(), Range1.getEnd()); + } + } + + // Given a location and a range, returns the range if: + // - The location and the range live in the same file. + // - The range is well ordered (end is not before begin). + // Returns an empty range otherwise. + SourceRange validateRange(SourceLocation Loc, SourceRange Range) { + std::pair<FileID, unsigned> Decomposed = SM.getDecomposedLoc(Loc); + std::pair<FileID, unsigned> Begin = SM.getDecomposedLoc(Range.getBegin()); + std::pair<FileID, unsigned> End = SM.getDecomposedLoc(Range.getEnd()); + + if (Begin.first != Decomposed.first || End.first != Decomposed.first) { + return SourceRange(); + } + + if (Begin.second >= End.second) { + return SourceRange(); + } + + return Range; + } + + bool VisitNamedDecl(NamedDecl *D) { + SourceLocation Loc = D->getLocation(); + + // If the token is from a macro expansion and the expansion location + // is interesting, use that instead as it tends to be more useful. + SourceLocation expandedLoc = Loc; + if (SM.isMacroBodyExpansion(Loc)) { + Loc = SM.getFileLoc(Loc); + } + + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + if (isa<ParmVarDecl>(D) && !D->getDeclName().getAsIdentifierInfo()) { + // Unnamed parameter in function proto. + return true; + } + + int Flags = 0; + const char *Kind = "def"; + const char *PrettyKind = "?"; + bool wasTemplate = false; + SourceRange PeekRange(D->getBeginLoc(), D->getEndLoc()); + // The nesting range identifies the left brace and right brace, which + // heavily depends on the AST node type. + SourceRange NestingRange; + QualType qtype = QualType(); + if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) { + if (D2->isTemplateInstantiation()) { + wasTemplate = true; + D = D2->getTemplateInstantiationPattern(); + } + // We treat pure virtual declarations as definitions. + Kind = (D2->isThisDeclarationADefinition() || D2->isPure()) ? "def" : "decl"; + PrettyKind = "function"; + PeekRange = getFunctionPeekRange(D2); + + // Only emit the nesting range if: + // - This is a definition AND + // - This isn't a template instantiation. Function templates' + // instantiations can end up as a definition with a Loc at their point + // of declaration but with the CompoundStmt of the template's + // point of definition. This really messes up the nesting range logic. + // At the time of writing this, the test repo's `big_header.h`'s + // `WhatsYourVector_impl::forwardDeclaredTemplateThingInlinedBelow` as + // instantiated by `big_cpp.cpp` triggers this phenomenon. + // + // Note: As covered elsewhere, template processing is tricky and it's + // conceivable that we may change traversal patterns in the future, + // mooting this guard. + if (D2->isThisDeclarationADefinition() && + !D2->isTemplateInstantiation()) { + // The CompoundStmt range is the brace range. + NestingRange = getCompoundStmtRange(D2->getBody()); + } + } else if (TagDecl *D2 = dyn_cast<TagDecl>(D)) { + Kind = D2->isThisDeclarationADefinition() ? "def" : "forward"; + PrettyKind = "type"; + + if (D2->isThisDeclarationADefinition() && D2->getDefinition() == D2) { + PeekRange = getTagPeekRange(D2); + NestingRange = D2->getBraceRange(); + } else { + PeekRange = SourceRange(); + } + } else if (TypedefNameDecl *D2 = dyn_cast<TypedefNameDecl>(D)) { + Kind = "alias"; + PrettyKind = "type"; + PeekRange = SourceRange(Loc, Loc); + qtype = D2->getUnderlyingType(); + } else if (VarDecl *D2 = dyn_cast<VarDecl>(D)) { + if (D2->isLocalVarDeclOrParm()) { + Flags = NoCrossref; + } + + Kind = D2->isThisDeclarationADefinition() == VarDecl::DeclarationOnly + ? "decl" + : "def"; + PrettyKind = "variable"; + } else if (isa<NamespaceDecl>(D) || isa<NamespaceAliasDecl>(D)) { + Kind = "def"; + PrettyKind = "namespace"; + PeekRange = SourceRange(Loc, Loc); + NamespaceDecl *D2 = dyn_cast<NamespaceDecl>(D); + if (D2) { + // There's no exposure of the left brace so we have to find it. + NestingRange = SourceRange( + findLeftBraceFromLoc(D2->isAnonymousNamespace() ? D2->getBeginLoc() : Loc), + D2->getRBraceLoc()); + } + } else if (isa<FieldDecl>(D)) { + Kind = "def"; + PrettyKind = "field"; + } else if (isa<EnumConstantDecl>(D)) { + Kind = "def"; + PrettyKind = "enum constant"; + } else { + return true; + } + + if (ValueDecl *D2 = dyn_cast<ValueDecl>(D)) { + qtype = D2->getType(); + } + + SourceRange CommentRange = getCommentRange(D); + PeekRange = combineRanges(PeekRange, CommentRange); + PeekRange = validateRange(Loc, PeekRange); + NestingRange = validateRange(Loc, NestingRange); + + std::string Symbol = getMangledName(CurMangleContext, D); + + // In the case of destructors, Loc might point to the ~ character. In that + // case we want to skip to the name of the class. However, Loc might also + // point to other places that generate destructors, such as the use site of + // a macro that expands to generate a destructor, or a lambda (apparently + // clang 8 creates a destructor declaration for at least some lambdas). In + // the former case we'll use the macro use site as the location, and in the + // latter we'll just drop the declaration. + if (isa<CXXDestructorDecl>(D)) { + PrettyKind = "destructor"; + const char *P = SM.getCharacterData(Loc); + if (*P == '~') { + // Advance Loc to the class name + P++; + + unsigned Skipped = 1; + while (*P == ' ' || *P == '\t' || *P == '\r' || *P == '\n') { + P++; + Skipped++; + } + + Loc = Loc.getLocWithOffset(Skipped); + } else { + // See if the destructor is coming from a macro expansion + P = SM.getCharacterData(expandedLoc); + if (*P != '~') { + // It's not + return true; + } + // It is, so just use Loc as-is + } + } + + visitIdentifier(Kind, PrettyKind, getQualifiedName(D), SourceRange(Loc), Symbol, + qtype, + getContext(D), Flags, PeekRange, NestingRange); + + // In-progress structured info emission. + if (RecordDecl *D2 = dyn_cast<RecordDecl>(D)) { + if (D2->isThisDeclarationADefinition() && + // XXX getASTRecordLayout doesn't work for dependent types, so we + // avoid calling into emitStructuredInfo for now if there's a + // dependent type or if we're in any kind of template context. This + // should be re-evaluated once this is working for normal classes and + // we can better evaluate what is useful. + !D2->isDependentType() && + !TemplateStack) { + if (auto *D3 = dyn_cast<CXXRecordDecl>(D2)) { + findBindingToJavaClass(*AstContext, *D3); + findBoundAsJavaClasses(*AstContext, *D3); + } + emitStructuredInfo(Loc, D2); + } + } + if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) { + if ((D2->isThisDeclarationADefinition() || D2->isPure()) && + // a clause at the top should have generalized and set wasTemplate so + // it shouldn't be the case that isTemplateInstantiation() is true. + !D2->isTemplateInstantiation() && + !wasTemplate && + !D2->isFunctionTemplateSpecialization() && + !TemplateStack) { + if (auto *D3 = dyn_cast<CXXMethodDecl>(D2)) { + findBindingToJavaMember(*AstContext, *D3); + } else { + findBindingToJavaFunction(*AstContext, *D2); + } + emitStructuredInfo(Loc, D2); + } + } + if (FieldDecl *D2 = dyn_cast<FieldDecl>(D)) { + if (!D2->isTemplated() && + !TemplateStack) { + emitStructuredInfo(Loc, D2); + } + } + if (VarDecl *D2 = dyn_cast<VarDecl>(D)) { + if (!D2->isTemplated() && + !TemplateStack && + isa<CXXRecordDecl>(D2->getDeclContext())) { + findBindingToJavaConstant(*AstContext, *D2); + emitStructuredInfo(Loc, D2); + } + } + + return true; + } + + bool VisitCXXConstructExpr(CXXConstructExpr *E) { + SourceLocation Loc = E->getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + FunctionDecl *Ctor = E->getConstructor(); + if (Ctor->isTemplateInstantiation()) { + Ctor = Ctor->getTemplateInstantiationPattern(); + } + std::string Mangled = getMangledName(CurMangleContext, Ctor); + + // FIXME: Need to do something different for list initialization. + + visitIdentifier("use", "constructor", getQualifiedName(Ctor), Loc, Mangled, + QualType(), getContext(Loc)); + + return true; + } + + bool VisitCallExpr(CallExpr *E) { + Decl *Callee = E->getCalleeDecl(); + if (!Callee || !FunctionDecl::classof(Callee)) { + return true; + } + + const NamedDecl *NamedCallee = dyn_cast<NamedDecl>(Callee); + + SourceLocation Loc; + + const FunctionDecl *F = dyn_cast<FunctionDecl>(NamedCallee); + if (F->isTemplateInstantiation()) { + NamedCallee = F->getTemplateInstantiationPattern(); + } + + std::string Mangled = getMangledName(CurMangleContext, NamedCallee); + int Flags = 0; + + Expr *CalleeExpr = E->getCallee()->IgnoreParenImpCasts(); + + if (CXXOperatorCallExpr::classof(E)) { + // Just take the first token. + CXXOperatorCallExpr *Op = dyn_cast<CXXOperatorCallExpr>(E); + Loc = Op->getOperatorLoc(); + Flags |= NotIdentifierToken; + } else if (MemberExpr::classof(CalleeExpr)) { + MemberExpr *Member = dyn_cast<MemberExpr>(CalleeExpr); + Loc = Member->getMemberLoc(); + } else if (DeclRefExpr::classof(CalleeExpr)) { + // We handle this in VisitDeclRefExpr. + return true; + } else { + return true; + } + + normalizeLocation(&Loc); + + if (!isInterestingLocation(Loc)) { + return true; + } + + std::vector<SourceRange> argRanges; + for (auto argExpr : E->arguments()) { + argRanges.push_back(argExpr->getSourceRange()); + } + + visitIdentifier("use", "function", getQualifiedName(NamedCallee), Loc, Mangled, + E->getCallReturnType(*AstContext), getContext(Loc), Flags, + SourceRange(), SourceRange(), &argRanges); + + return true; + } + + bool VisitTagTypeLoc(TagTypeLoc L) { + SourceLocation Loc = L.getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + TagDecl *Decl = L.getDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + L.getType(), getContext(Loc)); + return true; + } + + bool VisitTypedefTypeLoc(TypedefTypeLoc L) { + SourceLocation Loc = L.getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + NamedDecl *Decl = L.getTypedefNameDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + L.getType(), getContext(Loc)); + return true; + } + + bool VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L) { + SourceLocation Loc = L.getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + NamedDecl *Decl = L.getDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + L.getType(), getContext(Loc)); + return true; + } + + bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) { + SourceLocation Loc = L.getBeginLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + TemplateDecl *Td = L.getTypePtr()->getTemplateName().getAsTemplateDecl(); + if (ClassTemplateDecl *D = dyn_cast<ClassTemplateDecl>(Td)) { + NamedDecl *Decl = D->getTemplatedDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + QualType(), getContext(Loc)); + } else if (TypeAliasTemplateDecl *D = dyn_cast<TypeAliasTemplateDecl>(Td)) { + NamedDecl *Decl = D->getTemplatedDecl(); + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, + QualType(), getContext(Loc)); + } + + return true; + } + + bool VisitDependentNameTypeLoc(DependentNameTypeLoc L) { + SourceLocation Loc = L.getNameLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + for (const NamedDecl *D : + Resolver->resolveDependentNameType(L.getTypePtr())) { + visitHeuristicResult(Loc, D); + } + return true; + } + + bool VisitDeclRefExpr(DeclRefExpr *E) { + SourceLocation Loc = E->getExprLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + if (E->hasQualifier()) { + Loc = E->getNameInfo().getLoc(); + normalizeLocation(&Loc); + } + + NamedDecl *Decl = E->getDecl(); + if (const VarDecl *D2 = dyn_cast<VarDecl>(Decl)) { + int Flags = 0; + if (D2->isLocalVarDeclOrParm()) { + Flags = NoCrossref; + } + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "variable", getQualifiedName(Decl), Loc, Mangled, + D2->getType(), getContext(Loc), Flags); + } else if (isa<FunctionDecl>(Decl)) { + const FunctionDecl *F = dyn_cast<FunctionDecl>(Decl); + if (F->isTemplateInstantiation()) { + Decl = F->getTemplateInstantiationPattern(); + } + + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "function", getQualifiedName(Decl), Loc, Mangled, + E->getType(), getContext(Loc)); + } else if (isa<EnumConstantDecl>(Decl)) { + std::string Mangled = getMangledName(CurMangleContext, Decl); + visitIdentifier("use", "enum", getQualifiedName(Decl), Loc, Mangled, + E->getType(), getContext(Loc)); + } + + return true; + } + + bool VisitCXXConstructorDecl(CXXConstructorDecl *D) { + if (!isInterestingLocation(D->getLocation())) { + return true; + } + + for (CXXConstructorDecl::init_const_iterator It = D->init_begin(); + It != D->init_end(); ++It) { + const CXXCtorInitializer *Ci = *It; + if (!Ci->getMember() || !Ci->isWritten()) { + continue; + } + + SourceLocation Loc = Ci->getMemberLocation(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + continue; + } + + FieldDecl *Member = Ci->getMember(); + std::string Mangled = getMangledName(CurMangleContext, Member); + visitIdentifier("use", "field", getQualifiedName(Member), Loc, Mangled, + Member->getType(), getContext(D)); + } + + return true; + } + + bool VisitMemberExpr(MemberExpr *E) { + SourceLocation Loc = E->getExprLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + ValueDecl *Decl = E->getMemberDecl(); + if (FieldDecl *Field = dyn_cast<FieldDecl>(Decl)) { + std::string Mangled = getMangledName(CurMangleContext, Field); + visitIdentifier("use", "field", getQualifiedName(Field), Loc, Mangled, + Field->getType(), getContext(Loc)); + } + return true; + } + + // Helper function for producing heuristic results for usages in dependent + // code. These should be distinguished from concrete results (obtained for + // dependent code using the AutoTemplateContext machinery) once bug 1833552 is + // fixed. + // We don't expect this method to be intentionally called multiple times for + // a given (Loc, NamedDecl) pair because our callers should be mutually + // exclusive AST node types. However, it's fine if this method is called + // multiple time for a given pair because we explicitly de-duplicate records + // with an identical string representation (which is a good reason to have + // this helper, as it ensures identical representations). + void visitHeuristicResult(SourceLocation Loc, const NamedDecl *ND) { + if (const TemplateDecl *TD = dyn_cast<TemplateDecl>(ND)) { + ND = TD->getTemplatedDecl(); + } + QualType MaybeType; + const char *SyntaxKind = nullptr; + if (const FunctionDecl *F = dyn_cast<FunctionDecl>(ND)) { + MaybeType = F->getType(); + SyntaxKind = "function"; + } else if (const FieldDecl *F = dyn_cast<FieldDecl>(ND)) { + MaybeType = F->getType(); + SyntaxKind = "field"; + } else if (const EnumConstantDecl *E = dyn_cast<EnumConstantDecl>(ND)) { + MaybeType = E->getType(); + SyntaxKind = "enum"; + } else if (const TypedefNameDecl *T = dyn_cast<TypedefNameDecl>(ND)) { + MaybeType = T->getUnderlyingType(); + SyntaxKind = "type"; + } + if (SyntaxKind) { + std::string Mangled = getMangledName(CurMangleContext, ND); + visitIdentifier("use", SyntaxKind, getQualifiedName(ND), Loc, Mangled, + MaybeType, getContext(Loc)); + } + } + + bool VisitOverloadExpr(OverloadExpr *E) { + SourceLocation Loc = E->getExprLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + for (auto *Candidate : E->decls()) { + visitHeuristicResult(Loc, Candidate); + } + return true; + } + + bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) { + SourceLocation Loc = E->getMemberLoc(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + // If possible, provide a heuristic result without instantiation. + for (const NamedDecl *D : Resolver->resolveMemberExpr(E)) { + visitHeuristicResult(Loc, D); + } + + // Also record this location so that if we have instantiations, we can + // gather more accurate results from them. + if (TemplateStack) { + TemplateStack->visitDependent(Loc); + } + return true; + } + + bool VisitDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *E) { + SourceLocation Loc = E->getLocation(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return true; + } + + for (const NamedDecl *D : Resolver->resolveDeclRefExpr(E)) { + visitHeuristicResult(Loc, D); + } + return true; + } + + void enterSourceFile(SourceLocation Loc) { + normalizeLocation(&Loc); + FileInfo* newFile = getFileInfo(Loc); + if (!newFile->Interesting) { + return; + } + FileType type = newFile->Generated ? FileType::Generated : FileType::Source; + std::string symbol = + std::string("FILE_") + mangleFile(newFile->Realname, type); + + // We use an explicit zero-length source range at the start of the file. If we + // don't set the LocRangeEndValid flag, the visitIdentifier code will use the + // entire first token, which could be e.g. a long multiline-comment. + visitIdentifier("def", "file", newFile->Realname, SourceRange(Loc), + symbol, QualType(), Context(), + NotIdentifierToken | LocRangeEndValid); + } + + void inclusionDirective(SourceRange FileNameRange, const FileEntry* File) { + std::string includedFile(File->tryGetRealPathName()); + FileType type = relativizePath(includedFile); + if (type == FileType::Unknown) { + return; + } + std::string symbol = + std::string("FILE_") + mangleFile(includedFile, type); + + visitIdentifier("use", "file", includedFile, FileNameRange, symbol, + QualType(), Context(), + NotIdentifierToken | LocRangeEndValid); + } + + void macroDefined(const Token &Tok, const MacroDirective *Macro) { + if (Macro->getMacroInfo()->isBuiltinMacro()) { + return; + } + SourceLocation Loc = Tok.getLocation(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return; + } + + IdentifierInfo *Ident = Tok.getIdentifierInfo(); + if (Ident) { + std::string Mangled = + std::string("M_") + mangleLocation(Loc, std::string(Ident->getName())); + visitIdentifier("def", "macro", Ident->getName(), Loc, Mangled); + } + } + + void macroUsed(const Token &Tok, const MacroInfo *Macro) { + if (!Macro) { + return; + } + if (Macro->isBuiltinMacro()) { + return; + } + SourceLocation Loc = Tok.getLocation(); + normalizeLocation(&Loc); + if (!isInterestingLocation(Loc)) { + return; + } + + IdentifierInfo *Ident = Tok.getIdentifierInfo(); + if (Ident) { + std::string Mangled = + std::string("M_") + + mangleLocation(Macro->getDefinitionLoc(), std::string(Ident->getName())); + visitIdentifier("use", "macro", Ident->getName(), Loc, Mangled); + } + } +}; + +void PreprocessorHook::FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID = FileID()) { + switch (Reason) { + case PPCallbacks::RenameFile: + case PPCallbacks::SystemHeaderPragma: + // Don't care about these, since we want the actual on-disk filenames + break; + case PPCallbacks::EnterFile: + Indexer->enterSourceFile(Loc); + break; + case PPCallbacks::ExitFile: + // Don't care about exiting files + break; + } +} + +void PreprocessorHook::InclusionDirective(SourceLocation HashLoc, + const Token &IncludeTok, + StringRef FileName, + bool IsAngled, + CharSourceRange FileNameRange, +#if CLANG_VERSION_MAJOR >= 16 + OptionalFileEntryRef File, +#elif CLANG_VERSION_MAJOR >= 15 + Optional<FileEntryRef> File, +#else + const FileEntry *File, +#endif + StringRef SearchPath, + StringRef RelativePath, + const Module *Imported, + SrcMgr::CharacteristicKind FileType) { +#if CLANG_VERSION_MAJOR >= 15 + if (!File) { + return; + } + Indexer->inclusionDirective(FileNameRange.getAsRange(), &File->getFileEntry()); +#else + Indexer->inclusionDirective(FileNameRange.getAsRange(), File); +#endif +} + +void PreprocessorHook::MacroDefined(const Token &Tok, + const MacroDirective *Md) { + Indexer->macroDefined(Tok, Md); +} + +void PreprocessorHook::MacroExpands(const Token &Tok, const MacroDefinition &Md, + SourceRange Range, const MacroArgs *Ma) { + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +void PreprocessorHook::MacroUndefined(const Token &Tok, + const MacroDefinition &Md, + const MacroDirective *Undef) +{ + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +void PreprocessorHook::Defined(const Token &Tok, const MacroDefinition &Md, + SourceRange Range) { + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +void PreprocessorHook::Ifdef(SourceLocation Loc, const Token &Tok, + const MacroDefinition &Md) { + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +void PreprocessorHook::Ifndef(SourceLocation Loc, const Token &Tok, + const MacroDefinition &Md) { + Indexer->macroUsed(Tok, Md.getMacroInfo()); +} + +class IndexAction : public PluginASTAction { +protected: + std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI, + llvm::StringRef F) { + return make_unique<IndexConsumer>(CI); + } + + bool ParseArgs(const CompilerInstance &CI, + const std::vector<std::string> &Args) { + if (Args.size() != 3) { + DiagnosticsEngine &D = CI.getDiagnostics(); + unsigned DiagID = D.getCustomDiagID( + DiagnosticsEngine::Error, + "Need arguments for the source, output, and object directories"); + D.Report(DiagID); + return false; + } + + // Load our directories + Srcdir = getAbsolutePath(Args[0]); + if (Srcdir.empty()) { + DiagnosticsEngine &D = CI.getDiagnostics(); + unsigned DiagID = D.getCustomDiagID( + DiagnosticsEngine::Error, "Source directory '%0' does not exist"); + D.Report(DiagID) << Args[0]; + return false; + } + + ensurePath(Args[1] + PATHSEP_STRING); + Outdir = getAbsolutePath(Args[1]); + Outdir += PATHSEP_STRING; + + Objdir = getAbsolutePath(Args[2]); + if (Objdir.empty()) { + DiagnosticsEngine &D = CI.getDiagnostics(); + unsigned DiagID = D.getCustomDiagID(DiagnosticsEngine::Error, + "Objdir '%0' does not exist"); + D.Report(DiagID) << Args[2]; + return false; + } + Objdir += PATHSEP_STRING; + + printf("MOZSEARCH: %s %s %s\n", Srcdir.c_str(), Outdir.c_str(), + Objdir.c_str()); + + return true; + } + + void printHelp(llvm::raw_ostream &Ros) { + Ros << "Help for mozsearch plugin goes here\n"; + } +}; + +static FrontendPluginRegistry::Add<IndexAction> + Y("mozsearch-index", "create the mozsearch index database"); diff --git a/build/clang-plugin/mozsearch-plugin/README b/build/clang-plugin/mozsearch-plugin/README new file mode 100644 index 0000000000..d948e9aca3 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/README @@ -0,0 +1,12 @@ +This clang plugin code generates a JSON file for each compiler input +file. The JSON file contains information about the C++ symbols that +are referenced by the input file. The data is eventually consumed by +Searchfox. See https://github.com/mozsearch/mozsearch for more +information. + +This plugin is enabled with the --enable-clang-plugin and +--enable-mozsearch-plugin mozconfig options. The output of the plugin +is stored in $OBJDIR/mozsearch_index. + +This code is not a checker, unlike other parts of the Mozilla clang +plugin. It cannot be used with clang-tidy. diff --git a/build/clang-plugin/mozsearch-plugin/StringOperations.cpp b/build/clang-plugin/mozsearch-plugin/StringOperations.cpp new file mode 100644 index 0000000000..a2e60e42c6 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/StringOperations.cpp @@ -0,0 +1,42 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "StringOperations.h" + +static unsigned long djbHash(const char *Str) { + unsigned long Hash = 5381; + + for (const char *P = Str; *P; P++) { + // Hash * 33 + c + Hash = ((Hash << 5) + Hash) + *P; + } + + return Hash; +} + +// This doesn't actually return a hex string of |hash|, but it +// does... something. It doesn't really matter what. +static void hashToString(unsigned long Hash, char *Buffer) { + const char Table[] = {"0123456789abcdef"}; + char *P = Buffer; + while (Hash) { + *P = Table[Hash & 0xf]; + Hash >>= 4; + P++; + } + + *P = 0; +} + +std::string hash(const std::string &Str) { + static char HashStr[41]; + unsigned long H = djbHash(Str.c_str()); + hashToString(H, HashStr); + return std::string(HashStr); +} + +std::string toString(int N) { + return stringFormat("%d", N); +} diff --git a/build/clang-plugin/mozsearch-plugin/StringOperations.h b/build/clang-plugin/mozsearch-plugin/StringOperations.h new file mode 100644 index 0000000000..4aa5b31962 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/StringOperations.h @@ -0,0 +1,25 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef StringOperations_h +#define StringOperations_h + +#include <memory> +#include <string> +#include <string.h> + +std::string hash(const std::string &Str); + +template <typename... Args> +inline std::string stringFormat(const std::string &Format, Args... ArgList) { + size_t Len = snprintf(nullptr, 0, Format.c_str(), ArgList...); + std::unique_ptr<char[]> Buf(new char[Len + 1]); + snprintf(Buf.get(), Len + 1, Format.c_str(), ArgList...); + return std::string(Buf.get(), Buf.get() + Len); +} + +std::string toString(int N); + +#endif diff --git a/build/clang-plugin/mozsearch-plugin/from-clangd/HeuristicResolver.cpp b/build/clang-plugin/mozsearch-plugin/from-clangd/HeuristicResolver.cpp new file mode 100644 index 0000000000..719094dbf2 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/from-clangd/HeuristicResolver.cpp @@ -0,0 +1,349 @@ +//===--- HeuristicResolver.cpp ---------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "HeuristicResolver.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/CXXInheritance.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/Type.h" + +namespace clang { +namespace clangd { + +// Convenience lambdas for use as the 'Filter' parameter of +// HeuristicResolver::resolveDependentMember(). +const auto NoFilter = [](const NamedDecl *D) { return true; }; +const auto NonStaticFilter = [](const NamedDecl *D) { + return D->isCXXInstanceMember(); +}; +const auto StaticFilter = [](const NamedDecl *D) { + return !D->isCXXInstanceMember(); +}; +const auto ValueFilter = [](const NamedDecl *D) { return isa<ValueDecl>(D); }; +const auto TypeFilter = [](const NamedDecl *D) { return isa<TypeDecl>(D); }; +const auto TemplateFilter = [](const NamedDecl *D) { + return isa<TemplateDecl>(D); +}; + +namespace { + +const Type *resolveDeclsToType(const std::vector<const NamedDecl *> &Decls, + ASTContext &Ctx) { + if (Decls.size() != 1) // Names an overload set -- just bail. + return nullptr; + if (const auto *TD = dyn_cast<TypeDecl>(Decls[0])) { + return Ctx.getTypeDeclType(TD).getTypePtr(); + } + if (const auto *VD = dyn_cast<ValueDecl>(Decls[0])) { + return VD->getType().getTypePtrOrNull(); + } + return nullptr; +} + +} // namespace + +// Helper function for HeuristicResolver::resolveDependentMember() +// which takes a possibly-dependent type `T` and heuristically +// resolves it to a CXXRecordDecl in which we can try name lookup. +CXXRecordDecl *HeuristicResolver::resolveTypeToRecordDecl(const Type *T) const { + assert(T); + + // Unwrap type sugar such as type aliases. + T = T->getCanonicalTypeInternal().getTypePtr(); + + if (const auto *DNT = T->getAs<DependentNameType>()) { + T = resolveDeclsToType(resolveDependentNameType(DNT), Ctx); + if (!T) + return nullptr; + T = T->getCanonicalTypeInternal().getTypePtr(); + } + + if (const auto *RT = T->getAs<RecordType>()) + return dyn_cast<CXXRecordDecl>(RT->getDecl()); + + if (const auto *ICNT = T->getAs<InjectedClassNameType>()) + T = ICNT->getInjectedSpecializationType().getTypePtrOrNull(); + if (!T) + return nullptr; + + const auto *TST = T->getAs<TemplateSpecializationType>(); + if (!TST) + return nullptr; + + const ClassTemplateDecl *TD = dyn_cast_or_null<ClassTemplateDecl>( + TST->getTemplateName().getAsTemplateDecl()); + if (!TD) + return nullptr; + + return TD->getTemplatedDecl(); +} + +const Type *HeuristicResolver::getPointeeType(const Type *T) const { + if (!T) + return nullptr; + + if (T->isPointerType()) + return T->castAs<PointerType>()->getPointeeType().getTypePtrOrNull(); + + // Try to handle smart pointer types. + + // Look up operator-> in the primary template. If we find one, it's probably a + // smart pointer type. + auto ArrowOps = resolveDependentMember( + T, Ctx.DeclarationNames.getCXXOperatorName(OO_Arrow), NonStaticFilter); + if (ArrowOps.empty()) + return nullptr; + + // Getting the return type of the found operator-> method decl isn't useful, + // because we discarded template arguments to perform lookup in the primary + // template scope, so the return type would just have the form U* where U is a + // template parameter type. + // Instead, just handle the common case where the smart pointer type has the + // form of SmartPtr<X, ...>, and assume X is the pointee type. + auto *TST = T->getAs<TemplateSpecializationType>(); + if (!TST) + return nullptr; + if (TST->template_arguments().size() == 0) + return nullptr; + const TemplateArgument &FirstArg = TST->template_arguments()[0]; + if (FirstArg.getKind() != TemplateArgument::Type) + return nullptr; + return FirstArg.getAsType().getTypePtrOrNull(); +} + +std::vector<const NamedDecl *> HeuristicResolver::resolveMemberExpr( + const CXXDependentScopeMemberExpr *ME) const { + // If the expression has a qualifier, first try resolving the member + // inside the qualifier's type. + // Note that we cannot use a NonStaticFilter in either case, for a couple + // of reasons: + // 1. It's valid to access a static member using instance member syntax, + // e.g. `instance.static_member`. + // 2. We can sometimes get a CXXDependentScopeMemberExpr for static + // member syntax too, e.g. if `X::static_member` occurs inside + // an instance method, it's represented as a CXXDependentScopeMemberExpr + // with `this` as the base expression as `X` as the qualifier + // (which could be valid if `X` names a base class after instantiation). + if (NestedNameSpecifier *NNS = ME->getQualifier()) { + if (const Type *QualifierType = resolveNestedNameSpecifierToType(NNS)) { + auto Decls = + resolveDependentMember(QualifierType, ME->getMember(), NoFilter); + if (!Decls.empty()) + return Decls; + } + } + + // If that didn't yield any results, try resolving the member inside + // the expression's base type. + const Type *BaseType = ME->getBaseType().getTypePtrOrNull(); + if (ME->isArrow()) { + BaseType = getPointeeType(BaseType); + } + if (!BaseType) + return {}; + if (const auto *BT = BaseType->getAs<BuiltinType>()) { + // If BaseType is the type of a dependent expression, it's just + // represented as BuiltinType::Dependent which gives us no information. We + // can get further by analyzing the dependent expression. + Expr *Base = ME->isImplicitAccess() ? nullptr : ME->getBase(); + if (Base && BT->getKind() == BuiltinType::Dependent) { + BaseType = resolveExprToType(Base); + } + } + return resolveDependentMember(BaseType, ME->getMember(), NoFilter); +} + +std::vector<const NamedDecl *> HeuristicResolver::resolveDeclRefExpr( + const DependentScopeDeclRefExpr *RE) const { + return resolveDependentMember(RE->getQualifier()->getAsType(), + RE->getDeclName(), StaticFilter); +} + +std::vector<const NamedDecl *> +HeuristicResolver::resolveTypeOfCallExpr(const CallExpr *CE) const { + const auto *CalleeType = resolveExprToType(CE->getCallee()); + if (!CalleeType) + return {}; + if (const auto *FnTypePtr = CalleeType->getAs<PointerType>()) + CalleeType = FnTypePtr->getPointeeType().getTypePtr(); + if (const FunctionType *FnType = CalleeType->getAs<FunctionType>()) { + if (const auto *D = + resolveTypeToRecordDecl(FnType->getReturnType().getTypePtr())) { + return {D}; + } + } + return {}; +} + +std::vector<const NamedDecl *> +HeuristicResolver::resolveCalleeOfCallExpr(const CallExpr *CE) const { + if (const auto *ND = dyn_cast_or_null<NamedDecl>(CE->getCalleeDecl())) { + return {ND}; + } + + return resolveExprToDecls(CE->getCallee()); +} + +std::vector<const NamedDecl *> HeuristicResolver::resolveUsingValueDecl( + const UnresolvedUsingValueDecl *UUVD) const { + return resolveDependentMember(UUVD->getQualifier()->getAsType(), + UUVD->getNameInfo().getName(), ValueFilter); +} + +std::vector<const NamedDecl *> HeuristicResolver::resolveDependentNameType( + const DependentNameType *DNT) const { + return resolveDependentMember( + resolveNestedNameSpecifierToType(DNT->getQualifier()), + DNT->getIdentifier(), TypeFilter); +} + +std::vector<const NamedDecl *> +HeuristicResolver::resolveTemplateSpecializationType( + const DependentTemplateSpecializationType *DTST) const { + return resolveDependentMember( + resolveNestedNameSpecifierToType(DTST->getQualifier()), + DTST->getIdentifier(), TemplateFilter); +} + +std::vector<const NamedDecl *> +HeuristicResolver::resolveExprToDecls(const Expr *E) const { + if (const auto *ME = dyn_cast<CXXDependentScopeMemberExpr>(E)) { + return resolveMemberExpr(ME); + } + if (const auto *RE = dyn_cast<DependentScopeDeclRefExpr>(E)) { + return resolveDeclRefExpr(RE); + } + if (const auto *OE = dyn_cast<OverloadExpr>(E)) { + return {OE->decls_begin(), OE->decls_end()}; + } + if (const auto *CE = dyn_cast<CallExpr>(E)) { + return resolveTypeOfCallExpr(CE); + } + if (const auto *ME = dyn_cast<MemberExpr>(E)) + return {ME->getMemberDecl()}; + + return {}; +} + +const Type *HeuristicResolver::resolveExprToType(const Expr *E) const { + std::vector<const NamedDecl *> Decls = resolveExprToDecls(E); + if (!Decls.empty()) + return resolveDeclsToType(Decls, Ctx); + + return E->getType().getTypePtr(); +} + +const Type *HeuristicResolver::resolveNestedNameSpecifierToType( + const NestedNameSpecifier *NNS) const { + if (!NNS) + return nullptr; + + // The purpose of this function is to handle the dependent (Kind == + // Identifier) case, but we need to recurse on the prefix because + // that may be dependent as well, so for convenience handle + // the TypeSpec cases too. + switch (NNS->getKind()) { + case NestedNameSpecifier::TypeSpec: + case NestedNameSpecifier::TypeSpecWithTemplate: + return NNS->getAsType(); + case NestedNameSpecifier::Identifier: { + return resolveDeclsToType( + resolveDependentMember( + resolveNestedNameSpecifierToType(NNS->getPrefix()), + NNS->getAsIdentifier(), TypeFilter), + Ctx); + } + default: + break; + } + return nullptr; +} + +namespace { + +bool isOrdinaryMember(const NamedDecl *ND) { + return ND->isInIdentifierNamespace(Decl::IDNS_Ordinary | Decl::IDNS_Tag | + Decl::IDNS_Member); +} + +bool findOrdinaryMember(const CXXRecordDecl *RD, CXXBasePath &Path, + DeclarationName Name) { + Path.Decls = RD->lookup(Name).begin(); + for (DeclContext::lookup_iterator I = Path.Decls, E = I.end(); I != E; ++I) + if (isOrdinaryMember(*I)) + return true; + + return false; +} + +} // namespace + +bool HeuristicResolver::findOrdinaryMemberInDependentClasses( + const CXXBaseSpecifier *Specifier, CXXBasePath &Path, + DeclarationName Name) const { + CXXRecordDecl *RD = + resolveTypeToRecordDecl(Specifier->getType().getTypePtr()); + if (!RD) + return false; + return findOrdinaryMember(RD, Path, Name); +} + +std::vector<const NamedDecl *> HeuristicResolver::lookupDependentName( + CXXRecordDecl *RD, DeclarationName Name, + llvm::function_ref<bool(const NamedDecl *ND)> Filter) const { + std::vector<const NamedDecl *> Results; + + // Lookup in the class. + bool AnyOrdinaryMembers = false; + for (const NamedDecl *ND : RD->lookup(Name)) { + if (isOrdinaryMember(ND)) + AnyOrdinaryMembers = true; + if (Filter(ND)) + Results.push_back(ND); + } + if (AnyOrdinaryMembers) + return Results; + + // Perform lookup into our base classes. + CXXBasePaths Paths; + Paths.setOrigin(RD); + if (!RD->lookupInBases( + [&](const CXXBaseSpecifier *Specifier, CXXBasePath &Path) { + return findOrdinaryMemberInDependentClasses(Specifier, Path, Name); + }, + Paths, /*LookupInDependent=*/true)) + return Results; + for (DeclContext::lookup_iterator I = Paths.front().Decls, E = I.end(); + I != E; ++I) { + if (isOrdinaryMember(*I) && Filter(*I)) + Results.push_back(*I); + } + return Results; +} + +std::vector<const NamedDecl *> HeuristicResolver::resolveDependentMember( + const Type *T, DeclarationName Name, + llvm::function_ref<bool(const NamedDecl *ND)> Filter) const { + if (!T) + return {}; + if (auto *ET = T->getAs<EnumType>()) { + auto Result = ET->getDecl()->lookup(Name); + return {Result.begin(), Result.end()}; + } + if (auto *RD = resolveTypeToRecordDecl(T)) { + if (!RD->hasDefinition()) + return {}; + RD = RD->getDefinition(); + return lookupDependentName(RD, Name, Filter); + } + return {}; +} + +} // namespace clangd +} // namespace clang diff --git a/build/clang-plugin/mozsearch-plugin/from-clangd/HeuristicResolver.h b/build/clang-plugin/mozsearch-plugin/from-clangd/HeuristicResolver.h new file mode 100644 index 0000000000..dc04123d37 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/from-clangd/HeuristicResolver.h @@ -0,0 +1,122 @@ +//===--- HeuristicResolver.h - Resolution of dependent names -----*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEURISTICRESOLVER_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEURISTICRESOLVER_H + +#include "clang/AST/Decl.h" +#include <vector> + +namespace clang { + +class ASTContext; +class CallExpr; +class CXXBasePath; +class CXXDependentScopeMemberExpr; +class DeclarationName; +class DependentScopeDeclRefExpr; +class NamedDecl; +class Type; +class UnresolvedUsingValueDecl; + +namespace clangd { + +// This class heuristic resolution of declarations and types in template code. +// +// As a compiler, clang only needs to perform certain types of processing on +// template code (such as resolving dependent names to declarations, or +// resolving the type of a dependent expression) after instantiation. Indeed, +// C++ language features such as template specialization mean such resolution +// cannot be done accurately before instantiation +// +// However, template code is written and read in uninstantiated form, and clangd +// would like to provide editor features like go-to-definition in template code +// where possible. To this end, clangd attempts to resolve declarations and +// types in uninstantiated code by using heuristics, understanding that the +// results may not be fully accurate but that this is better than nothing. +// +// At this time, the heuristic used is a simple but effective one: assume that +// template instantiations are based on the primary template definition and not +// not a specialization. More advanced heuristics may be added in the future. +class HeuristicResolver { +public: + HeuristicResolver(ASTContext &Ctx) : Ctx(Ctx) {} + + // Try to heuristically resolve certain types of expressions, declarations, or + // types to one or more likely-referenced declarations. + std::vector<const NamedDecl *> + resolveMemberExpr(const CXXDependentScopeMemberExpr *ME) const; + std::vector<const NamedDecl *> + resolveDeclRefExpr(const DependentScopeDeclRefExpr *RE) const; + std::vector<const NamedDecl *> + resolveTypeOfCallExpr(const CallExpr *CE) const; + std::vector<const NamedDecl *> + resolveCalleeOfCallExpr(const CallExpr *CE) const; + std::vector<const NamedDecl *> + resolveUsingValueDecl(const UnresolvedUsingValueDecl *UUVD) const; + std::vector<const NamedDecl *> + resolveDependentNameType(const DependentNameType *DNT) const; + std::vector<const NamedDecl *> resolveTemplateSpecializationType( + const DependentTemplateSpecializationType *DTST) const; + + // Try to heuristically resolve a dependent nested name specifier + // to the type it likely denotes. Note that *dependent* name specifiers always + // denote types, not namespaces. + const Type * + resolveNestedNameSpecifierToType(const NestedNameSpecifier *NNS) const; + + // Given the type T of a dependent expression that appears of the LHS of a + // "->", heuristically find a corresponding pointee type in whose scope we + // could look up the name appearing on the RHS. + const Type *getPointeeType(const Type *T) const; + +private: + ASTContext &Ctx; + + // Given a tag-decl type and a member name, heuristically resolve the + // name to one or more declarations. + // The current heuristic is simply to look up the name in the primary + // template. This is a heuristic because the template could potentially + // have specializations that declare different members. + // Multiple declarations could be returned if the name is overloaded + // (e.g. an overloaded method in the primary template). + // This heuristic will give the desired answer in many cases, e.g. + // for a call to vector<T>::size(). + std::vector<const NamedDecl *> resolveDependentMember( + const Type *T, DeclarationName Name, + llvm::function_ref<bool(const NamedDecl *ND)> Filter) const; + + // Try to heuristically resolve the type of a possibly-dependent expression + // `E`. + const Type *resolveExprToType(const Expr *E) const; + std::vector<const NamedDecl *> resolveExprToDecls(const Expr *E) const; + + // Helper function for HeuristicResolver::resolveDependentMember() + // which takes a possibly-dependent type `T` and heuristically + // resolves it to a CXXRecordDecl in which we can try name lookup. + CXXRecordDecl *resolveTypeToRecordDecl(const Type *T) const; + + // This is a reimplementation of CXXRecordDecl::lookupDependentName() + // so that the implementation can call into other HeuristicResolver helpers. + // FIXME: Once HeuristicResolver is upstreamed to the clang libraries + // (https://github.com/clangd/clangd/discussions/1662), + // CXXRecordDecl::lookupDepenedentName() can be removed, and its call sites + // can be modified to benefit from the more comprehensive heuristics offered + // by HeuristicResolver instead. + std::vector<const NamedDecl *> lookupDependentName( + CXXRecordDecl *RD, DeclarationName Name, + llvm::function_ref<bool(const NamedDecl *ND)> Filter) const; + bool findOrdinaryMemberInDependentClasses(const CXXBaseSpecifier *Specifier, + CXXBasePath &Path, + DeclarationName Name) const; +}; + +} // namespace clangd +} // namespace clang + +#endif diff --git a/build/clang-plugin/mozsearch-plugin/from-clangd/README.md b/build/clang-plugin/mozsearch-plugin/from-clangd/README.md new file mode 100644 index 0000000000..9f39dab231 --- /dev/null +++ b/build/clang-plugin/mozsearch-plugin/from-clangd/README.md @@ -0,0 +1,10 @@ +The facilities in this subdirectory are copied over from clangd +(https://clangd.llvm.org/). + +The files here are currently copies of the following upstream files: +https://github.com/llvm/llvm-project/blob/2bcbcbefcd0f7432f99cc07bb47d1e1ecb579a3f/clang-tools-extra/clangd/HeuristicResolver.h +https://github.com/llvm/llvm-project/blob/2bcbcbefcd0f7432f99cc07bb47d1e1ecb579a3f/clang-tools-extra/clangd/HeuristicResolver.cpp + +If, in the future, these facilities are moved from clangd to +to libclangTooling and exposed in the clang API headers, we can +switch to consuming them from there directly. |