summaryrefslogtreecommitdiffstats
path: root/compilerplugins/clang/stringliteralvar.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'compilerplugins/clang/stringliteralvar.cxx')
-rw-r--r--compilerplugins/clang/stringliteralvar.cxx320
1 files changed, 320 insertions, 0 deletions
diff --git a/compilerplugins/clang/stringliteralvar.cxx b/compilerplugins/clang/stringliteralvar.cxx
new file mode 100644
index 0000000000..fcd3690669
--- /dev/null
+++ b/compilerplugins/clang/stringliteralvar.cxx
@@ -0,0 +1,320 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+// Find constant character array variables that are either
+// (a) passed into O[U]String constructors
+// (b) assigned to O[U]String
+// and should thus be turned into O[U]StringLiteral variables.
+//
+// Such a variable may have been used in multiple places, not all of which would be compatible with
+// changing the variable's type to O[U]StringLiteral. However, this plugin is aggressive and
+// ignores all but the first use of such a variable. In all cases of incompatible uses so far, it
+// was possible to change to surrounding code (for the better) to make the changes work.
+//
+// The plugin also flags O[U]StringLiteral variables of automatic storage duration, and uses of such
+// variables with sizeof---two likely errors that can occur once a variable has been changed from a
+// character array to O[U]StringLiteral.
+//
+//TODO: In theory, we should not only look for variables, but also for non-static data members. In
+// practice, those should be rare, though, as they should arguably have been static data members to
+// begin with.
+
+#include <cassert>
+
+#include "check.hxx"
+#include "compat.hxx"
+#include "plugin.hxx"
+
+namespace
+{
+bool isAutomaticVariable(VarDecl const* decl)
+{
+ switch (cast<VarDecl>(decl)->getStorageDuration())
+ {
+ case SD_Automatic:
+ return true;
+ case SD_Thread:
+ case SD_Static:
+ return false;
+ case SD_FullExpression:
+ case SD_Dynamic:
+ assert(false);
+ default:
+ llvm_unreachable("unknown StorageDuration");
+ }
+}
+
+class StringLiteralVar final : public loplugin::FilteringPlugin<StringLiteralVar>
+{
+public:
+ explicit StringLiteralVar(loplugin::InstantiationData const& data)
+ : FilteringPlugin(data)
+ {
+ }
+
+ bool TraverseInitListExpr(InitListExpr* expr, DataRecursionQueue* queue = nullptr)
+ {
+ return WalkUpFromInitListExpr(expr)
+ && TraverseSynOrSemInitListExpr(
+ expr->isSemanticForm() ? expr : expr->getSemanticForm(), queue);
+ }
+
+ bool VisitCXXConstructExpr(CXXConstructExpr const* expr)
+ {
+ if (ignoreLocation(expr))
+ {
+ return true;
+ }
+ loplugin::TypeCheck const tc(expr->getType());
+ if (!(tc.Class("OString").Namespace("rtl").GlobalNamespace()
+ || tc.Class("OUString").Namespace("rtl").GlobalNamespace()))
+ {
+ return true;
+ }
+ auto const ctor = expr->getConstructor();
+ switch (ctor->getNumParams())
+ {
+ case 1:
+ {
+ auto const e = dyn_cast<DeclRefExpr>(expr->getArg(0)->IgnoreParenImpCasts());
+ if (e == nullptr)
+ {
+ return true;
+ }
+ auto const tc = loplugin::TypeCheck(e->getType());
+ if (!(tc.Class("OStringLiteral").Namespace("rtl").GlobalNamespace()
+ || tc.Class("OUStringLiteral").Namespace("rtl").GlobalNamespace()))
+ {
+ return true;
+ }
+ auto const d = e->getDecl();
+ if (!isAutomaticVariable(cast<VarDecl>(d)))
+ {
+ return true;
+ }
+ if (!reportedAutomatic_.insert(d).second)
+ {
+ return true;
+ }
+ report(DiagnosticsEngine::Warning,
+ "variable %0 of type %1 with automatic storage duration most likely needs "
+ "to be static",
+ d->getLocation())
+ << d << d->getType() << d->getSourceRange();
+ report(DiagnosticsEngine::Note, "first converted to %0 here", expr->getLocation())
+ << expr->getType() << expr->getSourceRange();
+ }
+ break;
+ case 2:
+ {
+ auto const e1 = dyn_cast<DeclRefExpr>(expr->getArg(0)->IgnoreParenImpCasts());
+ if (e1 == nullptr)
+ {
+ return true;
+ }
+ auto const t = e1->getType();
+ if (!(t.isConstQualified() && t->isConstantArrayType()))
+ {
+ return true;
+ }
+ auto const e2 = expr->getArg(1);
+ if (!((isa<CXXDefaultArgExpr>(e2)
+ && loplugin::TypeCheck(e2->getType())
+ .Struct("Dummy")
+ .Namespace("libreoffice_internal")
+ .Namespace("rtl")
+ .GlobalNamespace())
+ || (loplugin::TypeCheck(ctor->getParamDecl(1)->getType())
+ .Typedef("sal_Int32")
+ .GlobalNamespace()
+ && e2->isIntegerConstantExpr(compiler.getASTContext()))))
+ {
+ return true;
+ }
+ auto const d = e1->getDecl();
+ if (isPotentiallyInitializedWithMalformedUtf16(d))
+ {
+ return true;
+ }
+ if (!reportedArray_.insert(d).second)
+ {
+ return true;
+ }
+ report(DiagnosticsEngine::Warning,
+ "change type of variable %0 from constant character array (%1) to "
+ "%select{OStringLiteral|OUStringLiteral}2%select{|, and make it static}3",
+ d->getLocation())
+ << d << d->getType()
+ << (tc.Class("OString").Namespace("rtl").GlobalNamespace() ? 0 : 1)
+ << isAutomaticVariable(cast<VarDecl>(d)) << d->getSourceRange();
+ report(DiagnosticsEngine::Note, "first passed into a %0 constructor here",
+ expr->getLocation())
+ << expr->getType().getUnqualifiedType() << expr->getSourceRange();
+ }
+ break;
+ }
+ return true;
+ }
+
+ bool VisitCXXOperatorCallExpr(CXXOperatorCallExpr const* expr)
+ {
+ if (ignoreLocation(expr))
+ {
+ return true;
+ }
+ if (expr->getOperator() != OO_Equal)
+ {
+ return true;
+ }
+ loplugin::TypeCheck const tc(expr->getType());
+ if (!(tc.Class("OString").Namespace("rtl").GlobalNamespace()
+ || tc.Class("OUString").Namespace("rtl").GlobalNamespace()))
+ {
+ return true;
+ }
+ if (expr->getNumArgs() != 2)
+ {
+ return true;
+ }
+ auto const e = dyn_cast<DeclRefExpr>(expr->getArg(1)->IgnoreParenImpCasts());
+ if (e == nullptr)
+ {
+ return true;
+ }
+ auto const t = e->getType();
+ if (!(t.isConstQualified() && t->isConstantArrayType()))
+ {
+ return true;
+ }
+ auto const d = e->getDecl();
+ if (isPotentiallyInitializedWithMalformedUtf16(d))
+ {
+ return true;
+ }
+ if (!reportedArray_.insert(d).second)
+ {
+ return true;
+ }
+ report(DiagnosticsEngine::Warning,
+ "change type of variable %0 from constant character array (%1) to "
+ "%select{OStringLiteral|OUStringLiteral}2%select{|, and make it static}3",
+ d->getLocation())
+ << d << d->getType() << (tc.Class("OString").Namespace("rtl").GlobalNamespace() ? 0 : 1)
+ << isAutomaticVariable(cast<VarDecl>(d)) << d->getSourceRange();
+ report(DiagnosticsEngine::Note, "first assigned here", expr->getBeginLoc())
+ << expr->getSourceRange();
+ return true;
+ }
+
+ bool VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr const* expr)
+ {
+ if (ignoreLocation(expr))
+ {
+ return true;
+ }
+ if (expr->getKind() != UETT_SizeOf)
+ {
+ return true;
+ }
+ if (expr->isArgumentType())
+ {
+ return true;
+ }
+ auto const e = dyn_cast<DeclRefExpr>(expr->getArgumentExpr()->IgnoreParenImpCasts());
+ if (e == nullptr)
+ {
+ return true;
+ }
+ auto const tc = loplugin::TypeCheck(e->getType());
+ if (!(tc.Class("OStringLiteral").Namespace("rtl").GlobalNamespace()
+ || tc.Class("OUStringLiteral").Namespace("rtl").GlobalNamespace()))
+ {
+ return true;
+ }
+ auto const d = e->getDecl();
+ report(DiagnosticsEngine::Warning,
+ "variable %0 of type %1 suspiciously used in a sizeof expression", e->getLocation())
+ << d << d->getType() << expr->getSourceRange();
+ return true;
+ }
+
+ bool preRun() override { return compiler.getLangOpts().CPlusPlus; }
+
+private:
+ void run() override
+ {
+ if (preRun())
+ {
+ TraverseDecl(compiler.getASTContext().getTranslationUnitDecl());
+ }
+ }
+
+ // There is some confusion on the semantics of numeric-escape-sequences in string literals, see
+ // <https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2029r4.html> "Proposed resolution
+ // for core issues 411, 1656, and 2333; numeric and universal character escapes in character and
+ // string literals", so suppress warnings about arrays that are deliberately not written as
+ // UTF-16 string literals because they contain lone surrogates:
+ bool isPotentiallyInitializedWithMalformedUtf16(ValueDecl const* decl) const
+ {
+ if (!decl->getType()->getArrayElementTypeNoTypeQual()->isChar16Type())
+ {
+ return false;
+ }
+ auto const init = cast<VarDecl>(decl)->getAnyInitializer();
+ if (init == nullptr)
+ {
+ return true;
+ }
+ auto const list = dyn_cast<InitListExpr>(init);
+ if (list == nullptr)
+ {
+ // Assuming that the initializer already is a string literal, assume that that string
+ // literal has no issues with malformed UTF-16:
+ if (isDebugMode())
+ {
+ assert(isa<clang::StringLiteral>(init));
+ }
+ return false;
+ }
+ auto highSurrogate = false;
+ for (auto const e : list->inits())
+ {
+ llvm::APSInt v;
+ if (!compat::EvaluateAsInt(e, v, compiler.getASTContext()))
+ {
+ return true;
+ }
+ if (highSurrogate)
+ {
+ if (v < 0xDC00 || v > 0xDFFF)
+ {
+ return true;
+ }
+ highSurrogate = false;
+ }
+ else if (v >= 0xD800 && v <= 0xDBFF)
+ {
+ highSurrogate = true;
+ }
+ else if (v >= 0xDC00 && v <= 0xDFFF)
+ {
+ return true;
+ }
+ }
+ return highSurrogate;
+ }
+
+ std::set<Decl const*> reportedAutomatic_;
+ std::set<Decl const*> reportedArray_;
+};
+
+static loplugin::Plugin::Registration<StringLiteralVar> reg("stringliteralvar");
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */