summaryrefslogtreecommitdiffstats
path: root/compilerplugins/clang/stringadd.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'compilerplugins/clang/stringadd.cxx')
-rw-r--r--compilerplugins/clang/stringadd.cxx467
1 files changed, 467 insertions, 0 deletions
diff --git a/compilerplugins/clang/stringadd.cxx b/compilerplugins/clang/stringadd.cxx
new file mode 100644
index 000000000..339192a78
--- /dev/null
+++ b/compilerplugins/clang/stringadd.cxx
@@ -0,0 +1,467 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+#ifndef LO_CLANG_SHARED_PLUGINS
+
+#include <cassert>
+#include <string>
+#include <iostream>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "plugin.hxx"
+#include "check.hxx"
+#include "config_clang.h"
+#include "clang/AST/CXXInheritance.h"
+#include "clang/AST/StmtVisitor.h"
+
+/**
+ Look for repeated addition to OUString/OString.
+
+ Eg.
+ OUString x = "xxx";
+ x += b;
+
+ which can be simplified to
+ x = "xxx" + b
+
+ which is more efficient, because of the OUStringConcat magic.
+*/
+
+namespace
+{
+class StringAdd : public loplugin::FilteringPlugin<StringAdd>
+{
+public:
+ explicit StringAdd(loplugin::InstantiationData const& data)
+ : FilteringPlugin(data)
+ {
+ }
+
+ bool preRun() override
+ {
+ std::string fn(handler.getMainFileName());
+ loplugin::normalizeDotDotInFilePath(fn);
+ if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/rtl/oustring/"))
+ return false;
+ if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/rtl/oustringbuffer/"))
+ return false;
+ if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/rtl/strings/"))
+ return false;
+ if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/OStringBuffer/"))
+ return false;
+ // there is an ifdef here, but my check is not working, not sure why
+ if (fn == SRCDIR "/pyuno/source/module/pyuno_runtime.cxx")
+ return false;
+ // TODO the += depends on the result of the preceding assign, so can't merge
+ if (fn == SRCDIR "/editeng/source/misc/svxacorr.cxx")
+ return false;
+ return true;
+ }
+
+ virtual void run() override
+ {
+ if (!preRun())
+ return;
+ TraverseDecl(compiler.getASTContext().getTranslationUnitDecl());
+ }
+
+ bool VisitCompoundStmt(CompoundStmt const*);
+ bool VisitCXXOperatorCallExpr(CXXOperatorCallExpr const*);
+ bool VisitCXXMemberCallExpr(CXXMemberCallExpr const*);
+
+private:
+ enum class Summands
+ {
+ OnlyCompileTimeConstants,
+ OnlySideEffectFree,
+ SideEffect
+ };
+
+ struct VarDeclAndSummands
+ {
+ const VarDecl* varDecl;
+ Summands summands;
+ };
+
+ VarDeclAndSummands findAssignOrAdd(Stmt const*);
+ bool checkForCompoundAssign(Stmt const* stmt1, Stmt const* stmt2, VarDeclAndSummands& varDecl);
+
+ Expr const* ignore(Expr const*);
+ bool isSideEffectFree(Expr const*);
+ bool isCompileTimeConstant(Expr const*);
+};
+
+bool StringAdd::VisitCompoundStmt(CompoundStmt const* compoundStmt)
+{
+ if (ignoreLocation(compoundStmt))
+ return true;
+
+ auto it = compoundStmt->body_begin();
+ while (true)
+ {
+ if (it == compoundStmt->body_end())
+ break;
+ VarDeclAndSummands foundVar = findAssignOrAdd(*it);
+ // reference types have slightly weird behaviour
+ if (foundVar.varDecl && !foundVar.varDecl->getType()->isReferenceType())
+ {
+ auto stmt1 = *it;
+ ++it;
+ while (it != compoundStmt->body_end())
+ {
+ if (!checkForCompoundAssign(stmt1, *it, foundVar))
+ {
+ break;
+ }
+ stmt1 = *it;
+ ++it;
+ }
+ }
+ else
+ ++it;
+ }
+
+ return true;
+}
+
+StringAdd::VarDeclAndSummands StringAdd::findAssignOrAdd(Stmt const* stmt)
+{
+ if (auto exprCleanup = dyn_cast<ExprWithCleanups>(stmt))
+ stmt = exprCleanup->getSubExpr();
+ if (auto switchCase = dyn_cast<SwitchCase>(stmt))
+ stmt = switchCase->getSubStmt();
+
+ if (auto declStmt = dyn_cast<DeclStmt>(stmt))
+ if (declStmt->isSingleDecl())
+ if (auto varDeclLHS = dyn_cast_or_null<VarDecl>(declStmt->getSingleDecl()))
+ {
+ auto tc = loplugin::TypeCheck(varDeclLHS->getType());
+ if (!tc.Class("OUString").Namespace("rtl").GlobalNamespace()
+ && !tc.Class("OString").Namespace("rtl").GlobalNamespace())
+ return {};
+ if (varDeclLHS->getStorageDuration() == SD_Static)
+ return {};
+ if (!varDeclLHS->hasInit())
+ return {};
+ return { varDeclLHS, (isCompileTimeConstant(varDeclLHS->getInit())
+ ? Summands::OnlyCompileTimeConstants
+ : (isSideEffectFree(varDeclLHS->getInit())
+ ? Summands::OnlySideEffectFree
+ : Summands::SideEffect)) };
+ }
+ if (auto operatorCall = dyn_cast<CXXOperatorCallExpr>(stmt))
+ if (operatorCall->getOperator() == OO_Equal || operatorCall->getOperator() == OO_PlusEqual)
+ if (auto declRefExprLHS = dyn_cast<DeclRefExpr>(ignore(operatorCall->getArg(0))))
+ if (auto varDeclLHS = dyn_cast<VarDecl>(declRefExprLHS->getDecl()))
+ {
+ auto tc = loplugin::TypeCheck(varDeclLHS->getType());
+ if (!tc.Class("OUString").Namespace("rtl").GlobalNamespace()
+ && !tc.Class("OString").Namespace("rtl").GlobalNamespace())
+ return {};
+ auto rhs = operatorCall->getArg(1);
+ return { varDeclLHS,
+ (isCompileTimeConstant(rhs)
+ ? Summands::OnlyCompileTimeConstants
+ : (isSideEffectFree(rhs) ? Summands::OnlySideEffectFree
+ : Summands::SideEffect)) };
+ }
+ return {};
+}
+
+bool StringAdd::checkForCompoundAssign(Stmt const* stmt1, Stmt const* stmt2,
+ VarDeclAndSummands& varDecl)
+{
+ // OString additions are frequently wrapped in these
+ if (auto exprCleanup = dyn_cast<ExprWithCleanups>(stmt2))
+ stmt2 = exprCleanup->getSubExpr();
+ if (auto switchCase = dyn_cast<SwitchCase>(stmt2))
+ stmt2 = switchCase->getSubStmt();
+ auto operatorCall = dyn_cast<CXXOperatorCallExpr>(stmt2);
+ if (!operatorCall)
+ return false;
+ if (operatorCall->getOperator() != OO_PlusEqual)
+ return false;
+ auto declRefExprLHS = dyn_cast<DeclRefExpr>(ignore(operatorCall->getArg(0)));
+ if (!declRefExprLHS)
+ return false;
+ if (declRefExprLHS->getDecl() != varDecl.varDecl)
+ return false;
+ // if either side is a compile-time-constant, then we don't care about
+ // side-effects
+ auto rhs = operatorCall->getArg(1);
+ auto const ctcRhs = isCompileTimeConstant(rhs);
+ if (!ctcRhs)
+ {
+ auto const sefRhs = isSideEffectFree(rhs);
+ auto const oldSummands = varDecl.summands;
+ varDecl.summands = sefRhs ? Summands::OnlySideEffectFree : Summands::SideEffect;
+ if (oldSummands != Summands::OnlyCompileTimeConstants
+ && (oldSummands == Summands::SideEffect || !sefRhs))
+ {
+ return true;
+ }
+ }
+ // if we cross a #ifdef boundary
+ if (containsPreprocessingConditionalInclusion(
+ SourceRange(stmt1->getSourceRange().getBegin(), stmt2->getSourceRange().getEnd())))
+ {
+ varDecl.summands
+ = ctcRhs ? Summands::OnlyCompileTimeConstants
+ : isSideEffectFree(rhs) ? Summands::OnlySideEffectFree : Summands::SideEffect;
+ return true;
+ }
+ report(DiagnosticsEngine::Warning, "simplify by merging with the preceding assignment",
+ stmt2->getBeginLoc())
+ << stmt2->getSourceRange();
+ return true;
+}
+
+// Check for generating temporaries when adding strings
+//
+bool StringAdd::VisitCXXOperatorCallExpr(CXXOperatorCallExpr const* operatorCall)
+{
+ if (ignoreLocation(operatorCall))
+ return true;
+ if (operatorCall->getOperator() != OO_Plus)
+ return true;
+ auto tc = loplugin::TypeCheck(operatorCall->getType()->getUnqualifiedDesugaredType());
+ if (!tc.Struct("OUStringConcat").Namespace("rtl").GlobalNamespace()
+ && !tc.Struct("OStringConcat").Namespace("rtl").GlobalNamespace()
+ && !tc.Class("OUString").Namespace("rtl").GlobalNamespace()
+ && !tc.Class("OString").Namespace("rtl").GlobalNamespace())
+ return true;
+
+ auto check = [operatorCall, this](unsigned arg) {
+ auto const e
+ = dyn_cast<CXXFunctionalCastExpr>(operatorCall->getArg(arg)->IgnoreParenImpCasts());
+ if (e == nullptr)
+ return;
+ auto tc3 = loplugin::TypeCheck(e->getType());
+ if (!tc3.Class("OUString").Namespace("rtl").GlobalNamespace()
+ && !tc3.Class("OString").Namespace("rtl").GlobalNamespace()
+ && !tc3.Class("OUStringLiteral").Namespace("rtl").GlobalNamespace()
+ && !tc3.Class("OStringLiteral").Namespace("rtl").GlobalNamespace()
+ && !tc3.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
+ && !tc3.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
+ return;
+ report(DiagnosticsEngine::Warning,
+ ("rather use O[U]String::Concat than constructing %0 from %1 on %select{L|R}2HS of "
+ "+ (where %select{R|L}2HS is of"
+ " type %3)"),
+ e->getBeginLoc())
+ << e->getType().getLocalUnqualifiedType() << e->getSubExprAsWritten()->getType() << arg
+ << operatorCall->getArg(1 - arg)->IgnoreImpCasts()->getType() << e->getSourceRange();
+ };
+
+ check(0);
+ check(1);
+ return true;
+}
+
+bool StringAdd::VisitCXXMemberCallExpr(CXXMemberCallExpr const* methodCall)
+{
+ if (ignoreLocation(methodCall))
+ return true;
+
+ auto methodDecl = methodCall->getMethodDecl();
+ if (!methodDecl || !methodDecl->getIdentifier() || methodDecl->getName() != "append"
+ || methodCall->getNumArgs() == 0)
+ return true;
+ auto tc1 = loplugin::TypeCheck(methodCall->getType());
+ if (!tc1.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
+ && !tc1.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
+ return true;
+ auto paramType = methodDecl->getParamDecl(0)->getType();
+ // char is still a pain to work with, when constructing a chained +
+ if (paramType->isCharType() || loplugin::TypeCheck(paramType).Typedef("sal_Unicode"))
+ return true;
+ auto arg = methodCall->getArg(0);
+ // I don't think the OUStringAppend functionality can handle this efficiently
+ if (isa<ConditionalOperator>(ignore(arg)))
+ return true;
+
+ auto methodCall2 = dyn_cast<CXXMemberCallExpr>(ignore(methodCall->getImplicitObjectArgument()));
+ if (!methodCall2)
+ return true;
+ auto tc = loplugin::TypeCheck(methodCall2->getType());
+ if (!tc.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
+ && !tc.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
+ return true;
+ auto methodDecl2 = methodCall2->getMethodDecl();
+ if (!methodDecl2->getIdentifier() || methodDecl2->getName() != "append"
+ || methodCall2->getNumArgs() == 0)
+ return true;
+ auto paramType2 = methodDecl2->getParamDecl(0)->getType();
+ // char is still a pain to work with, when constructing a chained +
+ if (paramType2->isCharType() || loplugin::TypeCheck(paramType2).Typedef("sal_Unicode"))
+ return true;
+ arg = methodCall2->getArg(0);
+ // I don't think the OUStringAppend functionality can handle this efficiently
+ if (isa<ConditionalOperator>(ignore(arg)))
+ return true;
+ report(DiagnosticsEngine::Warning,
+ "chained append, rather use single append call and + operator",
+ methodCall2->getBeginLoc())
+ << methodCall2->getSourceRange();
+
+ return true;
+}
+
+Expr const* StringAdd::ignore(Expr const* expr)
+{
+ return expr->IgnoreImplicit()->IgnoreParens()->IgnoreImplicit();
+}
+
+bool StringAdd::isSideEffectFree(Expr const* expr)
+{
+ expr = ignore(expr);
+ // I don't think the OUStringAppend functionality can handle this efficiently
+ if (isa<ConditionalOperator>(expr))
+ return false;
+ // Multiple statements have a well defined evaluation order (sequence points between them)
+ // but a single expression may be evaluated in arbitrary order;
+ // if there are side effects in one of the sub-expressions that have an effect on another subexpression,
+ // the result may be incorrect, and you don't necessarily notice in tests because the order is compiler-dependent.
+ // for example see commit afd743141f7a7dd05914d0872c9afe079f16fe0c where such a refactoring introduced such a bug.
+ // So only consider simple RHS expressions.
+ if (!expr->HasSideEffects(compiler.getASTContext()))
+ return true;
+
+ // check for chained adds which are side-effect free
+ if (auto operatorCall = dyn_cast<CXXOperatorCallExpr>(expr))
+ {
+ auto op = operatorCall->getOperator();
+ if (op == OO_PlusEqual || op == OO_Plus)
+ if (isSideEffectFree(operatorCall->getArg(0))
+ && isSideEffectFree(operatorCall->getArg(1)))
+ return true;
+ }
+
+ if (auto callExpr = dyn_cast<CallExpr>(expr))
+ {
+ // check for calls through OUString::number/OUString::unacquired
+ if (auto calleeMethodDecl = dyn_cast_or_null<CXXMethodDecl>(callExpr->getCalleeDecl()))
+ {
+ if (calleeMethodDecl->getIdentifier())
+ {
+ auto name = calleeMethodDecl->getName();
+ if (callExpr->getNumArgs() > 0
+ && (name == "number" || name == "unacquired" || name == "boolean"
+ || name == "copy"))
+ {
+ auto tc = loplugin::TypeCheck(calleeMethodDecl->getParent());
+ if (tc.Class("OUString") || tc.Class("OString"))
+ {
+ if (isSideEffectFree(callExpr->getArg(0)))
+ return true;
+ }
+ }
+ }
+ else if (auto const d = dyn_cast<CXXConversionDecl>(calleeMethodDecl))
+ {
+ if (loplugin::TypeCheck(d->getConversionType())
+ .ClassOrStruct("basic_string_view")
+ .StdNamespace())
+ {
+ auto const tc = loplugin::TypeCheck(calleeMethodDecl->getParent());
+ if (tc.Class("OUString").Namespace("rtl").GlobalNamespace()
+ || tc.Class("OString").Namespace("rtl").GlobalNamespace())
+ {
+ if (isSideEffectFree(callExpr->getCallee()))
+ return true;
+ }
+ }
+ }
+ // Aggressively assume that calls to const member functions are side effect free (if
+ // all of the call's sub-expressions are):
+ if (calleeMethodDecl->isConst())
+ {
+ auto sef = true;
+ // Other options besides CXXMemberCallExpr are e.g. CXXOperatorCallExpr which
+ // does not have such a target expression:
+ if (auto const mce = dyn_cast<CXXMemberCallExpr>(callExpr))
+ {
+ if (!isSideEffectFree(mce->getImplicitObjectArgument()))
+ {
+ sef = false;
+ }
+ }
+ if (sef)
+ {
+ for (unsigned i = 0; i != callExpr->getNumArgs(); ++i)
+ {
+ if (!isSideEffectFree(callExpr->getArg(i)))
+ {
+ sef = false;
+ break;
+ }
+ }
+ }
+ if (sef)
+ {
+ return true;
+ }
+ }
+ }
+ if (auto calleeFunctionDecl = dyn_cast_or_null<FunctionDecl>(callExpr->getCalleeDecl()))
+ if (calleeFunctionDecl && calleeFunctionDecl->getIdentifier())
+ {
+ auto name = calleeFunctionDecl->getName();
+ // check for calls through OUStringToOString
+ if (name == "OUStringToOString" || name == "OStringToOUString")
+ if (isSideEffectFree(callExpr->getArg(0)))
+ return true;
+ // allowlist some known-safe methods
+ if (name.endswith("ResId") || name == "GetXMLToken")
+ if (isSideEffectFree(callExpr->getArg(0)))
+ return true;
+ }
+ }
+
+ // sometimes we have a constructor call on the RHS
+ if (auto constructExpr = dyn_cast<CXXConstructExpr>(expr))
+ {
+ auto dc = loplugin::DeclCheck(constructExpr->getConstructor());
+ if (dc.MemberFunction().Class("OUString") || dc.MemberFunction().Class("OString"))
+ if (constructExpr->getNumArgs() == 0 || isSideEffectFree(constructExpr->getArg(0)))
+ return true;
+ // Expr::HasSideEffects does not like stuff that passes through OUStringLiteral
+ auto dc2 = loplugin::DeclCheck(constructExpr->getConstructor()->getParent());
+ if (dc2.Class("OUStringLiteral").Namespace("rtl").GlobalNamespace()
+ || dc2.Class("OStringLiteral").Namespace("rtl").GlobalNamespace())
+ return true;
+ }
+
+ // when adding literals, we sometimes get this
+ if (auto functionalCastExpr = dyn_cast<CXXFunctionalCastExpr>(expr))
+ {
+ auto tc = loplugin::TypeCheck(functionalCastExpr->getType());
+ if (tc.Class("OUStringLiteral").Namespace("rtl").GlobalNamespace()
+ || tc.Class("OStringLiteral").Namespace("rtl").GlobalNamespace())
+ return isSideEffectFree(functionalCastExpr->getSubExpr());
+ }
+
+ return false;
+}
+
+bool StringAdd::isCompileTimeConstant(Expr const* expr)
+{
+ expr = expr->IgnoreImplicit();
+ if (auto cxxConstructExpr = dyn_cast<CXXConstructExpr>(expr))
+ if (cxxConstructExpr->getNumArgs() > 0)
+ expr = cxxConstructExpr->getArg(0);
+ return isa<clang::StringLiteral>(expr);
+}
+
+loplugin::Plugin::Registration<StringAdd> stringadd("stringadd");
+}
+
+#endif // LO_CLANG_SHARED_PLUGINS
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */