Adding upstream version 4:24.2.0.upstream/4%24.2.0

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-15 05:54:39 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-15 05:54:39 +0000
commit: 267c6f2ac71f92999e969232431ba04678e7437e (patch)
tree: 358c9467650e1d0a1d7227a21dac2e3d08b622b2 /ucb/source/regexp
parent: Initial commit. (diff)
download: libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz
libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip
1 files changed, 393 insertions, 0 deletions
diff --git a/ucb/source/regexp/regexp.cxx b/ucb/source/regexp/regexp.cxx
new file mode 100644
index 0000000000..8b8dcbc85b
--- /dev/null
+++ b/ucb/source/regexp/regexp.cxx
@@ -0,0 +1,393 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
+ *   contributor license agreements. See the NOTICE file distributed
+ *   with this work for additional information regarding copyright
+ *   ownership. The ASF licenses this file to you under the Apache
+ *   License, Version 2.0 (the "License"); you may not use this file
+ *   except in compliance with the License. You may obtain a copy of
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <regexp.hxx>
+
+#include <cstddef>
+
+#include <osl/diagnose.h>
+#include <com/sun/star/lang/IllegalArgumentException.hpp>
+#include <rtl/character.hxx>
+#include <rtl/ustrbuf.hxx>
+#include <rtl/ustring.hxx>
+#include <utility>
+
+using namespace com::sun::star;
+using namespace ucb_impl;
+
+
+//  Regexp
+
+
+inline Regexp::Regexp(Kind eTheKind, OUString aThePrefix,
+                      bool bTheEmptyDomain, OUString aTheInfix,
+                      bool bTheTranslation,
+                      OUString aTheReversePrefix):
+    m_eKind(eTheKind),
+    m_aPrefix(std::move(aThePrefix)),
+    m_aInfix(std::move(aTheInfix)),
+    m_aReversePrefix(std::move(aTheReversePrefix)),
+    m_bEmptyDomain(bTheEmptyDomain),
+    m_bTranslation(bTheTranslation)
+{
+    OSL_ASSERT(m_eKind == KIND_DOMAIN
+               || (!m_bEmptyDomain && m_aInfix.isEmpty()));
+    OSL_ASSERT(m_bTranslation || m_aReversePrefix.isEmpty());
+}
+
+
+namespace {
+
+bool matchStringIgnoreCase(sal_Unicode const ** pBegin,
+                           sal_Unicode const * pEnd,
+                           OUString const & rString)
+{
+    sal_Unicode const * p = *pBegin;
+
+    sal_Unicode const * q = rString.getStr();
+    sal_Unicode const * qEnd = q + rString.getLength();
+
+    if (pEnd - p < qEnd - q)
+        return false;
+
+    while (q != qEnd)
+    {
+        if (rtl::compareIgnoreAsciiCase(*p++, *q++) != 0)
+            return false;
+    }
+
+    *pBegin = p;
+    return true;
+}
+
+}
+
+bool Regexp::matches(OUString const & rString) const
+{
+    sal_Unicode const * pBegin = rString.getStr();
+    sal_Unicode const * pEnd = pBegin + rString.getLength();
+
+    bool bMatches = false;
+
+    sal_Unicode const * p = pBegin;
+    if (matchStringIgnoreCase(&p, pEnd, m_aPrefix))
+    {
+        switch (m_eKind)
+        {
+            case KIND_PREFIX:
+                bMatches = true;
+                break;
+
+            case KIND_AUTHORITY:
+                bMatches = p == pEnd || *p == '/' || *p == '?' || *p == '#';
+                break;
+
+            case KIND_DOMAIN:
+                if (!m_bEmptyDomain)
+                {
+                    if (p == pEnd || *p == '/' || *p == '?' || *p == '#')
+                        break;
+                    ++p;
+                }
+                for (;;)
+                {
+                    sal_Unicode const * q = p;
+                    if (matchStringIgnoreCase(&q, pEnd, m_aInfix)
+                        && (q == pEnd || *q == '/' || *q == '?' || *q == '#'))
+                    {
+                        bMatches = true;
+                        break;
+                    }
+
+                    if (p == pEnd)
+                        break;
+
+                    sal_Unicode c = *p++;
+                    if (c == '/' || c == '?' || c == '#')
+                        break;
+                }
+                break;
+        }
+    }
+
+    return bMatches;
+}
+
+
+namespace {
+
+bool isScheme(OUString const & rString, bool bColon)
+{
+    // Return true if rString matches <scheme> (plus a trailing ":" if bColon
+    // is true) from RFC 2396:
+    sal_Unicode const * p = rString.getStr();
+    sal_Unicode const * pEnd = p + rString.getLength();
+    if (p != pEnd && rtl::isAsciiAlpha(*p))
+        for (++p;;)
+        {
+            if (p == pEnd)
+                return !bColon;
+            sal_Unicode c = *p++;
+            if (!(rtl::isAsciiAlphanumeric(c)
+                  || c == '+' || c == '-' || c == '.'))
+                return bColon && c == ':' && p == pEnd;
+        }
+    return false;
+}
+
+void appendStringLiteral(OUStringBuffer * pBuffer,
+                         OUString const & rString)
+{
+    OSL_ASSERT(pBuffer);
+
+    pBuffer->append('"');
+    sal_Unicode const * p = rString.getStr();
+    sal_Unicode const * pEnd = p + rString.getLength();
+    while (p != pEnd)
+    {
+        sal_Unicode c = *p++;
+        if (c == '"' || c == '\\')
+            pBuffer->append('\\');
+        pBuffer->append(c);
+    }
+    pBuffer->append('"');
+}
+
+}
+
+OUString Regexp::getRegexp() const
+{
+    if (m_bTranslation)
+    {
+        OUStringBuffer aBuffer;
+        if (!m_aPrefix.isEmpty())
+            appendStringLiteral(&aBuffer, m_aPrefix);
+        switch (m_eKind)
+        {
+            case KIND_PREFIX:
+                aBuffer.append("(.*)");
+                break;
+
+            case KIND_AUTHORITY:
+                aBuffer.append("(([/?#].*)?)");
+                break;
+
+            case KIND_DOMAIN:
+                aBuffer.append("([^/?#]" + OUStringChar(sal_Unicode(m_bEmptyDomain ? '*' : '+')));
+                if (!m_aInfix.isEmpty())
+                    appendStringLiteral(&aBuffer, m_aInfix);
+                aBuffer.append("([/?#].*)?)");
+                break;
+        }
+        aBuffer.append("->");
+        if (!m_aReversePrefix.isEmpty())
+            appendStringLiteral(&aBuffer, m_aReversePrefix);
+        aBuffer.append("\\1");
+        return aBuffer.makeStringAndClear();
+    }
+    else if (m_eKind == KIND_PREFIX && isScheme(m_aPrefix, true))
+        return m_aPrefix.copy(0, m_aPrefix.getLength() - 1);
+    else
+    {
+        OUStringBuffer aBuffer;
+        if (!m_aPrefix.isEmpty())
+            appendStringLiteral(&aBuffer, m_aPrefix);
+        switch (m_eKind)
+        {
+            case KIND_PREFIX:
+                aBuffer.append(".*");
+                break;
+
+            case KIND_AUTHORITY:
+                aBuffer.append("([/?#].*)?");
+                break;
+
+            case KIND_DOMAIN:
+                aBuffer.append("[^/?#]" + OUStringChar( m_bEmptyDomain ? '*' : '+' ));
+                if (!m_aInfix.isEmpty())
+                    appendStringLiteral(&aBuffer, m_aInfix);
+                aBuffer.append("([/?#].*)?");
+                break;
+        }
+        return aBuffer.makeStringAndClear();
+    }
+}
+
+
+namespace {
+
+bool matchString(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
+                 char const * pString, size_t nStringLength)
+{
+    sal_Unicode const * p = *pBegin;
+
+    unsigned char const * q = reinterpret_cast< unsigned char const * >(pString);
+    unsigned char const * qEnd = q + nStringLength;
+
+    if (pEnd - p < qEnd - q)
+        return false;
+
+    while (q != qEnd)
+    {
+        sal_Unicode c1 = *p++;
+        sal_Unicode c2 = *q++;
+        if (c1 != c2)
+            return false;
+    }
+
+    *pBegin = p;
+    return true;
+}
+
+bool scanStringLiteral(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
+                       OUString * pString)
+{
+    sal_Unicode const * p = *pBegin;
+
+    if (p == pEnd || *p++ != '"')
+        return false;
+
+    OUStringBuffer aBuffer;
+    for (;;)
+    {
+        if (p == pEnd)
+            return false;
+        sal_Unicode c = *p++;
+        if (c == '"')
+            break;
+        if (c == '\\')
+        {
+            if (p == pEnd)
+                return false;
+            c = *p++;
+            if (c != '"' && c != '\\')
+                return false;
+        }
+        aBuffer.append(c);
+    }
+
+    *pBegin = p;
+    *pString = aBuffer.makeStringAndClear();
+    return true;
+}
+
+}
+
+Regexp Regexp::parse(OUString const & rRegexp)
+{
+    // Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
+    // where <scheme> is as defined in RFC 2396:
+    if (isScheme(rRegexp, false))
+        return Regexp(Regexp::KIND_PREFIX,
+                      rRegexp + ":",
+                      false,
+                      OUString(),
+                      false,
+                      OUString());
+
+    sal_Unicode const * p = rRegexp.getStr();
+    sal_Unicode const * pEnd = p + rRegexp.getLength();
+
+    OUString aPrefix;
+    scanStringLiteral(&p, pEnd, &aPrefix);
+
+    if (p == pEnd)
+        throw lang::IllegalArgumentException();
+
+    // This and the matchString() calls below are some of the few places where
+    // RTL_CONSTASCII_STRINGPARAM() should NOT be removed.
+    // (c.f. https://gerrit.libreoffice.org/3117)
+    if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(".*")))
+    {
+        if (p != pEnd)
+            throw lang::IllegalArgumentException();
+
+        return Regexp(Regexp::KIND_PREFIX, aPrefix, false, OUString(),
+                      false, OUString());
+    }
+    else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("(.*)->")))
+    {
+        OUString aReversePrefix;
+        scanStringLiteral(&p, pEnd, &aReversePrefix);
+
+        if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
+            || p != pEnd)
+            throw lang::IllegalArgumentException();
+
+        return Regexp(Regexp::KIND_PREFIX, aPrefix, false, OUString(),
+                      true, aReversePrefix);
+    }
+    else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
+    {
+        if (p != pEnd)
+            throw lang::IllegalArgumentException();
+
+        return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, OUString(),
+                      false, OUString());
+    }
+    else if (matchString(&p, pEnd,
+                         RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->")))
+    {
+        OUString aReversePrefix;
+        if (!(scanStringLiteral(&p, pEnd, &aReversePrefix)
+              && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
+              && p == pEnd))
+            throw lang::IllegalArgumentException();
+
+        return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, OUString(),
+                      true, aReversePrefix);
+    }
+    else
+    {
+        bool bOpen = false;
+        if (p != pEnd && *p == '(')
+        {
+            ++p;
+            bOpen = true;
+        }
+
+        if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("[^/?#]")))
+            throw lang::IllegalArgumentException();
+
+        if (p == pEnd || (*p != '*' && *p != '+'))
+            throw lang::IllegalArgumentException();
+        bool bEmptyDomain = *p++ == '*';
+
+        OUString aInfix;
+        scanStringLiteral(&p, pEnd, &aInfix);
+
+        if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
+            throw lang::IllegalArgumentException();
+
+        OUString aReversePrefix;
+        if (bOpen
+            && !(matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(")->"))
+                 && scanStringLiteral(&p, pEnd, &aReversePrefix)
+                 && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))))
+            throw lang::IllegalArgumentException();
+
+        if (p != pEnd)
+            throw lang::IllegalArgumentException();
+
+        return Regexp(Regexp::KIND_DOMAIN, aPrefix, bEmptyDomain, aInfix,
+                      bOpen, aReversePrefix);
+    }
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-15 05:54:39 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-15 05:54:39 +0000
commit	267c6f2ac71f92999e969232431ba04678e7437e (patch)
tree	358c9467650e1d0a1d7227a21dac2e3d08b622b2 /ucb/source/regexp
parent	Initial commit. (diff)
download	libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip