1 files changed, 671 insertions, 0 deletions
diff --git a/src/VBox/Main/src-all/QMTranslatorImpl.cpp b/src/VBox/Main/src-all/QMTranslatorImpl.cpp
new file mode 100644
index 00000000..f1d19aab
--- /dev/null
+++ b/src/VBox/Main/src-all/QMTranslatorImpl.cpp
@@ -0,0 +1,671 @@
+/* $Id: QMTranslatorImpl.cpp $ */
+/** @file
+ * VirtualBox API translation handling class
+ */
+
+/*
+ * Copyright (C) 2014-2022 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <iprt/sanitized/iterator>
+#include <iprt/errcore.h>
+#include <iprt/file.h>
+#include <iprt/asm.h>
+#include <iprt/string.h>
+#include <iprt/strcache.h>
+#include <VBox/com/string.h>
+#include <VBox/log.h>
+#include <QMTranslator.h>
+
+/* QM File Magic Number */
+static const size_t g_cbMagic = 16;
+static const uint8_t g_abMagic[g_cbMagic] =
+{
+    0x3c, 0xb8, 0x64, 0x18, 0xca, 0xef, 0x9c, 0x95,
+    0xcd, 0x21, 0x1c, 0xbf, 0x60, 0xa1, 0xbd, 0xdd
+};
+
+/* Used internally */
+class QMException : public std::exception
+{
+    const char *m_str;
+public:
+    QMException(const char *str) : m_str(str) {}
+    virtual const char *what() const throw() { return m_str; }
+};
+
+/* Bytes stream. Used by the parser to iterate through the data */
+class QMBytesStream
+{
+    size_t         m_cbSize;
+    const uint8_t * const m_dataStart;
+    const uint8_t *m_iter;
+    const uint8_t *m_end;
+
+public:
+
+    QMBytesStream(const uint8_t *const dataStart, size_t cbSize)
+        : m_cbSize(dataStart ? cbSize : 0)
+        , m_dataStart(dataStart)
+        , m_iter(dataStart)
+    {
+        setEnd();
+    }
+
+    /** Sets end pointer.
+     * Used in message reader to detect the end of message block */
+    inline void setEnd(size_t pos = 0)
+    {
+        m_end = m_dataStart + (pos && pos < m_cbSize ? pos : m_cbSize);
+    }
+
+    inline uint8_t read8()
+    {
+        checkSize(1);
+        return *m_iter++;
+    }
+
+    inline uint32_t read32()
+    {
+        checkSize(4);
+        uint32_t result = *reinterpret_cast<const uint32_t *>(m_iter);
+        m_iter += 4;
+        return RT_BE2H_U32(result);
+    }
+
+    /** Reads string in UTF16 and converts it into a UTF8 string */
+    inline com::Utf8Str readUtf16String()
+    {
+        uint32_t size = read32();
+        checkSize(size);
+        if (size & 1)
+            throw QMException("Incorrect string size");
+
+        /* UTF-16 can encode up to codepoint U+10ffff, which UTF-8 needs 4 bytes
+           to encode, so reserve twice the size plus a terminator for the result. */
+        com::Utf8Str result;
+        result.reserve(size * 2 + 1);
+        char *pszStr = result.mutableRaw();
+        int vrc = RTUtf16BigToUtf8Ex((PCRTUTF16)m_iter, size >> 1, &pszStr, result.capacity(), NULL);
+        if (RT_SUCCESS(vrc))
+            result.jolt();
+        else
+            throw QMException("Translation from UTF-16 to UTF-8 failed");
+
+        m_iter += size;
+        return result;
+    }
+
+    /**
+     *  Reads a string, forcing UTF-8 encoding.
+     */
+    inline com::Utf8Str readString()
+    {
+        uint32_t size = read32();
+        checkSize(size);
+
+        com::Utf8Str result(reinterpret_cast<const char *>(m_iter), size);
+        if (size > 0)
+        {
+            RTStrPurgeEncoding(result.mutableRaw());
+            result.jolt();
+        }
+
+        m_iter += size;
+        return result;
+    }
+
+    /**
+     *  Reads memory block
+     *  Returns number of bytes read
+     */
+    inline uint32_t read(char *bBuf, uint32_t cbSize)
+    {
+        if (!bBuf || !cbSize)
+            return 0;
+        cbSize = RT_MIN(cbSize, (uint32_t)(m_end - m_iter));
+        memcpy(bBuf, m_iter, cbSize);
+        m_iter += cbSize;
+        return cbSize;
+    }
+
+    /** Checks the magic number.
+     * Should be called when in the beginning of the data
+     * @throws exception on mismatch  */
+    inline void checkMagic()
+    {
+        checkSize(g_cbMagic);
+        if (RT_LIKELY(memcmp(&(*m_iter), g_abMagic, g_cbMagic) == 0))
+            m_iter += g_cbMagic;
+        else
+            throw QMException("Wrong magic number");
+    }
+
+    /** Has we reached the end pointer? */
+    inline bool hasFinished()
+    {
+        return m_iter == m_end;
+    }
+
+    /** Returns current stream position */
+    inline size_t tellPos()
+    {
+        return (size_t)(m_iter - m_dataStart);
+    }
+
+    /** Moves current pointer to a desired position */
+    inline void seek(uint32_t offSkip)
+    {
+        size_t cbLeft = (size_t)(m_end - m_iter);
+        if (cbLeft >= offSkip)
+            m_iter += offSkip;
+        else
+            m_iter = m_end; /** @todo r=bird: Or throw exception via checkSize? */
+    }
+
+    /** Checks whether stream has enough data to read size bytes */
+    inline void checkSize(size_t size)
+    {
+        if (RT_LIKELY((size_t)(m_end - m_iter) >= size))
+            return;
+        throw QMException("Incorrect item size");
+    }
+};
+
+/* Internal QMTranslator implementation */
+class QMTranslator_Impl
+{
+    /** Used while parsing */
+    struct QMMessageParse
+    {
+        /* Everything is in UTF-8 */
+        std::vector<com::Utf8Str> astrTranslations;
+        com::Utf8Str strContext;
+        com::Utf8Str strComment;
+        com::Utf8Str strSource;
+
+        QMMessageParse() {}
+    };
+
+    struct QMMessage
+    {
+        const char *pszContext;
+        const char *pszSource;
+        const char *pszComment;
+        std::vector<const char *> vecTranslations;
+        uint32_t    hash;
+
+        QMMessage() : pszContext(NULL), pszSource(NULL), pszComment(NULL), hash(0)
+        {}
+
+        QMMessage(RTSTRCACHE hStrCache, const QMMessageParse &rSrc)
+            : pszContext(addStr(hStrCache, rSrc.strContext))
+            , pszSource(addStr(hStrCache, rSrc.strSource))
+            , pszComment(addStr(hStrCache, rSrc.strComment))
+            , hash(RTStrHash1(pszSource))
+        {
+            for (size_t i = 0; i < rSrc.astrTranslations.size(); i++)
+                vecTranslations.push_back(addStr(hStrCache, rSrc.astrTranslations[i]));
+        }
+
+        /** Helper. */
+        static const char *addStr(RTSTRCACHE hStrCache, const com::Utf8Str &rSrc)
+        {
+            if (rSrc.isNotEmpty())
+            {
+                const char *psz = RTStrCacheEnterN(hStrCache, rSrc.c_str(), rSrc.length());
+                if (RT_LIKELY(psz))
+                    return psz;
+                throw std::bad_alloc();
+            }
+            return NULL;
+        }
+
+    };
+
+    struct HashOffset
+    {
+        uint32_t hash;
+        uint32_t offset;
+
+        HashOffset(uint32_t a_hash = 0, uint32_t a_offs = 0) : hash(a_hash), offset(a_offs) {}
+
+        bool operator<(const HashOffset &obj) const
+        {
+            return (hash != obj.hash ? hash < obj.hash : offset < obj.offset);
+        }
+
+    };
+
+    typedef std::set<HashOffset> QMHashSet;
+    typedef QMHashSet::const_iterator QMHashSetConstIter;
+    typedef std::vector<QMMessage> QMMessageArray;
+    typedef std::vector<uint8_t> QMByteArray;
+
+    QMHashSet      m_hashSet;
+    QMMessageArray m_messageArray;
+    QMByteArray    m_pluralRules;
+
+public:
+
+    QMTranslator_Impl() {}
+
+    enum PluralOpCodes
+    {
+        Pl_Eq          = 0x01,
+        Pl_Lt          = 0x02,
+        Pl_Leq         = 0x03,
+        Pl_Between     = 0x04,
+
+        Pl_OpMask      = 0x07,
+
+        Pl_Not         = 0x08,
+        Pl_Mod10       = 0x10,
+        Pl_Mod100      = 0x20,
+        Pl_Lead1000    = 0x40,
+
+        Pl_And         = 0xFD,
+        Pl_Or          = 0xFE,
+        Pl_NewRule     = 0xFF,
+
+        Pl_LMask       = 0x80,
+    };
+
+    /*
+     * Rules format:
+     * <O><2>[<3>][<&&><O><2>[<3>]]...[<||><O><2>[<3>][<&&><O><2>[<3>]]...]...[<New><O>...]...
+     * where:
+     *    <O> - OpCode
+     *    <2> - Second operand
+     *    <3> - Third operand
+     *    <&&> - 'And' operation
+     *    <||> - 'Or' operation
+     *    <New> - Start of rule for next plural form
+     * Rules are ordered by plural forms, i.e:
+     *   <rule for first form (i.e. single)><New><rule for next form>...
+     */
+    bool checkPlural(const QMByteArray &aRules) const
+    {
+        if (aRules.empty())
+            return true;
+
+        uint32_t iPos = 0;
+        do {
+            uint8_t bOpCode = aRules[iPos];
+
+            /* Invalid place of And/Or/NewRule */
+            if (bOpCode & Pl_LMask)
+                return false;
+
+            /* 2nd operand */
+            iPos++;
+
+            /* 2nd operand missing */
+            if (iPos == aRules.size())
+                return false;
+
+            /* Invalid OpCode */
+            if ((bOpCode & Pl_OpMask) == 0)
+                return false;
+
+            if ((bOpCode & Pl_OpMask) == Pl_Between)
+            {
+                /* 3rd operand */
+                iPos++;
+
+                /* 3rd operand missing */
+                if (iPos == aRules.size())
+                    return false;
+            }
+
+            /* And/Or/NewRule */
+            iPos++;
+
+            /* All rules checked */
+            if (iPos == aRules.size())
+                return true;
+
+        } while (   (   (aRules[iPos] == Pl_And)
+                     || (aRules[iPos] == Pl_Or)
+                     || (aRules[iPos] == Pl_NewRule))
+                 && ++iPos != aRules.size());
+
+        return false;
+    }
+
+    size_t plural(size_t aNum) const
+    {
+        if (aNum == ~(size_t)0 || m_pluralRules.empty())
+            return 0;
+
+        size_t   uPluralNumber = 0;
+        uint32_t iPos = 0;
+
+        /* Rules loop */
+        for (;;)
+        {
+            bool fOr = false;
+            /* 'Or' loop */
+            for (;;)
+            {
+                bool fAnd = true;
+                /* 'And' loop */
+                for (;;)
+                {
+                    int    iOpCode = m_pluralRules[iPos++];
+                    size_t iOpLeft = aNum;
+                    if (iOpCode & Pl_Mod10)
+                        iOpLeft %= 10;
+                    else if (iOpCode & Pl_Mod100)
+                        iOpLeft %= 100;
+                    else if (iOpCode & Pl_Lead1000)
+                    {
+                        while (iOpLeft >= 1000)
+                            iOpLeft /= 1000;
+                    }
+                    size_t iOpRight = m_pluralRules[iPos++];
+                    int    iOp = iOpCode & Pl_OpMask;
+                    size_t iOpRight1 = 0;
+                    if (iOp == Pl_Between)
+                        iOpRight1 = m_pluralRules[iPos++];
+
+                    bool fResult =    (iOp == Pl_Eq      && iOpLeft == iOpRight)
+                                   || (iOp == Pl_Lt      && iOpLeft <  iOpRight)
+                                   || (iOp == Pl_Leq     && iOpLeft <= iOpRight)
+                                   || (iOp == Pl_Between && iOpLeft >= iOpRight && iOpLeft <= iOpRight1);
+                    if (iOpCode & Pl_Not)
+                        fResult = !fResult;
+
+                    fAnd = fAnd && fResult;
+                    if (iPos == m_pluralRules.size() || m_pluralRules[iPos] != Pl_And)
+                        break;
+                    iPos++;
+                }
+                fOr = fOr || fAnd;
+                if (iPos == m_pluralRules.size() || m_pluralRules[iPos] != Pl_Or)
+                    break;
+                iPos++;
+            }
+            if (fOr)
+                return uPluralNumber;
+
+            /* Qt returns last plural number if none of rules are match. */
+            uPluralNumber++;
+
+            if (iPos >= m_pluralRules.size())
+                return uPluralNumber;
+
+            iPos++; // Skip Pl_NewRule
+        }
+    }
+
+    const char *translate(const char  *pszContext,
+                          const char  *pszSource,
+                          const char  *pszDisamb,
+                          const size_t aNum,
+                          const char **ppszSafeSource) const RT_NOEXCEPT
+    {
+        QMHashSetConstIter lowerIter, upperIter;
+
+        /* As turned out, comments (pszDisamb) are not kept always in result qm file
+         * Therefore, exclude them from the hash */
+        uint32_t hash = RTStrHash1(pszSource);
+        lowerIter = m_hashSet.lower_bound(HashOffset(hash, 0));
+        upperIter = m_hashSet.upper_bound(HashOffset(hash, UINT32_MAX));
+
+        /*
+         * Check different combinations with and without context and
+         * disambiguation. This can help us to find the translation even
+         * if context or disambiguation are not know or properly defined.
+         */
+        const char *apszCtx[]    = {pszContext, pszContext, NULL,      NULL};
+        const char *apszDisabm[] = {pszDisamb,  NULL,       pszDisamb, NULL};
+        AssertCompile(RT_ELEMENTS(apszCtx) == RT_ELEMENTS(apszDisabm));
+
+        for (size_t i = 0; i < RT_ELEMENTS(apszCtx); ++i)
+        {
+            for (QMHashSetConstIter iter = lowerIter; iter != upperIter; ++iter)
+            {
+                const QMMessage &message = m_messageArray[iter->offset];
+                if (   RTStrCmp(message.pszSource, pszSource) == 0
+                    && (!apszCtx[i]     || !*apszCtx[i]     || RTStrCmp(message.pszContext, apszCtx[i]) == 0)
+                    && (!apszDisabm[i]  || !*apszDisabm[i]  || RTStrCmp(message.pszComment, apszDisabm[i]) == 0 ))
+                {
+                    *ppszSafeSource = message.pszSource;
+                    const std::vector<const char *> &vecTranslations = m_messageArray[iter->offset].vecTranslations;
+                    size_t const idxPlural = plural(aNum);
+                    return vecTranslations[RT_MIN(idxPlural, vecTranslations.size() - 1)];
+                }
+            }
+        }
+
+        *ppszSafeSource = NULL;
+        return pszSource;
+    }
+
+    void load(QMBytesStream &stream, RTSTRCACHE hStrCache)
+    {
+        /* Load into local variables. If we failed during the load,
+         * it would allow us to keep the object in a valid (previous) state. */
+        QMHashSet hashSet;
+        QMMessageArray messageArray;
+        QMByteArray pluralRules;
+
+        stream.checkMagic();
+
+        while (!stream.hasFinished())
+        {
+            uint32_t sectionCode = stream.read8();
+            uint32_t sLen = stream.read32();
+
+            /* Hashes and Context sections are ignored. They contain hash tables
+             * to speed-up search which is not useful since we recalculate all hashes
+             * and don't perform context search by hash */
+            switch (sectionCode)
+            {
+                case Messages:
+                    parseMessages(stream, hStrCache, &hashSet, &messageArray, sLen);
+                    break;
+                case Hashes:
+                    /* Only get size information to speed-up vector filling
+                     * if Hashes section goes in the file before Message section */
+                    if (messageArray.empty())
+                        messageArray.reserve(sLen >> 3);
+                    stream.seek(sLen);
+                    break;
+                case NumerusRules:
+                {
+                    pluralRules.resize(sLen);
+                    uint32_t cbSize = stream.read((char *)&pluralRules[0], sLen);
+                    if (cbSize < sLen)
+                        throw QMException("Incorrect section size");
+                    if (!checkPlural(pluralRules))
+                        pluralRules.erase(pluralRules.begin(), pluralRules.end());
+                    break;
+                }
+                case Contexts:
+                case Dependencies:
+                case Language:
+                    stream.seek(sLen);
+                    break;
+                default:
+                    throw QMException("Unkown section");
+            }
+        }
+
+        /* Store the data into member variables.
+         * The following functions never generate exceptions */
+        m_hashSet.swap(hashSet);
+        m_messageArray.swap(messageArray);
+        m_pluralRules.swap(pluralRules);
+    }
+
+private:
+
+    /* Some QM stuff */
+    enum SectionType
+    {
+        Contexts     = 0x2f,
+        Hashes       = 0x42,
+        Messages     = 0x69,
+        NumerusRules = 0x88,
+        Dependencies = 0x96,
+        Language     = 0xa7
+    };
+
+    enum MessageType
+    {
+        End          = 1,
+        SourceText16 = 2,
+        Translation  = 3,
+        Context16    = 4,
+        Obsolete1    = 5,  /**< was Hash */
+        SourceText   = 6,
+        Context      = 7,
+        Comment      = 8
+    };
+
+    /* Read messages from the stream. */
+    static void parseMessages(QMBytesStream &stream, RTSTRCACHE hStrCache, QMHashSet * const hashSet,
+                              QMMessageArray * const messageArray, size_t cbSize)
+    {
+        stream.setEnd(stream.tellPos() + cbSize);
+        uint32_t cMessage = 0;
+        while (!stream.hasFinished())
+        {
+            /* Process the record. Skip anything that doesn't have a source
+               string or any valid translations.  Using C++ strings for temporary
+               storage here, as we don't want to pollute the cache we bogus strings
+               in case of duplicate sub-records or invalid records. */
+            QMMessageParse ParsedMsg;
+            parseMessageRecord(stream, &ParsedMsg);
+            if (   ParsedMsg.astrTranslations.size() > 0
+                && ParsedMsg.strSource.isNotEmpty())
+            {
+                /* Copy the strings over into the string cache and a hashed QMMessage,
+                   before adding it to the result. */
+                QMMessage HashedMsg(hStrCache, ParsedMsg);
+                hashSet->insert(HashOffset(HashedMsg.hash, cMessage++));
+                messageArray->push_back(HashedMsg);
+
+            }
+            /*else: wtf? */
+        }
+        stream.setEnd();
+    }
+
+    /* Parse one message from the stream */
+    static void parseMessageRecord(QMBytesStream &stream, QMMessageParse * const message)
+    {
+        while (!stream.hasFinished())
+        {
+            uint8_t type = stream.read8();
+            switch (type)
+            {
+                case End:
+                    return;
+                /* Ignored as obsolete */
+                case Context16:
+                case SourceText16:
+                    stream.seek(stream.read32());
+                    break;
+                case Translation:
+                    message->astrTranslations.push_back(stream.readUtf16String());
+                    break;
+
+                case SourceText:
+                    message->strSource = stream.readString();
+                    break;
+
+                case Context:
+                    message->strContext = stream.readString();
+                    break;
+
+                case Comment:
+                    message->strComment = stream.readString();
+                    break;
+
+                default:
+                    /* Ignore unknown/obsolete block */
+                    LogRel(("QMTranslator::parseMessageRecord(): Unknown/obsolete message block %x\n", type));
+                    break;
+            }
+        }
+    }
+};
+
+/* Inteface functions implementation */
+QMTranslator::QMTranslator() : m_impl(new QMTranslator_Impl) {}
+
+QMTranslator::~QMTranslator() { delete m_impl; }
+
+const char *QMTranslator::translate(const char *pszContext, const char *pszSource, const char **ppszSafeSource,
+                                    const char *pszDisamb /*= NULL*/, const size_t aNum /*= ~(size_t)0*/) const RT_NOEXCEPT
+
+{
+    return m_impl->translate(pszContext, pszSource, pszDisamb, aNum, ppszSafeSource);
+}
+
+int QMTranslator::load(const char *pszFilename, RTSTRCACHE hStrCache) RT_NOEXCEPT
+{
+    /* To free safely the file in case of exception */
+    struct FileLoader
+    {
+        uint8_t *data;
+        size_t cbSize;
+        int vrc;
+        FileLoader(const char *pszFname)
+        {
+            vrc = RTFileReadAll(pszFname, (void**) &data, &cbSize);
+        }
+
+        ~FileLoader()
+        {
+            if (isSuccess())
+                RTFileReadAllFree(data, cbSize);
+        }
+        bool isSuccess() { return RT_SUCCESS(vrc); }
+    };
+
+    try
+    {
+        FileLoader loader(pszFilename);
+        if (loader.isSuccess())
+        {
+            QMBytesStream stream(loader.data, loader.cbSize);
+            m_impl->load(stream, hStrCache);
+        }
+        return loader.vrc;
+    }
+    catch(std::exception &e)
+    {
+        LogRel(("QMTranslator::load() failed to load file '%s', reason: %s\n", pszFilename, e.what()));
+        return VERR_INTERNAL_ERROR;
+    }
+    catch(...)
+    {
+        LogRel(("QMTranslator::load() failed to load file '%s'\n", pszFilename));
+        return VERR_GENERAL_FAILURE;
+    }
+}