diff options
Diffstat (limited to '')
-rw-r--r-- | src/VBox/Main/src-all/QMTranslatorImpl.cpp | 671 |
1 files changed, 671 insertions, 0 deletions
diff --git a/src/VBox/Main/src-all/QMTranslatorImpl.cpp b/src/VBox/Main/src-all/QMTranslatorImpl.cpp new file mode 100644 index 00000000..f1d19aab --- /dev/null +++ b/src/VBox/Main/src-all/QMTranslatorImpl.cpp @@ -0,0 +1,671 @@ +/* $Id: QMTranslatorImpl.cpp $ */ +/** @file + * VirtualBox API translation handling class + */ + +/* + * Copyright (C) 2014-2022 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include <vector> +#include <set> +#include <algorithm> +#include <iprt/sanitized/iterator> +#include <iprt/errcore.h> +#include <iprt/file.h> +#include <iprt/asm.h> +#include <iprt/string.h> +#include <iprt/strcache.h> +#include <VBox/com/string.h> +#include <VBox/log.h> +#include <QMTranslator.h> + +/* QM File Magic Number */ +static const size_t g_cbMagic = 16; +static const uint8_t g_abMagic[g_cbMagic] = +{ + 0x3c, 0xb8, 0x64, 0x18, 0xca, 0xef, 0x9c, 0x95, + 0xcd, 0x21, 0x1c, 0xbf, 0x60, 0xa1, 0xbd, 0xdd +}; + +/* Used internally */ +class QMException : public std::exception +{ + const char *m_str; +public: + QMException(const char *str) : m_str(str) {} + virtual const char *what() const throw() { return m_str; } +}; + +/* Bytes stream. Used by the parser to iterate through the data */ +class QMBytesStream +{ + size_t m_cbSize; + const uint8_t * const m_dataStart; + const uint8_t *m_iter; + const uint8_t *m_end; + +public: + + QMBytesStream(const uint8_t *const dataStart, size_t cbSize) + : m_cbSize(dataStart ? cbSize : 0) + , m_dataStart(dataStart) + , m_iter(dataStart) + { + setEnd(); + } + + /** Sets end pointer. + * Used in message reader to detect the end of message block */ + inline void setEnd(size_t pos = 0) + { + m_end = m_dataStart + (pos && pos < m_cbSize ? pos : m_cbSize); + } + + inline uint8_t read8() + { + checkSize(1); + return *m_iter++; + } + + inline uint32_t read32() + { + checkSize(4); + uint32_t result = *reinterpret_cast<const uint32_t *>(m_iter); + m_iter += 4; + return RT_BE2H_U32(result); + } + + /** Reads string in UTF16 and converts it into a UTF8 string */ + inline com::Utf8Str readUtf16String() + { + uint32_t size = read32(); + checkSize(size); + if (size & 1) + throw QMException("Incorrect string size"); + + /* UTF-16 can encode up to codepoint U+10ffff, which UTF-8 needs 4 bytes + to encode, so reserve twice the size plus a terminator for the result. */ + com::Utf8Str result; + result.reserve(size * 2 + 1); + char *pszStr = result.mutableRaw(); + int vrc = RTUtf16BigToUtf8Ex((PCRTUTF16)m_iter, size >> 1, &pszStr, result.capacity(), NULL); + if (RT_SUCCESS(vrc)) + result.jolt(); + else + throw QMException("Translation from UTF-16 to UTF-8 failed"); + + m_iter += size; + return result; + } + + /** + * Reads a string, forcing UTF-8 encoding. + */ + inline com::Utf8Str readString() + { + uint32_t size = read32(); + checkSize(size); + + com::Utf8Str result(reinterpret_cast<const char *>(m_iter), size); + if (size > 0) + { + RTStrPurgeEncoding(result.mutableRaw()); + result.jolt(); + } + + m_iter += size; + return result; + } + + /** + * Reads memory block + * Returns number of bytes read + */ + inline uint32_t read(char *bBuf, uint32_t cbSize) + { + if (!bBuf || !cbSize) + return 0; + cbSize = RT_MIN(cbSize, (uint32_t)(m_end - m_iter)); + memcpy(bBuf, m_iter, cbSize); + m_iter += cbSize; + return cbSize; + } + + /** Checks the magic number. + * Should be called when in the beginning of the data + * @throws exception on mismatch */ + inline void checkMagic() + { + checkSize(g_cbMagic); + if (RT_LIKELY(memcmp(&(*m_iter), g_abMagic, g_cbMagic) == 0)) + m_iter += g_cbMagic; + else + throw QMException("Wrong magic number"); + } + + /** Has we reached the end pointer? */ + inline bool hasFinished() + { + return m_iter == m_end; + } + + /** Returns current stream position */ + inline size_t tellPos() + { + return (size_t)(m_iter - m_dataStart); + } + + /** Moves current pointer to a desired position */ + inline void seek(uint32_t offSkip) + { + size_t cbLeft = (size_t)(m_end - m_iter); + if (cbLeft >= offSkip) + m_iter += offSkip; + else + m_iter = m_end; /** @todo r=bird: Or throw exception via checkSize? */ + } + + /** Checks whether stream has enough data to read size bytes */ + inline void checkSize(size_t size) + { + if (RT_LIKELY((size_t)(m_end - m_iter) >= size)) + return; + throw QMException("Incorrect item size"); + } +}; + +/* Internal QMTranslator implementation */ +class QMTranslator_Impl +{ + /** Used while parsing */ + struct QMMessageParse + { + /* Everything is in UTF-8 */ + std::vector<com::Utf8Str> astrTranslations; + com::Utf8Str strContext; + com::Utf8Str strComment; + com::Utf8Str strSource; + + QMMessageParse() {} + }; + + struct QMMessage + { + const char *pszContext; + const char *pszSource; + const char *pszComment; + std::vector<const char *> vecTranslations; + uint32_t hash; + + QMMessage() : pszContext(NULL), pszSource(NULL), pszComment(NULL), hash(0) + {} + + QMMessage(RTSTRCACHE hStrCache, const QMMessageParse &rSrc) + : pszContext(addStr(hStrCache, rSrc.strContext)) + , pszSource(addStr(hStrCache, rSrc.strSource)) + , pszComment(addStr(hStrCache, rSrc.strComment)) + , hash(RTStrHash1(pszSource)) + { + for (size_t i = 0; i < rSrc.astrTranslations.size(); i++) + vecTranslations.push_back(addStr(hStrCache, rSrc.astrTranslations[i])); + } + + /** Helper. */ + static const char *addStr(RTSTRCACHE hStrCache, const com::Utf8Str &rSrc) + { + if (rSrc.isNotEmpty()) + { + const char *psz = RTStrCacheEnterN(hStrCache, rSrc.c_str(), rSrc.length()); + if (RT_LIKELY(psz)) + return psz; + throw std::bad_alloc(); + } + return NULL; + } + + }; + + struct HashOffset + { + uint32_t hash; + uint32_t offset; + + HashOffset(uint32_t a_hash = 0, uint32_t a_offs = 0) : hash(a_hash), offset(a_offs) {} + + bool operator<(const HashOffset &obj) const + { + return (hash != obj.hash ? hash < obj.hash : offset < obj.offset); + } + + }; + + typedef std::set<HashOffset> QMHashSet; + typedef QMHashSet::const_iterator QMHashSetConstIter; + typedef std::vector<QMMessage> QMMessageArray; + typedef std::vector<uint8_t> QMByteArray; + + QMHashSet m_hashSet; + QMMessageArray m_messageArray; + QMByteArray m_pluralRules; + +public: + + QMTranslator_Impl() {} + + enum PluralOpCodes + { + Pl_Eq = 0x01, + Pl_Lt = 0x02, + Pl_Leq = 0x03, + Pl_Between = 0x04, + + Pl_OpMask = 0x07, + + Pl_Not = 0x08, + Pl_Mod10 = 0x10, + Pl_Mod100 = 0x20, + Pl_Lead1000 = 0x40, + + Pl_And = 0xFD, + Pl_Or = 0xFE, + Pl_NewRule = 0xFF, + + Pl_LMask = 0x80, + }; + + /* + * Rules format: + * <O><2>[<3>][<&&><O><2>[<3>]]...[<||><O><2>[<3>][<&&><O><2>[<3>]]...]...[<New><O>...]... + * where: + * <O> - OpCode + * <2> - Second operand + * <3> - Third operand + * <&&> - 'And' operation + * <||> - 'Or' operation + * <New> - Start of rule for next plural form + * Rules are ordered by plural forms, i.e: + * <rule for first form (i.e. single)><New><rule for next form>... + */ + bool checkPlural(const QMByteArray &aRules) const + { + if (aRules.empty()) + return true; + + uint32_t iPos = 0; + do { + uint8_t bOpCode = aRules[iPos]; + + /* Invalid place of And/Or/NewRule */ + if (bOpCode & Pl_LMask) + return false; + + /* 2nd operand */ + iPos++; + + /* 2nd operand missing */ + if (iPos == aRules.size()) + return false; + + /* Invalid OpCode */ + if ((bOpCode & Pl_OpMask) == 0) + return false; + + if ((bOpCode & Pl_OpMask) == Pl_Between) + { + /* 3rd operand */ + iPos++; + + /* 3rd operand missing */ + if (iPos == aRules.size()) + return false; + } + + /* And/Or/NewRule */ + iPos++; + + /* All rules checked */ + if (iPos == aRules.size()) + return true; + + } while ( ( (aRules[iPos] == Pl_And) + || (aRules[iPos] == Pl_Or) + || (aRules[iPos] == Pl_NewRule)) + && ++iPos != aRules.size()); + + return false; + } + + size_t plural(size_t aNum) const + { + if (aNum == ~(size_t)0 || m_pluralRules.empty()) + return 0; + + size_t uPluralNumber = 0; + uint32_t iPos = 0; + + /* Rules loop */ + for (;;) + { + bool fOr = false; + /* 'Or' loop */ + for (;;) + { + bool fAnd = true; + /* 'And' loop */ + for (;;) + { + int iOpCode = m_pluralRules[iPos++]; + size_t iOpLeft = aNum; + if (iOpCode & Pl_Mod10) + iOpLeft %= 10; + else if (iOpCode & Pl_Mod100) + iOpLeft %= 100; + else if (iOpCode & Pl_Lead1000) + { + while (iOpLeft >= 1000) + iOpLeft /= 1000; + } + size_t iOpRight = m_pluralRules[iPos++]; + int iOp = iOpCode & Pl_OpMask; + size_t iOpRight1 = 0; + if (iOp == Pl_Between) + iOpRight1 = m_pluralRules[iPos++]; + + bool fResult = (iOp == Pl_Eq && iOpLeft == iOpRight) + || (iOp == Pl_Lt && iOpLeft < iOpRight) + || (iOp == Pl_Leq && iOpLeft <= iOpRight) + || (iOp == Pl_Between && iOpLeft >= iOpRight && iOpLeft <= iOpRight1); + if (iOpCode & Pl_Not) + fResult = !fResult; + + fAnd = fAnd && fResult; + if (iPos == m_pluralRules.size() || m_pluralRules[iPos] != Pl_And) + break; + iPos++; + } + fOr = fOr || fAnd; + if (iPos == m_pluralRules.size() || m_pluralRules[iPos] != Pl_Or) + break; + iPos++; + } + if (fOr) + return uPluralNumber; + + /* Qt returns last plural number if none of rules are match. */ + uPluralNumber++; + + if (iPos >= m_pluralRules.size()) + return uPluralNumber; + + iPos++; // Skip Pl_NewRule + } + } + + const char *translate(const char *pszContext, + const char *pszSource, + const char *pszDisamb, + const size_t aNum, + const char **ppszSafeSource) const RT_NOEXCEPT + { + QMHashSetConstIter lowerIter, upperIter; + + /* As turned out, comments (pszDisamb) are not kept always in result qm file + * Therefore, exclude them from the hash */ + uint32_t hash = RTStrHash1(pszSource); + lowerIter = m_hashSet.lower_bound(HashOffset(hash, 0)); + upperIter = m_hashSet.upper_bound(HashOffset(hash, UINT32_MAX)); + + /* + * Check different combinations with and without context and + * disambiguation. This can help us to find the translation even + * if context or disambiguation are not know or properly defined. + */ + const char *apszCtx[] = {pszContext, pszContext, NULL, NULL}; + const char *apszDisabm[] = {pszDisamb, NULL, pszDisamb, NULL}; + AssertCompile(RT_ELEMENTS(apszCtx) == RT_ELEMENTS(apszDisabm)); + + for (size_t i = 0; i < RT_ELEMENTS(apszCtx); ++i) + { + for (QMHashSetConstIter iter = lowerIter; iter != upperIter; ++iter) + { + const QMMessage &message = m_messageArray[iter->offset]; + if ( RTStrCmp(message.pszSource, pszSource) == 0 + && (!apszCtx[i] || !*apszCtx[i] || RTStrCmp(message.pszContext, apszCtx[i]) == 0) + && (!apszDisabm[i] || !*apszDisabm[i] || RTStrCmp(message.pszComment, apszDisabm[i]) == 0 )) + { + *ppszSafeSource = message.pszSource; + const std::vector<const char *> &vecTranslations = m_messageArray[iter->offset].vecTranslations; + size_t const idxPlural = plural(aNum); + return vecTranslations[RT_MIN(idxPlural, vecTranslations.size() - 1)]; + } + } + } + + *ppszSafeSource = NULL; + return pszSource; + } + + void load(QMBytesStream &stream, RTSTRCACHE hStrCache) + { + /* Load into local variables. If we failed during the load, + * it would allow us to keep the object in a valid (previous) state. */ + QMHashSet hashSet; + QMMessageArray messageArray; + QMByteArray pluralRules; + + stream.checkMagic(); + + while (!stream.hasFinished()) + { + uint32_t sectionCode = stream.read8(); + uint32_t sLen = stream.read32(); + + /* Hashes and Context sections are ignored. They contain hash tables + * to speed-up search which is not useful since we recalculate all hashes + * and don't perform context search by hash */ + switch (sectionCode) + { + case Messages: + parseMessages(stream, hStrCache, &hashSet, &messageArray, sLen); + break; + case Hashes: + /* Only get size information to speed-up vector filling + * if Hashes section goes in the file before Message section */ + if (messageArray.empty()) + messageArray.reserve(sLen >> 3); + stream.seek(sLen); + break; + case NumerusRules: + { + pluralRules.resize(sLen); + uint32_t cbSize = stream.read((char *)&pluralRules[0], sLen); + if (cbSize < sLen) + throw QMException("Incorrect section size"); + if (!checkPlural(pluralRules)) + pluralRules.erase(pluralRules.begin(), pluralRules.end()); + break; + } + case Contexts: + case Dependencies: + case Language: + stream.seek(sLen); + break; + default: + throw QMException("Unkown section"); + } + } + + /* Store the data into member variables. + * The following functions never generate exceptions */ + m_hashSet.swap(hashSet); + m_messageArray.swap(messageArray); + m_pluralRules.swap(pluralRules); + } + +private: + + /* Some QM stuff */ + enum SectionType + { + Contexts = 0x2f, + Hashes = 0x42, + Messages = 0x69, + NumerusRules = 0x88, + Dependencies = 0x96, + Language = 0xa7 + }; + + enum MessageType + { + End = 1, + SourceText16 = 2, + Translation = 3, + Context16 = 4, + Obsolete1 = 5, /**< was Hash */ + SourceText = 6, + Context = 7, + Comment = 8 + }; + + /* Read messages from the stream. */ + static void parseMessages(QMBytesStream &stream, RTSTRCACHE hStrCache, QMHashSet * const hashSet, + QMMessageArray * const messageArray, size_t cbSize) + { + stream.setEnd(stream.tellPos() + cbSize); + uint32_t cMessage = 0; + while (!stream.hasFinished()) + { + /* Process the record. Skip anything that doesn't have a source + string or any valid translations. Using C++ strings for temporary + storage here, as we don't want to pollute the cache we bogus strings + in case of duplicate sub-records or invalid records. */ + QMMessageParse ParsedMsg; + parseMessageRecord(stream, &ParsedMsg); + if ( ParsedMsg.astrTranslations.size() > 0 + && ParsedMsg.strSource.isNotEmpty()) + { + /* Copy the strings over into the string cache and a hashed QMMessage, + before adding it to the result. */ + QMMessage HashedMsg(hStrCache, ParsedMsg); + hashSet->insert(HashOffset(HashedMsg.hash, cMessage++)); + messageArray->push_back(HashedMsg); + + } + /*else: wtf? */ + } + stream.setEnd(); + } + + /* Parse one message from the stream */ + static void parseMessageRecord(QMBytesStream &stream, QMMessageParse * const message) + { + while (!stream.hasFinished()) + { + uint8_t type = stream.read8(); + switch (type) + { + case End: + return; + /* Ignored as obsolete */ + case Context16: + case SourceText16: + stream.seek(stream.read32()); + break; + case Translation: + message->astrTranslations.push_back(stream.readUtf16String()); + break; + + case SourceText: + message->strSource = stream.readString(); + break; + + case Context: + message->strContext = stream.readString(); + break; + + case Comment: + message->strComment = stream.readString(); + break; + + default: + /* Ignore unknown/obsolete block */ + LogRel(("QMTranslator::parseMessageRecord(): Unknown/obsolete message block %x\n", type)); + break; + } + } + } +}; + +/* Inteface functions implementation */ +QMTranslator::QMTranslator() : m_impl(new QMTranslator_Impl) {} + +QMTranslator::~QMTranslator() { delete m_impl; } + +const char *QMTranslator::translate(const char *pszContext, const char *pszSource, const char **ppszSafeSource, + const char *pszDisamb /*= NULL*/, const size_t aNum /*= ~(size_t)0*/) const RT_NOEXCEPT + +{ + return m_impl->translate(pszContext, pszSource, pszDisamb, aNum, ppszSafeSource); +} + +int QMTranslator::load(const char *pszFilename, RTSTRCACHE hStrCache) RT_NOEXCEPT +{ + /* To free safely the file in case of exception */ + struct FileLoader + { + uint8_t *data; + size_t cbSize; + int vrc; + FileLoader(const char *pszFname) + { + vrc = RTFileReadAll(pszFname, (void**) &data, &cbSize); + } + + ~FileLoader() + { + if (isSuccess()) + RTFileReadAllFree(data, cbSize); + } + bool isSuccess() { return RT_SUCCESS(vrc); } + }; + + try + { + FileLoader loader(pszFilename); + if (loader.isSuccess()) + { + QMBytesStream stream(loader.data, loader.cbSize); + m_impl->load(stream, hStrCache); + } + return loader.vrc; + } + catch(std::exception &e) + { + LogRel(("QMTranslator::load() failed to load file '%s', reason: %s\n", pszFilename, e.what())); + return VERR_INTERNAL_ERROR; + } + catch(...) + { + LogRel(("QMTranslator::load() failed to load file '%s'\n", pszFilename)); + return VERR_GENERAL_FAILURE; + } +} |