diff options
Diffstat (limited to 'xbmc/utils/POUtils.cpp')
-rw-r--r-- | xbmc/utils/POUtils.cpp | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/xbmc/utils/POUtils.cpp b/xbmc/utils/POUtils.cpp new file mode 100644 index 0000000..830336d --- /dev/null +++ b/xbmc/utils/POUtils.cpp @@ -0,0 +1,313 @@ +/* + * Copyright (C) 2012-2018 Team Kodi + * This file is part of Kodi - https://kodi.tv + * + * SPDX-License-Identifier: GPL-2.0-or-later + * See LICENSES/README.md for more information. + */ + +#include "utils/POUtils.h" + +#include "URL.h" +#include "filesystem/File.h" +#include "utils/log.h" + +#include <stdlib.h> + +CPODocument::CPODocument() +{ + m_CursorPos = 0; + m_nextEntryPos = 0; + m_POfilelength = 0; + m_Entry.msgStrPlural.clear(); + m_Entry.msgStrPlural.resize(1); +} + +CPODocument::~CPODocument() = default; + +bool CPODocument::LoadFile(const std::string &pofilename) +{ + CURL poFileUrl(pofilename); + if (!XFILE::CFile::Exists(poFileUrl)) + return false; + + XFILE::CFile file; + std::vector<uint8_t> buf; + if (file.LoadFile(poFileUrl, buf) < 18) // at least a size of a minimalistic header + { + CLog::Log(LOGERROR, "{}: can't load file \"{}\" or file is too small", __FUNCTION__, + pofilename); + return false; + } + + m_strBuffer = '\n'; + m_strBuffer.append(reinterpret_cast<char*>(buf.data()), buf.size()); + buf.clear(); + + ConvertLineEnds(pofilename); + + // we make sure, to have an LF at the end of buffer + if (*m_strBuffer.rbegin() != '\n') + { + m_strBuffer += "\n"; + } + + m_POfilelength = m_strBuffer.size(); + + if (GetNextEntry() && m_Entry.Type == MSGID_FOUND) + return true; + + CLog::Log(LOGERROR, "POParser: unable to read PO file header from file: {}", pofilename); + return false; +} + +bool CPODocument::GetNextEntry() +{ + do + { + // if we don't find LFLF, we reached the end of the buffer and the last entry to check + // we indicate this with setting m_nextEntryPos to the end of the buffer + if ((m_nextEntryPos = m_strBuffer.find("\n\n", m_CursorPos)) == std::string::npos) + m_nextEntryPos = m_POfilelength-1; + + // now we read the actual entry into a temp string for further processing + m_Entry.Content.assign(m_strBuffer, m_CursorPos, m_nextEntryPos - m_CursorPos +1); + m_CursorPos = m_nextEntryPos+1; // jump cursor to the second LF character + + if (FindLineStart ("\nmsgid ", m_Entry.msgID.Pos)) + { + if (FindLineStart ("\nmsgctxt \"#", m_Entry.xIDPos) && ParseNumID()) + { + m_Entry.Type = ID_FOUND; // we found an entry with a valid numeric id + return true; + } + + size_t plurPos; + if (FindLineStart ("\nmsgid_plural ", plurPos)) + { + m_Entry.Type = MSGID_PLURAL_FOUND; // we found a pluralized entry + return true; + } + + m_Entry.Type = MSGID_FOUND; // we found a normal entry, with no numeric id + return true; + } + } + while (m_nextEntryPos != m_POfilelength-1); + // we reached the end of buffer AND we have not found a valid entry + + return false; +} + +void CPODocument::ParseEntry(bool bisSourceLang) +{ + if (bisSourceLang) + { + if (m_Entry.Type == ID_FOUND) + GetString(m_Entry.msgID); + else + m_Entry.msgID.Str.clear(); + return; + } + + if (m_Entry.Type != ID_FOUND) + { + GetString(m_Entry.msgID); + if (FindLineStart ("\nmsgctxt ", m_Entry.msgCtxt.Pos)) + GetString(m_Entry.msgCtxt); + else + m_Entry.msgCtxt.Str.clear(); + } + + if (m_Entry.Type != MSGID_PLURAL_FOUND) + { + if (FindLineStart ("\nmsgstr ", m_Entry.msgStr.Pos)) + { + GetString(m_Entry.msgStr); + GetString(m_Entry.msgID); + } + else + { + CLog::Log(LOGERROR, "POParser: missing msgstr line in entry. Failed entry: {}", + m_Entry.Content); + m_Entry.msgStr.Str.clear(); + } + return; + } + + // We found a plural form entry. We read it into a vector of CStrEntry types + m_Entry.msgStrPlural.clear(); + std::string strPattern = "\nmsgstr[0] "; + CStrEntry strEntry; + + for (int n=0; n<7 ; n++) + { + strPattern[8] = static_cast<char>(n+'0'); + if (FindLineStart (strPattern, strEntry.Pos)) + { + GetString(strEntry); + if (strEntry.Str.empty()) + break; + m_Entry.msgStrPlural.push_back(strEntry); + } + else + break; + } + + if (m_Entry.msgStrPlural.empty()) + { + CLog::Log(LOGERROR, + "POParser: msgstr[] plural lines have zero valid strings. " + "Failed entry: {}", + m_Entry.Content); + m_Entry.msgStrPlural.resize(1); // Put 1 element with an empty string into the vector + } +} + +const std::string& CPODocument::GetPlurMsgstr(size_t plural) const +{ + if (m_Entry.msgStrPlural.size() < plural+1) + { + CLog::Log(LOGERROR, + "POParser: msgstr[{}] plural field requested, but not found in PO file. " + "Failed entry: {}", + static_cast<int>(plural), m_Entry.Content); + plural = m_Entry.msgStrPlural.size()-1; + } + return m_Entry.msgStrPlural[plural].Str; +} + +std::string CPODocument::UnescapeString(const std::string &strInput) +{ + std::string strOutput; + if (strInput.empty()) + return strOutput; + + char oescchar; + strOutput.reserve(strInput.size()); + std::string::const_iterator it = strInput.begin(); + while (it < strInput.end()) + { + oescchar = *it++; + if (oescchar == '\\') + { + if (it == strInput.end()) + { + CLog::Log(LOGERROR, + "POParser: warning, unhandled escape character " + "at line-end. Problematic entry: {}", + m_Entry.Content); + break; + } + switch (*it++) + { + case 'a': oescchar = '\a'; break; + case 'b': oescchar = '\b'; break; + case 'v': oescchar = '\v'; break; + case 'n': oescchar = '\n'; break; + case 't': oescchar = '\t'; break; + case 'r': oescchar = '\r'; break; + case '"': oescchar = '"' ; break; + case '0': oescchar = '\0'; break; + case 'f': oescchar = '\f'; break; + case '?': oescchar = '\?'; break; + case '\'': oescchar = '\''; break; + case '\\': oescchar = '\\'; break; + + default: + { + CLog::Log(LOGERROR, + "POParser: warning, unhandled escape character. Problematic entry: {}", + m_Entry.Content); + continue; + } + } + } + strOutput.push_back(oescchar); + } + return strOutput; +} + +bool CPODocument::FindLineStart(const std::string &strToFind, size_t &FoundPos) +{ + + FoundPos = m_Entry.Content.find(strToFind); + + if (FoundPos == std::string::npos || FoundPos + strToFind.size() + 2 > m_Entry.Content.size()) + return false; // if we don't find the string or if we don't have at least one char after it + + FoundPos += strToFind.size(); // to set the pos marker to the exact start of the real data + return true; +} + +bool CPODocument::ParseNumID() +{ + if (isdigit(m_Entry.Content.at(m_Entry.xIDPos))) // verify if the first char is digit + { + // we check for the numeric id for the fist 10 chars (uint32) + m_Entry.xID = strtol(&m_Entry.Content[m_Entry.xIDPos], NULL, 10); + return true; + } + + CLog::Log(LOGERROR, "POParser: found numeric id descriptor, but no valid id can be read, " + "entry was handled as normal msgid entry"); + CLog::Log(LOGERROR, "POParser: The problematic entry: {}", m_Entry.Content); + return false; +} + +void CPODocument::GetString(CStrEntry &strEntry) +{ + size_t nextLFPos; + size_t startPos = strEntry.Pos; + strEntry.Str.clear(); + + while (startPos < m_Entry.Content.size()) + { + nextLFPos = m_Entry.Content.find('\n', startPos); + if (nextLFPos == std::string::npos) + nextLFPos = m_Entry.Content.size(); + + // check syntax, if it really is a valid quoted string line + if (nextLFPos-startPos < 2 || m_Entry.Content[startPos] != '\"' || + m_Entry.Content[nextLFPos-1] != '\"') + break; + + strEntry.Str.append(m_Entry.Content, startPos+1, nextLFPos-2-startPos); + startPos = nextLFPos+1; + } + + strEntry.Str = UnescapeString(strEntry.Str); +} + +void CPODocument::ConvertLineEnds(const std::string &filename) +{ + size_t foundPos = m_strBuffer.find_first_of('\r'); + if (foundPos == std::string::npos) + return; // We have only Linux style line endings in the file, nothing to do + + if (foundPos+1 >= m_strBuffer.size() || m_strBuffer[foundPos+1] != '\n') + CLog::Log(LOGDEBUG, + "POParser: PO file has Mac Style Line Endings. " + "Converted in memory to Linux LF for file: {}", + filename); + else + CLog::Log(LOGDEBUG, + "POParser: PO file has Win Style Line Endings. " + "Converted in memory to Linux LF for file: {}", + filename); + + std::string strTemp; + strTemp.reserve(m_strBuffer.size()); + for (std::string::const_iterator it = m_strBuffer.begin(); it < m_strBuffer.end(); ++it) + { + if (*it == '\r') + { + if (it+1 == m_strBuffer.end() || *(it+1) != '\n') + strTemp.push_back('\n'); // convert Mac style line ending and continue + continue; // we have Win style line ending so we exclude this CR now + } + strTemp.push_back(*it); + } + m_strBuffer.swap(strTemp); + m_POfilelength = m_strBuffer.size(); +} |