/* * Copyright (C) 2012-2018 Team Kodi * This file is part of Kodi - https://kodi.tv * * SPDX-License-Identifier: GPL-2.0-or-later * See LICENSES/README.md for more information. */ #include "utils/POUtils.h" #include "URL.h" #include "filesystem/File.h" #include "utils/log.h" #include CPODocument::CPODocument() { m_CursorPos = 0; m_nextEntryPos = 0; m_POfilelength = 0; m_Entry.msgStrPlural.clear(); m_Entry.msgStrPlural.resize(1); } CPODocument::~CPODocument() = default; bool CPODocument::LoadFile(const std::string &pofilename) { CURL poFileUrl(pofilename); if (!XFILE::CFile::Exists(poFileUrl)) return false; XFILE::CFile file; std::vector buf; if (file.LoadFile(poFileUrl, buf) < 18) // at least a size of a minimalistic header { CLog::Log(LOGERROR, "{}: can't load file \"{}\" or file is too small", __FUNCTION__, pofilename); return false; } m_strBuffer = '\n'; m_strBuffer.append(reinterpret_cast(buf.data()), buf.size()); buf.clear(); ConvertLineEnds(pofilename); // we make sure, to have an LF at the end of buffer if (*m_strBuffer.rbegin() != '\n') { m_strBuffer += "\n"; } m_POfilelength = m_strBuffer.size(); if (GetNextEntry() && m_Entry.Type == MSGID_FOUND) return true; CLog::Log(LOGERROR, "POParser: unable to read PO file header from file: {}", pofilename); return false; } bool CPODocument::GetNextEntry() { do { // if we don't find LFLF, we reached the end of the buffer and the last entry to check // we indicate this with setting m_nextEntryPos to the end of the buffer if ((m_nextEntryPos = m_strBuffer.find("\n\n", m_CursorPos)) == std::string::npos) m_nextEntryPos = m_POfilelength-1; // now we read the actual entry into a temp string for further processing m_Entry.Content.assign(m_strBuffer, m_CursorPos, m_nextEntryPos - m_CursorPos +1); m_CursorPos = m_nextEntryPos+1; // jump cursor to the second LF character if (FindLineStart ("\nmsgid ", m_Entry.msgID.Pos)) { if (FindLineStart ("\nmsgctxt \"#", m_Entry.xIDPos) && ParseNumID()) { m_Entry.Type = ID_FOUND; // we found an entry with a valid numeric id return true; } size_t plurPos; if (FindLineStart ("\nmsgid_plural ", plurPos)) { m_Entry.Type = MSGID_PLURAL_FOUND; // we found a pluralized entry return true; } m_Entry.Type = MSGID_FOUND; // we found a normal entry, with no numeric id return true; } } while (m_nextEntryPos != m_POfilelength-1); // we reached the end of buffer AND we have not found a valid entry return false; } void CPODocument::ParseEntry(bool bisSourceLang) { if (bisSourceLang) { if (m_Entry.Type == ID_FOUND) GetString(m_Entry.msgID); else m_Entry.msgID.Str.clear(); return; } if (m_Entry.Type != ID_FOUND) { GetString(m_Entry.msgID); if (FindLineStart ("\nmsgctxt ", m_Entry.msgCtxt.Pos)) GetString(m_Entry.msgCtxt); else m_Entry.msgCtxt.Str.clear(); } if (m_Entry.Type != MSGID_PLURAL_FOUND) { if (FindLineStart ("\nmsgstr ", m_Entry.msgStr.Pos)) { GetString(m_Entry.msgStr); GetString(m_Entry.msgID); } else { CLog::Log(LOGERROR, "POParser: missing msgstr line in entry. Failed entry: {}", m_Entry.Content); m_Entry.msgStr.Str.clear(); } return; } // We found a plural form entry. We read it into a vector of CStrEntry types m_Entry.msgStrPlural.clear(); std::string strPattern = "\nmsgstr[0] "; CStrEntry strEntry; for (int n=0; n<7 ; n++) { strPattern[8] = static_cast(n+'0'); if (FindLineStart (strPattern, strEntry.Pos)) { GetString(strEntry); if (strEntry.Str.empty()) break; m_Entry.msgStrPlural.push_back(strEntry); } else break; } if (m_Entry.msgStrPlural.empty()) { CLog::Log(LOGERROR, "POParser: msgstr[] plural lines have zero valid strings. " "Failed entry: {}", m_Entry.Content); m_Entry.msgStrPlural.resize(1); // Put 1 element with an empty string into the vector } } const std::string& CPODocument::GetPlurMsgstr(size_t plural) const { if (m_Entry.msgStrPlural.size() < plural+1) { CLog::Log(LOGERROR, "POParser: msgstr[{}] plural field requested, but not found in PO file. " "Failed entry: {}", static_cast(plural), m_Entry.Content); plural = m_Entry.msgStrPlural.size()-1; } return m_Entry.msgStrPlural[plural].Str; } std::string CPODocument::UnescapeString(const std::string &strInput) { std::string strOutput; if (strInput.empty()) return strOutput; char oescchar; strOutput.reserve(strInput.size()); std::string::const_iterator it = strInput.begin(); while (it < strInput.end()) { oescchar = *it++; if (oescchar == '\\') { if (it == strInput.end()) { CLog::Log(LOGERROR, "POParser: warning, unhandled escape character " "at line-end. Problematic entry: {}", m_Entry.Content); break; } switch (*it++) { case 'a': oescchar = '\a'; break; case 'b': oescchar = '\b'; break; case 'v': oescchar = '\v'; break; case 'n': oescchar = '\n'; break; case 't': oescchar = '\t'; break; case 'r': oescchar = '\r'; break; case '"': oescchar = '"' ; break; case '0': oescchar = '\0'; break; case 'f': oescchar = '\f'; break; case '?': oescchar = '\?'; break; case '\'': oescchar = '\''; break; case '\\': oescchar = '\\'; break; default: { CLog::Log(LOGERROR, "POParser: warning, unhandled escape character. Problematic entry: {}", m_Entry.Content); continue; } } } strOutput.push_back(oescchar); } return strOutput; } bool CPODocument::FindLineStart(const std::string &strToFind, size_t &FoundPos) { FoundPos = m_Entry.Content.find(strToFind); if (FoundPos == std::string::npos || FoundPos + strToFind.size() + 2 > m_Entry.Content.size()) return false; // if we don't find the string or if we don't have at least one char after it FoundPos += strToFind.size(); // to set the pos marker to the exact start of the real data return true; } bool CPODocument::ParseNumID() { if (isdigit(m_Entry.Content.at(m_Entry.xIDPos))) // verify if the first char is digit { // we check for the numeric id for the fist 10 chars (uint32) m_Entry.xID = strtol(&m_Entry.Content[m_Entry.xIDPos], NULL, 10); return true; } CLog::Log(LOGERROR, "POParser: found numeric id descriptor, but no valid id can be read, " "entry was handled as normal msgid entry"); CLog::Log(LOGERROR, "POParser: The problematic entry: {}", m_Entry.Content); return false; } void CPODocument::GetString(CStrEntry &strEntry) { size_t nextLFPos; size_t startPos = strEntry.Pos; strEntry.Str.clear(); while (startPos < m_Entry.Content.size()) { nextLFPos = m_Entry.Content.find('\n', startPos); if (nextLFPos == std::string::npos) nextLFPos = m_Entry.Content.size(); // check syntax, if it really is a valid quoted string line if (nextLFPos-startPos < 2 || m_Entry.Content[startPos] != '\"' || m_Entry.Content[nextLFPos-1] != '\"') break; strEntry.Str.append(m_Entry.Content, startPos+1, nextLFPos-2-startPos); startPos = nextLFPos+1; } strEntry.Str = UnescapeString(strEntry.Str); } void CPODocument::ConvertLineEnds(const std::string &filename) { size_t foundPos = m_strBuffer.find_first_of('\r'); if (foundPos == std::string::npos) return; // We have only Linux style line endings in the file, nothing to do if (foundPos+1 >= m_strBuffer.size() || m_strBuffer[foundPos+1] != '\n') CLog::Log(LOGDEBUG, "POParser: PO file has Mac Style Line Endings. " "Converted in memory to Linux LF for file: {}", filename); else CLog::Log(LOGDEBUG, "POParser: PO file has Win Style Line Endings. " "Converted in memory to Linux LF for file: {}", filename); std::string strTemp; strTemp.reserve(m_strBuffer.size()); for (std::string::const_iterator it = m_strBuffer.begin(); it < m_strBuffer.end(); ++it) { if (*it == '\r') { if (it+1 == m_strBuffer.end() || *(it+1) != '\n') strTemp.push_back('\n'); // convert Mac style line ending and continue continue; // we have Win style line ending so we exclude this CR now } strTemp.push_back(*it); } m_strBuffer.swap(strTemp); m_POfilelength = m_strBuffer.size(); }