1 files changed, 313 insertions, 0 deletions
diff --git a/xbmc/utils/POUtils.cpp b/xbmc/utils/POUtils.cpp
new file mode 100644
index 0000000..830336d
--- /dev/null
+++ b/xbmc/utils/POUtils.cpp
@@ -0,0 +1,313 @@
+/*
+ *  Copyright (C) 2012-2018 Team Kodi
+ *  This file is part of Kodi - https://kodi.tv
+ *
+ *  SPDX-License-Identifier: GPL-2.0-or-later
+ *  See LICENSES/README.md for more information.
+ */
+
+#include "utils/POUtils.h"
+
+#include "URL.h"
+#include "filesystem/File.h"
+#include "utils/log.h"
+
+#include <stdlib.h>
+
+CPODocument::CPODocument()
+{
+  m_CursorPos = 0;
+  m_nextEntryPos = 0;
+  m_POfilelength = 0;
+  m_Entry.msgStrPlural.clear();
+  m_Entry.msgStrPlural.resize(1);
+}
+
+CPODocument::~CPODocument() = default;
+
+bool CPODocument::LoadFile(const std::string &pofilename)
+{
+  CURL poFileUrl(pofilename);
+  if (!XFILE::CFile::Exists(poFileUrl))
+    return false;
+
+  XFILE::CFile file;
+  std::vector<uint8_t> buf;
+  if (file.LoadFile(poFileUrl, buf) < 18) // at least a size of a minimalistic header
+  {
+    CLog::Log(LOGERROR, "{}: can't load file \"{}\" or file is too small", __FUNCTION__,
+              pofilename);
+    return false;
+  }
+
+  m_strBuffer = '\n';
+  m_strBuffer.append(reinterpret_cast<char*>(buf.data()), buf.size());
+  buf.clear();
+
+  ConvertLineEnds(pofilename);
+
+  // we make sure, to have an LF at the end of buffer
+  if (*m_strBuffer.rbegin() != '\n')
+  {
+    m_strBuffer += "\n";
+  }
+
+  m_POfilelength = m_strBuffer.size();
+
+  if (GetNextEntry() && m_Entry.Type == MSGID_FOUND)
+    return true;
+
+  CLog::Log(LOGERROR, "POParser: unable to read PO file header from file: {}", pofilename);
+  return false;
+}
+
+bool CPODocument::GetNextEntry()
+{
+  do
+  {
+    // if we don't find LFLF, we reached the end of the buffer and the last entry to check
+    // we indicate this with setting m_nextEntryPos to the end of the buffer
+    if ((m_nextEntryPos = m_strBuffer.find("\n\n", m_CursorPos)) == std::string::npos)
+      m_nextEntryPos = m_POfilelength-1;
+
+    // now we read the actual entry into a temp string for further processing
+    m_Entry.Content.assign(m_strBuffer, m_CursorPos, m_nextEntryPos - m_CursorPos +1);
+    m_CursorPos = m_nextEntryPos+1; // jump cursor to the second LF character
+
+    if (FindLineStart ("\nmsgid ", m_Entry.msgID.Pos))
+    {
+      if (FindLineStart ("\nmsgctxt \"#", m_Entry.xIDPos) && ParseNumID())
+      {
+        m_Entry.Type = ID_FOUND; // we found an entry with a valid numeric id
+        return true;
+      }
+
+      size_t plurPos;
+      if (FindLineStart ("\nmsgid_plural ", plurPos))
+      {
+        m_Entry.Type = MSGID_PLURAL_FOUND; // we found a pluralized entry
+        return true;
+      }
+
+      m_Entry.Type = MSGID_FOUND; // we found a normal entry, with no numeric id
+      return true;
+    }
+  }
+  while (m_nextEntryPos != m_POfilelength-1);
+  // we reached the end of buffer AND we have not found a valid entry
+
+  return false;
+}
+
+void CPODocument::ParseEntry(bool bisSourceLang)
+{
+  if (bisSourceLang)
+  {
+    if (m_Entry.Type == ID_FOUND)
+      GetString(m_Entry.msgID);
+    else
+      m_Entry.msgID.Str.clear();
+    return;
+  }
+
+  if (m_Entry.Type != ID_FOUND)
+  {
+    GetString(m_Entry.msgID);
+    if (FindLineStart ("\nmsgctxt ", m_Entry.msgCtxt.Pos))
+      GetString(m_Entry.msgCtxt);
+    else
+      m_Entry.msgCtxt.Str.clear();
+  }
+
+  if (m_Entry.Type != MSGID_PLURAL_FOUND)
+  {
+    if (FindLineStart ("\nmsgstr ", m_Entry.msgStr.Pos))
+    {
+      GetString(m_Entry.msgStr);
+      GetString(m_Entry.msgID);
+    }
+    else
+    {
+      CLog::Log(LOGERROR, "POParser: missing msgstr line in entry. Failed entry: {}",
+                m_Entry.Content);
+      m_Entry.msgStr.Str.clear();
+    }
+    return;
+  }
+
+  // We found a plural form entry. We read it into a vector of CStrEntry types
+  m_Entry.msgStrPlural.clear();
+  std::string strPattern = "\nmsgstr[0] ";
+  CStrEntry strEntry;
+
+  for (int n=0; n<7 ; n++)
+  {
+    strPattern[8] = static_cast<char>(n+'0');
+    if (FindLineStart (strPattern, strEntry.Pos))
+    {
+      GetString(strEntry);
+      if (strEntry.Str.empty())
+        break;
+      m_Entry.msgStrPlural.push_back(strEntry);
+    }
+    else
+      break;
+  }
+
+  if (m_Entry.msgStrPlural.empty())
+  {
+    CLog::Log(LOGERROR,
+              "POParser: msgstr[] plural lines have zero valid strings. "
+              "Failed entry: {}",
+              m_Entry.Content);
+    m_Entry.msgStrPlural.resize(1); // Put 1 element with an empty string into the vector
+  }
+}
+
+const std::string& CPODocument::GetPlurMsgstr(size_t plural) const
+{
+  if (m_Entry.msgStrPlural.size() < plural+1)
+  {
+    CLog::Log(LOGERROR,
+              "POParser: msgstr[{}] plural field requested, but not found in PO file. "
+              "Failed entry: {}",
+              static_cast<int>(plural), m_Entry.Content);
+    plural = m_Entry.msgStrPlural.size()-1;
+  }
+  return m_Entry.msgStrPlural[plural].Str;
+}
+
+std::string CPODocument::UnescapeString(const std::string &strInput)
+{
+  std::string strOutput;
+  if (strInput.empty())
+    return strOutput;
+
+  char oescchar;
+  strOutput.reserve(strInput.size());
+  std::string::const_iterator it = strInput.begin();
+  while (it < strInput.end())
+  {
+    oescchar = *it++;
+    if (oescchar == '\\')
+    {
+      if (it == strInput.end())
+      {
+        CLog::Log(LOGERROR,
+                  "POParser: warning, unhandled escape character "
+                  "at line-end. Problematic entry: {}",
+                  m_Entry.Content);
+        break;
+      }
+      switch (*it++)
+      {
+        case 'a':  oescchar = '\a'; break;
+        case 'b':  oescchar = '\b'; break;
+        case 'v':  oescchar = '\v'; break;
+        case 'n':  oescchar = '\n'; break;
+        case 't':  oescchar = '\t'; break;
+        case 'r':  oescchar = '\r'; break;
+        case '"':  oescchar = '"' ; break;
+        case '0':  oescchar = '\0'; break;
+        case 'f':  oescchar = '\f'; break;
+        case '?':  oescchar = '\?'; break;
+        case '\'': oescchar = '\''; break;
+        case '\\': oescchar = '\\'; break;
+
+        default:
+        {
+          CLog::Log(LOGERROR,
+                    "POParser: warning, unhandled escape character. Problematic entry: {}",
+                    m_Entry.Content);
+          continue;
+        }
+      }
+    }
+    strOutput.push_back(oescchar);
+  }
+  return strOutput;
+}
+
+bool CPODocument::FindLineStart(const std::string &strToFind, size_t &FoundPos)
+{
+
+  FoundPos = m_Entry.Content.find(strToFind);
+
+  if (FoundPos == std::string::npos || FoundPos + strToFind.size() + 2 > m_Entry.Content.size())
+    return false; // if we don't find the string or if we don't have at least one char after it
+
+  FoundPos += strToFind.size(); // to set the pos marker to the exact start of the real data
+  return true;
+}
+
+bool CPODocument::ParseNumID()
+{
+  if (isdigit(m_Entry.Content.at(m_Entry.xIDPos))) // verify if the first char is digit
+  {
+    // we check for the numeric id for the fist 10 chars (uint32)
+    m_Entry.xID = strtol(&m_Entry.Content[m_Entry.xIDPos], NULL, 10);
+    return true;
+  }
+
+  CLog::Log(LOGERROR, "POParser: found numeric id descriptor, but no valid id can be read, "
+                      "entry was handled as normal msgid entry");
+  CLog::Log(LOGERROR, "POParser: The problematic entry: {}", m_Entry.Content);
+  return false;
+}
+
+void CPODocument::GetString(CStrEntry &strEntry)
+{
+  size_t nextLFPos;
+  size_t startPos = strEntry.Pos;
+  strEntry.Str.clear();
+
+  while (startPos < m_Entry.Content.size())
+  {
+    nextLFPos = m_Entry.Content.find('\n', startPos);
+    if (nextLFPos == std::string::npos)
+      nextLFPos = m_Entry.Content.size();
+
+    // check syntax, if it really is a valid quoted string line
+    if (nextLFPos-startPos < 2 ||  m_Entry.Content[startPos] != '\"' ||
+        m_Entry.Content[nextLFPos-1] != '\"')
+      break;
+
+    strEntry.Str.append(m_Entry.Content, startPos+1, nextLFPos-2-startPos);
+    startPos = nextLFPos+1;
+  }
+
+  strEntry.Str = UnescapeString(strEntry.Str);
+}
+
+void CPODocument::ConvertLineEnds(const std::string &filename)
+{
+  size_t foundPos = m_strBuffer.find_first_of('\r');
+  if (foundPos == std::string::npos)
+    return; // We have only Linux style line endings in the file, nothing to do
+
+  if (foundPos+1 >= m_strBuffer.size() || m_strBuffer[foundPos+1] != '\n')
+    CLog::Log(LOGDEBUG,
+              "POParser: PO file has Mac Style Line Endings. "
+              "Converted in memory to Linux LF for file: {}",
+              filename);
+  else
+    CLog::Log(LOGDEBUG,
+              "POParser: PO file has Win Style Line Endings. "
+              "Converted in memory to Linux LF for file: {}",
+              filename);
+
+  std::string strTemp;
+  strTemp.reserve(m_strBuffer.size());
+  for (std::string::const_iterator it = m_strBuffer.begin(); it < m_strBuffer.end(); ++it)
+  {
+    if (*it == '\r')
+    {
+      if (it+1 == m_strBuffer.end() || *(it+1) != '\n')
+        strTemp.push_back('\n'); // convert Mac style line ending and continue
+      continue; // we have Win style line ending so we exclude this CR now
+    }
+    strTemp.push_back(*it);
+  }
+  m_strBuffer.swap(strTemp);
+  m_POfilelength = m_strBuffer.size();
+}