summaryrefslogtreecommitdiffstats
path: root/xbmc/utils/CharsetConverter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'xbmc/utils/CharsetConverter.cpp')
-rw-r--r--xbmc/utils/CharsetConverter.cpp910
1 files changed, 910 insertions, 0 deletions
diff --git a/xbmc/utils/CharsetConverter.cpp b/xbmc/utils/CharsetConverter.cpp
new file mode 100644
index 0000000..89976ee
--- /dev/null
+++ b/xbmc/utils/CharsetConverter.cpp
@@ -0,0 +1,910 @@
+/*
+ * Copyright (C) 2005-2018 Team Kodi
+ * This file is part of Kodi - https://kodi.tv
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * See LICENSES/README.md for more information.
+ */
+
+#include "CharsetConverter.h"
+
+#include "LangInfo.h"
+#include "guilib/LocalizeStrings.h"
+#include "log.h"
+#include "settings/Settings.h"
+#include "settings/lib/Setting.h"
+#include "settings/lib/SettingDefinitions.h"
+#include "utils/StringUtils.h"
+#include "utils/Utf8Utils.h"
+
+#include <algorithm>
+#include <mutex>
+
+#include <fribidi.h>
+#include <iconv.h>
+
+#ifdef WORDS_BIGENDIAN
+ #define ENDIAN_SUFFIX "BE"
+#else
+ #define ENDIAN_SUFFIX "LE"
+#endif
+
+#if defined(TARGET_DARWIN)
+ #define WCHAR_IS_UCS_4 1
+ #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
+ #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
+ #define UTF8_SOURCE "UTF-8-MAC"
+ #define WCHAR_CHARSET UTF32_CHARSET
+#elif defined(TARGET_WINDOWS)
+ #define WCHAR_IS_UTF16 1
+ #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
+ #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
+ #define UTF8_SOURCE "UTF-8"
+ #define WCHAR_CHARSET UTF16_CHARSET
+#elif defined(TARGET_FREEBSD)
+ #define WCHAR_IS_UCS_4 1
+ #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
+ #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
+ #define UTF8_SOURCE "UTF-8"
+ #define WCHAR_CHARSET UTF32_CHARSET
+#elif defined(TARGET_ANDROID)
+ #define WCHAR_IS_UCS_4 1
+ #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
+ #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
+ #define UTF8_SOURCE "UTF-8"
+ #define WCHAR_CHARSET UTF32_CHARSET
+#else
+ #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
+ #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
+ #define UTF8_SOURCE "UTF-8"
+ #define WCHAR_CHARSET "WCHAR_T"
+ #if __STDC_ISO_10646__
+ #ifdef SIZEOF_WCHAR_T
+ #if SIZEOF_WCHAR_T == 4
+ #define WCHAR_IS_UCS_4 1
+ #elif SIZEOF_WCHAR_T == 2
+ #define WCHAR_IS_UCS_2 1
+ #endif
+ #endif
+ #endif
+#endif
+
+#define NO_ICONV ((iconv_t)-1)
+
+enum SpecialCharset
+{
+ NotSpecialCharset = 0,
+ SystemCharset,
+ UserCharset /* locale.charset */,
+ SubtitleCharset /* subtitles.charset */,
+};
+
+class CConverterType : public CCriticalSection
+{
+public:
+ CConverterType(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen = 1);
+ CConverterType(enum SpecialCharset sourceSpecialCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen = 1);
+ CConverterType(const std::string& sourceCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen = 1);
+ CConverterType(enum SpecialCharset sourceSpecialCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen = 1);
+ CConverterType(const CConverterType& other);
+ ~CConverterType();
+
+ iconv_t GetConverter(std::unique_lock<CCriticalSection>& converterLock);
+
+ void Reset(void);
+ void ReinitTo(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen = 1);
+ std::string GetSourceCharset(void) const { return m_sourceCharset; }
+ std::string GetTargetCharset(void) const { return m_targetCharset; }
+ unsigned int GetTargetSingleCharMaxLen(void) const { return m_targetSingleCharMaxLen; }
+
+private:
+ static std::string ResolveSpecialCharset(enum SpecialCharset charset);
+
+ enum SpecialCharset m_sourceSpecialCharset;
+ std::string m_sourceCharset;
+ enum SpecialCharset m_targetSpecialCharset;
+ std::string m_targetCharset;
+ iconv_t m_iconv;
+ unsigned int m_targetSingleCharMaxLen;
+};
+
+CConverterType::CConverterType(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(),
+ m_sourceSpecialCharset(NotSpecialCharset),
+ m_sourceCharset(sourceCharset),
+ m_targetSpecialCharset(NotSpecialCharset),
+ m_targetCharset(targetCharset),
+ m_iconv(NO_ICONV),
+ m_targetSingleCharMaxLen(targetSingleCharMaxLen)
+{
+}
+
+CConverterType::CConverterType(enum SpecialCharset sourceSpecialCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(),
+ m_sourceSpecialCharset(sourceSpecialCharset),
+ m_sourceCharset(),
+ m_targetSpecialCharset(NotSpecialCharset),
+ m_targetCharset(targetCharset),
+ m_iconv(NO_ICONV),
+ m_targetSingleCharMaxLen(targetSingleCharMaxLen)
+{
+}
+
+CConverterType::CConverterType(const std::string& sourceCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(),
+ m_sourceSpecialCharset(NotSpecialCharset),
+ m_sourceCharset(sourceCharset),
+ m_targetSpecialCharset(targetSpecialCharset),
+ m_targetCharset(),
+ m_iconv(NO_ICONV),
+ m_targetSingleCharMaxLen(targetSingleCharMaxLen)
+{
+}
+
+CConverterType::CConverterType(enum SpecialCharset sourceSpecialCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(),
+ m_sourceSpecialCharset(sourceSpecialCharset),
+ m_sourceCharset(),
+ m_targetSpecialCharset(targetSpecialCharset),
+ m_targetCharset(),
+ m_iconv(NO_ICONV),
+ m_targetSingleCharMaxLen(targetSingleCharMaxLen)
+{
+}
+
+CConverterType::CConverterType(const CConverterType& other) : CCriticalSection(),
+ m_sourceSpecialCharset(other.m_sourceSpecialCharset),
+ m_sourceCharset(other.m_sourceCharset),
+ m_targetSpecialCharset(other.m_targetSpecialCharset),
+ m_targetCharset(other.m_targetCharset),
+ m_iconv(NO_ICONV),
+ m_targetSingleCharMaxLen(other.m_targetSingleCharMaxLen)
+{
+}
+
+CConverterType::~CConverterType()
+{
+ std::unique_lock<CCriticalSection> lock(*this);
+ if (m_iconv != NO_ICONV)
+ iconv_close(m_iconv);
+ lock.unlock(); // ensure unlocking before final destruction
+}
+
+iconv_t CConverterType::GetConverter(std::unique_lock<CCriticalSection>& converterLock)
+{
+ // ensure that this unique instance is locked externally
+ if (converterLock.mutex() != this)
+ return NO_ICONV;
+
+ if (m_iconv == NO_ICONV)
+ {
+ if (m_sourceSpecialCharset)
+ m_sourceCharset = ResolveSpecialCharset(m_sourceSpecialCharset);
+ if (m_targetSpecialCharset)
+ m_targetCharset = ResolveSpecialCharset(m_targetSpecialCharset);
+
+ m_iconv = iconv_open(m_targetCharset.c_str(), m_sourceCharset.c_str());
+
+ if (m_iconv == NO_ICONV)
+ CLog::Log(LOGERROR, "{}: iconv_open() for \"{}\" -> \"{}\" failed, errno = {} ({})",
+ __FUNCTION__, m_sourceCharset, m_targetCharset, errno, strerror(errno));
+ }
+
+ return m_iconv;
+}
+
+void CConverterType::Reset(void)
+{
+ std::unique_lock<CCriticalSection> lock(*this);
+ if (m_iconv != NO_ICONV)
+ {
+ iconv_close(m_iconv);
+ m_iconv = NO_ICONV;
+ }
+
+ if (m_sourceSpecialCharset)
+ m_sourceCharset.clear();
+ if (m_targetSpecialCharset)
+ m_targetCharset.clear();
+
+}
+
+void CConverterType::ReinitTo(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/)
+{
+ std::unique_lock<CCriticalSection> lock(*this);
+ if (sourceCharset != m_sourceCharset || targetCharset != m_targetCharset)
+ {
+ if (m_iconv != NO_ICONV)
+ {
+ iconv_close(m_iconv);
+ m_iconv = NO_ICONV;
+ }
+
+ m_sourceSpecialCharset = NotSpecialCharset;
+ m_sourceCharset = sourceCharset;
+ m_targetSpecialCharset = NotSpecialCharset;
+ m_targetCharset = targetCharset;
+ m_targetSingleCharMaxLen = targetSingleCharMaxLen;
+ }
+}
+
+std::string CConverterType::ResolveSpecialCharset(enum SpecialCharset charset)
+{
+ switch (charset)
+ {
+ case SystemCharset:
+ return "";
+ case UserCharset:
+ return g_langInfo.GetGuiCharSet();
+ case SubtitleCharset:
+ return g_langInfo.GetSubtitleCharSet();
+ case NotSpecialCharset:
+ default:
+ return "UTF-8"; /* dummy value */
+ }
+}
+
+enum StdConversionType /* Keep it in sync with CCharsetConverter::CInnerConverter::m_stdConversion */
+{
+ NoConversion = -1,
+ Utf8ToUtf32 = 0,
+ Utf32ToUtf8,
+ Utf32ToW,
+ WToUtf32,
+ SubtitleCharsetToUtf8,
+ Utf8ToUserCharset,
+ UserCharsetToUtf8,
+ Utf32ToUserCharset,
+ WtoUtf8,
+ Utf16LEtoW,
+ Utf16BEtoUtf8,
+ Utf16LEtoUtf8,
+ Utf8toW,
+ Utf8ToSystem,
+ SystemToUtf8,
+ Ucs2CharsetToUtf8,
+ MacintoshToUtf8,
+ NumberOfStdConversionTypes /* Dummy sentinel entry */
+};
+
+/* We don't want to pollute header file with many additional includes and definitions, so put
+ here all staff that require usage of types defined in this file or in additional headers */
+class CCharsetConverter::CInnerConverter
+{
+public:
+ static bool logicalToVisualBiDi(const std::u32string& stringSrc,
+ std::u32string& stringDst,
+ FriBidiCharType base = FRIBIDI_TYPE_LTR,
+ const bool failOnBadString = false,
+ int* visualToLogicalMap = nullptr);
+ static bool isBidiDirectionRTL(const std::string& stringSrc);
+
+ template<class INPUT,class OUTPUT>
+ static bool stdConvert(StdConversionType convertType, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false);
+ template<class INPUT,class OUTPUT>
+ static bool customConvert(const std::string& sourceCharset, const std::string& targetCharset, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false);
+
+ template<class INPUT,class OUTPUT>
+ static bool convert(iconv_t type, int multiplier, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false);
+
+ static CConverterType m_stdConversion[NumberOfStdConversionTypes];
+ static CCriticalSection m_critSectionFriBiDi;
+};
+
+/* single symbol sizes in chars */
+const int CCharsetConverter::m_Utf8CharMinSize = 1;
+const int CCharsetConverter::m_Utf8CharMaxSize = 4;
+
+// clang-format off
+CConverterType CCharsetConverter::CInnerConverter::m_stdConversion[NumberOfStdConversionTypes] = /* keep it in sync with enum StdConversionType */
+{
+ /* Utf8ToUtf32 */ CConverterType(UTF8_SOURCE, UTF32_CHARSET),
+ /* Utf32ToUtf8 */ CConverterType(UTF32_CHARSET, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
+ /* Utf32ToW */ CConverterType(UTF32_CHARSET, WCHAR_CHARSET),
+ /* WToUtf32 */ CConverterType(WCHAR_CHARSET, UTF32_CHARSET),
+ /* SubtitleCharsetToUtf8*/CConverterType(SubtitleCharset, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
+ /* Utf8ToUserCharset */ CConverterType(UTF8_SOURCE, UserCharset),
+ /* UserCharsetToUtf8 */ CConverterType(UserCharset, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
+ /* Utf32ToUserCharset */ CConverterType(UTF32_CHARSET, UserCharset),
+ /* WtoUtf8 */ CConverterType(WCHAR_CHARSET, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
+ /* Utf16LEtoW */ CConverterType("UTF-16LE", WCHAR_CHARSET),
+ /* Utf16BEtoUtf8 */ CConverterType("UTF-16BE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
+ /* Utf16LEtoUtf8 */ CConverterType("UTF-16LE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
+ /* Utf8toW */ CConverterType(UTF8_SOURCE, WCHAR_CHARSET),
+ /* Utf8ToSystem */ CConverterType(UTF8_SOURCE, SystemCharset),
+ /* SystemToUtf8 */ CConverterType(SystemCharset, UTF8_SOURCE),
+ /* Ucs2CharsetToUtf8 */ CConverterType("UCS-2LE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
+ /* MacintoshToUtf8 */ CConverterType("macintosh", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize)
+};
+// clang-format on
+
+CCriticalSection CCharsetConverter::CInnerConverter::m_critSectionFriBiDi;
+
+template<class INPUT,class OUTPUT>
+bool CCharsetConverter::CInnerConverter::stdConvert(StdConversionType convertType, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/)
+{
+ strDest.clear();
+ if (strSource.empty())
+ return true;
+
+ if (convertType < 0 || convertType >= NumberOfStdConversionTypes)
+ return false;
+
+ CConverterType& convType = m_stdConversion[convertType];
+ std::unique_lock<CCriticalSection> converterLock(convType);
+
+ return convert(convType.GetConverter(converterLock), convType.GetTargetSingleCharMaxLen(), strSource, strDest, failOnInvalidChar);
+}
+
+template<class INPUT,class OUTPUT>
+bool CCharsetConverter::CInnerConverter::customConvert(const std::string& sourceCharset, const std::string& targetCharset, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/)
+{
+ strDest.clear();
+ if (strSource.empty())
+ return true;
+
+ iconv_t conv = iconv_open(targetCharset.c_str(), sourceCharset.c_str());
+ if (conv == NO_ICONV)
+ {
+ CLog::Log(LOGERROR, "{}: iconv_open() for \"{}\" -> \"{}\" failed, errno = {} ({})",
+ __FUNCTION__, sourceCharset, targetCharset, errno, strerror(errno));
+ return false;
+ }
+ const int dstMultp = (targetCharset.compare(0, 5, "UTF-8") == 0) ? CCharsetConverter::m_Utf8CharMaxSize : 1;
+ const bool result = convert(conv, dstMultp, strSource, strDest, failOnInvalidChar);
+ iconv_close(conv);
+
+ return result;
+}
+
+/* iconv may declare inbuf to be char** rather than const char** depending on platform and version,
+ so provide a wrapper that handles both */
+struct charPtrPtrAdapter
+{
+ const char** pointer;
+ explicit charPtrPtrAdapter(const char** p) :
+ pointer(p) { }
+ operator char**()
+ { return const_cast<char**>(pointer); }
+ operator const char**()
+ { return pointer; }
+};
+
+template<class INPUT,class OUTPUT>
+bool CCharsetConverter::CInnerConverter::convert(iconv_t type, int multiplier, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/)
+{
+ if (type == NO_ICONV)
+ return false;
+
+ //input buffer for iconv() is the buffer from strSource
+ size_t inBufSize = (strSource.length() + 1) * sizeof(typename INPUT::value_type);
+ const char* inBuf = (const char*)strSource.c_str();
+
+ //allocate output buffer for iconv()
+ size_t outBufSize = (strSource.length() + 1) * sizeof(typename OUTPUT::value_type) * multiplier;
+ char* outBuf = (char*)malloc(outBufSize);
+ if (outBuf == NULL)
+ {
+ CLog::Log(LOGFATAL, "{}: malloc failed", __FUNCTION__);
+ return false;
+ }
+
+ size_t inBytesAvail = inBufSize; //how many bytes iconv() can read
+ size_t outBytesAvail = outBufSize; //how many bytes iconv() can write
+ const char* inBufStart = inBuf; //where in our input buffer iconv() should start reading
+ char* outBufStart = outBuf; //where in out output buffer iconv() should start writing
+
+ size_t returnV;
+ while(true)
+ {
+ //iconv() will update inBufStart, inBytesAvail, outBufStart and outBytesAvail
+ returnV = iconv(type, charPtrPtrAdapter(&inBufStart), &inBytesAvail, &outBufStart, &outBytesAvail);
+
+ if (returnV == (size_t)-1)
+ {
+ if (errno == E2BIG) //output buffer is not big enough
+ {
+ //save where iconv() ended converting, realloc might make outBufStart invalid
+ size_t bytesConverted = outBufSize - outBytesAvail;
+
+ //make buffer twice as big
+ outBufSize *= 2;
+ char* newBuf = (char*)realloc(outBuf, outBufSize);
+ if (!newBuf)
+ {
+ CLog::Log(LOGFATAL, "{} realloc failed with errno={}({})", __FUNCTION__, errno,
+ strerror(errno));
+ break;
+ }
+ outBuf = newBuf;
+
+ //update the buffer pointer and counter
+ outBufStart = outBuf + bytesConverted;
+ outBytesAvail = outBufSize - bytesConverted;
+
+ //continue in the loop and convert the rest
+ continue;
+ }
+ else if (errno == EILSEQ) //An invalid multibyte sequence has been encountered in the input
+ {
+ if (failOnInvalidChar)
+ break;
+
+ //skip invalid byte
+ inBufStart++;
+ inBytesAvail--;
+ //continue in the loop and convert the rest
+ continue;
+ }
+ else if (errno == EINVAL) /* Invalid sequence at the end of input buffer */
+ {
+ if (!failOnInvalidChar)
+ returnV = 0; /* reset error status to use converted part */
+
+ break;
+ }
+ else //iconv() had some other error
+ {
+ CLog::Log(LOGERROR, "{}: iconv() failed, errno={} ({})", __FUNCTION__, errno,
+ strerror(errno));
+ }
+ }
+ break;
+ }
+
+ //complete the conversion (reset buffers), otherwise the current data will prefix the data on the next call
+ if (iconv(type, NULL, NULL, &outBufStart, &outBytesAvail) == (size_t)-1)
+ CLog::Log(LOGERROR, "{} failed cleanup errno={}({})", __FUNCTION__, errno, strerror(errno));
+
+ if (returnV == (size_t)-1)
+ {
+ free(outBuf);
+ return false;
+ }
+ //we're done
+
+ const typename OUTPUT::size_type sizeInChars = (typename OUTPUT::size_type) (outBufSize - outBytesAvail) / sizeof(typename OUTPUT::value_type);
+ typename OUTPUT::const_pointer strPtr = (typename OUTPUT::const_pointer) outBuf;
+ /* Make sure that all buffer is assigned and string is stopped at end of buffer */
+ if (strPtr[sizeInChars-1] == 0 && strSource[strSource.length()-1] != 0)
+ strDest.assign(strPtr, sizeInChars-1);
+ else
+ strDest.assign(strPtr, sizeInChars);
+
+ free(outBuf);
+
+ return true;
+}
+
+bool CCharsetConverter::CInnerConverter::logicalToVisualBiDi(
+ const std::u32string& stringSrc,
+ std::u32string& stringDst,
+ FriBidiCharType base /*= FRIBIDI_TYPE_LTR*/,
+ const bool failOnBadString /*= false*/,
+ int* visualToLogicalMap /*= nullptr*/)
+{
+ stringDst.clear();
+
+ const size_t srcLen = stringSrc.length();
+ if (srcLen == 0)
+ return true;
+
+ stringDst.reserve(srcLen);
+ size_t lineStart = 0;
+
+ // libfribidi is not threadsafe, so make sure we make it so
+ std::unique_lock<CCriticalSection> lock(m_critSectionFriBiDi);
+ do
+ {
+ size_t lineEnd = stringSrc.find('\n', lineStart);
+ if (lineEnd >= srcLen) // equal to 'lineEnd == std::string::npos'
+ lineEnd = srcLen;
+ else
+ lineEnd++; // include '\n'
+
+ const size_t lineLen = lineEnd - lineStart;
+
+ FriBidiChar* visual = (FriBidiChar*) malloc((lineLen + 1) * sizeof(FriBidiChar));
+ if (visual == NULL)
+ {
+ free(visual);
+ CLog::Log(LOGFATAL, "{}: can't allocate memory", __FUNCTION__);
+ return false;
+ }
+
+ bool bidiFailed = false;
+ FriBidiCharType baseCopy = base; // preserve same value for all lines, required because fribidi_log2vis will modify parameter value
+ if (fribidi_log2vis(reinterpret_cast<const FriBidiChar*>(stringSrc.c_str() + lineStart),
+ lineLen, &baseCopy, visual, nullptr,
+ !visualToLogicalMap ? nullptr : visualToLogicalMap + lineStart, nullptr))
+ {
+ // Removes bidirectional marks
+ const int newLen = fribidi_remove_bidi_marks(
+ visual, lineLen, nullptr, !visualToLogicalMap ? nullptr : visualToLogicalMap + lineStart,
+ nullptr);
+ if (newLen > 0)
+ stringDst.append((const char32_t*)visual, (size_t)newLen);
+ else if (newLen < 0)
+ bidiFailed = failOnBadString;
+ }
+ else
+ bidiFailed = failOnBadString;
+
+ free(visual);
+
+ if (bidiFailed)
+ return false;
+
+ lineStart = lineEnd;
+ } while (lineStart < srcLen);
+
+ return !stringDst.empty();
+}
+
+bool CCharsetConverter::CInnerConverter::isBidiDirectionRTL(const std::string& str)
+{
+ std::u32string converted;
+ if (!CInnerConverter::stdConvert(Utf8ToUtf32, str, converted, true))
+ return false;
+
+ int lineLen = static_cast<int>(str.size());
+ FriBidiCharType* charTypes = new FriBidiCharType[lineLen];
+ fribidi_get_bidi_types(reinterpret_cast<const FriBidiChar*>(converted.c_str()),
+ (FriBidiStrIndex)lineLen, charTypes);
+ FriBidiCharType charType = fribidi_get_par_direction(charTypes, (FriBidiStrIndex)lineLen);
+ delete[] charTypes;
+ return charType == FRIBIDI_PAR_RTL;
+}
+
+static struct SCharsetMapping
+{
+ const char* charset;
+ const char* caption;
+} g_charsets[] = {
+ { "ISO-8859-1", "Western Europe (ISO)" }
+ , { "ISO-8859-2", "Central Europe (ISO)" }
+ , { "ISO-8859-3", "South Europe (ISO)" }
+ , { "ISO-8859-4", "Baltic (ISO)" }
+ , { "ISO-8859-5", "Cyrillic (ISO)" }
+ , { "ISO-8859-6", "Arabic (ISO)" }
+ , { "ISO-8859-7", "Greek (ISO)" }
+ , { "ISO-8859-8", "Hebrew (ISO)" }
+ , { "ISO-8859-9", "Turkish (ISO)" }
+ , { "CP1250", "Central Europe (Windows)" }
+ , { "CP1251", "Cyrillic (Windows)" }
+ , { "CP1252", "Western Europe (Windows)" }
+ , { "CP1253", "Greek (Windows)" }
+ , { "CP1254", "Turkish (Windows)" }
+ , { "CP1255", "Hebrew (Windows)" }
+ , { "CP1256", "Arabic (Windows)" }
+ , { "CP1257", "Baltic (Windows)" }
+ , { "CP1258", "Vietnamese (Windows)" }
+ , { "CP874", "Thai (Windows)" }
+ , { "BIG5", "Chinese Traditional (Big5)" }
+ , { "GBK", "Chinese Simplified (GBK)" }
+ , { "SHIFT_JIS", "Japanese (Shift-JIS)" }
+ , { "CP949", "Korean" }
+ , { "BIG5-HKSCS", "Hong Kong (Big5-HKSCS)" }
+ , { NULL, NULL }
+};
+
+CCharsetConverter::CCharsetConverter() = default;
+
+void CCharsetConverter::OnSettingChanged(const std::shared_ptr<const CSetting>& setting)
+{
+ if (setting == NULL)
+ return;
+
+ const std::string& settingId = setting->GetId();
+ if (settingId == CSettings::SETTING_LOCALE_CHARSET)
+ resetUserCharset();
+ else if (settingId == CSettings::SETTING_SUBTITLES_CHARSET)
+ resetSubtitleCharset();
+}
+
+void CCharsetConverter::clear()
+{
+}
+
+std::vector<std::string> CCharsetConverter::getCharsetLabels()
+{
+ std::vector<std::string> lab;
+ for(SCharsetMapping* c = g_charsets; c->charset; c++)
+ lab.emplace_back(c->caption);
+
+ return lab;
+}
+
+std::string CCharsetConverter::getCharsetLabelByName(const std::string& charsetName)
+{
+ for(SCharsetMapping* c = g_charsets; c->charset; c++)
+ {
+ if (StringUtils::EqualsNoCase(charsetName,c->charset))
+ return c->caption;
+ }
+
+ return "";
+}
+
+std::string CCharsetConverter::getCharsetNameByLabel(const std::string& charsetLabel)
+{
+ for(SCharsetMapping* c = g_charsets; c->charset; c++)
+ {
+ if (StringUtils::EqualsNoCase(charsetLabel, c->caption))
+ return c->charset;
+ }
+
+ return "";
+}
+
+void CCharsetConverter::reset(void)
+{
+ for (CConverterType& conversion : CInnerConverter::m_stdConversion)
+ conversion.Reset();
+}
+
+void CCharsetConverter::resetSystemCharset(void)
+{
+ CInnerConverter::m_stdConversion[Utf8ToSystem].Reset();
+ CInnerConverter::m_stdConversion[SystemToUtf8].Reset();
+}
+
+void CCharsetConverter::resetUserCharset(void)
+{
+ CInnerConverter::m_stdConversion[UserCharsetToUtf8].Reset();
+ CInnerConverter::m_stdConversion[UserCharsetToUtf8].Reset();
+ CInnerConverter::m_stdConversion[Utf32ToUserCharset].Reset();
+ resetSubtitleCharset();
+}
+
+void CCharsetConverter::resetSubtitleCharset(void)
+{
+ CInnerConverter::m_stdConversion[SubtitleCharsetToUtf8].Reset();
+}
+
+void CCharsetConverter::reinitCharsetsFromSettings(void)
+{
+ resetUserCharset(); // this will also reinit Subtitle charsets
+}
+
+bool CCharsetConverter::utf8ToUtf32(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/)
+{
+ return CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32StringDst, failOnBadChar);
+}
+
+std::u32string CCharsetConverter::utf8ToUtf32(const std::string& utf8StringSrc, bool failOnBadChar /*= true*/)
+{
+ std::u32string converted;
+ utf8ToUtf32(utf8StringSrc, converted, failOnBadChar);
+ return converted;
+}
+
+bool CCharsetConverter::utf8ToUtf32Visual(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool bVisualBiDiFlip /*= false*/, bool forceLTRReadingOrder /*= false*/, bool failOnBadChar /*= false*/)
+{
+ if (bVisualBiDiFlip)
+ {
+ std::u32string converted;
+ if (!CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, converted, failOnBadChar))
+ return false;
+
+ return CInnerConverter::logicalToVisualBiDi(converted, utf32StringDst, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF, failOnBadChar);
+ }
+ return CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32StringDst, failOnBadChar);
+}
+
+bool CCharsetConverter::utf32ToUtf8(const std::u32string& utf32StringSrc, std::string& utf8StringDst, bool failOnBadChar /*= true*/)
+{
+ return CInnerConverter::stdConvert(Utf32ToUtf8, utf32StringSrc, utf8StringDst, failOnBadChar);
+}
+
+std::string CCharsetConverter::utf32ToUtf8(const std::u32string& utf32StringSrc, bool failOnBadChar /*= false*/)
+{
+ std::string converted;
+ utf32ToUtf8(utf32StringSrc, converted, failOnBadChar);
+ return converted;
+}
+
+bool CCharsetConverter::utf32ToW(const std::u32string& utf32StringSrc, std::wstring& wStringDst, bool failOnBadChar /*= true*/)
+{
+#ifdef WCHAR_IS_UCS_4
+ wStringDst.assign((const wchar_t*)utf32StringSrc.c_str(), utf32StringSrc.length());
+ return true;
+#else // !WCHAR_IS_UCS_4
+ return CInnerConverter::stdConvert(Utf32ToW, utf32StringSrc, wStringDst, failOnBadChar);
+#endif // !WCHAR_IS_UCS_4
+}
+
+bool CCharsetConverter::utf32logicalToVisualBiDi(const std::u32string& logicalStringSrc,
+ std::u32string& visualStringDst,
+ bool forceLTRReadingOrder /*= false*/,
+ bool failOnBadString /*= false*/,
+ int* visualToLogicalMap /*= nullptr*/)
+{
+ return CInnerConverter::logicalToVisualBiDi(
+ logicalStringSrc, visualStringDst, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF,
+ failOnBadString, visualToLogicalMap);
+}
+
+bool CCharsetConverter::wToUtf32(const std::wstring& wStringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/)
+{
+#ifdef WCHAR_IS_UCS_4
+ /* UCS-4 is almost equal to UTF-32, but UTF-32 has strict limits on possible values, while UCS-4 is usually unchecked.
+ * With this "conversion" we ensure that output will be valid UTF-32 string. */
+#endif
+ return CInnerConverter::stdConvert(WToUtf32, wStringSrc, utf32StringDst, failOnBadChar);
+}
+
+// The bVisualBiDiFlip forces a flip of characters for hebrew/arabic languages, only set to false if the flipping
+// of the string is already made or the string is not displayed in the GUI
+bool CCharsetConverter::utf8ToW(const std::string& utf8StringSrc, std::wstring& wStringDst, bool bVisualBiDiFlip /*= true*/,
+ bool forceLTRReadingOrder /*= false*/, bool failOnBadChar /*= false*/)
+{
+ // Try to flip hebrew/arabic characters, if any
+ if (bVisualBiDiFlip)
+ {
+ wStringDst.clear();
+ std::u32string utf32str;
+ if (!CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32str, failOnBadChar))
+ return false;
+
+ std::u32string utf32flipped;
+ const bool bidiResult = CInnerConverter::logicalToVisualBiDi(utf32str, utf32flipped, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF, failOnBadChar);
+
+ return CInnerConverter::stdConvert(Utf32ToW, utf32flipped, wStringDst, failOnBadChar) && bidiResult;
+ }
+
+ return CInnerConverter::stdConvert(Utf8toW, utf8StringSrc, wStringDst, failOnBadChar);
+}
+
+bool CCharsetConverter::subtitleCharsetToUtf8(const std::string& stringSrc, std::string& utf8StringDst)
+{
+ return CInnerConverter::stdConvert(SubtitleCharsetToUtf8, stringSrc, utf8StringDst, false);
+}
+
+bool CCharsetConverter::fromW(const std::wstring& wStringSrc,
+ std::string& stringDst, const std::string& enc)
+{
+ return CInnerConverter::customConvert(WCHAR_CHARSET, enc, wStringSrc, stringDst);
+}
+
+bool CCharsetConverter::toW(const std::string& stringSrc,
+ std::wstring& wStringDst, const std::string& enc)
+{
+ return CInnerConverter::customConvert(enc, WCHAR_CHARSET, stringSrc, wStringDst);
+}
+
+bool CCharsetConverter::utf8ToStringCharset(const std::string& utf8StringSrc, std::string& stringDst)
+{
+ return CInnerConverter::stdConvert(Utf8ToUserCharset, utf8StringSrc, stringDst);
+}
+
+bool CCharsetConverter::utf8ToStringCharset(std::string& stringSrcDst)
+{
+ std::string strSrc(stringSrcDst);
+ return utf8ToStringCharset(strSrc, stringSrcDst);
+}
+
+bool CCharsetConverter::ToUtf8(const std::string& strSourceCharset, const std::string& stringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/)
+{
+ if (strSourceCharset == "UTF-8")
+ { // simple case - no conversion necessary
+ utf8StringDst = stringSrc;
+ return true;
+ }
+
+ return CInnerConverter::customConvert(strSourceCharset, "UTF-8", stringSrc, utf8StringDst, failOnBadChar);
+}
+
+bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::string& stringDst)
+{
+ if (strDestCharset == "UTF-8")
+ { // simple case - no conversion necessary
+ stringDst = utf8StringSrc;
+ return true;
+ }
+
+ return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, stringDst);
+}
+
+bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::u16string& utf16StringDst)
+{
+ return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, utf16StringDst);
+}
+
+bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::u32string& utf32StringDst)
+{
+ return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, utf32StringDst);
+}
+
+bool CCharsetConverter::unknownToUTF8(std::string& stringSrcDst)
+{
+ std::string source(stringSrcDst);
+ return unknownToUTF8(source, stringSrcDst);
+}
+
+bool CCharsetConverter::unknownToUTF8(const std::string& stringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/)
+{
+ // checks whether it's utf8 already, and if not converts using the sourceCharset if given, else the string charset
+ if (CUtf8Utils::isValidUtf8(stringSrc))
+ {
+ utf8StringDst = stringSrc;
+ return true;
+ }
+ return CInnerConverter::stdConvert(UserCharsetToUtf8, stringSrc, utf8StringDst, failOnBadChar);
+}
+
+bool CCharsetConverter::wToUTF8(const std::wstring& wStringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/)
+{
+ return CInnerConverter::stdConvert(WtoUtf8, wStringSrc, utf8StringDst, failOnBadChar);
+}
+
+bool CCharsetConverter::utf16BEtoUTF8(const std::u16string& utf16StringSrc, std::string& utf8StringDst)
+{
+ return CInnerConverter::stdConvert(Utf16BEtoUtf8, utf16StringSrc, utf8StringDst);
+}
+
+bool CCharsetConverter::utf16BEtoUTF8(const std::string& utf16StringSrc, std::string& utf8StringDst)
+{
+ return CInnerConverter::stdConvert(Utf16BEtoUtf8, utf16StringSrc, utf8StringDst);
+}
+
+bool CCharsetConverter::utf16LEtoUTF8(const std::u16string& utf16StringSrc,
+ std::string& utf8StringDst)
+{
+ return CInnerConverter::stdConvert(Utf16LEtoUtf8, utf16StringSrc, utf8StringDst);
+}
+
+bool CCharsetConverter::ucs2ToUTF8(const std::u16string& ucs2StringSrc, std::string& utf8StringDst)
+{
+ return CInnerConverter::stdConvert(Ucs2CharsetToUtf8, ucs2StringSrc,utf8StringDst);
+}
+
+bool CCharsetConverter::utf16LEtoW(const std::u16string& utf16String, std::wstring& wString)
+{
+ return CInnerConverter::stdConvert(Utf16LEtoW, utf16String, wString);
+}
+
+bool CCharsetConverter::utf32ToStringCharset(const std::u32string& utf32StringSrc, std::string& stringDst)
+{
+ return CInnerConverter::stdConvert(Utf32ToUserCharset, utf32StringSrc, stringDst);
+}
+
+bool CCharsetConverter::utf8ToSystem(std::string& stringSrcDst, bool failOnBadChar /*= false*/)
+{
+ std::string strSrc(stringSrcDst);
+ return CInnerConverter::stdConvert(Utf8ToSystem, strSrc, stringSrcDst, failOnBadChar);
+}
+
+bool CCharsetConverter::systemToUtf8(const std::string& sysStringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/)
+{
+ return CInnerConverter::stdConvert(SystemToUtf8, sysStringSrc, utf8StringDst, failOnBadChar);
+}
+
+bool CCharsetConverter::MacintoshToUTF8(const std::string& macStringSrc, std::string& utf8StringDst)
+{
+ return CInnerConverter::stdConvert(MacintoshToUtf8, macStringSrc, utf8StringDst);
+}
+
+bool CCharsetConverter::utf8logicalToVisualBiDi(const std::string& utf8StringSrc, std::string& utf8StringDst, bool failOnBadString /*= false*/)
+{
+ utf8StringDst.clear();
+ std::u32string utf32flipped;
+ if (!utf8ToUtf32Visual(utf8StringSrc, utf32flipped, true, true, failOnBadString))
+ return false;
+
+ return CInnerConverter::stdConvert(Utf32ToUtf8, utf32flipped, utf8StringDst, failOnBadString);
+}
+
+bool CCharsetConverter::utf8IsRTLBidiDirection(const std::string& utf8String)
+{
+ return CInnerConverter::isBidiDirectionRTL(utf8String);
+}
+
+void CCharsetConverter::SettingOptionsCharsetsFiller(const SettingConstPtr& setting,
+ std::vector<StringSettingOption>& list,
+ std::string& current,
+ void* data)
+{
+ std::vector<std::string> vecCharsets = g_charsetConverter.getCharsetLabels();
+ sort(vecCharsets.begin(), vecCharsets.end(), sortstringbyname());
+
+ list.emplace_back(g_localizeStrings.Get(13278), "DEFAULT"); // "Default"
+ for (int i = 0; i < (int) vecCharsets.size(); ++i)
+ list.emplace_back(vecCharsets[i], g_charsetConverter.getCharsetNameByLabel(vecCharsets[i]));
+}