From c04dcc2e7d834218ef2d4194331e383402495ae1 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 10 Apr 2024 20:07:22 +0200 Subject: Adding upstream version 2:20.4+dfsg. Signed-off-by: Daniel Baumann --- xbmc/utils/CharsetConverter.cpp | 910 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 910 insertions(+) create mode 100644 xbmc/utils/CharsetConverter.cpp (limited to 'xbmc/utils/CharsetConverter.cpp') diff --git a/xbmc/utils/CharsetConverter.cpp b/xbmc/utils/CharsetConverter.cpp new file mode 100644 index 0000000..89976ee --- /dev/null +++ b/xbmc/utils/CharsetConverter.cpp @@ -0,0 +1,910 @@ +/* + * Copyright (C) 2005-2018 Team Kodi + * This file is part of Kodi - https://kodi.tv + * + * SPDX-License-Identifier: GPL-2.0-or-later + * See LICENSES/README.md for more information. + */ + +#include "CharsetConverter.h" + +#include "LangInfo.h" +#include "guilib/LocalizeStrings.h" +#include "log.h" +#include "settings/Settings.h" +#include "settings/lib/Setting.h" +#include "settings/lib/SettingDefinitions.h" +#include "utils/StringUtils.h" +#include "utils/Utf8Utils.h" + +#include +#include + +#include +#include + +#ifdef WORDS_BIGENDIAN + #define ENDIAN_SUFFIX "BE" +#else + #define ENDIAN_SUFFIX "LE" +#endif + +#if defined(TARGET_DARWIN) + #define WCHAR_IS_UCS_4 1 + #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX + #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX + #define UTF8_SOURCE "UTF-8-MAC" + #define WCHAR_CHARSET UTF32_CHARSET +#elif defined(TARGET_WINDOWS) + #define WCHAR_IS_UTF16 1 + #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX + #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX + #define UTF8_SOURCE "UTF-8" + #define WCHAR_CHARSET UTF16_CHARSET +#elif defined(TARGET_FREEBSD) + #define WCHAR_IS_UCS_4 1 + #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX + #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX + #define UTF8_SOURCE "UTF-8" + #define WCHAR_CHARSET UTF32_CHARSET +#elif defined(TARGET_ANDROID) + #define WCHAR_IS_UCS_4 1 + #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX + #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX + #define UTF8_SOURCE "UTF-8" + #define WCHAR_CHARSET UTF32_CHARSET +#else + #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX + #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX + #define UTF8_SOURCE "UTF-8" + #define WCHAR_CHARSET "WCHAR_T" + #if __STDC_ISO_10646__ + #ifdef SIZEOF_WCHAR_T + #if SIZEOF_WCHAR_T == 4 + #define WCHAR_IS_UCS_4 1 + #elif SIZEOF_WCHAR_T == 2 + #define WCHAR_IS_UCS_2 1 + #endif + #endif + #endif +#endif + +#define NO_ICONV ((iconv_t)-1) + +enum SpecialCharset +{ + NotSpecialCharset = 0, + SystemCharset, + UserCharset /* locale.charset */, + SubtitleCharset /* subtitles.charset */, +}; + +class CConverterType : public CCriticalSection +{ +public: + CConverterType(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen = 1); + CConverterType(enum SpecialCharset sourceSpecialCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen = 1); + CConverterType(const std::string& sourceCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen = 1); + CConverterType(enum SpecialCharset sourceSpecialCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen = 1); + CConverterType(const CConverterType& other); + ~CConverterType(); + + iconv_t GetConverter(std::unique_lock& converterLock); + + void Reset(void); + void ReinitTo(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen = 1); + std::string GetSourceCharset(void) const { return m_sourceCharset; } + std::string GetTargetCharset(void) const { return m_targetCharset; } + unsigned int GetTargetSingleCharMaxLen(void) const { return m_targetSingleCharMaxLen; } + +private: + static std::string ResolveSpecialCharset(enum SpecialCharset charset); + + enum SpecialCharset m_sourceSpecialCharset; + std::string m_sourceCharset; + enum SpecialCharset m_targetSpecialCharset; + std::string m_targetCharset; + iconv_t m_iconv; + unsigned int m_targetSingleCharMaxLen; +}; + +CConverterType::CConverterType(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(), + m_sourceSpecialCharset(NotSpecialCharset), + m_sourceCharset(sourceCharset), + m_targetSpecialCharset(NotSpecialCharset), + m_targetCharset(targetCharset), + m_iconv(NO_ICONV), + m_targetSingleCharMaxLen(targetSingleCharMaxLen) +{ +} + +CConverterType::CConverterType(enum SpecialCharset sourceSpecialCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(), + m_sourceSpecialCharset(sourceSpecialCharset), + m_sourceCharset(), + m_targetSpecialCharset(NotSpecialCharset), + m_targetCharset(targetCharset), + m_iconv(NO_ICONV), + m_targetSingleCharMaxLen(targetSingleCharMaxLen) +{ +} + +CConverterType::CConverterType(const std::string& sourceCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(), + m_sourceSpecialCharset(NotSpecialCharset), + m_sourceCharset(sourceCharset), + m_targetSpecialCharset(targetSpecialCharset), + m_targetCharset(), + m_iconv(NO_ICONV), + m_targetSingleCharMaxLen(targetSingleCharMaxLen) +{ +} + +CConverterType::CConverterType(enum SpecialCharset sourceSpecialCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(), + m_sourceSpecialCharset(sourceSpecialCharset), + m_sourceCharset(), + m_targetSpecialCharset(targetSpecialCharset), + m_targetCharset(), + m_iconv(NO_ICONV), + m_targetSingleCharMaxLen(targetSingleCharMaxLen) +{ +} + +CConverterType::CConverterType(const CConverterType& other) : CCriticalSection(), + m_sourceSpecialCharset(other.m_sourceSpecialCharset), + m_sourceCharset(other.m_sourceCharset), + m_targetSpecialCharset(other.m_targetSpecialCharset), + m_targetCharset(other.m_targetCharset), + m_iconv(NO_ICONV), + m_targetSingleCharMaxLen(other.m_targetSingleCharMaxLen) +{ +} + +CConverterType::~CConverterType() +{ + std::unique_lock lock(*this); + if (m_iconv != NO_ICONV) + iconv_close(m_iconv); + lock.unlock(); // ensure unlocking before final destruction +} + +iconv_t CConverterType::GetConverter(std::unique_lock& converterLock) +{ + // ensure that this unique instance is locked externally + if (converterLock.mutex() != this) + return NO_ICONV; + + if (m_iconv == NO_ICONV) + { + if (m_sourceSpecialCharset) + m_sourceCharset = ResolveSpecialCharset(m_sourceSpecialCharset); + if (m_targetSpecialCharset) + m_targetCharset = ResolveSpecialCharset(m_targetSpecialCharset); + + m_iconv = iconv_open(m_targetCharset.c_str(), m_sourceCharset.c_str()); + + if (m_iconv == NO_ICONV) + CLog::Log(LOGERROR, "{}: iconv_open() for \"{}\" -> \"{}\" failed, errno = {} ({})", + __FUNCTION__, m_sourceCharset, m_targetCharset, errno, strerror(errno)); + } + + return m_iconv; +} + +void CConverterType::Reset(void) +{ + std::unique_lock lock(*this); + if (m_iconv != NO_ICONV) + { + iconv_close(m_iconv); + m_iconv = NO_ICONV; + } + + if (m_sourceSpecialCharset) + m_sourceCharset.clear(); + if (m_targetSpecialCharset) + m_targetCharset.clear(); + +} + +void CConverterType::ReinitTo(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/) +{ + std::unique_lock lock(*this); + if (sourceCharset != m_sourceCharset || targetCharset != m_targetCharset) + { + if (m_iconv != NO_ICONV) + { + iconv_close(m_iconv); + m_iconv = NO_ICONV; + } + + m_sourceSpecialCharset = NotSpecialCharset; + m_sourceCharset = sourceCharset; + m_targetSpecialCharset = NotSpecialCharset; + m_targetCharset = targetCharset; + m_targetSingleCharMaxLen = targetSingleCharMaxLen; + } +} + +std::string CConverterType::ResolveSpecialCharset(enum SpecialCharset charset) +{ + switch (charset) + { + case SystemCharset: + return ""; + case UserCharset: + return g_langInfo.GetGuiCharSet(); + case SubtitleCharset: + return g_langInfo.GetSubtitleCharSet(); + case NotSpecialCharset: + default: + return "UTF-8"; /* dummy value */ + } +} + +enum StdConversionType /* Keep it in sync with CCharsetConverter::CInnerConverter::m_stdConversion */ +{ + NoConversion = -1, + Utf8ToUtf32 = 0, + Utf32ToUtf8, + Utf32ToW, + WToUtf32, + SubtitleCharsetToUtf8, + Utf8ToUserCharset, + UserCharsetToUtf8, + Utf32ToUserCharset, + WtoUtf8, + Utf16LEtoW, + Utf16BEtoUtf8, + Utf16LEtoUtf8, + Utf8toW, + Utf8ToSystem, + SystemToUtf8, + Ucs2CharsetToUtf8, + MacintoshToUtf8, + NumberOfStdConversionTypes /* Dummy sentinel entry */ +}; + +/* We don't want to pollute header file with many additional includes and definitions, so put + here all staff that require usage of types defined in this file or in additional headers */ +class CCharsetConverter::CInnerConverter +{ +public: + static bool logicalToVisualBiDi(const std::u32string& stringSrc, + std::u32string& stringDst, + FriBidiCharType base = FRIBIDI_TYPE_LTR, + const bool failOnBadString = false, + int* visualToLogicalMap = nullptr); + static bool isBidiDirectionRTL(const std::string& stringSrc); + + template + static bool stdConvert(StdConversionType convertType, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false); + template + static bool customConvert(const std::string& sourceCharset, const std::string& targetCharset, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false); + + template + static bool convert(iconv_t type, int multiplier, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false); + + static CConverterType m_stdConversion[NumberOfStdConversionTypes]; + static CCriticalSection m_critSectionFriBiDi; +}; + +/* single symbol sizes in chars */ +const int CCharsetConverter::m_Utf8CharMinSize = 1; +const int CCharsetConverter::m_Utf8CharMaxSize = 4; + +// clang-format off +CConverterType CCharsetConverter::CInnerConverter::m_stdConversion[NumberOfStdConversionTypes] = /* keep it in sync with enum StdConversionType */ +{ + /* Utf8ToUtf32 */ CConverterType(UTF8_SOURCE, UTF32_CHARSET), + /* Utf32ToUtf8 */ CConverterType(UTF32_CHARSET, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize), + /* Utf32ToW */ CConverterType(UTF32_CHARSET, WCHAR_CHARSET), + /* WToUtf32 */ CConverterType(WCHAR_CHARSET, UTF32_CHARSET), + /* SubtitleCharsetToUtf8*/CConverterType(SubtitleCharset, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize), + /* Utf8ToUserCharset */ CConverterType(UTF8_SOURCE, UserCharset), + /* UserCharsetToUtf8 */ CConverterType(UserCharset, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize), + /* Utf32ToUserCharset */ CConverterType(UTF32_CHARSET, UserCharset), + /* WtoUtf8 */ CConverterType(WCHAR_CHARSET, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize), + /* Utf16LEtoW */ CConverterType("UTF-16LE", WCHAR_CHARSET), + /* Utf16BEtoUtf8 */ CConverterType("UTF-16BE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize), + /* Utf16LEtoUtf8 */ CConverterType("UTF-16LE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize), + /* Utf8toW */ CConverterType(UTF8_SOURCE, WCHAR_CHARSET), + /* Utf8ToSystem */ CConverterType(UTF8_SOURCE, SystemCharset), + /* SystemToUtf8 */ CConverterType(SystemCharset, UTF8_SOURCE), + /* Ucs2CharsetToUtf8 */ CConverterType("UCS-2LE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize), + /* MacintoshToUtf8 */ CConverterType("macintosh", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize) +}; +// clang-format on + +CCriticalSection CCharsetConverter::CInnerConverter::m_critSectionFriBiDi; + +template +bool CCharsetConverter::CInnerConverter::stdConvert(StdConversionType convertType, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/) +{ + strDest.clear(); + if (strSource.empty()) + return true; + + if (convertType < 0 || convertType >= NumberOfStdConversionTypes) + return false; + + CConverterType& convType = m_stdConversion[convertType]; + std::unique_lock converterLock(convType); + + return convert(convType.GetConverter(converterLock), convType.GetTargetSingleCharMaxLen(), strSource, strDest, failOnInvalidChar); +} + +template +bool CCharsetConverter::CInnerConverter::customConvert(const std::string& sourceCharset, const std::string& targetCharset, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/) +{ + strDest.clear(); + if (strSource.empty()) + return true; + + iconv_t conv = iconv_open(targetCharset.c_str(), sourceCharset.c_str()); + if (conv == NO_ICONV) + { + CLog::Log(LOGERROR, "{}: iconv_open() for \"{}\" -> \"{}\" failed, errno = {} ({})", + __FUNCTION__, sourceCharset, targetCharset, errno, strerror(errno)); + return false; + } + const int dstMultp = (targetCharset.compare(0, 5, "UTF-8") == 0) ? CCharsetConverter::m_Utf8CharMaxSize : 1; + const bool result = convert(conv, dstMultp, strSource, strDest, failOnInvalidChar); + iconv_close(conv); + + return result; +} + +/* iconv may declare inbuf to be char** rather than const char** depending on platform and version, + so provide a wrapper that handles both */ +struct charPtrPtrAdapter +{ + const char** pointer; + explicit charPtrPtrAdapter(const char** p) : + pointer(p) { } + operator char**() + { return const_cast(pointer); } + operator const char**() + { return pointer; } +}; + +template +bool CCharsetConverter::CInnerConverter::convert(iconv_t type, int multiplier, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/) +{ + if (type == NO_ICONV) + return false; + + //input buffer for iconv() is the buffer from strSource + size_t inBufSize = (strSource.length() + 1) * sizeof(typename INPUT::value_type); + const char* inBuf = (const char*)strSource.c_str(); + + //allocate output buffer for iconv() + size_t outBufSize = (strSource.length() + 1) * sizeof(typename OUTPUT::value_type) * multiplier; + char* outBuf = (char*)malloc(outBufSize); + if (outBuf == NULL) + { + CLog::Log(LOGFATAL, "{}: malloc failed", __FUNCTION__); + return false; + } + + size_t inBytesAvail = inBufSize; //how many bytes iconv() can read + size_t outBytesAvail = outBufSize; //how many bytes iconv() can write + const char* inBufStart = inBuf; //where in our input buffer iconv() should start reading + char* outBufStart = outBuf; //where in out output buffer iconv() should start writing + + size_t returnV; + while(true) + { + //iconv() will update inBufStart, inBytesAvail, outBufStart and outBytesAvail + returnV = iconv(type, charPtrPtrAdapter(&inBufStart), &inBytesAvail, &outBufStart, &outBytesAvail); + + if (returnV == (size_t)-1) + { + if (errno == E2BIG) //output buffer is not big enough + { + //save where iconv() ended converting, realloc might make outBufStart invalid + size_t bytesConverted = outBufSize - outBytesAvail; + + //make buffer twice as big + outBufSize *= 2; + char* newBuf = (char*)realloc(outBuf, outBufSize); + if (!newBuf) + { + CLog::Log(LOGFATAL, "{} realloc failed with errno={}({})", __FUNCTION__, errno, + strerror(errno)); + break; + } + outBuf = newBuf; + + //update the buffer pointer and counter + outBufStart = outBuf + bytesConverted; + outBytesAvail = outBufSize - bytesConverted; + + //continue in the loop and convert the rest + continue; + } + else if (errno == EILSEQ) //An invalid multibyte sequence has been encountered in the input + { + if (failOnInvalidChar) + break; + + //skip invalid byte + inBufStart++; + inBytesAvail--; + //continue in the loop and convert the rest + continue; + } + else if (errno == EINVAL) /* Invalid sequence at the end of input buffer */ + { + if (!failOnInvalidChar) + returnV = 0; /* reset error status to use converted part */ + + break; + } + else //iconv() had some other error + { + CLog::Log(LOGERROR, "{}: iconv() failed, errno={} ({})", __FUNCTION__, errno, + strerror(errno)); + } + } + break; + } + + //complete the conversion (reset buffers), otherwise the current data will prefix the data on the next call + if (iconv(type, NULL, NULL, &outBufStart, &outBytesAvail) == (size_t)-1) + CLog::Log(LOGERROR, "{} failed cleanup errno={}({})", __FUNCTION__, errno, strerror(errno)); + + if (returnV == (size_t)-1) + { + free(outBuf); + return false; + } + //we're done + + const typename OUTPUT::size_type sizeInChars = (typename OUTPUT::size_type) (outBufSize - outBytesAvail) / sizeof(typename OUTPUT::value_type); + typename OUTPUT::const_pointer strPtr = (typename OUTPUT::const_pointer) outBuf; + /* Make sure that all buffer is assigned and string is stopped at end of buffer */ + if (strPtr[sizeInChars-1] == 0 && strSource[strSource.length()-1] != 0) + strDest.assign(strPtr, sizeInChars-1); + else + strDest.assign(strPtr, sizeInChars); + + free(outBuf); + + return true; +} + +bool CCharsetConverter::CInnerConverter::logicalToVisualBiDi( + const std::u32string& stringSrc, + std::u32string& stringDst, + FriBidiCharType base /*= FRIBIDI_TYPE_LTR*/, + const bool failOnBadString /*= false*/, + int* visualToLogicalMap /*= nullptr*/) +{ + stringDst.clear(); + + const size_t srcLen = stringSrc.length(); + if (srcLen == 0) + return true; + + stringDst.reserve(srcLen); + size_t lineStart = 0; + + // libfribidi is not threadsafe, so make sure we make it so + std::unique_lock lock(m_critSectionFriBiDi); + do + { + size_t lineEnd = stringSrc.find('\n', lineStart); + if (lineEnd >= srcLen) // equal to 'lineEnd == std::string::npos' + lineEnd = srcLen; + else + lineEnd++; // include '\n' + + const size_t lineLen = lineEnd - lineStart; + + FriBidiChar* visual = (FriBidiChar*) malloc((lineLen + 1) * sizeof(FriBidiChar)); + if (visual == NULL) + { + free(visual); + CLog::Log(LOGFATAL, "{}: can't allocate memory", __FUNCTION__); + return false; + } + + bool bidiFailed = false; + FriBidiCharType baseCopy = base; // preserve same value for all lines, required because fribidi_log2vis will modify parameter value + if (fribidi_log2vis(reinterpret_cast(stringSrc.c_str() + lineStart), + lineLen, &baseCopy, visual, nullptr, + !visualToLogicalMap ? nullptr : visualToLogicalMap + lineStart, nullptr)) + { + // Removes bidirectional marks + const int newLen = fribidi_remove_bidi_marks( + visual, lineLen, nullptr, !visualToLogicalMap ? nullptr : visualToLogicalMap + lineStart, + nullptr); + if (newLen > 0) + stringDst.append((const char32_t*)visual, (size_t)newLen); + else if (newLen < 0) + bidiFailed = failOnBadString; + } + else + bidiFailed = failOnBadString; + + free(visual); + + if (bidiFailed) + return false; + + lineStart = lineEnd; + } while (lineStart < srcLen); + + return !stringDst.empty(); +} + +bool CCharsetConverter::CInnerConverter::isBidiDirectionRTL(const std::string& str) +{ + std::u32string converted; + if (!CInnerConverter::stdConvert(Utf8ToUtf32, str, converted, true)) + return false; + + int lineLen = static_cast(str.size()); + FriBidiCharType* charTypes = new FriBidiCharType[lineLen]; + fribidi_get_bidi_types(reinterpret_cast(converted.c_str()), + (FriBidiStrIndex)lineLen, charTypes); + FriBidiCharType charType = fribidi_get_par_direction(charTypes, (FriBidiStrIndex)lineLen); + delete[] charTypes; + return charType == FRIBIDI_PAR_RTL; +} + +static struct SCharsetMapping +{ + const char* charset; + const char* caption; +} g_charsets[] = { + { "ISO-8859-1", "Western Europe (ISO)" } + , { "ISO-8859-2", "Central Europe (ISO)" } + , { "ISO-8859-3", "South Europe (ISO)" } + , { "ISO-8859-4", "Baltic (ISO)" } + , { "ISO-8859-5", "Cyrillic (ISO)" } + , { "ISO-8859-6", "Arabic (ISO)" } + , { "ISO-8859-7", "Greek (ISO)" } + , { "ISO-8859-8", "Hebrew (ISO)" } + , { "ISO-8859-9", "Turkish (ISO)" } + , { "CP1250", "Central Europe (Windows)" } + , { "CP1251", "Cyrillic (Windows)" } + , { "CP1252", "Western Europe (Windows)" } + , { "CP1253", "Greek (Windows)" } + , { "CP1254", "Turkish (Windows)" } + , { "CP1255", "Hebrew (Windows)" } + , { "CP1256", "Arabic (Windows)" } + , { "CP1257", "Baltic (Windows)" } + , { "CP1258", "Vietnamese (Windows)" } + , { "CP874", "Thai (Windows)" } + , { "BIG5", "Chinese Traditional (Big5)" } + , { "GBK", "Chinese Simplified (GBK)" } + , { "SHIFT_JIS", "Japanese (Shift-JIS)" } + , { "CP949", "Korean" } + , { "BIG5-HKSCS", "Hong Kong (Big5-HKSCS)" } + , { NULL, NULL } +}; + +CCharsetConverter::CCharsetConverter() = default; + +void CCharsetConverter::OnSettingChanged(const std::shared_ptr& setting) +{ + if (setting == NULL) + return; + + const std::string& settingId = setting->GetId(); + if (settingId == CSettings::SETTING_LOCALE_CHARSET) + resetUserCharset(); + else if (settingId == CSettings::SETTING_SUBTITLES_CHARSET) + resetSubtitleCharset(); +} + +void CCharsetConverter::clear() +{ +} + +std::vector CCharsetConverter::getCharsetLabels() +{ + std::vector lab; + for(SCharsetMapping* c = g_charsets; c->charset; c++) + lab.emplace_back(c->caption); + + return lab; +} + +std::string CCharsetConverter::getCharsetLabelByName(const std::string& charsetName) +{ + for(SCharsetMapping* c = g_charsets; c->charset; c++) + { + if (StringUtils::EqualsNoCase(charsetName,c->charset)) + return c->caption; + } + + return ""; +} + +std::string CCharsetConverter::getCharsetNameByLabel(const std::string& charsetLabel) +{ + for(SCharsetMapping* c = g_charsets; c->charset; c++) + { + if (StringUtils::EqualsNoCase(charsetLabel, c->caption)) + return c->charset; + } + + return ""; +} + +void CCharsetConverter::reset(void) +{ + for (CConverterType& conversion : CInnerConverter::m_stdConversion) + conversion.Reset(); +} + +void CCharsetConverter::resetSystemCharset(void) +{ + CInnerConverter::m_stdConversion[Utf8ToSystem].Reset(); + CInnerConverter::m_stdConversion[SystemToUtf8].Reset(); +} + +void CCharsetConverter::resetUserCharset(void) +{ + CInnerConverter::m_stdConversion[UserCharsetToUtf8].Reset(); + CInnerConverter::m_stdConversion[UserCharsetToUtf8].Reset(); + CInnerConverter::m_stdConversion[Utf32ToUserCharset].Reset(); + resetSubtitleCharset(); +} + +void CCharsetConverter::resetSubtitleCharset(void) +{ + CInnerConverter::m_stdConversion[SubtitleCharsetToUtf8].Reset(); +} + +void CCharsetConverter::reinitCharsetsFromSettings(void) +{ + resetUserCharset(); // this will also reinit Subtitle charsets +} + +bool CCharsetConverter::utf8ToUtf32(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/) +{ + return CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32StringDst, failOnBadChar); +} + +std::u32string CCharsetConverter::utf8ToUtf32(const std::string& utf8StringSrc, bool failOnBadChar /*= true*/) +{ + std::u32string converted; + utf8ToUtf32(utf8StringSrc, converted, failOnBadChar); + return converted; +} + +bool CCharsetConverter::utf8ToUtf32Visual(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool bVisualBiDiFlip /*= false*/, bool forceLTRReadingOrder /*= false*/, bool failOnBadChar /*= false*/) +{ + if (bVisualBiDiFlip) + { + std::u32string converted; + if (!CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, converted, failOnBadChar)) + return false; + + return CInnerConverter::logicalToVisualBiDi(converted, utf32StringDst, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF, failOnBadChar); + } + return CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32StringDst, failOnBadChar); +} + +bool CCharsetConverter::utf32ToUtf8(const std::u32string& utf32StringSrc, std::string& utf8StringDst, bool failOnBadChar /*= true*/) +{ + return CInnerConverter::stdConvert(Utf32ToUtf8, utf32StringSrc, utf8StringDst, failOnBadChar); +} + +std::string CCharsetConverter::utf32ToUtf8(const std::u32string& utf32StringSrc, bool failOnBadChar /*= false*/) +{ + std::string converted; + utf32ToUtf8(utf32StringSrc, converted, failOnBadChar); + return converted; +} + +bool CCharsetConverter::utf32ToW(const std::u32string& utf32StringSrc, std::wstring& wStringDst, bool failOnBadChar /*= true*/) +{ +#ifdef WCHAR_IS_UCS_4 + wStringDst.assign((const wchar_t*)utf32StringSrc.c_str(), utf32StringSrc.length()); + return true; +#else // !WCHAR_IS_UCS_4 + return CInnerConverter::stdConvert(Utf32ToW, utf32StringSrc, wStringDst, failOnBadChar); +#endif // !WCHAR_IS_UCS_4 +} + +bool CCharsetConverter::utf32logicalToVisualBiDi(const std::u32string& logicalStringSrc, + std::u32string& visualStringDst, + bool forceLTRReadingOrder /*= false*/, + bool failOnBadString /*= false*/, + int* visualToLogicalMap /*= nullptr*/) +{ + return CInnerConverter::logicalToVisualBiDi( + logicalStringSrc, visualStringDst, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF, + failOnBadString, visualToLogicalMap); +} + +bool CCharsetConverter::wToUtf32(const std::wstring& wStringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/) +{ +#ifdef WCHAR_IS_UCS_4 + /* UCS-4 is almost equal to UTF-32, but UTF-32 has strict limits on possible values, while UCS-4 is usually unchecked. + * With this "conversion" we ensure that output will be valid UTF-32 string. */ +#endif + return CInnerConverter::stdConvert(WToUtf32, wStringSrc, utf32StringDst, failOnBadChar); +} + +// The bVisualBiDiFlip forces a flip of characters for hebrew/arabic languages, only set to false if the flipping +// of the string is already made or the string is not displayed in the GUI +bool CCharsetConverter::utf8ToW(const std::string& utf8StringSrc, std::wstring& wStringDst, bool bVisualBiDiFlip /*= true*/, + bool forceLTRReadingOrder /*= false*/, bool failOnBadChar /*= false*/) +{ + // Try to flip hebrew/arabic characters, if any + if (bVisualBiDiFlip) + { + wStringDst.clear(); + std::u32string utf32str; + if (!CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32str, failOnBadChar)) + return false; + + std::u32string utf32flipped; + const bool bidiResult = CInnerConverter::logicalToVisualBiDi(utf32str, utf32flipped, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF, failOnBadChar); + + return CInnerConverter::stdConvert(Utf32ToW, utf32flipped, wStringDst, failOnBadChar) && bidiResult; + } + + return CInnerConverter::stdConvert(Utf8toW, utf8StringSrc, wStringDst, failOnBadChar); +} + +bool CCharsetConverter::subtitleCharsetToUtf8(const std::string& stringSrc, std::string& utf8StringDst) +{ + return CInnerConverter::stdConvert(SubtitleCharsetToUtf8, stringSrc, utf8StringDst, false); +} + +bool CCharsetConverter::fromW(const std::wstring& wStringSrc, + std::string& stringDst, const std::string& enc) +{ + return CInnerConverter::customConvert(WCHAR_CHARSET, enc, wStringSrc, stringDst); +} + +bool CCharsetConverter::toW(const std::string& stringSrc, + std::wstring& wStringDst, const std::string& enc) +{ + return CInnerConverter::customConvert(enc, WCHAR_CHARSET, stringSrc, wStringDst); +} + +bool CCharsetConverter::utf8ToStringCharset(const std::string& utf8StringSrc, std::string& stringDst) +{ + return CInnerConverter::stdConvert(Utf8ToUserCharset, utf8StringSrc, stringDst); +} + +bool CCharsetConverter::utf8ToStringCharset(std::string& stringSrcDst) +{ + std::string strSrc(stringSrcDst); + return utf8ToStringCharset(strSrc, stringSrcDst); +} + +bool CCharsetConverter::ToUtf8(const std::string& strSourceCharset, const std::string& stringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/) +{ + if (strSourceCharset == "UTF-8") + { // simple case - no conversion necessary + utf8StringDst = stringSrc; + return true; + } + + return CInnerConverter::customConvert(strSourceCharset, "UTF-8", stringSrc, utf8StringDst, failOnBadChar); +} + +bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::string& stringDst) +{ + if (strDestCharset == "UTF-8") + { // simple case - no conversion necessary + stringDst = utf8StringSrc; + return true; + } + + return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, stringDst); +} + +bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::u16string& utf16StringDst) +{ + return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, utf16StringDst); +} + +bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::u32string& utf32StringDst) +{ + return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, utf32StringDst); +} + +bool CCharsetConverter::unknownToUTF8(std::string& stringSrcDst) +{ + std::string source(stringSrcDst); + return unknownToUTF8(source, stringSrcDst); +} + +bool CCharsetConverter::unknownToUTF8(const std::string& stringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/) +{ + // checks whether it's utf8 already, and if not converts using the sourceCharset if given, else the string charset + if (CUtf8Utils::isValidUtf8(stringSrc)) + { + utf8StringDst = stringSrc; + return true; + } + return CInnerConverter::stdConvert(UserCharsetToUtf8, stringSrc, utf8StringDst, failOnBadChar); +} + +bool CCharsetConverter::wToUTF8(const std::wstring& wStringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/) +{ + return CInnerConverter::stdConvert(WtoUtf8, wStringSrc, utf8StringDst, failOnBadChar); +} + +bool CCharsetConverter::utf16BEtoUTF8(const std::u16string& utf16StringSrc, std::string& utf8StringDst) +{ + return CInnerConverter::stdConvert(Utf16BEtoUtf8, utf16StringSrc, utf8StringDst); +} + +bool CCharsetConverter::utf16BEtoUTF8(const std::string& utf16StringSrc, std::string& utf8StringDst) +{ + return CInnerConverter::stdConvert(Utf16BEtoUtf8, utf16StringSrc, utf8StringDst); +} + +bool CCharsetConverter::utf16LEtoUTF8(const std::u16string& utf16StringSrc, + std::string& utf8StringDst) +{ + return CInnerConverter::stdConvert(Utf16LEtoUtf8, utf16StringSrc, utf8StringDst); +} + +bool CCharsetConverter::ucs2ToUTF8(const std::u16string& ucs2StringSrc, std::string& utf8StringDst) +{ + return CInnerConverter::stdConvert(Ucs2CharsetToUtf8, ucs2StringSrc,utf8StringDst); +} + +bool CCharsetConverter::utf16LEtoW(const std::u16string& utf16String, std::wstring& wString) +{ + return CInnerConverter::stdConvert(Utf16LEtoW, utf16String, wString); +} + +bool CCharsetConverter::utf32ToStringCharset(const std::u32string& utf32StringSrc, std::string& stringDst) +{ + return CInnerConverter::stdConvert(Utf32ToUserCharset, utf32StringSrc, stringDst); +} + +bool CCharsetConverter::utf8ToSystem(std::string& stringSrcDst, bool failOnBadChar /*= false*/) +{ + std::string strSrc(stringSrcDst); + return CInnerConverter::stdConvert(Utf8ToSystem, strSrc, stringSrcDst, failOnBadChar); +} + +bool CCharsetConverter::systemToUtf8(const std::string& sysStringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/) +{ + return CInnerConverter::stdConvert(SystemToUtf8, sysStringSrc, utf8StringDst, failOnBadChar); +} + +bool CCharsetConverter::MacintoshToUTF8(const std::string& macStringSrc, std::string& utf8StringDst) +{ + return CInnerConverter::stdConvert(MacintoshToUtf8, macStringSrc, utf8StringDst); +} + +bool CCharsetConverter::utf8logicalToVisualBiDi(const std::string& utf8StringSrc, std::string& utf8StringDst, bool failOnBadString /*= false*/) +{ + utf8StringDst.clear(); + std::u32string utf32flipped; + if (!utf8ToUtf32Visual(utf8StringSrc, utf32flipped, true, true, failOnBadString)) + return false; + + return CInnerConverter::stdConvert(Utf32ToUtf8, utf32flipped, utf8StringDst, failOnBadString); +} + +bool CCharsetConverter::utf8IsRTLBidiDirection(const std::string& utf8String) +{ + return CInnerConverter::isBidiDirectionRTL(utf8String); +} + +void CCharsetConverter::SettingOptionsCharsetsFiller(const SettingConstPtr& setting, + std::vector& list, + std::string& current, + void* data) +{ + std::vector vecCharsets = g_charsetConverter.getCharsetLabels(); + sort(vecCharsets.begin(), vecCharsets.end(), sortstringbyname()); + + list.emplace_back(g_localizeStrings.Get(13278), "DEFAULT"); // "Default" + for (int i = 0; i < (int) vecCharsets.size(); ++i) + list.emplace_back(vecCharsets[i], g_charsetConverter.getCharsetNameByLabel(vecCharsets[i])); +} -- cgit v1.2.3