diff options
Diffstat (limited to 'xbmc/utils/test/TestCharsetConverter.cpp')
-rw-r--r-- | xbmc/utils/test/TestCharsetConverter.cpp | 401 |
1 files changed, 401 insertions, 0 deletions
diff --git a/xbmc/utils/test/TestCharsetConverter.cpp b/xbmc/utils/test/TestCharsetConverter.cpp new file mode 100644 index 0000000..f8736b7 --- /dev/null +++ b/xbmc/utils/test/TestCharsetConverter.cpp @@ -0,0 +1,401 @@ +/* + * Copyright (C) 2005-2018 Team Kodi + * This file is part of Kodi - https://kodi.tv + * + * SPDX-License-Identifier: GPL-2.0-or-later + * See LICENSES/README.md for more information. + */ + +#include "ServiceBroker.h" +#include "settings/Settings.h" +#include "settings/SettingsComponent.h" +#include "utils/CharsetConverter.h" +#include "utils/Utf8Utils.h" + +#include <gtest/gtest.h> + +#if 0 +static const uint16_t refutf16LE1[] = { 0xff54, 0xff45, 0xff53, 0xff54, + 0xff3f, 0xff55, 0xff54, 0xff46, + 0xff11, 0xff16, 0xff2c, 0xff25, + 0xff54, 0xff4f, 0xff57, 0x0 }; + +static const uint16_t refutf16LE2[] = { 0xff54, 0xff45, 0xff53, 0xff54, + 0xff3f, 0xff55, 0xff54, 0xff46, + 0xff18, 0xff34, 0xff4f, 0xff1a, + 0xff3f, 0xff43, 0xff48, 0xff41, + 0xff52, 0xff53, 0xff45, 0xff54, + 0xff3f, 0xff35, 0xff34, 0xff26, + 0xff0d, 0xff11, 0xff16, 0xff2c, + 0xff25, 0xff0c, 0xff3f, 0xff23, + 0xff33, 0xff54, 0xff44, 0xff33, + 0xff54, 0xff52, 0xff49, 0xff4e, + 0xff47, 0xff11, 0xff16, 0x0 }; +#endif + +static const char refutf16LE3[] = "T\377E\377S\377T\377?\377S\377T\377" + "R\377I\377N\377G\377#\377H\377A\377" + "R\377S\377E\377T\377\064\377O\377\065" + "\377T\377F\377\030\377"; + +#if 0 +static const uint16_t refutf16LE4[] = { 0xff54, 0xff45, 0xff53, 0xff54, + 0xff3f, 0xff55, 0xff54, 0xff46, + 0xff11, 0xff16, 0xff2c, 0xff25, + 0xff54, 0xff4f, 0xff35, 0xff34, + 0xff26, 0xff18, 0x0 }; + +static const uint32_t refutf32LE1[] = { 0xff54, 0xff45, 0xff53, 0xff54, + 0xff3f, 0xff55, 0xff54, 0xff46, + 0xff18, 0xff34, 0xff4f, 0xff1a, + 0xff3f, 0xff43, 0xff48, 0xff41, + 0xff52, 0xff53, 0xff45, 0xff54, + 0xff3f, 0xff35, 0xff34, 0xff26, + 0xff0d, 0xff13, 0xff12, 0xff2c, + 0xff25, 0xff0c, 0xff3f, 0xff23, + 0xff33, 0xff54, 0xff44, 0xff33, + 0xff54, 0xff52, 0xff49, 0xff4e, + 0xff47, 0xff13, 0xff12, 0xff3f, +#ifdef TARGET_DARWIN + 0x0 }; +#else + 0x1f42d, 0x1f42e, 0x0 }; +#endif + +static const uint16_t refutf16BE[] = { 0x54ff, 0x45ff, 0x53ff, 0x54ff, + 0x3fff, 0x55ff, 0x54ff, 0x46ff, + 0x11ff, 0x16ff, 0x22ff, 0x25ff, + 0x54ff, 0x4fff, 0x35ff, 0x34ff, + 0x26ff, 0x18ff, 0x0}; + +static const uint16_t refucs2[] = { 0xff54, 0xff45, 0xff53, 0xff54, + 0xff3f, 0xff55, 0xff43, 0xff53, + 0xff12, 0xff54, 0xff4f, 0xff35, + 0xff34, 0xff26, 0xff18, 0x0 }; +#endif + +class TestCharsetConverter : public testing::Test +{ +protected: + TestCharsetConverter() + { + /* Add default settings for locale. + * Settings here are taken from CGUISettings::Initialize() + */ + /* + //! @todo implement + CSettingsCategory *loc = CServiceBroker::GetSettingsComponent()->GetSettings()->AddCategory(7, "locale", 14090); + CServiceBroker::GetSettingsComponent()->GetSettings()->AddString(loc, CSettings::SETTING_LOCALE_LANGUAGE,248,"english", + SPIN_CONTROL_TEXT); + CServiceBroker::GetSettingsComponent()->GetSettings()->AddString(loc, CSettings::SETTING_LOCALE_COUNTRY, 20026, "USA", + SPIN_CONTROL_TEXT); + CServiceBroker::GetSettingsComponent()->GetSettings()->AddString(loc, CSettings::SETTING_LOCALE_CHARSET, 14091, "DEFAULT", + SPIN_CONTROL_TEXT); // charset is set by the + // language file + + // Add default settings for subtitles + CSettingsCategory *sub = CServiceBroker::GetSettingsComponent()->GetSettings()->AddCategory(5, "subtitles", 287); + CServiceBroker::GetSettingsComponent()->GetSettings()->AddString(sub, CSettings::SETTING_SUBTITLES_CHARSET, 735, "DEFAULT", + SPIN_CONTROL_TEXT); + */ + g_charsetConverter.reset(); + g_charsetConverter.clear(); + } + + ~TestCharsetConverter() override + { + CServiceBroker::GetSettingsComponent()->GetSettings()->Unload(); + } + + std::string refstra1, refstra2, varstra1; + std::wstring refstrw1, varstrw1; + std::string refstr1; +}; + +TEST_F(TestCharsetConverter, utf8ToW) +{ + refstra1 = "test utf8ToW"; + refstrw1 = L"test utf8ToW"; + varstrw1.clear(); + g_charsetConverter.utf8ToW(refstra1, varstrw1, true, false, false); + EXPECT_STREQ(refstrw1.c_str(), varstrw1.c_str()); +} + + +//TEST_F(TestCharsetConverter, utf16LEtoW) +//{ +// refstrw1 = L"test_utf16LEtow"; +// //! @todo Should be able to use '=' operator instead of assign() +// std::wstring refstr16_1; +// refstr16_1.assign(refutf16LE1); +// varstrw1.clear(); +// g_charsetConverter.utf16LEtoW(refstr16_1, varstrw1); +// EXPECT_STREQ(refstrw1.c_str(), varstrw1.c_str()); +//} + +TEST_F(TestCharsetConverter, subtitleCharsetToUtf8) +{ + refstra1 = "test subtitleCharsetToW"; + varstra1.clear(); + g_charsetConverter.subtitleCharsetToUtf8(refstra1, varstra1); + + /* Assign refstra1 to refstrw1 so that we can compare */ + EXPECT_STREQ(refstra1.c_str(), varstra1.c_str()); +} + +TEST_F(TestCharsetConverter, utf8ToStringCharset_1) +{ + refstra1 = "test utf8ToStringCharset"; + varstra1.clear(); + g_charsetConverter.utf8ToStringCharset(refstra1, varstra1); + EXPECT_STREQ(refstra1.c_str(), varstra1.c_str()); +} + +TEST_F(TestCharsetConverter, utf8ToStringCharset_2) +{ + refstra1 = "test utf8ToStringCharset"; + varstra1 = "test utf8ToStringCharset"; + g_charsetConverter.utf8ToStringCharset(varstra1); + EXPECT_STREQ(refstra1.c_str(), varstra1.c_str()); +} + +TEST_F(TestCharsetConverter, utf8ToSystem) +{ + refstra1 = "test utf8ToSystem"; + varstra1 = "test utf8ToSystem"; + g_charsetConverter.utf8ToSystem(varstra1); + EXPECT_STREQ(refstra1.c_str(), varstra1.c_str()); +} + +TEST_F(TestCharsetConverter, utf8To_ASCII) +{ + refstra1 = "test utf8To: charset ASCII, std::string"; + varstra1.clear(); + g_charsetConverter.utf8To("ASCII", refstra1, varstra1); + EXPECT_STREQ(refstra1.c_str(), varstra1.c_str()); +} + +/* +TEST_F(TestCharsetConverter, utf8To_UTF16LE) +{ + refstra1 = "test_utf8To:_charset_UTF-16LE,_" + "CStdString16"; + refstr16_1.assign(refutf16LE2); + varstr16_1.clear(); + g_charsetConverter.utf8To("UTF-16LE", refstra1, varstr16_1); + EXPECT_TRUE(!memcmp(refstr16_1.c_str(), varstr16_1.c_str(), + refstr16_1.length() * sizeof(uint16_t))); +} +*/ + +//TEST_F(TestCharsetConverter, utf8To_UTF32LE) +//{ +// refstra1 = "test_utf8To:_charset_UTF-32LE,_" +//#ifdef TARGET_DARWIN +///* OSX has its own 'special' utf-8 charset which we use (see UTF8_SOURCE in CharsetConverter.cpp) +// which is basically NFD (decomposed) utf-8. The trouble is, it fails on the COW FACE and MOUSE FACE +// characters for some reason (possibly anything over 0x100000, or maybe there's a decomposed form of these +// that I couldn't find???) If UTF8_SOURCE is switched to UTF-8 then this test would pass as-is, but then +// some filenames stored in utf8-mac wouldn't display correctly in the UI. */ +// "CStdString32_"; +//#else +// "CStdString32_🐭🐮"; +//#endif +// refstr32_1.assign(refutf32LE1); +// varstr32_1.clear(); +// g_charsetConverter.utf8To("UTF-32LE", refstra1, varstr32_1); +// EXPECT_TRUE(!memcmp(refstr32_1.c_str(), varstr32_1.c_str(), +// sizeof(refutf32LE1))); +//} + +TEST_F(TestCharsetConverter, stringCharsetToUtf8) +{ + refstra1 = "test_stringCharsetToUtf8"; + varstra1.clear(); + g_charsetConverter.ToUtf8("UTF-16LE", refutf16LE3, varstra1); + EXPECT_STREQ(refstra1.c_str(), varstra1.c_str()); +} + +TEST_F(TestCharsetConverter, isValidUtf8_1) +{ + varstra1.clear(); + g_charsetConverter.ToUtf8("UTF-16LE", refutf16LE3, varstra1); + EXPECT_TRUE(CUtf8Utils::isValidUtf8(varstra1.c_str())); +} + +TEST_F(TestCharsetConverter, isValidUtf8_2) +{ + refstr1 = refutf16LE3; + EXPECT_FALSE(CUtf8Utils::isValidUtf8(refstr1)); +} + +TEST_F(TestCharsetConverter, isValidUtf8_3) +{ + varstra1.clear(); + g_charsetConverter.ToUtf8("UTF-16LE", refutf16LE3, varstra1); + EXPECT_TRUE(CUtf8Utils::isValidUtf8(varstra1.c_str())); +} + +TEST_F(TestCharsetConverter, isValidUtf8_4) +{ + EXPECT_FALSE(CUtf8Utils::isValidUtf8(refutf16LE3)); +} + +//! @todo Resolve correct input/output for this function +// TEST_F(TestCharsetConverter, ucs2CharsetToStringCharset) +// { +// void ucs2CharsetToStringCharset(const std::wstring& strSource, +// std::string& strDest, bool swap = false); +// } + +TEST_F(TestCharsetConverter, wToUTF8) +{ + refstrw1 = L"test_wToUTF8"; + refstra1 = u8"test_wToUTF8"; + varstra1.clear(); + g_charsetConverter.wToUTF8(refstrw1, varstra1); + EXPECT_STREQ(refstra1.c_str(), varstra1.c_str()); +} + +//TEST_F(TestCharsetConverter, utf16BEtoUTF8) +//{ +// refstr16_1.assign(refutf16BE); +// refstra1 = "test_utf16BEtoUTF8"; +// varstra1.clear(); +// g_charsetConverter.utf16BEtoUTF8(refstr16_1, varstra1); +// EXPECT_STREQ(refstra1.c_str(), varstra1.c_str()); +//} + +//TEST_F(TestCharsetConverter, utf16LEtoUTF8) +//{ +// refstr16_1.assign(refutf16LE4); +// refstra1 = "test_utf16LEtoUTF8"; +// varstra1.clear(); +// g_charsetConverter.utf16LEtoUTF8(refstr16_1, varstra1); +// EXPECT_STREQ(refstra1.c_str(), varstra1.c_str()); +//} + +//TEST_F(TestCharsetConverter, ucs2ToUTF8) +//{ +// refstr16_1.assign(refucs2); +// refstra1 = "test_ucs2toUTF8"; +// varstra1.clear(); +// g_charsetConverter.ucs2ToUTF8(refstr16_1, varstra1); +// EXPECT_STREQ(refstra1.c_str(), varstra1.c_str()); +//} + +TEST_F(TestCharsetConverter, utf8logicalToVisualBiDi) +{ + refstra1 = "test_utf8logicalToVisualBiDi"; + refstra2 = "test_utf8logicalToVisualBiDi"; + varstra1.clear(); + g_charsetConverter.utf8logicalToVisualBiDi(refstra1, varstra1); + EXPECT_STREQ(refstra2.c_str(), varstra1.c_str()); +} + +//! @todo Resolve correct input/output for this function +// TEST_F(TestCharsetConverter, utf32ToStringCharset) +// { +// void utf32ToStringCharset(const unsigned long* strSource, std::string& strDest); +// } + +TEST_F(TestCharsetConverter, getCharsetLabels) +{ + std::vector<std::string> reflabels; + reflabels.emplace_back("Western Europe (ISO)"); + reflabels.emplace_back("Central Europe (ISO)"); + reflabels.emplace_back("South Europe (ISO)"); + reflabels.emplace_back("Baltic (ISO)"); + reflabels.emplace_back("Cyrillic (ISO)"); + reflabels.emplace_back("Arabic (ISO)"); + reflabels.emplace_back("Greek (ISO)"); + reflabels.emplace_back("Hebrew (ISO)"); + reflabels.emplace_back("Turkish (ISO)"); + reflabels.emplace_back("Central Europe (Windows)"); + reflabels.emplace_back("Cyrillic (Windows)"); + reflabels.emplace_back("Western Europe (Windows)"); + reflabels.emplace_back("Greek (Windows)"); + reflabels.emplace_back("Turkish (Windows)"); + reflabels.emplace_back("Hebrew (Windows)"); + reflabels.emplace_back("Arabic (Windows)"); + reflabels.emplace_back("Baltic (Windows)"); + reflabels.emplace_back("Vietnamese (Windows)"); + reflabels.emplace_back("Thai (Windows)"); + reflabels.emplace_back("Chinese Traditional (Big5)"); + reflabels.emplace_back("Chinese Simplified (GBK)"); + reflabels.emplace_back("Japanese (Shift-JIS)"); + reflabels.emplace_back("Korean"); + reflabels.emplace_back("Hong Kong (Big5-HKSCS)"); + + std::vector<std::string> varlabels = g_charsetConverter.getCharsetLabels(); + ASSERT_EQ(reflabels.size(), varlabels.size()); + + size_t pos = 0; + for (const auto& it : varlabels) + { + EXPECT_STREQ((reflabels.at(pos++)).c_str(), it.c_str()); + } +} + +TEST_F(TestCharsetConverter, getCharsetLabelByName) +{ + std::string varstr = + g_charsetConverter.getCharsetLabelByName("ISO-8859-1"); + EXPECT_STREQ("Western Europe (ISO)", varstr.c_str()); + varstr.clear(); + varstr = g_charsetConverter.getCharsetLabelByName("Bogus"); + EXPECT_STREQ("", varstr.c_str()); +} + +TEST_F(TestCharsetConverter, getCharsetNameByLabel) +{ + std::string varstr = + g_charsetConverter.getCharsetNameByLabel("Western Europe (ISO)"); + EXPECT_STREQ("ISO-8859-1", varstr.c_str()); + varstr.clear(); + varstr = g_charsetConverter.getCharsetNameByLabel("Bogus"); + EXPECT_STREQ("", varstr.c_str()); +} + +TEST_F(TestCharsetConverter, unknownToUTF8_1) +{ + refstra1 = "test_unknownToUTF8"; + varstra1 = "test_unknownToUTF8"; + g_charsetConverter.unknownToUTF8(varstra1); + EXPECT_STREQ(refstra1.c_str(), varstra1.c_str()); +} + +TEST_F(TestCharsetConverter, unknownToUTF8_2) +{ + refstra1 = "test_unknownToUTF8"; + varstra1.clear(); + g_charsetConverter.unknownToUTF8(refstra1, varstra1); + EXPECT_STREQ(refstra1.c_str(), varstra1.c_str()); +} + +TEST_F(TestCharsetConverter, toW) +{ + refstra1 = "test_toW:_charset_UTF-16LE"; + refstrw1 = L"\xBDEF\xEF94\x85BD\xBDEF\xEF93\x94BD\xBCEF\xEFBF" + L"\x94BD\xBDEF\xEF8F\xB7BC\xBCEF\xEF9A\xBFBC\xBDEF" + L"\xEF83\x88BD\xBDEF\xEF81\x92BD\xBDEF\xEF93\x85BD" + L"\xBDEF\xEF94\xBFBC\xBCEF\xEFB5\xB4BC\xBCEF\xEFA6" + L"\x8DBC\xBCEF\xEF91\x96BC\xBCEF\xEFAC\xA5BC"; + varstrw1.clear(); + g_charsetConverter.toW(refstra1, varstrw1, "UTF-16LE"); + EXPECT_STREQ(refstrw1.c_str(), varstrw1.c_str()); +} + +TEST_F(TestCharsetConverter, fromW) +{ + refstrw1 = L"\xBDEF\xEF94\x85BD\xBDEF\xEF93\x94BD\xBCEF\xEFBF" + L"\x86BD\xBDEF\xEF92\x8FBD\xBDEF\xEF8D\xB7BC\xBCEF" + L"\xEF9A\xBFBC\xBDEF\xEF83\x88BD\xBDEF\xEF81\x92BD" + L"\xBDEF\xEF93\x85BD\xBDEF\xEF94\xBFBC\xBCEF\xEFB5" + L"\xB4BC\xBCEF\xEFA6\x8DBC\xBCEF\xEF91\x96BC\xBCEF" + L"\xEFAC\xA5BC"; + refstra1 = "test_fromW:_charset_UTF-16LE"; + varstra1.clear(); + g_charsetConverter.fromW(refstrw1, varstra1, "UTF-16LE"); + EXPECT_STREQ(refstra1.c_str(), varstra1.c_str()); +} |