/* * Copyright (C) 2005-2018 Team Kodi * This file is part of Kodi - https://kodi.tv * * SPDX-License-Identifier: GPL-2.0-or-later * See LICENSES/README.md for more information. */ #include "HTMLUtil.h" #include "utils/StringUtils.h" #include using namespace HTML; CHTMLUtil::CHTMLUtil(void) = default; CHTMLUtil::~CHTMLUtil(void) = default; void CHTMLUtil::RemoveTags(std::string& strHTML) { int iNested = 0; std::string strReturn = ""; for (int i = 0; i < (int) strHTML.size(); ++i) { if (strHTML[i] == '<') iNested++; else if (strHTML[i] == '>') iNested--; else { if (!iNested) { strReturn += strHTML[i]; } } } strHTML = strReturn; } typedef struct { const wchar_t* html; const wchar_t w; } HTMLMapping; static const HTMLMapping mappings[] = {{L"&", 0x0026}, {L"'", 0x0027}, {L"´", 0x00B4}, {L"à", 0x00E0}, {L"á", 0x00E1}, {L"â", 0x00E2}, {L"ã", 0x00E3}, {L"ä", 0x00E4}, {L"å", 0x00E5}, {L"æ", 0x00E6}, {L"À", 0x00C0}, {L"Á", 0x00C1}, {L"Â", 0x00C2}, {L"Ã", 0x00C3}, {L"Ä", 0x00C4}, {L"Å", 0x00C5}, {L"Æ", 0x00C6}, {L"„", 0x201E}, {L"¦", 0x00A6}, {L"•", 0x2022}, {L"•", 0x2022}, {L"¢", 0x00A2}, {L"ˆ", 0x02C6}, {L"¤", 0x00A4}, {L"©", 0x00A9}, {L"¸", 0x00B8}, {L"Ç", 0x00C7}, {L"ç", 0x00E7}, {L"†", 0x2020}, {L"°", 0x00B0}, {L"÷", 0x00F7}, {L"‡", 0x2021}, {L"è", 0x00E8}, {L"é", 0x00E9}, {L"ê", 0x00EA}, {L" ", 0x2003}, {L" ", 0x2002}, {L"ë", 0x00EB}, {L"ð", 0x00F0}, {L"€", 0x20AC}, {L"È", 0x00C8}, {L"É", 0x00C9}, {L"Ê", 0x00CA}, {L"Ë", 0x00CB}, {L"Ð", 0x00D0}, {L""", 0x0022}, {L"⁄", 0x2044}, {L"¼", 0x00BC}, {L"½", 0x00BD}, {L"¾", 0x00BE}, {L">", 0x003E}, {L"…", 0x2026}, {L"¡", 0x00A1}, {L"¿", 0x00BF}, {L"ì", 0x00EC}, {L"í", 0x00ED}, {L"î", 0x00EE}, {L"ï", 0x00EF}, {L"Ì", 0x00CC}, {L"Í", 0x00CD}, {L"Î", 0x00CE}, {L"Ï", 0x00CF}, {L"‎", 0x200E}, {L"<", 0x003C}, {L"«", 0x00AB}, {L"“", 0x201C}, {L"‹", 0x2039}, {L"‘", 0x2018}, {L"¯", 0x00AF}, {L"µ", 0x00B5}, {L"·", 0x00B7}, {L"—", 0x2014}, {L" ", 0x00A0}, {L"–", 0x2013}, {L"ñ", 0x00F1}, {L"¬", 0x00AC}, {L"Ñ", 0x00D1}, {L"ª", 0x00AA}, {L"º", 0x00BA}, {L"œ", 0x0153}, {L"ò", 0x00F2}, {L"ó", 0x00F3}, {L"ô", 0x00F4}, {L"õ", 0x00F5}, {L"ö", 0x00F6}, {L"ø", 0x00F8}, {L"Œ", 0x0152}, {L"Ò", 0x00D2}, {L"Ó", 0x00D3}, {L"Ô", 0x00D4}, {L"Õ", 0x00D5}, {L"Ö", 0x00D6}, {L"Ø", 0x00D8}, {L"¶", 0x00B6}, {L"‰", 0x2030}, {L"±", 0x00B1}, {L"£", 0x00A3}, {L"»", 0x00BB}, {L"”", 0x201D}, {L"®", 0x00AE}, {L"‏", 0x200F}, {L"›", 0x203A}, {L"’", 0x2019}, {L"‚", 0x201A}, {L"š", 0x0161}, {L"§", 0x00A7}, {L"­", 0x00AD}, {L"¹", 0x00B9}, {L"²", 0x00B2}, {L"³", 0x00B3}, {L"ß", 0x00DF}, {L"Š", 0x0160}, {L" ", 0x2009}, {L"þ", 0x00FE}, {L"˜", 0x02DC}, {L"×", 0x00D7}, {L"™", 0x2122}, {L"Þ", 0x00DE}, {L"¨", 0x00A8}, {L"ù", 0x00F9}, {L"ú", 0x00FA}, {L"û", 0x00FB}, {L"ü", 0x00FC}, {L"Ù", 0x00D9}, {L"Ú", 0x00DA}, {L"Û", 0x00DB}, {L"Ü", 0x00DC}, {L"¥", 0x00A5}, {L"ÿ", 0x00FF}, {L"ý", 0x00FD}, {L"Ý", 0x00DD}, {L"Ÿ", 0x0178}, {L"‍", 0x200D}, {L"‌", 0x200C}, {NULL, L'\0'}}; void CHTMLUtil::ConvertHTMLToW(const std::wstring& strHTML, std::wstring& strStripped) { //! @todo STRING_CLEANUP if (strHTML.empty()) { strStripped.clear(); return ; } size_t iPos = 0; strStripped = strHTML; while (mappings[iPos].html) { StringUtils::Replace(strStripped, mappings[iPos].html,std::wstring(1, mappings[iPos].w)); iPos++; } iPos = strStripped.find(L"&#"); while (iPos > 0 && iPos < strStripped.size() - 4) { size_t iStart = iPos + 1; iPos += 2; std::wstring num; int base = 10; if (strStripped[iPos] == L'x') { base = 16; iPos++; } size_t i = iPos; while (iPos < strStripped.size() && (base == 16 ? iswxdigit(strStripped[iPos]) : iswdigit(strStripped[iPos]))) iPos++; num = strStripped.substr(i, iPos-i); wchar_t val = (wchar_t)wcstol(num.c_str(),NULL,base); if (base == 10) num = StringUtils::Format(L"&#{};", num); else num = StringUtils::Format(L"&#x{};", num); StringUtils::Replace(strStripped, num,std::wstring(1,val)); iPos = strStripped.find(L"&#", iStart); } }