From c04dcc2e7d834218ef2d4194331e383402495ae1 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 10 Apr 2024 20:07:22 +0200 Subject: Adding upstream version 2:20.4+dfsg. Signed-off-by: Daniel Baumann --- xbmc/utils/HTMLUtil.cpp | 229 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 xbmc/utils/HTMLUtil.cpp (limited to 'xbmc/utils/HTMLUtil.cpp') diff --git a/xbmc/utils/HTMLUtil.cpp b/xbmc/utils/HTMLUtil.cpp new file mode 100644 index 0000000..8687ffe --- /dev/null +++ b/xbmc/utils/HTMLUtil.cpp @@ -0,0 +1,229 @@ +/* + * Copyright (C) 2005-2018 Team Kodi + * This file is part of Kodi - https://kodi.tv + * + * SPDX-License-Identifier: GPL-2.0-or-later + * See LICENSES/README.md for more information. + */ + +#include "HTMLUtil.h" + +#include "utils/StringUtils.h" + +#include + +using namespace HTML; + +CHTMLUtil::CHTMLUtil(void) = default; + +CHTMLUtil::~CHTMLUtil(void) = default; + +void CHTMLUtil::RemoveTags(std::string& strHTML) +{ + int iNested = 0; + std::string strReturn = ""; + for (int i = 0; i < (int) strHTML.size(); ++i) + { + if (strHTML[i] == '<') iNested++; + else if (strHTML[i] == '>') iNested--; + else + { + if (!iNested) + { + strReturn += strHTML[i]; + } + } + } + + strHTML = strReturn; +} + +typedef struct +{ + const wchar_t* html; + const wchar_t w; +} HTMLMapping; + +static const HTMLMapping mappings[] = + {{L"&", 0x0026}, + {L"'", 0x0027}, + {L"´", 0x00B4}, + {L"à", 0x00E0}, + {L"á", 0x00E1}, + {L"â", 0x00E2}, + {L"ã", 0x00E3}, + {L"ä", 0x00E4}, + {L"å", 0x00E5}, + {L"æ", 0x00E6}, + {L"À", 0x00C0}, + {L"Á", 0x00C1}, + {L"Â", 0x00C2}, + {L"Ã", 0x00C3}, + {L"Ä", 0x00C4}, + {L"Å", 0x00C5}, + {L"Æ", 0x00C6}, + {L"„", 0x201E}, + {L"¦", 0x00A6}, + {L"•", 0x2022}, + {L"•", 0x2022}, + {L"¢", 0x00A2}, + {L"ˆ", 0x02C6}, + {L"¤", 0x00A4}, + {L"©", 0x00A9}, + {L"¸", 0x00B8}, + {L"Ç", 0x00C7}, + {L"ç", 0x00E7}, + {L"†", 0x2020}, + {L"°", 0x00B0}, + {L"÷", 0x00F7}, + {L"‡", 0x2021}, + {L"è", 0x00E8}, + {L"é", 0x00E9}, + {L"ê", 0x00EA}, + {L" ", 0x2003}, + {L" ", 0x2002}, + {L"ë", 0x00EB}, + {L"ð", 0x00F0}, + {L"€", 0x20AC}, + {L"È", 0x00C8}, + {L"É", 0x00C9}, + {L"Ê", 0x00CA}, + {L"Ë", 0x00CB}, + {L"Ð", 0x00D0}, + {L""", 0x0022}, + {L"⁄", 0x2044}, + {L"¼", 0x00BC}, + {L"½", 0x00BD}, + {L"¾", 0x00BE}, + {L">", 0x003E}, + {L"…", 0x2026}, + {L"¡", 0x00A1}, + {L"¿", 0x00BF}, + {L"ì", 0x00EC}, + {L"í", 0x00ED}, + {L"î", 0x00EE}, + {L"ï", 0x00EF}, + {L"Ì", 0x00CC}, + {L"Í", 0x00CD}, + {L"Î", 0x00CE}, + {L"Ï", 0x00CF}, + {L"‎", 0x200E}, + {L"<", 0x003C}, + {L"«", 0x00AB}, + {L"“", 0x201C}, + {L"‹", 0x2039}, + {L"‘", 0x2018}, + {L"¯", 0x00AF}, + {L"µ", 0x00B5}, + {L"·", 0x00B7}, + {L"—", 0x2014}, + {L" ", 0x00A0}, + {L"–", 0x2013}, + {L"ñ", 0x00F1}, + {L"¬", 0x00AC}, + {L"Ñ", 0x00D1}, + {L"ª", 0x00AA}, + {L"º", 0x00BA}, + {L"œ", 0x0153}, + {L"ò", 0x00F2}, + {L"ó", 0x00F3}, + {L"ô", 0x00F4}, + {L"õ", 0x00F5}, + {L"ö", 0x00F6}, + {L"ø", 0x00F8}, + {L"Œ", 0x0152}, + {L"Ò", 0x00D2}, + {L"Ó", 0x00D3}, + {L"Ô", 0x00D4}, + {L"Õ", 0x00D5}, + {L"Ö", 0x00D6}, + {L"Ø", 0x00D8}, + {L"¶", 0x00B6}, + {L"‰", 0x2030}, + {L"±", 0x00B1}, + {L"£", 0x00A3}, + {L"»", 0x00BB}, + {L"”", 0x201D}, + {L"®", 0x00AE}, + {L"‏", 0x200F}, + {L"›", 0x203A}, + {L"’", 0x2019}, + {L"‚", 0x201A}, + {L"š", 0x0161}, + {L"§", 0x00A7}, + {L"­", 0x00AD}, + {L"¹", 0x00B9}, + {L"²", 0x00B2}, + {L"³", 0x00B3}, + {L"ß", 0x00DF}, + {L"Š", 0x0160}, + {L" ", 0x2009}, + {L"þ", 0x00FE}, + {L"˜", 0x02DC}, + {L"×", 0x00D7}, + {L"™", 0x2122}, + {L"Þ", 0x00DE}, + {L"¨", 0x00A8}, + {L"ù", 0x00F9}, + {L"ú", 0x00FA}, + {L"û", 0x00FB}, + {L"ü", 0x00FC}, + {L"Ù", 0x00D9}, + {L"Ú", 0x00DA}, + {L"Û", 0x00DB}, + {L"Ü", 0x00DC}, + {L"¥", 0x00A5}, + {L"ÿ", 0x00FF}, + {L"ý", 0x00FD}, + {L"Ý", 0x00DD}, + {L"Ÿ", 0x0178}, + {L"‍", 0x200D}, + {L"‌", 0x200C}, + {NULL, L'\0'}}; + +void CHTMLUtil::ConvertHTMLToW(const std::wstring& strHTML, std::wstring& strStripped) +{ + //! @todo STRING_CLEANUP + if (strHTML.empty()) + { + strStripped.clear(); + return ; + } + size_t iPos = 0; + strStripped = strHTML; + while (mappings[iPos].html) + { + StringUtils::Replace(strStripped, mappings[iPos].html,std::wstring(1, mappings[iPos].w)); + iPos++; + } + + iPos = strStripped.find(L"&#"); + while (iPos > 0 && iPos < strStripped.size() - 4) + { + size_t iStart = iPos + 1; + iPos += 2; + std::wstring num; + int base = 10; + if (strStripped[iPos] == L'x') + { + base = 16; + iPos++; + } + + size_t i = iPos; + while (iPos < strStripped.size() && + (base == 16 ? iswxdigit(strStripped[iPos]) : iswdigit(strStripped[iPos]))) + iPos++; + + num = strStripped.substr(i, iPos-i); + wchar_t val = (wchar_t)wcstol(num.c_str(),NULL,base); + if (base == 10) + num = StringUtils::Format(L"&#{};", num); + else + num = StringUtils::Format(L"&#x{};", num); + + StringUtils::Replace(strStripped, num,std::wstring(1,val)); + iPos = strStripped.find(L"&#", iStart); + } +} + -- cgit v1.2.3