diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 18:07:22 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 18:07:22 +0000 |
commit | c04dcc2e7d834218ef2d4194331e383402495ae1 (patch) | |
tree | 7333e38d10d75386e60f336b80c2443c1166031d /xbmc/filesystem/HTTPDirectory.cpp | |
parent | Initial commit. (diff) | |
download | kodi-c04dcc2e7d834218ef2d4194331e383402495ae1.tar.xz kodi-c04dcc2e7d834218ef2d4194331e383402495ae1.zip |
Adding upstream version 2:20.4+dfsg.upstream/2%20.4+dfsg
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'xbmc/filesystem/HTTPDirectory.cpp')
-rw-r--r-- | xbmc/filesystem/HTTPDirectory.cpp | 318 |
1 files changed, 318 insertions, 0 deletions
diff --git a/xbmc/filesystem/HTTPDirectory.cpp b/xbmc/filesystem/HTTPDirectory.cpp new file mode 100644 index 0000000..0097b7f --- /dev/null +++ b/xbmc/filesystem/HTTPDirectory.cpp @@ -0,0 +1,318 @@ +/* + * Copyright (C) 2005-2018 Team Kodi + * This file is part of Kodi - https://kodi.tv + * + * SPDX-License-Identifier: GPL-2.0-or-later + * See LICENSES/README.md for more information. + */ + +#include "HTTPDirectory.h" + +#include "CurlFile.h" +#include "FileItem.h" +#include "ServiceBroker.h" +#include "URL.h" +#include "settings/AdvancedSettings.h" +#include "settings/SettingsComponent.h" +#include "utils/CharsetConverter.h" +#include "utils/HTMLUtil.h" +#include "utils/RegExp.h" +#include "utils/StringUtils.h" +#include "utils/URIUtils.h" +#include "utils/log.h" + +#include <climits> + +using namespace XFILE; + +CHTTPDirectory::CHTTPDirectory(void) = default; +CHTTPDirectory::~CHTTPDirectory(void) = default; + +bool CHTTPDirectory::GetDirectory(const CURL& url, CFileItemList &items) +{ + CCurlFile http; + + const std::string& strBasePath = url.GetFileName(); + + if(!http.Open(url)) + { + CLog::Log(LOGERROR, "{} - Unable to get http directory ({})", __FUNCTION__, url.GetRedacted()); + return false; + } + + CRegExp reItem(true); // HTML is case-insensitive + reItem.RegComp("<a href=\"([^\"]*)\"[^>]*>\\s*(.*?)\\s*</a>(.+?)(?=<a|</tr|$)"); + + CRegExp reDateTimeHtml(true); + reDateTimeHtml.RegComp( + "<td align=\"right\">([0-9]{2})-([A-Z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2}) +</td>"); + + CRegExp reDateTimeLighttp(true); + reDateTimeLighttp.RegComp( + "<td class=\"m\">([0-9]{4})-([A-Z]{3})-([0-9]{2}) ([0-9]{2}):([0-9]{2}):([0-9]{2})</td>"); + + CRegExp reDateTimeNginx(true); + reDateTimeNginx.RegComp("([0-9]{2})-([A-Z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2})"); + + CRegExp reDateTimeNginxFancy(true); + reDateTimeNginxFancy.RegComp( + "<td class=\"date\">([0-9]{4})-([A-Z]{3})-([0-9]{2}) ([0-9]{2}):([0-9]{2})</td>"); + + CRegExp reDateTimeApacheNewFormat(true); + reDateTimeApacheNewFormat.RegComp( + "<td align=\"right\">([0-9]{4})-([0-9]{2})-([0-9]{2}) ([0-9]{2}):([0-9]{2}) +</td>"); + + CRegExp reDateTime(true); + reDateTime.RegComp("([0-9]{4})-([0-9]{2})-([0-9]{2}) ([0-9]{2}):([0-9]{2})"); + + CRegExp reSizeHtml(true); + reSizeHtml.RegComp("> *([0-9.]+) *(B|K|M|G| )(iB)?</td>"); + + CRegExp reSize(true); + reSize.RegComp(" +([0-9]+)(B|K|M|G)?(?=\\s|<|$)"); + + /* read response from server into string buffer */ + std::string strBuffer; + if (http.ReadData(strBuffer) && strBuffer.length() > 0) + { + /* if Content-Length is found and its not text/html, URL is pointing to file so don't treat URL as HTTPDirectory */ + if (!http.GetHttpHeader().GetValue("Content-Length").empty() && + !StringUtils::StartsWithNoCase(http.GetHttpHeader().GetValue("Content-type"), "text/html")) + { + return false; + } + + std::string fileCharset(http.GetProperty(XFILE::FILE_PROPERTY_CONTENT_CHARSET)); + if (!fileCharset.empty() && fileCharset != "UTF-8") + { + std::string converted; + if (g_charsetConverter.ToUtf8(fileCharset, strBuffer, converted) && !converted.empty()) + strBuffer = converted; + } + + unsigned int bufferOffset = 0; + while (bufferOffset < strBuffer.length()) + { + int matchOffset = reItem.RegFind(strBuffer.c_str(), bufferOffset); + if (matchOffset < 0) + break; + + bufferOffset = matchOffset + reItem.GetSubLength(0); + + std::string strLink = reItem.GetMatch(1); + std::string strName = reItem.GetMatch(2); + std::string strMetadata = reItem.GetMatch(3); + StringUtils::Trim(strMetadata); + + if(strLink[0] == '/') + strLink = strLink.substr(1); + + std::string strNameTemp = StringUtils::Trim(strName); + + std::wstring wName, wLink, wConverted; + if (fileCharset.empty()) + g_charsetConverter.unknownToUTF8(strNameTemp); + g_charsetConverter.utf8ToW(strNameTemp, wName, false); + HTML::CHTMLUtil::ConvertHTMLToW(wName, wConverted); + g_charsetConverter.wToUTF8(wConverted, strNameTemp); + URIUtils::RemoveSlashAtEnd(strNameTemp); + + std::string strLinkBase = strLink; + std::string strLinkOptions; + + // split link with url options + size_t pos = strLinkBase.find('?'); + if (pos != std::string::npos) + { + strLinkOptions = strLinkBase.substr(pos); + strLinkBase.erase(pos); + } + + // strip url fragment from the link + pos = strLinkBase.find('#'); + if (pos != std::string::npos) + { + strLinkBase.erase(pos); + } + + // Convert any HTTP character entities (e.g.: "&") to percentage encoding + // (e.g.: "%xx") as some web servers (Apache) put these in HTTP Directory Indexes + // this is also needed as CURL objects interpret them incorrectly due to the ; + // also being allowed as URL option separator + if (fileCharset.empty()) + g_charsetConverter.unknownToUTF8(strLinkBase); + g_charsetConverter.utf8ToW(strLinkBase, wLink, false); + HTML::CHTMLUtil::ConvertHTMLToW(wLink, wConverted); + g_charsetConverter.wToUTF8(wConverted, strLinkBase); + + // encoding + and ; to URL encode if it is not already encoded by http server used on the remote server (example: Apache) + // more characters may be added here when required when required by certain http servers + pos = strLinkBase.find_first_of("+;"); + while (pos != std::string::npos) + { + std::stringstream convert; + convert << '%' << std::hex << int(strLinkBase.at(pos)); + strLinkBase.replace(pos, 1, convert.str()); + pos = strLinkBase.find_first_of("+;"); + } + + std::string strLinkTemp = strLinkBase; + + URIUtils::RemoveSlashAtEnd(strLinkTemp); + strLinkTemp = CURL::Decode(strLinkTemp); + + if (StringUtils::EndsWith(strNameTemp, "..>") && + StringUtils::StartsWith(strLinkTemp, strNameTemp.substr(0, strNameTemp.length() - 3))) + strName = strNameTemp = strLinkTemp; + + /* Per RFC 1808 ยง 5.3, relative paths containing a colon ":" should be either prefixed with + * "./" or escaped (as "%3A"). This handles the prefix case, the escaping should be handled by + * the CURL::Decode above + * - https://tools.ietf.org/html/rfc1808#section-5.3 + */ + auto NameMatchesLink([](const std::string& name, const std::string& link) -> bool + { + return (name == link) || + ((std::string::npos != name.find(':')) && (std::string{"./"}.append(name) == link)); + }); + + // we detect http directory items by its display name and its stripped link + // if same, we consider it as a valid item. + if (strLinkTemp != ".." && strLinkTemp != "" && NameMatchesLink(strNameTemp, strLinkTemp)) + { + CFileItemPtr pItem(new CFileItem(strNameTemp)); + pItem->SetProperty("IsHTTPDirectory", true); + CURL url2(url); + + url2.SetFileName(strBasePath + strLinkBase); + url2.SetOptions(strLinkOptions); + pItem->SetURL(url2); + + if(URIUtils::HasSlashAtEnd(pItem->GetPath(), true)) + pItem->m_bIsFolder = true; + + std::string day, month, year, hour, minute; + int monthNum = 0; + + if (reDateTimeHtml.RegFind(strMetadata.c_str()) >= 0) + { + day = reDateTimeHtml.GetMatch(1); + month = reDateTimeHtml.GetMatch(2); + year = reDateTimeHtml.GetMatch(3); + hour = reDateTimeHtml.GetMatch(4); + minute = reDateTimeHtml.GetMatch(5); + } + else if (reDateTimeNginxFancy.RegFind(strMetadata.c_str()) >= 0) + { + day = reDateTimeNginxFancy.GetMatch(3); + month = reDateTimeNginxFancy.GetMatch(2); + year = reDateTimeNginxFancy.GetMatch(1); + hour = reDateTimeNginxFancy.GetMatch(4); + minute = reDateTimeNginxFancy.GetMatch(5); + } + else if (reDateTimeNginx.RegFind(strMetadata.c_str()) >= 0) + { + day = reDateTimeNginx.GetMatch(1); + month = reDateTimeNginx.GetMatch(2); + year = reDateTimeNginx.GetMatch(3); + hour = reDateTimeNginx.GetMatch(4); + minute = reDateTimeNginx.GetMatch(5); + } + else if (reDateTimeLighttp.RegFind(strMetadata.c_str()) >= 0) + { + day = reDateTimeLighttp.GetMatch(3); + month = reDateTimeLighttp.GetMatch(2); + year = reDateTimeLighttp.GetMatch(1); + hour = reDateTimeLighttp.GetMatch(4); + minute = reDateTimeLighttp.GetMatch(5); + } + else if (reDateTimeApacheNewFormat.RegFind(strMetadata.c_str()) >= 0) + { + day = reDateTimeApacheNewFormat.GetMatch(3); + monthNum = atoi(reDateTimeApacheNewFormat.GetMatch(2).c_str()); + year = reDateTimeApacheNewFormat.GetMatch(1); + hour = reDateTimeApacheNewFormat.GetMatch(4); + minute = reDateTimeApacheNewFormat.GetMatch(5); + } + else if (reDateTime.RegFind(strMetadata.c_str()) >= 0) + { + day = reDateTime.GetMatch(3); + monthNum = atoi(reDateTime.GetMatch(2).c_str()); + year = reDateTime.GetMatch(1); + hour = reDateTime.GetMatch(4); + minute = reDateTime.GetMatch(5); + } + + if (month.length() > 0) + monthNum = CDateTime::MonthStringToMonthNum(month); + + if (day.length() > 0 && monthNum > 0 && year.length() > 0) + { + pItem->m_dateTime = CDateTime(atoi(year.c_str()), monthNum, atoi(day.c_str()), atoi(hour.c_str()), atoi(minute.c_str()), 0); + } + + if (!pItem->m_bIsFolder) + { + if (reSizeHtml.RegFind(strMetadata.c_str()) >= 0) + { + double Size = atof(reSizeHtml.GetMatch(1).c_str()); + std::string strUnit(reSizeHtml.GetMatch(2)); + + if (strUnit == "K") + Size = Size * 1024; + else if (strUnit == "M") + Size = Size * 1024 * 1024; + else if (strUnit == "G") + Size = Size * 1024 * 1024 * 1024; + + pItem->m_dwSize = (int64_t)Size; + } + else if (reSize.RegFind(strMetadata.c_str()) >= 0) + { + double Size = atof(reSize.GetMatch(1).c_str()); + std::string strUnit(reSize.GetMatch(2)); + + if (strUnit == "K") + Size = Size * 1024; + else if (strUnit == "M") + Size = Size * 1024 * 1024; + else if (strUnit == "G") + Size = Size * 1024 * 1024 * 1024; + + pItem->m_dwSize = (int64_t)Size; + } + else + if (CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_bHTTPDirectoryStatFilesize) // As a fallback get the size by stat-ing the file (slow) + { + CCurlFile file; + file.Open(url); + pItem->m_dwSize=file.GetLength(); + file.Close(); + } + } + items.Add(pItem); + } + } + } + http.Close(); + + items.SetProperty("IsHTTPDirectory", true); + + return true; +} + +bool CHTTPDirectory::Exists(const CURL &url) +{ + CCurlFile http; + struct __stat64 buffer; + + if( http.Stat(url, &buffer) != 0 ) + { + return false; + } + + if (buffer.st_mode == _S_IFDIR) + return true; + + return false; +} |