/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* This parsing code originally lived in xpfe/components/directory/ - bbaetz */ #include "nsDirIndexParser.h" #include "mozilla/ArrayUtils.h" #include "mozilla/ClearOnShutdown.h" #include "mozilla/Encoding.h" #include "mozilla/StaticPtr.h" #include "prprf.h" #include "nsCRT.h" #include "nsDirIndex.h" #include "nsEscape.h" #include "nsIDirIndex.h" #include "nsIInputStream.h" #include "nsITextToSubURI.h" #include "nsServiceManagerUtils.h" #include "mozilla/intl/LocaleService.h" using namespace mozilla; struct EncodingProp { const char* const mKey; NotNull mValue; }; static StaticRefPtr gTextToSubURI; static const EncodingProp localesFallbacks[] = { {"ar", WINDOWS_1256_ENCODING}, {"ba", WINDOWS_1251_ENCODING}, {"be", WINDOWS_1251_ENCODING}, {"bg", WINDOWS_1251_ENCODING}, {"cs", WINDOWS_1250_ENCODING}, {"el", ISO_8859_7_ENCODING}, {"et", WINDOWS_1257_ENCODING}, {"fa", WINDOWS_1256_ENCODING}, {"he", WINDOWS_1255_ENCODING}, {"hr", WINDOWS_1250_ENCODING}, {"hu", ISO_8859_2_ENCODING}, {"ja", SHIFT_JIS_ENCODING}, {"kk", WINDOWS_1251_ENCODING}, {"ko", EUC_KR_ENCODING}, {"ku", WINDOWS_1254_ENCODING}, {"ky", WINDOWS_1251_ENCODING}, {"lt", WINDOWS_1257_ENCODING}, {"lv", WINDOWS_1257_ENCODING}, {"mk", WINDOWS_1251_ENCODING}, {"pl", ISO_8859_2_ENCODING}, {"ru", WINDOWS_1251_ENCODING}, {"sah", WINDOWS_1251_ENCODING}, {"sk", WINDOWS_1250_ENCODING}, {"sl", ISO_8859_2_ENCODING}, {"sr", WINDOWS_1251_ENCODING}, {"tg", WINDOWS_1251_ENCODING}, {"th", WINDOWS_874_ENCODING}, {"tr", WINDOWS_1254_ENCODING}, {"tt", WINDOWS_1251_ENCODING}, {"uk", WINDOWS_1251_ENCODING}, {"vi", WINDOWS_1258_ENCODING}, {"zh", GBK_ENCODING}}; static NotNull GetFTPFallbackEncodingDoNotAddNewCallersToThisFunction() { nsAutoCString locale; mozilla::intl::LocaleService::GetInstance()->GetAppLocaleAsBCP47(locale); // Let's lower case the string just in case unofficial language packs // don't stick to conventions. ToLowerCase(locale); // ASCII lowercasing with CString input! // Special case Traditional Chinese before throwing away stuff after the // language itself. Today we only ship zh-TW, but be defensive about // possible future values. if (locale.EqualsLiteral("zh-tw") || locale.EqualsLiteral("zh-hk") || locale.EqualsLiteral("zh-mo") || locale.EqualsLiteral("zh-hant")) { return BIG5_ENCODING; } // Throw away regions and other variants to accommodate weird stuff seen // in telemetry--apparently unofficial language packs. int32_t hyphenIndex = locale.FindChar('-'); if (hyphenIndex >= 0) { locale.Truncate(hyphenIndex); } size_t index; if (BinarySearchIf( localesFallbacks, 0, ArrayLength(localesFallbacks), [&locale](const EncodingProp& aProperty) { return Compare(locale, nsDependentCString(aProperty.mKey)); }, &index)) { return localesFallbacks[index].mValue; } return WINDOWS_1252_ENCODING; } NS_IMPL_ISUPPORTS(nsDirIndexParser, nsIRequestObserver, nsIStreamListener, nsIDirIndexParser) nsresult nsDirIndexParser::Init() { mLineStart = 0; mHasDescription = false; mFormat[0] = -1; auto encoding = GetFTPFallbackEncodingDoNotAddNewCallersToThisFunction(); encoding->Name(mEncoding); nsresult rv = NS_OK; if (!gTextToSubURI) { nsCOMPtr service = do_GetService(NS_ITEXTTOSUBURI_CONTRACTID, &rv); if (NS_SUCCEEDED(rv)) { gTextToSubURI = service; ClearOnShutdown(&gTextToSubURI); } } return rv; } NS_IMETHODIMP nsDirIndexParser::SetListener(nsIDirIndexListener* aListener) { mListener = aListener; return NS_OK; } NS_IMETHODIMP nsDirIndexParser::GetListener(nsIDirIndexListener** aListener) { *aListener = do_AddRef(mListener).take(); return NS_OK; } NS_IMETHODIMP nsDirIndexParser::GetComment(char** aComment) { *aComment = ToNewCString(mComment, mozilla::fallible); if (!*aComment) return NS_ERROR_OUT_OF_MEMORY; return NS_OK; } NS_IMETHODIMP nsDirIndexParser::SetEncoding(const char* aEncoding) { mEncoding.Assign(aEncoding); return NS_OK; } NS_IMETHODIMP nsDirIndexParser::GetEncoding(char** aEncoding) { *aEncoding = ToNewCString(mEncoding, mozilla::fallible); if (!*aEncoding) return NS_ERROR_OUT_OF_MEMORY; return NS_OK; } NS_IMETHODIMP nsDirIndexParser::OnStartRequest(nsIRequest* aRequest) { return NS_OK; } NS_IMETHODIMP nsDirIndexParser::OnStopRequest(nsIRequest* aRequest, nsresult aStatusCode) { // Finish up if (mBuf.Length() > (uint32_t)mLineStart) { ProcessData(aRequest); } return NS_OK; } nsDirIndexParser::Field nsDirIndexParser::gFieldTable[] = { {"Filename", FIELD_FILENAME}, {"Description", FIELD_DESCRIPTION}, {"Content-Length", FIELD_CONTENTLENGTH}, {"Last-Modified", FIELD_LASTMODIFIED}, {"Content-Type", FIELD_CONTENTTYPE}, {"File-Type", FIELD_FILETYPE}, {nullptr, FIELD_UNKNOWN}}; void nsDirIndexParser::ParseFormat(const char* aFormatStr) { // Parse a "200" format line, and remember the fields and their // ordering in mFormat. Multiple 200 lines stomp on each other. unsigned int formatNum = 0; mFormat[0] = -1; do { while (*aFormatStr && nsCRT::IsAsciiSpace(char16_t(*aFormatStr))) { ++aFormatStr; } if (!*aFormatStr) break; nsAutoCString name; int32_t len = 0; while (aFormatStr[len] && !nsCRT::IsAsciiSpace(char16_t(aFormatStr[len]))) { ++len; } name.Append(aFormatStr, len); aFormatStr += len; // Okay, we're gonna monkey with the nsStr. Bold! name.SetLength(nsUnescapeCount(name.BeginWriting())); // All tokens are case-insensitive - // http://www.mozilla.org/projects/netlib/dirindexformat.html if (name.LowerCaseEqualsLiteral("description")) mHasDescription = true; for (Field* i = gFieldTable; i->mName; ++i) { if (name.EqualsIgnoreCase(i->mName)) { mFormat[formatNum] = i->mType; mFormat[++formatNum] = -1; break; } } } while (*aFormatStr && (formatNum < (ArrayLength(mFormat) - 1))); } void nsDirIndexParser::ParseData(nsIDirIndex* aIdx, char* aDataStr, int32_t aLineLen) { // Parse a "201" data line, using the field ordering specified in // mFormat. if (mFormat[0] == -1) { // Ignore if we haven't seen a format yet. return; } nsAutoCString filename; int32_t lineLen = aLineLen; for (int32_t i = 0; mFormat[i] != -1; ++i) { // If we've exhausted the data before we run out of fields, just bail. if (!*aDataStr || (lineLen < 1)) { return; } while ((lineLen > 0) && nsCRT::IsAsciiSpace(*aDataStr)) { ++aDataStr; --lineLen; } if (lineLen < 1) { // invalid format, bail return; } char* value = aDataStr; if (*aDataStr == '"' || *aDataStr == '\'') { // it's a quoted string. snarf everything up to the next quote character const char quotechar = *(aDataStr++); lineLen--; ++value; while ((lineLen > 0) && *aDataStr != quotechar) { ++aDataStr; --lineLen; } if (lineLen > 0) { *aDataStr++ = '\0'; --lineLen; } if (!lineLen) { // invalid format, bail return; } } else { // it's unquoted. snarf until we see whitespace. value = aDataStr; while ((lineLen > 0) && (!nsCRT::IsAsciiSpace(*aDataStr))) { ++aDataStr; --lineLen; } if (lineLen > 0) { *aDataStr++ = '\0'; --lineLen; } // even if we ran out of line length here, there's still a trailing zero // byte afterwards } fieldType t = fieldType(mFormat[i]); switch (t) { case FIELD_FILENAME: { // don't unescape at this point, so that UnEscapeAndConvert() can filename = value; bool success = false; nsAutoString entryuri; if (RefPtr textToSub = gTextToSubURI) { nsAutoString result; if (NS_SUCCEEDED( textToSub->UnEscapeAndConvert(mEncoding, filename, result))) { if (!result.IsEmpty()) { aIdx->SetLocation(filename); if (!mHasDescription) aIdx->SetDescription(result); success = true; } } else { NS_WARNING("UnEscapeAndConvert error"); } } if (!success) { // if unsuccessfully at charset conversion, then // just fallback to unescape'ing in-place // XXX - this shouldn't be using UTF8, should it? // when can we fail to get the service, anyway? - bbaetz aIdx->SetLocation(filename); if (!mHasDescription) { aIdx->SetDescription(NS_ConvertUTF8toUTF16(value)); } } } break; case FIELD_DESCRIPTION: nsUnescape(value); aIdx->SetDescription(NS_ConvertUTF8toUTF16(value)); break; case FIELD_CONTENTLENGTH: { int64_t len; int32_t status = PR_sscanf(value, "%lld", &len); if (status == 1) { aIdx->SetSize(len); } else { aIdx->SetSize(UINT64_MAX); // UINT64_MAX means unknown } } break; case FIELD_LASTMODIFIED: { PRTime tm; nsUnescape(value); if (PR_ParseTimeString(value, false, &tm) == PR_SUCCESS) { aIdx->SetLastModified(tm); } } break; case FIELD_CONTENTTYPE: aIdx->SetContentType(nsDependentCString(value)); break; case FIELD_FILETYPE: // unescape in-place nsUnescape(value); if (!nsCRT::strcasecmp(value, "directory")) { aIdx->SetType(nsIDirIndex::TYPE_DIRECTORY); } else if (!nsCRT::strcasecmp(value, "file")) { aIdx->SetType(nsIDirIndex::TYPE_FILE); } else if (!nsCRT::strcasecmp(value, "symbolic-link")) { aIdx->SetType(nsIDirIndex::TYPE_SYMLINK); } else { aIdx->SetType(nsIDirIndex::TYPE_UNKNOWN); } break; case FIELD_UNKNOWN: // ignore break; } } } NS_IMETHODIMP nsDirIndexParser::OnDataAvailable(nsIRequest* aRequest, nsIInputStream* aStream, uint64_t aSourceOffset, uint32_t aCount) { if (aCount < 1) return NS_OK; uint32_t len = mBuf.Length(); // Ensure that our mBuf has capacity to hold the data we're about to // read. // Before adjusting the capacity, guard against any potential overflow // resulting from the addition of aCount with len. See Bug 1823551. NS_ENSURE_TRUE((UINT32_MAX - aCount) >= len, NS_ERROR_FAILURE); if (!mBuf.SetLength(len + aCount, fallible)) return NS_ERROR_OUT_OF_MEMORY; // Now read the data into our buffer. nsresult rv; uint32_t count; rv = aStream->Read(mBuf.BeginWriting() + len, aCount, &count); if (NS_FAILED(rv)) return rv; // Set the string's length according to the amount of data we've read. // Note: we know this to work on nsCString. This isn't guaranteed to // work on other strings. mBuf.SetLength(len + count); return ProcessData(aRequest); } nsresult nsDirIndexParser::ProcessData(nsIRequest* aRequest) { if (!mListener) return NS_ERROR_FAILURE; while (true) { int32_t eol = mBuf.FindCharInSet("\n\r", mLineStart); if (eol < 0) break; mBuf.SetCharAt(char16_t('\0'), eol); const char* line = mBuf.get() + mLineStart; int32_t lineLen = eol - mLineStart; mLineStart = eol + 1; if (lineLen >= 4) { const char* buf = line; if (buf[0] == '1') { if (buf[1] == '0') { if (buf[2] == '0' && buf[3] == ':') { // 100. Human-readable comment line. Ignore } else if (buf[2] == '1' && buf[3] == ':') { // 101. Human-readable information line. mComment.Append(buf + 4); char* value = ((char*)buf) + 4; nsUnescape(value); mListener->OnInformationAvailable(aRequest, NS_ConvertUTF8toUTF16(value)); } else if (buf[2] == '2' && buf[3] == ':') { // 102. Human-readable information line, HTML. mComment.Append(buf + 4); } } } else if (buf[0] == '2') { if (buf[1] == '0') { if (buf[2] == '0' && buf[3] == ':') { // 200. Define field names ParseFormat(buf + 4); } else if (buf[2] == '1' && buf[3] == ':') { // 201. Field data nsCOMPtr idx = new nsDirIndex(); ParseData(idx, ((char*)buf) + 4, lineLen - 4); mListener->OnIndexAvailable(aRequest, idx); } } } else if (buf[0] == '3') { if (buf[1] == '0') { if (buf[2] == '0' && buf[3] == ':') { // 300. Self-referring URL } else if (buf[2] == '1' && buf[3] == ':') { // 301. OUR EXTENSION - encoding int i = 4; while (buf[i] && nsCRT::IsAsciiSpace(buf[i])) ++i; if (buf[i]) SetEncoding(buf + i); } } } } } return NS_OK; }